@truto/truto-jsonata 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -11,9 +11,6 @@ var $dxT2C$json2md = require("json2md");
11
11
  var $dxT2C$mime = require("mime");
12
12
  var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
- var $dxT2C$officeparser = require("officeparser");
15
- var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
16
- var $dxT2C$xlsx = require("xlsx");
17
14
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
18
15
 
19
16
  function $parcel$interopDefault(a) {
@@ -1401,11 +1398,16 @@ async function $caaee789061bb8bb$var$generateEmbeddingsCohere(body, api_key) {
1401
1398
  texts: chunk
1402
1399
  })
1403
1400
  });
1404
- if (!response.ok) throw new Error(await response.text());
1401
+ if (!response.ok) {
1402
+ if (response.status === 429) throw new Error("Rate limit exceeded");
1403
+ if (response.status >= 500) throw new Error("Server error");
1404
+ throw new (0, $dxT2C$pretry.AbortError)(await response.text());
1405
+ }
1405
1406
  return await response.json();
1406
1407
  }, {
1407
- retries: 10,
1408
- maxTimeout: 600000
1408
+ retries: 5,
1409
+ maxTimeout: 5000,
1410
+ minTimeout: 2500
1409
1411
  });
1410
1412
  }, {
1411
1413
  concurrency: 1
@@ -1421,89 +1423,49 @@ async function $caaee789061bb8bb$var$generateEmbeddingsCohere(body, api_key) {
1421
1423
  },
1422
1424
  body: JSON.stringify(body)
1423
1425
  });
1424
- if (!response.ok) throw new Error(await response.text());
1426
+ if (!response.ok) {
1427
+ if (response.status === 429) throw new Error("Rate limit exceeded");
1428
+ if (response.status >= 500) throw new Error("Server error");
1429
+ throw new (0, $dxT2C$pretry.AbortError)(await response.text());
1430
+ }
1425
1431
  return await response.json();
1426
1432
  }, {
1427
- retries: 10,
1428
- maxTimeout: 600000
1433
+ retries: 5,
1434
+ maxTimeout: 5000,
1435
+ minTimeout: 2500
1429
1436
  });
1430
1437
  }
1431
1438
  var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmbeddingsCohere;
1432
1439
 
1433
1440
 
1434
1441
 
1435
-
1436
-
1437
- async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1438
- if (value instanceof ReadableStream) {
1439
- const chunks = [];
1440
- for await (const chunk of value)chunks.push(Buffer.from(chunk));
1441
- const buffer = Buffer.concat(chunks);
1442
- return buffer.toString(encoding);
1443
- }
1444
- return value.toString(encoding);
1445
- }
1446
- var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1447
-
1448
-
1449
-
1450
- async function $05e3378f7c17d263$var$parsePdf(buffer) {
1451
- const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1452
- const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
1453
- const doc = await getDocument({
1454
- data: data,
1455
- useSystemFonts: true
1456
- }).promise;
1457
- // Get metadata and initialize output object
1458
- const output = [];
1459
- // Iterate through each page and fetch the text content
1460
- for(let i = 1; i <= doc.numPages; i++){
1461
- const page = await doc.getPage(i);
1462
- const textContent = await page.getTextContent();
1463
- const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
1464
- // Add page content to output
1465
- if (contents) output.push((0, $dxT2C$lodashes.join)([
1466
- `Page Number: ${i}`,
1467
- contents
1468
- ], "/\n"));
1469
- }
1470
- // Return the results as JSON
1471
- return (0, $dxT2C$lodashes.join)(output, "\n");
1472
- }
1473
1442
  async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1474
- let buffer;
1475
- if (file instanceof ReadableStream) {
1476
- const chunks = [];
1477
- for await (const chunk of file)chunks.push(Buffer.from(chunk));
1478
- buffer = Buffer.concat(chunks);
1479
- }
1480
- if ((0, $dxT2C$lodashes.includes)([
1481
- "application/vnd.ms-excel",
1482
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1483
- ], fileType)) {
1484
- const workbook = $dxT2C$xlsx.read(buffer, {
1485
- type: "buffer"
1486
- });
1487
- const allData = workbook.SheetNames.flatMap((sheetName)=>{
1488
- const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
1489
- return sheetData.map((record)=>{
1490
- const newRecord = {
1491
- sheetName: sheetName
1492
- };
1493
- return Object.assign(newRecord, record);
1494
- });
1443
+ const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
1444
+ const documentParserApiKey = this.environment.lookup("documentParserApiKey");
1445
+ if (!documentParserApiKey) throw new Error("API key not found in environment");
1446
+ return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
1447
+ const response = await fetch(`${documentParserApiUrl}/parse`, {
1448
+ method: "POST",
1449
+ headers: {
1450
+ accept: "application/json",
1451
+ "content-type": fileType,
1452
+ "user-agent": "truto",
1453
+ Authorization: `Bearer ${documentParserApiKey}`
1454
+ },
1455
+ body: file
1495
1456
  });
1496
- return allData;
1497
- }
1498
- if ((0, $dxT2C$lodashes.includes)([
1499
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1500
- "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1501
- "application/vnd.oasis.opendocument.text",
1502
- "application/vnd.oasis.opendocument.spreadsheet",
1503
- "application/vnd.oasis.opendocument.presentation"
1504
- ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1505
- if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1506
- return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
1457
+ if (!response.ok) {
1458
+ if (response.status === 429) throw new Error("Rate limit exceeded");
1459
+ if (response.status >= 500) throw new Error("Server error");
1460
+ throw new (0, $dxT2C$pretry.AbortError)(await response.text());
1461
+ }
1462
+ const data = await response.json();
1463
+ return data.content;
1464
+ }, {
1465
+ retries: 5,
1466
+ maxTimeout: 5000,
1467
+ minTimeout: 2500
1468
+ });
1507
1469
  }
1508
1470
  var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
1509
1471
 
@@ -1547,6 +1509,17 @@ async function $9a2529096849a04f$var$teeStream(stream) {
1547
1509
  var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
1548
1510
 
1549
1511
 
1512
+ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1513
+ if (value instanceof ReadableStream) {
1514
+ const chunks = [];
1515
+ for await (const chunk of value)chunks.push(Buffer.from(chunk));
1516
+ const buffer = Buffer.concat(chunks);
1517
+ return buffer.toString(encoding);
1518
+ }
1519
+ return value.toString(encoding);
1520
+ }
1521
+ var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1522
+
1550
1523
 
1551
1524
  function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1552
1525
  expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));