@truto/truto-jsonata 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -11,10 +11,8 @@ var $dxT2C$json2md = require("json2md");
11
11
  var $dxT2C$mime = require("mime");
12
12
  var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
- var $dxT2C$officeparser = require("officeparser");
15
- var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
16
- var $dxT2C$xlsx = require("xlsx");
17
14
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
15
+ var $dxT2C$qs = require("qs");
18
16
 
19
17
  function $parcel$interopDefault(a) {
20
18
  return a && a.__esModule ? a.default : a;
@@ -1442,78 +1440,33 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
1442
1440
 
1443
1441
 
1444
1442
 
1445
-
1446
-
1447
- async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1448
- if (value instanceof ReadableStream) {
1449
- const chunks = [];
1450
- for await (const chunk of value)chunks.push(Buffer.from(chunk));
1451
- const buffer = Buffer.concat(chunks);
1452
- return buffer.toString(encoding);
1453
- }
1454
- return value.toString(encoding);
1455
- }
1456
- var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1457
-
1458
-
1459
-
1460
- async function $05e3378f7c17d263$var$parsePdf(buffer) {
1461
- const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1462
- const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
1463
- const doc = await getDocument({
1464
- data: data,
1465
- useSystemFonts: true
1466
- }).promise;
1467
- // Get metadata and initialize output object
1468
- const output = [];
1469
- // Iterate through each page and fetch the text content
1470
- for(let i = 1; i <= doc.numPages; i++){
1471
- const page = await doc.getPage(i);
1472
- const textContent = await page.getTextContent();
1473
- const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
1474
- // Add page content to output
1475
- if (contents) output.push((0, $dxT2C$lodashes.join)([
1476
- `Page Number: ${i}`,
1477
- contents
1478
- ], "/\n"));
1479
- }
1480
- // Return the results as JSON
1481
- return (0, $dxT2C$lodashes.join)(output, "\n");
1482
- }
1483
1443
  async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1484
- let buffer;
1485
- if (file instanceof ReadableStream) {
1486
- const chunks = [];
1487
- for await (const chunk of file)chunks.push(Buffer.from(chunk));
1488
- buffer = Buffer.concat(chunks);
1489
- }
1490
- if ((0, $dxT2C$lodashes.includes)([
1491
- "application/vnd.ms-excel",
1492
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1493
- ], fileType)) {
1494
- const workbook = $dxT2C$xlsx.read(buffer, {
1495
- type: "buffer"
1496
- });
1497
- const allData = workbook.SheetNames.flatMap((sheetName)=>{
1498
- const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
1499
- return sheetData.map((record)=>{
1500
- const newRecord = {
1501
- sheetName: sheetName
1502
- };
1503
- return Object.assign(newRecord, record);
1504
- });
1444
+ const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
1445
+ const documentParserApiKey = this.environment.lookup("documentParserApiKey");
1446
+ if (!documentParserApiKey) throw new Error("API key not found in environment");
1447
+ return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
1448
+ const response = await fetch(`${documentParserApiUrl}/parse`, {
1449
+ method: "POST",
1450
+ headers: {
1451
+ accept: "application/json",
1452
+ "content-type": fileType,
1453
+ "user-agent": "truto",
1454
+ Authorization: `Bearer ${documentParserApiKey}`
1455
+ },
1456
+ body: file
1505
1457
  });
1506
- return allData;
1507
- }
1508
- if ((0, $dxT2C$lodashes.includes)([
1509
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1510
- "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1511
- "application/vnd.oasis.opendocument.text",
1512
- "application/vnd.oasis.opendocument.spreadsheet",
1513
- "application/vnd.oasis.opendocument.presentation"
1514
- ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1515
- if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1516
- return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
1458
+ if (!response.ok) {
1459
+ if (response.status === 429) throw new Error("Rate limit exceeded");
1460
+ if (response.status >= 500) throw new Error("Server error");
1461
+ throw new (0, $dxT2C$pretry.AbortError)(await response.text());
1462
+ }
1463
+ const data = await response.json();
1464
+ return data.content;
1465
+ }, {
1466
+ retries: 5,
1467
+ maxTimeout: 5000,
1468
+ minTimeout: 2500
1469
+ });
1517
1470
  }
1518
1471
  var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
1519
1472
 
@@ -1557,6 +1510,29 @@ async function $9a2529096849a04f$var$teeStream(stream) {
1557
1510
  var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
1558
1511
 
1559
1512
 
1513
+ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1514
+ if (value instanceof ReadableStream) {
1515
+ const chunks = [];
1516
+ for await (const chunk of value)chunks.push(Buffer.from(chunk));
1517
+ const buffer = Buffer.concat(chunks);
1518
+ return buffer.toString(encoding);
1519
+ }
1520
+ return value.toString(encoding);
1521
+ }
1522
+ var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1523
+
1524
+
1525
+
1526
+ function $7161d3cc11fbd965$export$2e2bcd8739ae039(query, options) {
1527
+ return (0, ($parcel$interopDefault($dxT2C$qs))).parse(query, options);
1528
+ }
1529
+
1530
+
1531
+
1532
+ function $d3ad656830753df3$export$2e2bcd8739ae039(query, options) {
1533
+ return (0, ($parcel$interopDefault($dxT2C$qs))).stringify(query, options);
1534
+ }
1535
+
1560
1536
 
1561
1537
  function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1562
1538
  expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
@@ -1640,6 +1616,8 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1640
1616
  expression.registerFunction("getDataUri", (0, $15c432f5f036a88a$export$2e2bcd8739ae039));
1641
1617
  expression.registerFunction("teeStream", (0, $9a2529096849a04f$export$2e2bcd8739ae039));
1642
1618
  expression.registerFunction("bufferToString", (0, $0f748d6318103cdf$export$2e2bcd8739ae039));
1619
+ expression.registerFunction("parseQuery", (0, $7161d3cc11fbd965$export$2e2bcd8739ae039));
1620
+ expression.registerFunction("stringifyQuery", (0, $d3ad656830753df3$export$2e2bcd8739ae039));
1643
1621
  return expression;
1644
1622
  }
1645
1623