@truto/truto-jsonata 1.0.18 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -11,9 +11,6 @@ var $dxT2C$json2md = require("json2md");
11
11
  var $dxT2C$mime = require("mime");
12
12
  var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
- var $dxT2C$officeparser = require("officeparser");
15
- var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
16
- var $dxT2C$xlsx = require("xlsx");
17
14
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
18
15
 
19
16
  function $parcel$interopDefault(a) {
@@ -1442,78 +1439,33 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
1442
1439
 
1443
1440
 
1444
1441
 
1445
-
1446
-
1447
- async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1448
- if (value instanceof ReadableStream) {
1449
- const chunks = [];
1450
- for await (const chunk of value)chunks.push(Buffer.from(chunk));
1451
- const buffer = Buffer.concat(chunks);
1452
- return buffer.toString(encoding);
1453
- }
1454
- return value.toString(encoding);
1455
- }
1456
- var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1457
-
1458
-
1459
-
1460
- async function $05e3378f7c17d263$var$parsePdf(buffer) {
1461
- const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1462
- const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
1463
- const doc = await getDocument({
1464
- data: data,
1465
- useSystemFonts: true
1466
- }).promise;
1467
- // Get metadata and initialize output object
1468
- const output = [];
1469
- // Iterate through each page and fetch the text content
1470
- for(let i = 1; i <= doc.numPages; i++){
1471
- const page = await doc.getPage(i);
1472
- const textContent = await page.getTextContent();
1473
- const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
1474
- // Add page content to output
1475
- if (contents) output.push((0, $dxT2C$lodashes.join)([
1476
- `Page Number: ${i}`,
1477
- contents
1478
- ], "/\n"));
1479
- }
1480
- // Return the results as JSON
1481
- return (0, $dxT2C$lodashes.join)(output, "\n");
1482
- }
1483
1442
  async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1484
- let buffer;
1485
- if (file instanceof ReadableStream) {
1486
- const chunks = [];
1487
- for await (const chunk of file)chunks.push(Buffer.from(chunk));
1488
- buffer = Buffer.concat(chunks);
1489
- }
1490
- if ((0, $dxT2C$lodashes.includes)([
1491
- "application/vnd.ms-excel",
1492
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1493
- ], fileType)) {
1494
- const workbook = $dxT2C$xlsx.read(buffer, {
1495
- type: "buffer"
1496
- });
1497
- const allData = workbook.SheetNames.flatMap((sheetName)=>{
1498
- const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
1499
- return sheetData.map((record)=>{
1500
- const newRecord = {
1501
- sheetName: sheetName
1502
- };
1503
- return Object.assign(newRecord, record);
1504
- });
1443
+ const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
1444
+ const documentParserApiKey = this.environment.lookup("documentParserApiKey");
1445
+ if (!documentParserApiKey) throw new Error("API key not found in environment");
1446
+ return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
1447
+ const response = await fetch(`${documentParserApiUrl}/parse`, {
1448
+ method: "POST",
1449
+ headers: {
1450
+ accept: "application/json",
1451
+ "content-type": fileType,
1452
+ "user-agent": "truto",
1453
+ Authorization: `Bearer ${documentParserApiKey}`
1454
+ },
1455
+ body: file
1505
1456
  });
1506
- return allData;
1507
- }
1508
- if ((0, $dxT2C$lodashes.includes)([
1509
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1510
- "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1511
- "application/vnd.oasis.opendocument.text",
1512
- "application/vnd.oasis.opendocument.spreadsheet",
1513
- "application/vnd.oasis.opendocument.presentation"
1514
- ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1515
- if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1516
- return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
1457
+ if (!response.ok) {
1458
+ if (response.status === 429) throw new Error("Rate limit exceeded");
1459
+ if (response.status >= 500) throw new Error("Server error");
1460
+ throw new (0, $dxT2C$pretry.AbortError)(await response.text());
1461
+ }
1462
+ const data = await response.json();
1463
+ return data.content;
1464
+ }, {
1465
+ retries: 5,
1466
+ maxTimeout: 5000,
1467
+ minTimeout: 2500
1468
+ });
1517
1469
  }
1518
1470
  var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
1519
1471
 
@@ -1557,6 +1509,17 @@ async function $9a2529096849a04f$var$teeStream(stream) {
1557
1509
  var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
1558
1510
 
1559
1511
 
1512
+ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1513
+ if (value instanceof ReadableStream) {
1514
+ const chunks = [];
1515
+ for await (const chunk of value)chunks.push(Buffer.from(chunk));
1516
+ const buffer = Buffer.concat(chunks);
1517
+ return buffer.toString(encoding);
1518
+ }
1519
+ return value.toString(encoding);
1520
+ }
1521
+ var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1522
+
1560
1523
 
1561
1524
  function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1562
1525
  expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));