@truto/truto-jsonata 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +52 -79
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +57 -86
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -4
package/dist/main.cjs
CHANGED
|
@@ -11,9 +11,6 @@ var $dxT2C$json2md = require("json2md");
|
|
|
11
11
|
var $dxT2C$mime = require("mime");
|
|
12
12
|
var $dxT2C$pmap = require("p-map");
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
|
-
var $dxT2C$officeparser = require("officeparser");
|
|
15
|
-
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
16
|
-
var $dxT2C$xlsx = require("xlsx");
|
|
17
14
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
18
15
|
|
|
19
16
|
function $parcel$interopDefault(a) {
|
|
@@ -1401,11 +1398,16 @@ async function $caaee789061bb8bb$var$generateEmbeddingsCohere(body, api_key) {
|
|
|
1401
1398
|
texts: chunk
|
|
1402
1399
|
})
|
|
1403
1400
|
});
|
|
1404
|
-
if (!response.ok)
|
|
1401
|
+
if (!response.ok) {
|
|
1402
|
+
if (response.status === 429) throw new Error("Rate limit exceeded");
|
|
1403
|
+
if (response.status >= 500) throw new Error("Server error");
|
|
1404
|
+
throw new (0, $dxT2C$pretry.AbortError)(await response.text());
|
|
1405
|
+
}
|
|
1405
1406
|
return await response.json();
|
|
1406
1407
|
}, {
|
|
1407
|
-
retries:
|
|
1408
|
-
maxTimeout:
|
|
1408
|
+
retries: 5,
|
|
1409
|
+
maxTimeout: 5000,
|
|
1410
|
+
minTimeout: 2500
|
|
1409
1411
|
});
|
|
1410
1412
|
}, {
|
|
1411
1413
|
concurrency: 1
|
|
@@ -1421,89 +1423,49 @@ async function $caaee789061bb8bb$var$generateEmbeddingsCohere(body, api_key) {
|
|
|
1421
1423
|
},
|
|
1422
1424
|
body: JSON.stringify(body)
|
|
1423
1425
|
});
|
|
1424
|
-
if (!response.ok)
|
|
1426
|
+
if (!response.ok) {
|
|
1427
|
+
if (response.status === 429) throw new Error("Rate limit exceeded");
|
|
1428
|
+
if (response.status >= 500) throw new Error("Server error");
|
|
1429
|
+
throw new (0, $dxT2C$pretry.AbortError)(await response.text());
|
|
1430
|
+
}
|
|
1425
1431
|
return await response.json();
|
|
1426
1432
|
}, {
|
|
1427
|
-
retries:
|
|
1428
|
-
maxTimeout:
|
|
1433
|
+
retries: 5,
|
|
1434
|
+
maxTimeout: 5000,
|
|
1435
|
+
minTimeout: 2500
|
|
1429
1436
|
});
|
|
1430
1437
|
}
|
|
1431
1438
|
var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmbeddingsCohere;
|
|
1432
1439
|
|
|
1433
1440
|
|
|
1434
1441
|
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1438
|
-
if (value instanceof ReadableStream) {
|
|
1439
|
-
const chunks = [];
|
|
1440
|
-
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1441
|
-
const buffer = Buffer.concat(chunks);
|
|
1442
|
-
return buffer.toString(encoding);
|
|
1443
|
-
}
|
|
1444
|
-
return value.toString(encoding);
|
|
1445
|
-
}
|
|
1446
|
-
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1451
|
-
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1452
|
-
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
1453
|
-
const doc = await getDocument({
|
|
1454
|
-
data: data,
|
|
1455
|
-
useSystemFonts: true
|
|
1456
|
-
}).promise;
|
|
1457
|
-
// Get metadata and initialize output object
|
|
1458
|
-
const output = [];
|
|
1459
|
-
// Iterate through each page and fetch the text content
|
|
1460
|
-
for(let i = 1; i <= doc.numPages; i++){
|
|
1461
|
-
const page = await doc.getPage(i);
|
|
1462
|
-
const textContent = await page.getTextContent();
|
|
1463
|
-
const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
|
|
1464
|
-
// Add page content to output
|
|
1465
|
-
if (contents) output.push((0, $dxT2C$lodashes.join)([
|
|
1466
|
-
`Page Number: ${i}`,
|
|
1467
|
-
contents
|
|
1468
|
-
], "/\n"));
|
|
1469
|
-
}
|
|
1470
|
-
// Return the results as JSON
|
|
1471
|
-
return (0, $dxT2C$lodashes.join)(output, "\n");
|
|
1472
|
-
}
|
|
1473
1442
|
async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1488
|
-
const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1489
|
-
return sheetData.map((record)=>{
|
|
1490
|
-
const newRecord = {
|
|
1491
|
-
sheetName: sheetName
|
|
1492
|
-
};
|
|
1493
|
-
return Object.assign(newRecord, record);
|
|
1494
|
-
});
|
|
1443
|
+
const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
|
|
1444
|
+
const documentParserApiKey = this.environment.lookup("documentParserApiKey");
|
|
1445
|
+
if (!documentParserApiKey) throw new Error("API key not found in environment");
|
|
1446
|
+
return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
|
|
1447
|
+
const response = await fetch(`${documentParserApiUrl}/parse`, {
|
|
1448
|
+
method: "POST",
|
|
1449
|
+
headers: {
|
|
1450
|
+
accept: "application/json",
|
|
1451
|
+
"content-type": fileType,
|
|
1452
|
+
"user-agent": "truto",
|
|
1453
|
+
Authorization: `Bearer ${documentParserApiKey}`
|
|
1454
|
+
},
|
|
1455
|
+
body: file
|
|
1495
1456
|
});
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1457
|
+
if (!response.ok) {
|
|
1458
|
+
if (response.status === 429) throw new Error("Rate limit exceeded");
|
|
1459
|
+
if (response.status >= 500) throw new Error("Server error");
|
|
1460
|
+
throw new (0, $dxT2C$pretry.AbortError)(await response.text());
|
|
1461
|
+
}
|
|
1462
|
+
const data = await response.json();
|
|
1463
|
+
return data.content;
|
|
1464
|
+
}, {
|
|
1465
|
+
retries: 5,
|
|
1466
|
+
maxTimeout: 5000,
|
|
1467
|
+
minTimeout: 2500
|
|
1468
|
+
});
|
|
1507
1469
|
}
|
|
1508
1470
|
var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
|
|
1509
1471
|
|
|
@@ -1547,6 +1509,17 @@ async function $9a2529096849a04f$var$teeStream(stream) {
|
|
|
1547
1509
|
var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
|
|
1548
1510
|
|
|
1549
1511
|
|
|
1512
|
+
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1513
|
+
if (value instanceof ReadableStream) {
|
|
1514
|
+
const chunks = [];
|
|
1515
|
+
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1516
|
+
const buffer = Buffer.concat(chunks);
|
|
1517
|
+
return buffer.toString(encoding);
|
|
1518
|
+
}
|
|
1519
|
+
return value.toString(encoding);
|
|
1520
|
+
}
|
|
1521
|
+
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1522
|
+
|
|
1550
1523
|
|
|
1551
1524
|
function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
1552
1525
|
expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
|