@truto/truto-jsonata 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +51 -73
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +55 -79
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -4
package/dist/main.cjs
CHANGED
|
@@ -11,10 +11,8 @@ var $dxT2C$json2md = require("json2md");
|
|
|
11
11
|
var $dxT2C$mime = require("mime");
|
|
12
12
|
var $dxT2C$pmap = require("p-map");
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
|
-
var $dxT2C$officeparser = require("officeparser");
|
|
15
|
-
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
16
|
-
var $dxT2C$xlsx = require("xlsx");
|
|
17
14
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
15
|
+
var $dxT2C$qs = require("qs");
|
|
18
16
|
|
|
19
17
|
function $parcel$interopDefault(a) {
|
|
20
18
|
return a && a.__esModule ? a.default : a;
|
|
@@ -1442,78 +1440,33 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
|
|
|
1442
1440
|
|
|
1443
1441
|
|
|
1444
1442
|
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1448
|
-
if (value instanceof ReadableStream) {
|
|
1449
|
-
const chunks = [];
|
|
1450
|
-
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1451
|
-
const buffer = Buffer.concat(chunks);
|
|
1452
|
-
return buffer.toString(encoding);
|
|
1453
|
-
}
|
|
1454
|
-
return value.toString(encoding);
|
|
1455
|
-
}
|
|
1456
|
-
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1461
|
-
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1462
|
-
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
1463
|
-
const doc = await getDocument({
|
|
1464
|
-
data: data,
|
|
1465
|
-
useSystemFonts: true
|
|
1466
|
-
}).promise;
|
|
1467
|
-
// Get metadata and initialize output object
|
|
1468
|
-
const output = [];
|
|
1469
|
-
// Iterate through each page and fetch the text content
|
|
1470
|
-
for(let i = 1; i <= doc.numPages; i++){
|
|
1471
|
-
const page = await doc.getPage(i);
|
|
1472
|
-
const textContent = await page.getTextContent();
|
|
1473
|
-
const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
|
|
1474
|
-
// Add page content to output
|
|
1475
|
-
if (contents) output.push((0, $dxT2C$lodashes.join)([
|
|
1476
|
-
`Page Number: ${i}`,
|
|
1477
|
-
contents
|
|
1478
|
-
], "/\n"));
|
|
1479
|
-
}
|
|
1480
|
-
// Return the results as JSON
|
|
1481
|
-
return (0, $dxT2C$lodashes.join)(output, "\n");
|
|
1482
|
-
}
|
|
1483
1443
|
async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1498
|
-
const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1499
|
-
return sheetData.map((record)=>{
|
|
1500
|
-
const newRecord = {
|
|
1501
|
-
sheetName: sheetName
|
|
1502
|
-
};
|
|
1503
|
-
return Object.assign(newRecord, record);
|
|
1504
|
-
});
|
|
1444
|
+
const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
|
|
1445
|
+
const documentParserApiKey = this.environment.lookup("documentParserApiKey");
|
|
1446
|
+
if (!documentParserApiKey) throw new Error("API key not found in environment");
|
|
1447
|
+
return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
|
|
1448
|
+
const response = await fetch(`${documentParserApiUrl}/parse`, {
|
|
1449
|
+
method: "POST",
|
|
1450
|
+
headers: {
|
|
1451
|
+
accept: "application/json",
|
|
1452
|
+
"content-type": fileType,
|
|
1453
|
+
"user-agent": "truto",
|
|
1454
|
+
Authorization: `Bearer ${documentParserApiKey}`
|
|
1455
|
+
},
|
|
1456
|
+
body: file
|
|
1505
1457
|
});
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1458
|
+
if (!response.ok) {
|
|
1459
|
+
if (response.status === 429) throw new Error("Rate limit exceeded");
|
|
1460
|
+
if (response.status >= 500) throw new Error("Server error");
|
|
1461
|
+
throw new (0, $dxT2C$pretry.AbortError)(await response.text());
|
|
1462
|
+
}
|
|
1463
|
+
const data = await response.json();
|
|
1464
|
+
return data.content;
|
|
1465
|
+
}, {
|
|
1466
|
+
retries: 5,
|
|
1467
|
+
maxTimeout: 5000,
|
|
1468
|
+
minTimeout: 2500
|
|
1469
|
+
});
|
|
1517
1470
|
}
|
|
1518
1471
|
var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
|
|
1519
1472
|
|
|
@@ -1557,6 +1510,29 @@ async function $9a2529096849a04f$var$teeStream(stream) {
|
|
|
1557
1510
|
var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
|
|
1558
1511
|
|
|
1559
1512
|
|
|
1513
|
+
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1514
|
+
if (value instanceof ReadableStream) {
|
|
1515
|
+
const chunks = [];
|
|
1516
|
+
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1517
|
+
const buffer = Buffer.concat(chunks);
|
|
1518
|
+
return buffer.toString(encoding);
|
|
1519
|
+
}
|
|
1520
|
+
return value.toString(encoding);
|
|
1521
|
+
}
|
|
1522
|
+
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1523
|
+
|
|
1524
|
+
|
|
1525
|
+
|
|
1526
|
+
function $7161d3cc11fbd965$export$2e2bcd8739ae039(query, options) {
|
|
1527
|
+
return (0, ($parcel$interopDefault($dxT2C$qs))).parse(query, options);
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
|
|
1532
|
+
function $d3ad656830753df3$export$2e2bcd8739ae039(query, options) {
|
|
1533
|
+
return (0, ($parcel$interopDefault($dxT2C$qs))).stringify(query, options);
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1560
1536
|
|
|
1561
1537
|
function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
1562
1538
|
expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
|
|
@@ -1640,6 +1616,8 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
|
1640
1616
|
expression.registerFunction("getDataUri", (0, $15c432f5f036a88a$export$2e2bcd8739ae039));
|
|
1641
1617
|
expression.registerFunction("teeStream", (0, $9a2529096849a04f$export$2e2bcd8739ae039));
|
|
1642
1618
|
expression.registerFunction("bufferToString", (0, $0f748d6318103cdf$export$2e2bcd8739ae039));
|
|
1619
|
+
expression.registerFunction("parseQuery", (0, $7161d3cc11fbd965$export$2e2bcd8739ae039));
|
|
1620
|
+
expression.registerFunction("stringifyQuery", (0, $d3ad656830753df3$export$2e2bcd8739ae039));
|
|
1643
1621
|
return expression;
|
|
1644
1622
|
}
|
|
1645
1623
|
|