@truto/truto-jsonata 1.0.18 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +36 -73
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +40 -79
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -4
package/dist/main.cjs
CHANGED
|
@@ -11,9 +11,6 @@ var $dxT2C$json2md = require("json2md");
|
|
|
11
11
|
var $dxT2C$mime = require("mime");
|
|
12
12
|
var $dxT2C$pmap = require("p-map");
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
|
-
var $dxT2C$officeparser = require("officeparser");
|
|
15
|
-
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
16
|
-
var $dxT2C$xlsx = require("xlsx");
|
|
17
14
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
18
15
|
|
|
19
16
|
function $parcel$interopDefault(a) {
|
|
@@ -1442,78 +1439,33 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
|
|
|
1442
1439
|
|
|
1443
1440
|
|
|
1444
1441
|
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1448
|
-
if (value instanceof ReadableStream) {
|
|
1449
|
-
const chunks = [];
|
|
1450
|
-
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1451
|
-
const buffer = Buffer.concat(chunks);
|
|
1452
|
-
return buffer.toString(encoding);
|
|
1453
|
-
}
|
|
1454
|
-
return value.toString(encoding);
|
|
1455
|
-
}
|
|
1456
|
-
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1461
|
-
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1462
|
-
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
1463
|
-
const doc = await getDocument({
|
|
1464
|
-
data: data,
|
|
1465
|
-
useSystemFonts: true
|
|
1466
|
-
}).promise;
|
|
1467
|
-
// Get metadata and initialize output object
|
|
1468
|
-
const output = [];
|
|
1469
|
-
// Iterate through each page and fetch the text content
|
|
1470
|
-
for(let i = 1; i <= doc.numPages; i++){
|
|
1471
|
-
const page = await doc.getPage(i);
|
|
1472
|
-
const textContent = await page.getTextContent();
|
|
1473
|
-
const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
|
|
1474
|
-
// Add page content to output
|
|
1475
|
-
if (contents) output.push((0, $dxT2C$lodashes.join)([
|
|
1476
|
-
`Page Number: ${i}`,
|
|
1477
|
-
contents
|
|
1478
|
-
], "/\n"));
|
|
1479
|
-
}
|
|
1480
|
-
// Return the results as JSON
|
|
1481
|
-
return (0, $dxT2C$lodashes.join)(output, "\n");
|
|
1482
|
-
}
|
|
1483
1442
|
async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1498
|
-
const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1499
|
-
return sheetData.map((record)=>{
|
|
1500
|
-
const newRecord = {
|
|
1501
|
-
sheetName: sheetName
|
|
1502
|
-
};
|
|
1503
|
-
return Object.assign(newRecord, record);
|
|
1504
|
-
});
|
|
1443
|
+
const documentParserApiUrl = this.environment.lookup("documentParserApiUrl");
|
|
1444
|
+
const documentParserApiKey = this.environment.lookup("documentParserApiKey");
|
|
1445
|
+
if (!documentParserApiKey) throw new Error("API key not found in environment");
|
|
1446
|
+
return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
|
|
1447
|
+
const response = await fetch(`${documentParserApiUrl}/parse`, {
|
|
1448
|
+
method: "POST",
|
|
1449
|
+
headers: {
|
|
1450
|
+
accept: "application/json",
|
|
1451
|
+
"content-type": fileType,
|
|
1452
|
+
"user-agent": "truto",
|
|
1453
|
+
Authorization: `Bearer ${documentParserApiKey}`
|
|
1454
|
+
},
|
|
1455
|
+
body: file
|
|
1505
1456
|
});
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1457
|
+
if (!response.ok) {
|
|
1458
|
+
if (response.status === 429) throw new Error("Rate limit exceeded");
|
|
1459
|
+
if (response.status >= 500) throw new Error("Server error");
|
|
1460
|
+
throw new (0, $dxT2C$pretry.AbortError)(await response.text());
|
|
1461
|
+
}
|
|
1462
|
+
const data = await response.json();
|
|
1463
|
+
return data.content;
|
|
1464
|
+
}, {
|
|
1465
|
+
retries: 5,
|
|
1466
|
+
maxTimeout: 5000,
|
|
1467
|
+
minTimeout: 2500
|
|
1468
|
+
});
|
|
1517
1469
|
}
|
|
1518
1470
|
var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
|
|
1519
1471
|
|
|
@@ -1557,6 +1509,17 @@ async function $9a2529096849a04f$var$teeStream(stream) {
|
|
|
1557
1509
|
var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
|
|
1558
1510
|
|
|
1559
1511
|
|
|
1512
|
+
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1513
|
+
if (value instanceof ReadableStream) {
|
|
1514
|
+
const chunks = [];
|
|
1515
|
+
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1516
|
+
const buffer = Buffer.concat(chunks);
|
|
1517
|
+
return buffer.toString(encoding);
|
|
1518
|
+
}
|
|
1519
|
+
return value.toString(encoding);
|
|
1520
|
+
}
|
|
1521
|
+
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1522
|
+
|
|
1560
1523
|
|
|
1561
1524
|
function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
1562
1525
|
expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
|