@truto/truto-jsonata 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +29 -5
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +30 -6
- package/dist/module.js.map +1 -1
- package/package.json +2 -1
package/dist/main.cjs
CHANGED
|
@@ -13,6 +13,7 @@ var $dxT2C$pmap = require("p-map");
|
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
14
|
var $dxT2C$officeparser = require("officeparser");
|
|
15
15
|
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
16
|
+
var $dxT2C$xlsx = require("xlsx");
|
|
16
17
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
17
18
|
|
|
18
19
|
function $parcel$interopDefault(a) {
|
|
@@ -1445,6 +1446,7 @@ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
|
1445
1446
|
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1446
1447
|
|
|
1447
1448
|
|
|
1449
|
+
|
|
1448
1450
|
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1449
1451
|
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1450
1452
|
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
@@ -1475,13 +1477,30 @@ async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
|
1475
1477
|
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1476
1478
|
buffer = Buffer.concat(chunks);
|
|
1477
1479
|
}
|
|
1480
|
+
if ((0, $dxT2C$lodashes.includes)([
|
|
1481
|
+
"application/vnd.ms-excel",
|
|
1482
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
1483
|
+
], fileType)) {
|
|
1484
|
+
const workbook = $dxT2C$xlsx.read(buffer, {
|
|
1485
|
+
type: "buffer"
|
|
1486
|
+
});
|
|
1487
|
+
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1488
|
+
const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1489
|
+
return sheetData.map((record)=>{
|
|
1490
|
+
const newRecord = {
|
|
1491
|
+
sheetName: sheetName
|
|
1492
|
+
};
|
|
1493
|
+
return Object.assign(newRecord, record);
|
|
1494
|
+
});
|
|
1495
|
+
});
|
|
1496
|
+
return allData;
|
|
1497
|
+
}
|
|
1478
1498
|
if ((0, $dxT2C$lodashes.includes)([
|
|
1479
1499
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1480
1500
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1481
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1482
1501
|
"application/vnd.oasis.opendocument.text",
|
|
1483
|
-
"application/vnd.oasis.opendocument.
|
|
1484
|
-
"application/vnd.oasis.opendocument.
|
|
1502
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
1503
|
+
"application/vnd.oasis.opendocument.presentation"
|
|
1485
1504
|
], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
|
|
1486
1505
|
if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
|
|
1487
1506
|
return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
|
|
@@ -1490,12 +1509,17 @@ var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocume
|
|
|
1490
1509
|
|
|
1491
1510
|
|
|
1492
1511
|
|
|
1493
|
-
|
|
1512
|
+
|
|
1513
|
+
async function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
|
|
1494
1514
|
chunkSize: 200,
|
|
1495
1515
|
chunkOverlap: 60
|
|
1496
1516
|
}) {
|
|
1517
|
+
let textToSplit;
|
|
1518
|
+
if ((0, $dxT2C$lodashes.isString)(text)) textToSplit = text;
|
|
1519
|
+
else if ((0, $dxT2C$lodashes.isPlainObject)(text) || (0, $dxT2C$lodashes.isArray)(text)) textToSplit = JSON.stringify(text, null, 2);
|
|
1520
|
+
else textToSplit = (0, $dxT2C$lodashes.toString)(text);
|
|
1497
1521
|
const splitter = new (0, $dxT2C$langchaintextsplitters.RecursiveCharacterTextSplitter)(options);
|
|
1498
|
-
return splitter.splitText(
|
|
1522
|
+
return splitter.splitText(textToSplit);
|
|
1499
1523
|
}
|
|
1500
1524
|
|
|
1501
1525
|
|