@truto/truto-jsonata 1.0.14 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +51 -18
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +52 -19
- package/dist/module.js.map +1 -1
- package/package.json +2 -1
package/dist/main.cjs
CHANGED
|
@@ -13,6 +13,7 @@ var $dxT2C$pmap = require("p-map");
|
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
14
|
var $dxT2C$officeparser = require("officeparser");
|
|
15
15
|
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
16
|
+
var $dxT2C$xlsx = require("xlsx");
|
|
16
17
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
17
18
|
|
|
18
19
|
function $parcel$interopDefault(a) {
|
|
@@ -642,19 +643,28 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
|
|
|
642
643
|
if (!text) return acc;
|
|
643
644
|
const textToInsert = (0, $5a7cb266718aeaae$export$2e2bcd8739ae039)(text);
|
|
644
645
|
// chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
|
|
645
|
-
const
|
|
646
|
-
(0, $dxT2C$lodashes.
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
646
|
+
const textSplitByNewLine = textToInsert.split("\n");
|
|
647
|
+
const chunksOfChunks = (0, $dxT2C$lodashes.compact)((0, $dxT2C$lodashes.map)(textSplitByNewLine, (chunk)=>$bc8e7b2fdb012b3a$var$chunkText(chunk)));
|
|
648
|
+
const chunksWithNewLines = (0, $dxT2C$lodashes.flattenDeep)((0, $233848446ca33fe6$export$2e2bcd8739ae039)((0, $dxT2C$lodashes.map)(chunksOfChunks, (chunk)=>{
|
|
649
|
+
return (0, $dxT2C$lodashes.map)(chunk, (_chunk)=>({
|
|
650
|
+
type: "text",
|
|
651
|
+
text: {
|
|
652
|
+
content: _chunk
|
|
653
|
+
},
|
|
654
|
+
...token.type === "codespan" ? {
|
|
655
|
+
annotations: {
|
|
656
|
+
code: true
|
|
657
|
+
}
|
|
658
|
+
} : {}
|
|
659
|
+
}));
|
|
660
|
+
}), {
|
|
661
|
+
type: "text",
|
|
662
|
+
text: {
|
|
663
|
+
content: "\n"
|
|
664
|
+
}
|
|
665
|
+
}));
|
|
666
|
+
(0, $dxT2C$lodashes.each)(chunksWithNewLines, (chunk)=>{
|
|
667
|
+
acc.push(chunk);
|
|
658
668
|
});
|
|
659
669
|
return acc;
|
|
660
670
|
}, acc);
|
|
@@ -1436,6 +1446,7 @@ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
|
1436
1446
|
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1437
1447
|
|
|
1438
1448
|
|
|
1449
|
+
|
|
1439
1450
|
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1440
1451
|
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1441
1452
|
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
@@ -1466,13 +1477,30 @@ async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
|
1466
1477
|
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1467
1478
|
buffer = Buffer.concat(chunks);
|
|
1468
1479
|
}
|
|
1480
|
+
if ((0, $dxT2C$lodashes.includes)([
|
|
1481
|
+
"application/vnd.ms-excel",
|
|
1482
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
1483
|
+
], fileType)) {
|
|
1484
|
+
const workbook = $dxT2C$xlsx.read(buffer, {
|
|
1485
|
+
type: "buffer"
|
|
1486
|
+
});
|
|
1487
|
+
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1488
|
+
const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1489
|
+
return sheetData.map((record)=>{
|
|
1490
|
+
const newRecord = {
|
|
1491
|
+
sheetName: sheetName
|
|
1492
|
+
};
|
|
1493
|
+
return Object.assign(newRecord, record);
|
|
1494
|
+
});
|
|
1495
|
+
});
|
|
1496
|
+
return allData;
|
|
1497
|
+
}
|
|
1469
1498
|
if ((0, $dxT2C$lodashes.includes)([
|
|
1470
1499
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1471
1500
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1472
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1473
1501
|
"application/vnd.oasis.opendocument.text",
|
|
1474
|
-
"application/vnd.oasis.opendocument.
|
|
1475
|
-
"application/vnd.oasis.opendocument.
|
|
1502
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
1503
|
+
"application/vnd.oasis.opendocument.presentation"
|
|
1476
1504
|
], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
|
|
1477
1505
|
if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
|
|
1478
1506
|
return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
|
|
@@ -1481,12 +1509,17 @@ var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocume
|
|
|
1481
1509
|
|
|
1482
1510
|
|
|
1483
1511
|
|
|
1484
|
-
|
|
1512
|
+
|
|
1513
|
+
async function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
|
|
1485
1514
|
chunkSize: 200,
|
|
1486
1515
|
chunkOverlap: 60
|
|
1487
1516
|
}) {
|
|
1517
|
+
let textToSplit;
|
|
1518
|
+
if ((0, $dxT2C$lodashes.isString)(text)) textToSplit = text;
|
|
1519
|
+
else if ((0, $dxT2C$lodashes.isPlainObject)(text) || (0, $dxT2C$lodashes.isArray)(text)) textToSplit = JSON.stringify(text, null, 2);
|
|
1520
|
+
else textToSplit = (0, $dxT2C$lodashes.toString)(text);
|
|
1488
1521
|
const splitter = new (0, $dxT2C$langchaintextsplitters.RecursiveCharacterTextSplitter)(options);
|
|
1489
|
-
return splitter.splitText(
|
|
1522
|
+
return splitter.splitText(textToSplit);
|
|
1490
1523
|
}
|
|
1491
1524
|
|
|
1492
1525
|
|