@truto/truto-jsonata 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +29 -5
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +30 -6
- package/dist/module.js.map +1 -1
- package/package.json +4 -3
package/dist/module.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import $hgUW1$jsonata from "jsonata";
|
|
2
|
-
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, flattenDeep as $hgUW1$flattenDeep, set as $hgUW1$set, isString as $hgUW1$isString, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber, includes as $hgUW1$includes} from "lodash-es";
|
|
2
|
+
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, flattenDeep as $hgUW1$flattenDeep, set as $hgUW1$set, isString as $hgUW1$isString, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber, includes as $hgUW1$includes, isPlainObject as $hgUW1$isPlainObject, isArray as $hgUW1$isArray, toString as $hgUW1$toString} from "lodash-es";
|
|
3
3
|
import {DateTime as $hgUW1$DateTime} from "luxon";
|
|
4
4
|
import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
|
|
5
5
|
import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
|
|
@@ -14,6 +14,7 @@ import $hgUW1$pretry from "p-retry";
|
|
|
14
14
|
import {Buffer as $hgUW1$Buffer} from "buffer";
|
|
15
15
|
import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
|
|
16
16
|
import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
|
|
17
|
+
import {read as $hgUW1$read, utils as $hgUW1$utils} from "xlsx";
|
|
17
18
|
import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
|
|
18
19
|
|
|
19
20
|
function $parcel$interopDefault(a) {
|
|
@@ -1439,6 +1440,7 @@ var $65fc862361d7fe06$export$2e2bcd8739ae039 = $65fc862361d7fe06$var$bufferToStr
|
|
|
1439
1440
|
|
|
1440
1441
|
|
|
1441
1442
|
|
|
1443
|
+
|
|
1442
1444
|
var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
|
|
1443
1445
|
async function $f29f00a7536a3143$var$parsePdf(buffer) {
|
|
1444
1446
|
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
@@ -1470,13 +1472,30 @@ async function $f29f00a7536a3143$var$parseDocument(file, fileType) {
|
|
|
1470
1472
|
for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
|
|
1471
1473
|
buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
|
|
1472
1474
|
}
|
|
1475
|
+
if ((0, $hgUW1$includes)([
|
|
1476
|
+
"application/vnd.ms-excel",
|
|
1477
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
1478
|
+
], fileType)) {
|
|
1479
|
+
const workbook = $hgUW1$read(buffer, {
|
|
1480
|
+
type: "buffer"
|
|
1481
|
+
});
|
|
1482
|
+
const allData = workbook.SheetNames.flatMap((sheetName)=>{
|
|
1483
|
+
const sheetData = $hgUW1$utils.sheet_to_json(workbook.Sheets[sheetName]);
|
|
1484
|
+
return sheetData.map((record)=>{
|
|
1485
|
+
const newRecord = {
|
|
1486
|
+
sheetName: sheetName
|
|
1487
|
+
};
|
|
1488
|
+
return Object.assign(newRecord, record);
|
|
1489
|
+
});
|
|
1490
|
+
});
|
|
1491
|
+
return allData;
|
|
1492
|
+
}
|
|
1473
1493
|
if ((0, $hgUW1$includes)([
|
|
1474
1494
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1475
1495
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1476
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1477
1496
|
"application/vnd.oasis.opendocument.text",
|
|
1478
|
-
"application/vnd.oasis.opendocument.
|
|
1479
|
-
"application/vnd.oasis.opendocument.
|
|
1497
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
1498
|
+
"application/vnd.oasis.opendocument.presentation"
|
|
1480
1499
|
], fileType)) return await (0, $hgUW1$parseOfficeAsync)(buffer);
|
|
1481
1500
|
if (fileType === "application/pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
|
|
1482
1501
|
return (0, $65fc862361d7fe06$export$2e2bcd8739ae039)(buffer, "utf-8");
|
|
@@ -1485,12 +1504,17 @@ var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocume
|
|
|
1485
1504
|
|
|
1486
1505
|
|
|
1487
1506
|
|
|
1488
|
-
|
|
1507
|
+
|
|
1508
|
+
async function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
|
|
1489
1509
|
chunkSize: 200,
|
|
1490
1510
|
chunkOverlap: 60
|
|
1491
1511
|
}) {
|
|
1512
|
+
let textToSplit;
|
|
1513
|
+
if ((0, $hgUW1$isString)(text)) textToSplit = text;
|
|
1514
|
+
else if ((0, $hgUW1$isPlainObject)(text) || (0, $hgUW1$isArray)(text)) textToSplit = JSON.stringify(text, null, 2);
|
|
1515
|
+
else textToSplit = (0, $hgUW1$toString)(text);
|
|
1492
1516
|
const splitter = new (0, $hgUW1$RecursiveCharacterTextSplitter)(options);
|
|
1493
|
-
return splitter.splitText(
|
|
1517
|
+
return splitter.splitText(textToSplit);
|
|
1494
1518
|
}
|
|
1495
1519
|
|
|
1496
1520
|
|