@truto/truto-jsonata 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -13,6 +13,7 @@ var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
14
  var $dxT2C$officeparser = require("officeparser");
15
15
  var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
16
+ var $dxT2C$xlsx = require("xlsx");
16
17
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
17
18
 
18
19
  function $parcel$interopDefault(a) {
@@ -1445,6 +1446,7 @@ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1445
1446
  var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1446
1447
 
1447
1448
 
1449
+
1448
1450
  async function $05e3378f7c17d263$var$parsePdf(buffer) {
1449
1451
  const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1450
1452
  const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
@@ -1475,13 +1477,30 @@ async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1475
1477
  for await (const chunk of file)chunks.push(Buffer.from(chunk));
1476
1478
  buffer = Buffer.concat(chunks);
1477
1479
  }
1480
+ if ((0, $dxT2C$lodashes.includes)([
1481
+ "application/vnd.ms-excel",
1482
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1483
+ ], fileType)) {
1484
+ const workbook = $dxT2C$xlsx.read(buffer, {
1485
+ type: "buffer"
1486
+ });
1487
+ const allData = workbook.SheetNames.flatMap((sheetName)=>{
1488
+ const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
1489
+ return sheetData.map((record)=>{
1490
+ const newRecord = {
1491
+ sheetName: sheetName
1492
+ };
1493
+ return Object.assign(newRecord, record);
1494
+ });
1495
+ });
1496
+ return allData;
1497
+ }
1478
1498
  if ((0, $dxT2C$lodashes.includes)([
1479
1499
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1480
1500
  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1481
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1482
1501
  "application/vnd.oasis.opendocument.text",
1483
- "application/vnd.oasis.opendocument.presentation",
1484
- "application/vnd.oasis.opendocument.spreadsheet"
1502
+ "application/vnd.oasis.opendocument.spreadsheet",
1503
+ "application/vnd.oasis.opendocument.presentation"
1485
1504
  ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1486
1505
  if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1487
1506
  return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
@@ -1490,12 +1509,17 @@ var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocume
1490
1509
 
1491
1510
 
1492
1511
 
1493
- function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
1512
+
1513
+ async function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
1494
1514
  chunkSize: 200,
1495
1515
  chunkOverlap: 60
1496
1516
  }) {
1517
+ let textToSplit;
1518
+ if ((0, $dxT2C$lodashes.isString)(text)) textToSplit = text;
1519
+ else if ((0, $dxT2C$lodashes.isPlainObject)(text) || (0, $dxT2C$lodashes.isArray)(text)) textToSplit = JSON.stringify(text, null, 2);
1520
+ else textToSplit = (0, $dxT2C$lodashes.toString)(text);
1497
1521
  const splitter = new (0, $dxT2C$langchaintextsplitters.RecursiveCharacterTextSplitter)(options);
1498
- return splitter.splitText(text);
1522
+ return splitter.splitText(textToSplit);
1499
1523
  }
1500
1524
 
1501
1525