@truto/truto-jsonata 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -13,6 +13,7 @@ var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
14
  var $dxT2C$officeparser = require("officeparser");
15
15
  var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
16
+ var $dxT2C$xlsx = require("xlsx");
16
17
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
17
18
 
18
19
  function $parcel$interopDefault(a) {
@@ -642,19 +643,28 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
642
643
  if (!text) return acc;
643
644
  const textToInsert = (0, $5a7cb266718aeaae$export$2e2bcd8739ae039)(text);
644
645
  // chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
645
- const chunks = $bc8e7b2fdb012b3a$var$chunkText(textToInsert);
646
- (0, $dxT2C$lodashes.each)(chunks, (chunk)=>{
647
- acc.push({
648
- type: "text",
649
- text: {
650
- content: chunk
651
- },
652
- ...token.type === "codespan" ? {
653
- annotations: {
654
- code: true
655
- }
656
- } : {}
657
- });
646
+ const textSplitByNewLine = textToInsert.split("\n");
647
+ const chunksOfChunks = (0, $dxT2C$lodashes.compact)((0, $dxT2C$lodashes.map)(textSplitByNewLine, (chunk)=>$bc8e7b2fdb012b3a$var$chunkText(chunk)));
648
+ const chunksWithNewLines = (0, $dxT2C$lodashes.flattenDeep)((0, $233848446ca33fe6$export$2e2bcd8739ae039)((0, $dxT2C$lodashes.map)(chunksOfChunks, (chunk)=>{
649
+ return (0, $dxT2C$lodashes.map)(chunk, (_chunk)=>({
650
+ type: "text",
651
+ text: {
652
+ content: _chunk
653
+ },
654
+ ...token.type === "codespan" ? {
655
+ annotations: {
656
+ code: true
657
+ }
658
+ } : {}
659
+ }));
660
+ }), {
661
+ type: "text",
662
+ text: {
663
+ content: "\n"
664
+ }
665
+ }));
666
+ (0, $dxT2C$lodashes.each)(chunksWithNewLines, (chunk)=>{
667
+ acc.push(chunk);
658
668
  });
659
669
  return acc;
660
670
  }, acc);
@@ -1436,6 +1446,7 @@ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1436
1446
  var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1437
1447
 
1438
1448
 
1449
+
1439
1450
  async function $05e3378f7c17d263$var$parsePdf(buffer) {
1440
1451
  const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1441
1452
  const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
@@ -1466,13 +1477,30 @@ async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1466
1477
  for await (const chunk of file)chunks.push(Buffer.from(chunk));
1467
1478
  buffer = Buffer.concat(chunks);
1468
1479
  }
1480
+ if ((0, $dxT2C$lodashes.includes)([
1481
+ "application/vnd.ms-excel",
1482
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1483
+ ], fileType)) {
1484
+ const workbook = $dxT2C$xlsx.read(buffer, {
1485
+ type: "buffer"
1486
+ });
1487
+ const allData = workbook.SheetNames.flatMap((sheetName)=>{
1488
+ const sheetData = $dxT2C$xlsx.utils.sheet_to_json(workbook.Sheets[sheetName]);
1489
+ return sheetData.map((record)=>{
1490
+ const newRecord = {
1491
+ sheetName: sheetName
1492
+ };
1493
+ return Object.assign(newRecord, record);
1494
+ });
1495
+ });
1496
+ return allData;
1497
+ }
1469
1498
  if ((0, $dxT2C$lodashes.includes)([
1470
1499
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1471
1500
  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1472
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1473
1501
  "application/vnd.oasis.opendocument.text",
1474
- "application/vnd.oasis.opendocument.presentation",
1475
- "application/vnd.oasis.opendocument.spreadsheet"
1502
+ "application/vnd.oasis.opendocument.spreadsheet",
1503
+ "application/vnd.oasis.opendocument.presentation"
1476
1504
  ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1477
1505
  if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1478
1506
  return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
@@ -1481,12 +1509,17 @@ var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocume
1481
1509
 
1482
1510
 
1483
1511
 
1484
- function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
1512
+
1513
+ async function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
1485
1514
  chunkSize: 200,
1486
1515
  chunkOverlap: 60
1487
1516
  }) {
1517
+ let textToSplit;
1518
+ if ((0, $dxT2C$lodashes.isString)(text)) textToSplit = text;
1519
+ else if ((0, $dxT2C$lodashes.isPlainObject)(text) || (0, $dxT2C$lodashes.isArray)(text)) textToSplit = JSON.stringify(text, null, 2);
1520
+ else textToSplit = (0, $dxT2C$lodashes.toString)(text);
1488
1521
  const splitter = new (0, $dxT2C$langchaintextsplitters.RecursiveCharacterTextSplitter)(options);
1489
- return splitter.splitText(text);
1522
+ return splitter.splitText(textToSplit);
1490
1523
  }
1491
1524
 
1492
1525