@truto/truto-jsonata 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.cjs CHANGED
@@ -12,7 +12,6 @@ var $dxT2C$mime = require("mime");
12
12
  var $dxT2C$pmap = require("p-map");
13
13
  var $dxT2C$pretry = require("p-retry");
14
14
  var $dxT2C$officeparser = require("officeparser");
15
- var $dxT2C$filetype = require("file-type");
16
15
  var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
17
16
  var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
18
17
 
@@ -459,6 +458,7 @@ var $90c566adb85cb52e$export$2e2bcd8739ae039 = $90c566adb85cb52e$var$convertMark
459
458
 
460
459
 
461
460
 
461
+
462
462
  const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])=>{
463
463
  return (0, $dxT2C$lodashes.reduce)(tokens, (acc, token)=>{
464
464
  const childTokens = token.type === "blockquote" ? (0, $dxT2C$lodashes.get)(token, "tokens[0].tokens") : (0, $dxT2C$lodashes.get)(token, "tokens");
@@ -520,17 +520,25 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
520
520
  return acc;
521
521
  }
522
522
  if (token.type === "code") {
523
+ const textSplitByNewLine = token.text.split("\n");
524
+ const chunksOfChunks = (0, $dxT2C$lodashes.compact)((0, $dxT2C$lodashes.map)(textSplitByNewLine, (chunk)=>$bc8e7b2fdb012b3a$var$chunkText(chunk)));
525
+ const chunksWithNewLines = (0, $dxT2C$lodashes.flattenDeep)((0, $233848446ca33fe6$export$2e2bcd8739ae039)((0, $dxT2C$lodashes.map)(chunksOfChunks, (chunk)=>{
526
+ return (0, $dxT2C$lodashes.map)(chunk, (_chunk)=>({
527
+ type: "text",
528
+ text: {
529
+ content: _chunk
530
+ }
531
+ }));
532
+ }), {
533
+ type: "text",
534
+ text: {
535
+ content: "\n\n"
536
+ }
537
+ }));
523
538
  acc.push({
524
539
  type: "code",
525
540
  code: {
526
- rich_text: [
527
- {
528
- type: "text",
529
- text: {
530
- content: token.text
531
- }
532
- }
533
- ],
541
+ rich_text: chunksWithNewLines,
534
542
  language: token.lang || "plain text"
535
543
  }
536
544
  });
@@ -633,20 +641,27 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
633
641
  const text = (0, $dxT2C$lodashes.get)(token, "text");
634
642
  if (!text) return acc;
635
643
  const textToInsert = (0, $5a7cb266718aeaae$export$2e2bcd8739ae039)(text);
636
- acc.push({
637
- type: "text",
638
- text: {
639
- content: textToInsert
640
- },
641
- ...token.type === "codespan" ? {
642
- annotations: {
643
- code: true
644
- }
645
- } : {}
644
+ // chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
645
+ const chunks = $bc8e7b2fdb012b3a$var$chunkText(textToInsert);
646
+ (0, $dxT2C$lodashes.each)(chunks, (chunk)=>{
647
+ acc.push({
648
+ type: "text",
649
+ text: {
650
+ content: chunk
651
+ },
652
+ ...token.type === "codespan" ? {
653
+ annotations: {
654
+ code: true
655
+ }
656
+ } : {}
657
+ });
646
658
  });
647
659
  return acc;
648
660
  }, acc);
649
661
  };
662
+ const $bc8e7b2fdb012b3a$var$chunkText = (text, numChars = 2000)=>{
663
+ return text.match(new RegExp(`.{1,${numChars}}`, "g"));
664
+ };
650
665
  const $bc8e7b2fdb012b3a$var$convertMarkdownToNotion = (text)=>{
651
666
  const tokens = (0, $dxT2C$marked.Lexer).lex(text);
652
667
  const parsedTokens = $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest(tokens);
@@ -1409,6 +1424,17 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
1409
1424
 
1410
1425
 
1411
1426
 
1427
+ async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
1428
+ if (value instanceof ReadableStream) {
1429
+ const chunks = [];
1430
+ for await (const chunk of value)chunks.push(Buffer.from(chunk));
1431
+ const buffer = Buffer.concat(chunks);
1432
+ return buffer.toString(encoding);
1433
+ }
1434
+ return value.toString(encoding);
1435
+ }
1436
+ var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
1437
+
1412
1438
 
1413
1439
  async function $05e3378f7c17d263$var$parsePdf(buffer) {
1414
1440
  const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
@@ -1433,16 +1459,23 @@ async function $05e3378f7c17d263$var$parsePdf(buffer) {
1433
1459
  // Return the results as JSON
1434
1460
  return (0, $dxT2C$lodashes.join)(output, "\n");
1435
1461
  }
1436
- async function $05e3378f7c17d263$var$parseDocument(file) {
1462
+ async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
1463
+ let buffer;
1437
1464
  if (file instanceof ReadableStream) {
1438
1465
  const chunks = [];
1439
1466
  for await (const chunk of file)chunks.push(Buffer.from(chunk));
1440
- const buffer = Buffer.concat(chunks);
1441
- const fileExt = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.ext;
1442
- if (fileExt === "pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1443
- return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1467
+ buffer = Buffer.concat(chunks);
1444
1468
  }
1445
- return await (0, $dxT2C$officeparser.parseOfficeAsync)(file);
1469
+ if ((0, $dxT2C$lodashes.includes)([
1470
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1471
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1472
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1473
+ "application/vnd.oasis.opendocument.text",
1474
+ "application/vnd.oasis.opendocument.presentation",
1475
+ "application/vnd.oasis.opendocument.spreadsheet"
1476
+ ], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
1477
+ if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
1478
+ return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
1446
1479
  }
1447
1480
  var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
1448
1481
 
@@ -1457,21 +1490,19 @@ function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
1457
1490
  }
1458
1491
 
1459
1492
 
1460
-
1461
- async function $15c432f5f036a88a$var$getDataUri(file) {
1493
+ async function $15c432f5f036a88a$var$getDataUri(file, mimeType) {
1494
+ if (!mimeType) throw new Error("Mime type is required");
1462
1495
  if (file instanceof ReadableStream) {
1463
1496
  const chunks = [];
1464
1497
  for await (const chunk of file)chunks.push(Buffer.from(chunk));
1465
1498
  const buffer = Buffer.concat(chunks);
1466
1499
  const base64Image = buffer.toString("base64");
1467
- const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
1468
1500
  // Construct the data URI for a PNG image
1469
1501
  return `data:${mimeType};base64,${base64Image}`;
1470
1502
  }
1471
1503
  const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
1472
1504
  const buffer = Buffer.from(arrayBuffer);
1473
1505
  const base64Image = buffer.toString("base64");
1474
- const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
1475
1506
  return `data:${mimeType};base64,${base64Image}`;
1476
1507
  }
1477
1508
  var $15c432f5f036a88a$export$2e2bcd8739ae039 = $15c432f5f036a88a$var$getDataUri;
@@ -1483,6 +1514,7 @@ async function $9a2529096849a04f$var$teeStream(stream) {
1483
1514
  var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
1484
1515
 
1485
1516
 
1517
+
1486
1518
  function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1487
1519
  expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
1488
1520
  expression.registerFunction("base64decode", (0, $77081a2d6d46cd50$export$2e2bcd8739ae039));
@@ -1564,6 +1596,7 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
1564
1596
  expression.registerFunction("recursiveCharacterTextSplitter", (0, $fe4dcef142601b8c$export$e600492876ee595b));
1565
1597
  expression.registerFunction("getDataUri", (0, $15c432f5f036a88a$export$2e2bcd8739ae039));
1566
1598
  expression.registerFunction("teeStream", (0, $9a2529096849a04f$export$2e2bcd8739ae039));
1599
+ expression.registerFunction("bufferToString", (0, $0f748d6318103cdf$export$2e2bcd8739ae039));
1567
1600
  return expression;
1568
1601
  }
1569
1602