@truto/truto-jsonata 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import $hgUW1$jsonata from "jsonata";
2
- import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
2
+ import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, flattenDeep as $hgUW1$flattenDeep, set as $hgUW1$set, isString as $hgUW1$isString, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber, includes as $hgUW1$includes} from "lodash-es";
3
3
  import {DateTime as $hgUW1$DateTime} from "luxon";
4
4
  import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
5
5
  import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
@@ -13,7 +13,6 @@ import $hgUW1$pmap from "p-map";
13
13
  import $hgUW1$pretry from "p-retry";
14
14
  import {Buffer as $hgUW1$Buffer} from "buffer";
15
15
  import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
16
- import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
17
16
  import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
18
17
  import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
19
18
 
@@ -450,6 +449,7 @@ var $1856284278f92c61$export$2e2bcd8739ae039 = $1856284278f92c61$var$convertMark
450
449
 
451
450
 
452
451
 
452
+
453
453
  const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])=>{
454
454
  return (0, $hgUW1$reduce)(tokens, (acc, token)=>{
455
455
  const childTokens = token.type === "blockquote" ? (0, $hgUW1$get)(token, "tokens[0].tokens") : (0, $hgUW1$get)(token, "tokens");
@@ -511,17 +511,25 @@ const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
511
511
  return acc;
512
512
  }
513
513
  if (token.type === "code") {
514
+ const textSplitByNewLine = token.text.split("\n");
515
+ const chunksOfChunks = (0, $hgUW1$compact)((0, $hgUW1$map)(textSplitByNewLine, (chunk)=>$d584c575eabd606d$var$chunkText(chunk)));
516
+ const chunksWithNewLines = (0, $hgUW1$flattenDeep)((0, $8f5fb75395ee9099$export$2e2bcd8739ae039)((0, $hgUW1$map)(chunksOfChunks, (chunk)=>{
517
+ return (0, $hgUW1$map)(chunk, (_chunk)=>({
518
+ type: "text",
519
+ text: {
520
+ content: _chunk
521
+ }
522
+ }));
523
+ }), {
524
+ type: "text",
525
+ text: {
526
+ content: "\n\n"
527
+ }
528
+ }));
514
529
  acc.push({
515
530
  type: "code",
516
531
  code: {
517
- rich_text: [
518
- {
519
- type: "text",
520
- text: {
521
- content: token.text
522
- }
523
- }
524
- ],
532
+ rich_text: chunksWithNewLines,
525
533
  language: token.lang || "plain text"
526
534
  }
527
535
  });
@@ -624,20 +632,27 @@ const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
624
632
  const text = (0, $hgUW1$get)(token, "text");
625
633
  if (!text) return acc;
626
634
  const textToInsert = (0, $e8d3ca61f6a67481$export$2e2bcd8739ae039)(text);
627
- acc.push({
628
- type: "text",
629
- text: {
630
- content: textToInsert
631
- },
632
- ...token.type === "codespan" ? {
633
- annotations: {
634
- code: true
635
- }
636
- } : {}
635
+ // chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
636
+ const chunks = $d584c575eabd606d$var$chunkText(textToInsert);
637
+ (0, $hgUW1$each)(chunks, (chunk)=>{
638
+ acc.push({
639
+ type: "text",
640
+ text: {
641
+ content: chunk
642
+ },
643
+ ...token.type === "codespan" ? {
644
+ annotations: {
645
+ code: true
646
+ }
647
+ } : {}
648
+ });
637
649
  });
638
650
  return acc;
639
651
  }, acc);
640
652
  };
653
+ const $d584c575eabd606d$var$chunkText = (text, numChars = 2000)=>{
654
+ return text.match(new RegExp(`.{1,${numChars}}`, "g"));
655
+ };
641
656
  const $d584c575eabd606d$var$convertMarkdownToNotion = (text)=>{
642
657
  const tokens = (0, $hgUW1$Lexer).lex(text);
643
658
  const parsedTokens = $d584c575eabd606d$var$parseMarkedTokenToNotionRequest(tokens);
@@ -1401,6 +1416,19 @@ var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmb
1401
1416
 
1402
1417
 
1403
1418
 
1419
+ var $65fc862361d7fe06$require$Buffer = $hgUW1$Buffer;
1420
+ async function $65fc862361d7fe06$var$bufferToString(value, encoding) {
1421
+ if (value instanceof ReadableStream) {
1422
+ const chunks = [];
1423
+ for await (const chunk of value)chunks.push($65fc862361d7fe06$require$Buffer.from(chunk));
1424
+ const buffer = $65fc862361d7fe06$require$Buffer.concat(chunks);
1425
+ return buffer.toString(encoding);
1426
+ }
1427
+ return value.toString(encoding);
1428
+ }
1429
+ var $65fc862361d7fe06$export$2e2bcd8739ae039 = $65fc862361d7fe06$var$bufferToString;
1430
+
1431
+
1404
1432
 
1405
1433
  var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
1406
1434
  async function $f29f00a7536a3143$var$parsePdf(buffer) {
@@ -1426,16 +1454,23 @@ async function $f29f00a7536a3143$var$parsePdf(buffer) {
1426
1454
  // Return the results as JSON
1427
1455
  return (0, $hgUW1$join)(output, "\n");
1428
1456
  }
1429
- async function $f29f00a7536a3143$var$parseDocument(file) {
1457
+ async function $f29f00a7536a3143$var$parseDocument(file, fileType) {
1458
+ let buffer;
1430
1459
  if (file instanceof ReadableStream) {
1431
1460
  const chunks = [];
1432
1461
  for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
1433
- const buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1434
- const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
1435
- if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1436
- return await (0, $hgUW1$parseOfficeAsync)(buffer);
1462
+ buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1437
1463
  }
1438
- return await (0, $hgUW1$parseOfficeAsync)(file);
1464
+ if ((0, $hgUW1$includes)([
1465
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1466
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1467
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1468
+ "application/vnd.oasis.opendocument.text",
1469
+ "application/vnd.oasis.opendocument.presentation",
1470
+ "application/vnd.oasis.opendocument.spreadsheet"
1471
+ ], fileType)) return await (0, $hgUW1$parseOfficeAsync)(buffer);
1472
+ if (fileType === "application/pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1473
+ return (0, $65fc862361d7fe06$export$2e2bcd8739ae039)(buffer, "utf-8");
1439
1474
  }
1440
1475
  var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
1441
1476
 
@@ -1451,22 +1486,20 @@ function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
1451
1486
 
1452
1487
 
1453
1488
 
1454
-
1455
1489
  var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
1456
- async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
1490
+ async function $6b4f9d8bcf8ab446$var$getDataUri(file, mimeType) {
1491
+ if (!mimeType) throw new Error("Mime type is required");
1457
1492
  if (file instanceof ReadableStream) {
1458
1493
  const chunks = [];
1459
1494
  for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
1460
1495
  const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
1461
1496
  const base64Image = buffer.toString("base64");
1462
- const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1463
1497
  // Construct the data URI for a PNG image
1464
1498
  return `data:${mimeType};base64,${base64Image}`;
1465
1499
  }
1466
1500
  const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
1467
1501
  const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
1468
1502
  const base64Image = buffer.toString("base64");
1469
- const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1470
1503
  return `data:${mimeType};base64,${base64Image}`;
1471
1504
  }
1472
1505
  var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
@@ -1478,6 +1511,7 @@ async function $5e08a3608e719b52$var$teeStream(stream) {
1478
1511
  var $5e08a3608e719b52$export$2e2bcd8739ae039 = $5e08a3608e719b52$var$teeStream;
1479
1512
 
1480
1513
 
1514
+
1481
1515
  function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1482
1516
  expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
1483
1517
  expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
@@ -1559,6 +1593,7 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1559
1593
  expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
1560
1594
  expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
1561
1595
  expression.registerFunction("teeStream", (0, $5e08a3608e719b52$export$2e2bcd8739ae039));
1596
+ expression.registerFunction("bufferToString", (0, $65fc862361d7fe06$export$2e2bcd8739ae039));
1562
1597
  return expression;
1563
1598
  }
1564
1599