@truto/truto-jsonata 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +62 -29
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +65 -30
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -4
package/dist/module.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import $hgUW1$jsonata from "jsonata";
|
|
2
|
-
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map,
|
|
2
|
+
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, flattenDeep as $hgUW1$flattenDeep, set as $hgUW1$set, isString as $hgUW1$isString, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber, includes as $hgUW1$includes} from "lodash-es";
|
|
3
3
|
import {DateTime as $hgUW1$DateTime} from "luxon";
|
|
4
4
|
import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
|
|
5
5
|
import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
|
|
@@ -13,7 +13,6 @@ import $hgUW1$pmap from "p-map";
|
|
|
13
13
|
import $hgUW1$pretry from "p-retry";
|
|
14
14
|
import {Buffer as $hgUW1$Buffer} from "buffer";
|
|
15
15
|
import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
|
|
16
|
-
import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
|
|
17
16
|
import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
|
|
18
17
|
import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
|
|
19
18
|
|
|
@@ -450,6 +449,7 @@ var $1856284278f92c61$export$2e2bcd8739ae039 = $1856284278f92c61$var$convertMark
|
|
|
450
449
|
|
|
451
450
|
|
|
452
451
|
|
|
452
|
+
|
|
453
453
|
const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])=>{
|
|
454
454
|
return (0, $hgUW1$reduce)(tokens, (acc, token)=>{
|
|
455
455
|
const childTokens = token.type === "blockquote" ? (0, $hgUW1$get)(token, "tokens[0].tokens") : (0, $hgUW1$get)(token, "tokens");
|
|
@@ -511,17 +511,25 @@ const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
|
|
|
511
511
|
return acc;
|
|
512
512
|
}
|
|
513
513
|
if (token.type === "code") {
|
|
514
|
+
const textSplitByNewLine = token.text.split("\n");
|
|
515
|
+
const chunksOfChunks = (0, $hgUW1$compact)((0, $hgUW1$map)(textSplitByNewLine, (chunk)=>$d584c575eabd606d$var$chunkText(chunk)));
|
|
516
|
+
const chunksWithNewLines = (0, $hgUW1$flattenDeep)((0, $8f5fb75395ee9099$export$2e2bcd8739ae039)((0, $hgUW1$map)(chunksOfChunks, (chunk)=>{
|
|
517
|
+
return (0, $hgUW1$map)(chunk, (_chunk)=>({
|
|
518
|
+
type: "text",
|
|
519
|
+
text: {
|
|
520
|
+
content: _chunk
|
|
521
|
+
}
|
|
522
|
+
}));
|
|
523
|
+
}), {
|
|
524
|
+
type: "text",
|
|
525
|
+
text: {
|
|
526
|
+
content: "\n\n"
|
|
527
|
+
}
|
|
528
|
+
}));
|
|
514
529
|
acc.push({
|
|
515
530
|
type: "code",
|
|
516
531
|
code: {
|
|
517
|
-
rich_text:
|
|
518
|
-
{
|
|
519
|
-
type: "text",
|
|
520
|
-
text: {
|
|
521
|
-
content: token.text
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
],
|
|
532
|
+
rich_text: chunksWithNewLines,
|
|
525
533
|
language: token.lang || "plain text"
|
|
526
534
|
}
|
|
527
535
|
});
|
|
@@ -624,20 +632,27 @@ const $d584c575eabd606d$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
|
|
|
624
632
|
const text = (0, $hgUW1$get)(token, "text");
|
|
625
633
|
if (!text) return acc;
|
|
626
634
|
const textToInsert = (0, $e8d3ca61f6a67481$export$2e2bcd8739ae039)(text);
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
635
|
+
// chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
|
|
636
|
+
const chunks = $d584c575eabd606d$var$chunkText(textToInsert);
|
|
637
|
+
(0, $hgUW1$each)(chunks, (chunk)=>{
|
|
638
|
+
acc.push({
|
|
639
|
+
type: "text",
|
|
640
|
+
text: {
|
|
641
|
+
content: chunk
|
|
642
|
+
},
|
|
643
|
+
...token.type === "codespan" ? {
|
|
644
|
+
annotations: {
|
|
645
|
+
code: true
|
|
646
|
+
}
|
|
647
|
+
} : {}
|
|
648
|
+
});
|
|
637
649
|
});
|
|
638
650
|
return acc;
|
|
639
651
|
}, acc);
|
|
640
652
|
};
|
|
653
|
+
const $d584c575eabd606d$var$chunkText = (text, numChars = 2000)=>{
|
|
654
|
+
return text.match(new RegExp(`.{1,${numChars}}`, "g"));
|
|
655
|
+
};
|
|
641
656
|
const $d584c575eabd606d$var$convertMarkdownToNotion = (text)=>{
|
|
642
657
|
const tokens = (0, $hgUW1$Lexer).lex(text);
|
|
643
658
|
const parsedTokens = $d584c575eabd606d$var$parseMarkedTokenToNotionRequest(tokens);
|
|
@@ -1401,6 +1416,19 @@ var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmb
|
|
|
1401
1416
|
|
|
1402
1417
|
|
|
1403
1418
|
|
|
1419
|
+
var $65fc862361d7fe06$require$Buffer = $hgUW1$Buffer;
|
|
1420
|
+
async function $65fc862361d7fe06$var$bufferToString(value, encoding) {
|
|
1421
|
+
if (value instanceof ReadableStream) {
|
|
1422
|
+
const chunks = [];
|
|
1423
|
+
for await (const chunk of value)chunks.push($65fc862361d7fe06$require$Buffer.from(chunk));
|
|
1424
|
+
const buffer = $65fc862361d7fe06$require$Buffer.concat(chunks);
|
|
1425
|
+
return buffer.toString(encoding);
|
|
1426
|
+
}
|
|
1427
|
+
return value.toString(encoding);
|
|
1428
|
+
}
|
|
1429
|
+
var $65fc862361d7fe06$export$2e2bcd8739ae039 = $65fc862361d7fe06$var$bufferToString;
|
|
1430
|
+
|
|
1431
|
+
|
|
1404
1432
|
|
|
1405
1433
|
var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
|
|
1406
1434
|
async function $f29f00a7536a3143$var$parsePdf(buffer) {
|
|
@@ -1426,16 +1454,23 @@ async function $f29f00a7536a3143$var$parsePdf(buffer) {
|
|
|
1426
1454
|
// Return the results as JSON
|
|
1427
1455
|
return (0, $hgUW1$join)(output, "\n");
|
|
1428
1456
|
}
|
|
1429
|
-
async function $f29f00a7536a3143$var$parseDocument(file) {
|
|
1457
|
+
async function $f29f00a7536a3143$var$parseDocument(file, fileType) {
|
|
1458
|
+
let buffer;
|
|
1430
1459
|
if (file instanceof ReadableStream) {
|
|
1431
1460
|
const chunks = [];
|
|
1432
1461
|
for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
|
|
1433
|
-
|
|
1434
|
-
const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
|
|
1435
|
-
if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
|
|
1436
|
-
return await (0, $hgUW1$parseOfficeAsync)(buffer);
|
|
1462
|
+
buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
|
|
1437
1463
|
}
|
|
1438
|
-
|
|
1464
|
+
if ((0, $hgUW1$includes)([
|
|
1465
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1466
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1467
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1468
|
+
"application/vnd.oasis.opendocument.text",
|
|
1469
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
1470
|
+
"application/vnd.oasis.opendocument.spreadsheet"
|
|
1471
|
+
], fileType)) return await (0, $hgUW1$parseOfficeAsync)(buffer);
|
|
1472
|
+
if (fileType === "application/pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
|
|
1473
|
+
return (0, $65fc862361d7fe06$export$2e2bcd8739ae039)(buffer, "utf-8");
|
|
1439
1474
|
}
|
|
1440
1475
|
var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
|
|
1441
1476
|
|
|
@@ -1451,22 +1486,20 @@ function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
|
|
|
1451
1486
|
|
|
1452
1487
|
|
|
1453
1488
|
|
|
1454
|
-
|
|
1455
1489
|
var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
|
|
1456
|
-
async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
|
|
1490
|
+
async function $6b4f9d8bcf8ab446$var$getDataUri(file, mimeType) {
|
|
1491
|
+
if (!mimeType) throw new Error("Mime type is required");
|
|
1457
1492
|
if (file instanceof ReadableStream) {
|
|
1458
1493
|
const chunks = [];
|
|
1459
1494
|
for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
|
|
1460
1495
|
const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
|
|
1461
1496
|
const base64Image = buffer.toString("base64");
|
|
1462
|
-
const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
|
|
1463
1497
|
// Construct the data URI for a PNG image
|
|
1464
1498
|
return `data:${mimeType};base64,${base64Image}`;
|
|
1465
1499
|
}
|
|
1466
1500
|
const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
|
|
1467
1501
|
const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
|
|
1468
1502
|
const base64Image = buffer.toString("base64");
|
|
1469
|
-
const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
|
|
1470
1503
|
return `data:${mimeType};base64,${base64Image}`;
|
|
1471
1504
|
}
|
|
1472
1505
|
var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
|
|
@@ -1478,6 +1511,7 @@ async function $5e08a3608e719b52$var$teeStream(stream) {
|
|
|
1478
1511
|
var $5e08a3608e719b52$export$2e2bcd8739ae039 = $5e08a3608e719b52$var$teeStream;
|
|
1479
1512
|
|
|
1480
1513
|
|
|
1514
|
+
|
|
1481
1515
|
function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
|
|
1482
1516
|
expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
|
|
1483
1517
|
expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
|
|
@@ -1559,6 +1593,7 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
|
|
|
1559
1593
|
expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
|
|
1560
1594
|
expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
|
|
1561
1595
|
expression.registerFunction("teeStream", (0, $5e08a3608e719b52$export$2e2bcd8739ae039));
|
|
1596
|
+
expression.registerFunction("bufferToString", (0, $65fc862361d7fe06$export$2e2bcd8739ae039));
|
|
1562
1597
|
return expression;
|
|
1563
1598
|
}
|
|
1564
1599
|
|