@truto/truto-jsonata 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.cjs +62 -29
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +65 -30
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -4
package/dist/main.cjs
CHANGED
|
@@ -12,7 +12,6 @@ var $dxT2C$mime = require("mime");
|
|
|
12
12
|
var $dxT2C$pmap = require("p-map");
|
|
13
13
|
var $dxT2C$pretry = require("p-retry");
|
|
14
14
|
var $dxT2C$officeparser = require("officeparser");
|
|
15
|
-
var $dxT2C$filetype = require("file-type");
|
|
16
15
|
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
17
16
|
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
18
17
|
|
|
@@ -459,6 +458,7 @@ var $90c566adb85cb52e$export$2e2bcd8739ae039 = $90c566adb85cb52e$var$convertMark
|
|
|
459
458
|
|
|
460
459
|
|
|
461
460
|
|
|
461
|
+
|
|
462
462
|
const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])=>{
|
|
463
463
|
return (0, $dxT2C$lodashes.reduce)(tokens, (acc, token)=>{
|
|
464
464
|
const childTokens = token.type === "blockquote" ? (0, $dxT2C$lodashes.get)(token, "tokens[0].tokens") : (0, $dxT2C$lodashes.get)(token, "tokens");
|
|
@@ -520,17 +520,25 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
|
|
|
520
520
|
return acc;
|
|
521
521
|
}
|
|
522
522
|
if (token.type === "code") {
|
|
523
|
+
const textSplitByNewLine = token.text.split("\n");
|
|
524
|
+
const chunksOfChunks = (0, $dxT2C$lodashes.compact)((0, $dxT2C$lodashes.map)(textSplitByNewLine, (chunk)=>$bc8e7b2fdb012b3a$var$chunkText(chunk)));
|
|
525
|
+
const chunksWithNewLines = (0, $dxT2C$lodashes.flattenDeep)((0, $233848446ca33fe6$export$2e2bcd8739ae039)((0, $dxT2C$lodashes.map)(chunksOfChunks, (chunk)=>{
|
|
526
|
+
return (0, $dxT2C$lodashes.map)(chunk, (_chunk)=>({
|
|
527
|
+
type: "text",
|
|
528
|
+
text: {
|
|
529
|
+
content: _chunk
|
|
530
|
+
}
|
|
531
|
+
}));
|
|
532
|
+
}), {
|
|
533
|
+
type: "text",
|
|
534
|
+
text: {
|
|
535
|
+
content: "\n\n"
|
|
536
|
+
}
|
|
537
|
+
}));
|
|
523
538
|
acc.push({
|
|
524
539
|
type: "code",
|
|
525
540
|
code: {
|
|
526
|
-
rich_text:
|
|
527
|
-
{
|
|
528
|
-
type: "text",
|
|
529
|
-
text: {
|
|
530
|
-
content: token.text
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
],
|
|
541
|
+
rich_text: chunksWithNewLines,
|
|
534
542
|
language: token.lang || "plain text"
|
|
535
543
|
}
|
|
536
544
|
});
|
|
@@ -633,20 +641,27 @@ const $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest = (tokens, acc = [])
|
|
|
633
641
|
const text = (0, $dxT2C$lodashes.get)(token, "text");
|
|
634
642
|
if (!text) return acc;
|
|
635
643
|
const textToInsert = (0, $5a7cb266718aeaae$export$2e2bcd8739ae039)(text);
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
644
|
+
// chunk the text into 2000 character chunks, should handle emojis and multi-byte characters
|
|
645
|
+
const chunks = $bc8e7b2fdb012b3a$var$chunkText(textToInsert);
|
|
646
|
+
(0, $dxT2C$lodashes.each)(chunks, (chunk)=>{
|
|
647
|
+
acc.push({
|
|
648
|
+
type: "text",
|
|
649
|
+
text: {
|
|
650
|
+
content: chunk
|
|
651
|
+
},
|
|
652
|
+
...token.type === "codespan" ? {
|
|
653
|
+
annotations: {
|
|
654
|
+
code: true
|
|
655
|
+
}
|
|
656
|
+
} : {}
|
|
657
|
+
});
|
|
646
658
|
});
|
|
647
659
|
return acc;
|
|
648
660
|
}, acc);
|
|
649
661
|
};
|
|
662
|
+
const $bc8e7b2fdb012b3a$var$chunkText = (text, numChars = 2000)=>{
|
|
663
|
+
return text.match(new RegExp(`.{1,${numChars}}`, "g"));
|
|
664
|
+
};
|
|
650
665
|
const $bc8e7b2fdb012b3a$var$convertMarkdownToNotion = (text)=>{
|
|
651
666
|
const tokens = (0, $dxT2C$marked.Lexer).lex(text);
|
|
652
667
|
const parsedTokens = $bc8e7b2fdb012b3a$var$parseMarkedTokenToNotionRequest(tokens);
|
|
@@ -1409,6 +1424,17 @@ var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmb
|
|
|
1409
1424
|
|
|
1410
1425
|
|
|
1411
1426
|
|
|
1427
|
+
async function $0f748d6318103cdf$var$bufferToString(value, encoding) {
|
|
1428
|
+
if (value instanceof ReadableStream) {
|
|
1429
|
+
const chunks = [];
|
|
1430
|
+
for await (const chunk of value)chunks.push(Buffer.from(chunk));
|
|
1431
|
+
const buffer = Buffer.concat(chunks);
|
|
1432
|
+
return buffer.toString(encoding);
|
|
1433
|
+
}
|
|
1434
|
+
return value.toString(encoding);
|
|
1435
|
+
}
|
|
1436
|
+
var $0f748d6318103cdf$export$2e2bcd8739ae039 = $0f748d6318103cdf$var$bufferToString;
|
|
1437
|
+
|
|
1412
1438
|
|
|
1413
1439
|
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1414
1440
|
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
@@ -1433,16 +1459,23 @@ async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
|
1433
1459
|
// Return the results as JSON
|
|
1434
1460
|
return (0, $dxT2C$lodashes.join)(output, "\n");
|
|
1435
1461
|
}
|
|
1436
|
-
async function $05e3378f7c17d263$var$parseDocument(file) {
|
|
1462
|
+
async function $05e3378f7c17d263$var$parseDocument(file, fileType) {
|
|
1463
|
+
let buffer;
|
|
1437
1464
|
if (file instanceof ReadableStream) {
|
|
1438
1465
|
const chunks = [];
|
|
1439
1466
|
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1440
|
-
|
|
1441
|
-
const fileExt = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.ext;
|
|
1442
|
-
if (fileExt === "pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
|
|
1443
|
-
return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
|
|
1467
|
+
buffer = Buffer.concat(chunks);
|
|
1444
1468
|
}
|
|
1445
|
-
|
|
1469
|
+
if ((0, $dxT2C$lodashes.includes)([
|
|
1470
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1471
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1472
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1473
|
+
"application/vnd.oasis.opendocument.text",
|
|
1474
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
1475
|
+
"application/vnd.oasis.opendocument.spreadsheet"
|
|
1476
|
+
], fileType)) return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
|
|
1477
|
+
if (fileType === "application/pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
|
|
1478
|
+
return (0, $0f748d6318103cdf$export$2e2bcd8739ae039)(buffer, "utf-8");
|
|
1446
1479
|
}
|
|
1447
1480
|
var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
|
|
1448
1481
|
|
|
@@ -1457,21 +1490,19 @@ function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
|
|
|
1457
1490
|
}
|
|
1458
1491
|
|
|
1459
1492
|
|
|
1460
|
-
|
|
1461
|
-
|
|
1493
|
+
async function $15c432f5f036a88a$var$getDataUri(file, mimeType) {
|
|
1494
|
+
if (!mimeType) throw new Error("Mime type is required");
|
|
1462
1495
|
if (file instanceof ReadableStream) {
|
|
1463
1496
|
const chunks = [];
|
|
1464
1497
|
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1465
1498
|
const buffer = Buffer.concat(chunks);
|
|
1466
1499
|
const base64Image = buffer.toString("base64");
|
|
1467
|
-
const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
|
|
1468
1500
|
// Construct the data URI for a PNG image
|
|
1469
1501
|
return `data:${mimeType};base64,${base64Image}`;
|
|
1470
1502
|
}
|
|
1471
1503
|
const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
|
|
1472
1504
|
const buffer = Buffer.from(arrayBuffer);
|
|
1473
1505
|
const base64Image = buffer.toString("base64");
|
|
1474
|
-
const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
|
|
1475
1506
|
return `data:${mimeType};base64,${base64Image}`;
|
|
1476
1507
|
}
|
|
1477
1508
|
var $15c432f5f036a88a$export$2e2bcd8739ae039 = $15c432f5f036a88a$var$getDataUri;
|
|
@@ -1483,6 +1514,7 @@ async function $9a2529096849a04f$var$teeStream(stream) {
|
|
|
1483
1514
|
var $9a2529096849a04f$export$2e2bcd8739ae039 = $9a2529096849a04f$var$teeStream;
|
|
1484
1515
|
|
|
1485
1516
|
|
|
1517
|
+
|
|
1486
1518
|
function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
1487
1519
|
expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
|
|
1488
1520
|
expression.registerFunction("base64decode", (0, $77081a2d6d46cd50$export$2e2bcd8739ae039));
|
|
@@ -1564,6 +1596,7 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
|
1564
1596
|
expression.registerFunction("recursiveCharacterTextSplitter", (0, $fe4dcef142601b8c$export$e600492876ee595b));
|
|
1565
1597
|
expression.registerFunction("getDataUri", (0, $15c432f5f036a88a$export$2e2bcd8739ae039));
|
|
1566
1598
|
expression.registerFunction("teeStream", (0, $9a2529096849a04f$export$2e2bcd8739ae039));
|
|
1599
|
+
expression.registerFunction("bufferToString", (0, $0f748d6318103cdf$export$2e2bcd8739ae039));
|
|
1567
1600
|
return expression;
|
|
1568
1601
|
}
|
|
1569
1602
|
|