@truto/truto-jsonata 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -0
- package/dist/main.cjs +130 -11
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +136 -12
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +8 -1
package/dist/module.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import $hgUW1$jsonata from "jsonata";
|
|
2
|
-
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
|
|
2
|
+
import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
|
|
3
3
|
import {DateTime as $hgUW1$DateTime} from "luxon";
|
|
4
4
|
import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
|
|
5
5
|
import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
|
|
@@ -9,6 +9,14 @@ import {gfm as $hgUW1$gfm} from "turndown-plugin-gfm";
|
|
|
9
9
|
import {xml2js as $hgUW1$xml2js, js2xml as $hgUW1$js2xml} from "xml-js";
|
|
10
10
|
import $hgUW1$json2md from "json2md";
|
|
11
11
|
import $hgUW1$mime from "mime";
|
|
12
|
+
import $hgUW1$pmap from "p-map";
|
|
13
|
+
import $hgUW1$pretry from "p-retry";
|
|
14
|
+
import {Buffer as $hgUW1$Buffer} from "buffer";
|
|
15
|
+
import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
|
|
16
|
+
import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
|
|
17
|
+
import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
|
|
18
|
+
import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
|
|
19
|
+
import {ReadableStream as $hgUW1$ReadableStream} from "node:stream/web";
|
|
12
20
|
|
|
13
21
|
function $parcel$interopDefault(a) {
|
|
14
22
|
return a && a.__esModule ? a.default : a;
|
|
@@ -1339,23 +1347,133 @@ async function $22f22d9ed76050e5$var$convertMarkdownToHtml(markdown, options) {
|
|
|
1339
1347
|
var $22f22d9ed76050e5$export$2e2bcd8739ae039 = $22f22d9ed76050e5$var$convertMarkdownToHtml;
|
|
1340
1348
|
|
|
1341
1349
|
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
|
|
1342
1353
|
async function $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere(body, api_key) {
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1354
|
+
if (!(0, $hgUW1$isEmpty)(body.texts)) {
|
|
1355
|
+
const chunks = (0, $hgUW1$chunk)((0, $hgUW1$castArray)(body.texts), 20);
|
|
1356
|
+
return await (0, $hgUW1$pmap)(chunks, async (chunk)=>{
|
|
1357
|
+
return await (0, $hgUW1$pretry)(async ()=>{
|
|
1358
|
+
const response = await fetch("https://api.cohere.com/v2/embed", {
|
|
1359
|
+
method: "POST",
|
|
1360
|
+
headers: {
|
|
1361
|
+
accept: "application/json",
|
|
1362
|
+
"content-type": "application/json",
|
|
1363
|
+
"user-agent": "truto",
|
|
1364
|
+
Authorization: `Bearer ${api_key}`
|
|
1365
|
+
},
|
|
1366
|
+
body: JSON.stringify({
|
|
1367
|
+
...body,
|
|
1368
|
+
texts: chunk
|
|
1369
|
+
})
|
|
1370
|
+
});
|
|
1371
|
+
if (!response.ok) throw new Error(await response.text());
|
|
1372
|
+
return await response.json();
|
|
1373
|
+
}, {
|
|
1374
|
+
retries: 10,
|
|
1375
|
+
maxTimeout: 600000
|
|
1376
|
+
});
|
|
1377
|
+
}, {
|
|
1378
|
+
concurrency: 1
|
|
1379
|
+
});
|
|
1380
|
+
} else if (!(0, $hgUW1$isEmpty)(body.images)) return await (0, $hgUW1$pretry)(async ()=>{
|
|
1381
|
+
const response = await fetch("https://api.cohere.com/v2/embed", {
|
|
1382
|
+
method: "POST",
|
|
1383
|
+
headers: {
|
|
1384
|
+
accept: "application/json",
|
|
1385
|
+
"content-type": "application/json",
|
|
1386
|
+
"user-agent": "truto",
|
|
1387
|
+
Authorization: `Bearer ${api_key}`
|
|
1388
|
+
},
|
|
1389
|
+
body: JSON.stringify(body)
|
|
1390
|
+
});
|
|
1391
|
+
if (!response.ok) throw new Error(await response.text());
|
|
1392
|
+
return await response.json();
|
|
1393
|
+
}, {
|
|
1394
|
+
retries: 10,
|
|
1395
|
+
maxTimeout: 600000
|
|
1352
1396
|
});
|
|
1353
|
-
if (!response.ok) throw new Error(await response.text());
|
|
1354
|
-
return await response.json();
|
|
1355
1397
|
}
|
|
1356
1398
|
var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere;
|
|
1357
1399
|
|
|
1358
1400
|
|
|
1401
|
+
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
|
|
1405
|
+
|
|
1406
|
+
var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
|
|
1407
|
+
async function $f29f00a7536a3143$var$parsePdf(buffer) {
|
|
1408
|
+
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1409
|
+
const { getDocument: getDocument } = await (0, $hgUW1$resolvePDFJS)();
|
|
1410
|
+
const doc = await getDocument({
|
|
1411
|
+
data: data,
|
|
1412
|
+
useSystemFonts: true
|
|
1413
|
+
}).promise;
|
|
1414
|
+
// Get metadata and initialize output object
|
|
1415
|
+
const output = [];
|
|
1416
|
+
// Iterate through each page and fetch the text content
|
|
1417
|
+
for(let i = 1; i <= doc.numPages; i++){
|
|
1418
|
+
const page = await doc.getPage(i);
|
|
1419
|
+
const textContent = await page.getTextContent();
|
|
1420
|
+
const contents = textContent.items.map((item)=>(0, $hgUW1$get)(item, "str")).join(" ");
|
|
1421
|
+
// Add page content to output
|
|
1422
|
+
if (contents) output.push((0, $hgUW1$join)([
|
|
1423
|
+
`Page Number: ${i}`,
|
|
1424
|
+
contents
|
|
1425
|
+
], "/\n"));
|
|
1426
|
+
}
|
|
1427
|
+
// Return the results as JSON
|
|
1428
|
+
return (0, $hgUW1$join)(output, "\n");
|
|
1429
|
+
}
|
|
1430
|
+
async function $f29f00a7536a3143$var$parseDocument(file) {
|
|
1431
|
+
if (file) {
|
|
1432
|
+
const chunks = [];
|
|
1433
|
+
for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
|
|
1434
|
+
const buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
|
|
1435
|
+
const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
|
|
1436
|
+
if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
|
|
1437
|
+
return await (0, $hgUW1$parseOfficeAsync)(buffer);
|
|
1438
|
+
}
|
|
1439
|
+
return await (0, $hgUW1$parseOfficeAsync)(file);
|
|
1440
|
+
}
|
|
1441
|
+
var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
|
|
1442
|
+
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
|
|
1446
|
+
chunkSize: 200,
|
|
1447
|
+
chunkOverlap: 60
|
|
1448
|
+
}) {
|
|
1449
|
+
const splitter = new (0, $hgUW1$RecursiveCharacterTextSplitter)(options);
|
|
1450
|
+
return splitter.splitText(text);
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
|
|
1454
|
+
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
|
|
1458
|
+
async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
|
|
1459
|
+
if (file instanceof (0, $hgUW1$ReadableStream)) {
|
|
1460
|
+
const chunks = [];
|
|
1461
|
+
for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
|
|
1462
|
+
const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
|
|
1463
|
+
const base64Image = buffer.toString("base64");
|
|
1464
|
+
const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
|
|
1465
|
+
// Construct the data URI for a PNG image
|
|
1466
|
+
return `data:${mimeType};base64,${base64Image}`;
|
|
1467
|
+
}
|
|
1468
|
+
const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
|
|
1469
|
+
const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
|
|
1470
|
+
const base64Image = buffer.toString("base64");
|
|
1471
|
+
const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
|
|
1472
|
+
return `data:${mimeType};base64,${base64Image}`;
|
|
1473
|
+
}
|
|
1474
|
+
var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
|
|
1475
|
+
|
|
1476
|
+
|
|
1359
1477
|
function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
|
|
1360
1478
|
expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
|
|
1361
1479
|
expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
|
|
@@ -1423,6 +1541,9 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
|
|
|
1423
1541
|
expression.registerFunction("values", function(obj) {
|
|
1424
1542
|
return (0, $hgUW1$values)(obj);
|
|
1425
1543
|
});
|
|
1544
|
+
expression.registerFunction("chunk", function(arr, size) {
|
|
1545
|
+
return (0, $hgUW1$chunk)((0, $hgUW1$castArray)(arr), size);
|
|
1546
|
+
});
|
|
1426
1547
|
expression.registerFunction("wrap", function(value, wrapper, endWrapper) {
|
|
1427
1548
|
return (0, $hgUW1$join)([
|
|
1428
1549
|
wrapper,
|
|
@@ -1430,6 +1551,9 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
|
|
|
1430
1551
|
endWrapper || wrapper
|
|
1431
1552
|
], "");
|
|
1432
1553
|
});
|
|
1554
|
+
expression.registerFunction("parseDocument", (0, $f29f00a7536a3143$export$2e2bcd8739ae039));
|
|
1555
|
+
expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
|
|
1556
|
+
expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
|
|
1433
1557
|
return expression;
|
|
1434
1558
|
}
|
|
1435
1559
|
|