@truto/truto-jsonata 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import $hgUW1$jsonata from "jsonata";
2
- import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
2
+ import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
3
3
  import {DateTime as $hgUW1$DateTime} from "luxon";
4
4
  import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
5
5
  import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
@@ -9,6 +9,13 @@ import {gfm as $hgUW1$gfm} from "turndown-plugin-gfm";
9
9
  import {xml2js as $hgUW1$xml2js, js2xml as $hgUW1$js2xml} from "xml-js";
10
10
  import $hgUW1$json2md from "json2md";
11
11
  import $hgUW1$mime from "mime";
12
+ import $hgUW1$pmap from "p-map";
13
+ import $hgUW1$pretry from "p-retry";
14
+ import {Buffer as $hgUW1$Buffer} from "buffer";
15
+ import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
16
+ import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
17
+ import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
18
+ import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
12
19
 
13
20
  function $parcel$interopDefault(a) {
14
21
  return a && a.__esModule ? a.default : a;
@@ -1339,23 +1346,138 @@ async function $22f22d9ed76050e5$var$convertMarkdownToHtml(markdown, options) {
1339
1346
  var $22f22d9ed76050e5$export$2e2bcd8739ae039 = $22f22d9ed76050e5$var$convertMarkdownToHtml;
1340
1347
 
1341
1348
 
1349
+
1350
+
1351
+
1342
1352
  async function $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere(body, api_key) {
1343
- const response = await fetch("https://api.cohere.com/v2/embed", {
1344
- method: "POST",
1345
- headers: {
1346
- accept: "application/json",
1347
- "content-type": "application/json",
1348
- "user-agent": "truto",
1349
- Authorization: `Bearer ${api_key}`
1350
- },
1351
- body: JSON.stringify(body)
1353
+ if (!(0, $hgUW1$isEmpty)(body.texts)) {
1354
+ const chunks = (0, $hgUW1$chunk)((0, $hgUW1$castArray)(body.texts), 20);
1355
+ return await (0, $hgUW1$pmap)(chunks, async (chunk)=>{
1356
+ return await (0, $hgUW1$pretry)(async ()=>{
1357
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1358
+ method: "POST",
1359
+ headers: {
1360
+ accept: "application/json",
1361
+ "content-type": "application/json",
1362
+ "user-agent": "truto",
1363
+ Authorization: `Bearer ${api_key}`
1364
+ },
1365
+ body: JSON.stringify({
1366
+ ...body,
1367
+ texts: chunk
1368
+ })
1369
+ });
1370
+ if (!response.ok) throw new Error(await response.text());
1371
+ return await response.json();
1372
+ }, {
1373
+ retries: 10,
1374
+ maxTimeout: 600000
1375
+ });
1376
+ }, {
1377
+ concurrency: 1
1378
+ });
1379
+ } else if (!(0, $hgUW1$isEmpty)(body.images)) return await (0, $hgUW1$pretry)(async ()=>{
1380
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1381
+ method: "POST",
1382
+ headers: {
1383
+ accept: "application/json",
1384
+ "content-type": "application/json",
1385
+ "user-agent": "truto",
1386
+ Authorization: `Bearer ${api_key}`
1387
+ },
1388
+ body: JSON.stringify(body)
1389
+ });
1390
+ if (!response.ok) throw new Error(await response.text());
1391
+ return await response.json();
1392
+ }, {
1393
+ retries: 10,
1394
+ maxTimeout: 600000
1352
1395
  });
1353
- if (!response.ok) throw new Error(await response.text());
1354
- return await response.json();
1355
1396
  }
1356
1397
  var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere;
1357
1398
 
1358
1399
 
1400
+
1401
+
1402
+
1403
+
1404
+
1405
+ var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
1406
+ async function $f29f00a7536a3143$var$parsePdf(buffer) {
1407
+ const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1408
+ const { getDocument: getDocument } = await (0, $hgUW1$resolvePDFJS)();
1409
+ const doc = await getDocument({
1410
+ data: data,
1411
+ useSystemFonts: true
1412
+ }).promise;
1413
+ // Get metadata and initialize output object
1414
+ const output = [];
1415
+ // Iterate through each page and fetch the text content
1416
+ for(let i = 1; i <= doc.numPages; i++){
1417
+ const page = await doc.getPage(i);
1418
+ const textContent = await page.getTextContent();
1419
+ const contents = textContent.items.map((item)=>(0, $hgUW1$get)(item, "str")).join(" ");
1420
+ // Add page content to output
1421
+ if (contents) output.push((0, $hgUW1$join)([
1422
+ `Page Number: ${i}`,
1423
+ contents
1424
+ ], "/\n"));
1425
+ }
1426
+ // Return the results as JSON
1427
+ return (0, $hgUW1$join)(output, "\n");
1428
+ }
1429
+ async function $f29f00a7536a3143$var$parseDocument(file) {
1430
+ if (file instanceof ReadableStream) {
1431
+ const chunks = [];
1432
+ for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
1433
+ const buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1434
+ const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
1435
+ if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1436
+ return await (0, $hgUW1$parseOfficeAsync)(buffer);
1437
+ }
1438
+ return await (0, $hgUW1$parseOfficeAsync)(file);
1439
+ }
1440
+ var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
1441
+
1442
+
1443
+
1444
+ function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
1445
+ chunkSize: 200,
1446
+ chunkOverlap: 60
1447
+ }) {
1448
+ const splitter = new (0, $hgUW1$RecursiveCharacterTextSplitter)(options);
1449
+ return splitter.splitText(text);
1450
+ }
1451
+
1452
+
1453
+
1454
+
1455
+ var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
1456
+ async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
1457
+ if (file instanceof ReadableStream) {
1458
+ const chunks = [];
1459
+ for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
1460
+ const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
1461
+ const base64Image = buffer.toString("base64");
1462
+ const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1463
+ // Construct the data URI for a PNG image
1464
+ return `data:${mimeType};base64,${base64Image}`;
1465
+ }
1466
+ const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
1467
+ const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
1468
+ const base64Image = buffer.toString("base64");
1469
+ const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1470
+ return `data:${mimeType};base64,${base64Image}`;
1471
+ }
1472
+ var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
1473
+
1474
+
1475
+ async function $5e08a3608e719b52$var$teeStream(stream) {
1476
+ return stream.tee();
1477
+ }
1478
+ var $5e08a3608e719b52$export$2e2bcd8739ae039 = $5e08a3608e719b52$var$teeStream;
1479
+
1480
+
1359
1481
  function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1360
1482
  expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
1361
1483
  expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
@@ -1423,6 +1545,9 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1423
1545
  expression.registerFunction("values", function(obj) {
1424
1546
  return (0, $hgUW1$values)(obj);
1425
1547
  });
1548
+ expression.registerFunction("chunk", function(arr, size) {
1549
+ return (0, $hgUW1$chunk)((0, $hgUW1$castArray)(arr), size);
1550
+ });
1426
1551
  expression.registerFunction("wrap", function(value, wrapper, endWrapper) {
1427
1552
  return (0, $hgUW1$join)([
1428
1553
  wrapper,
@@ -1430,6 +1555,10 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1430
1555
  endWrapper || wrapper
1431
1556
  ], "");
1432
1557
  });
1558
+ expression.registerFunction("parseDocument", (0, $f29f00a7536a3143$export$2e2bcd8739ae039));
1559
+ expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
1560
+ expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
1561
+ expression.registerFunction("teeStream", (0, $5e08a3608e719b52$export$2e2bcd8739ae039));
1433
1562
  return expression;
1434
1563
  }
1435
1564