@truto/truto-jsonata 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import $hgUW1$jsonata from "jsonata";
2
- import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
2
+ import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
3
3
  import {DateTime as $hgUW1$DateTime} from "luxon";
4
4
  import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
5
5
  import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
@@ -9,6 +9,14 @@ import {gfm as $hgUW1$gfm} from "turndown-plugin-gfm";
9
9
  import {xml2js as $hgUW1$xml2js, js2xml as $hgUW1$js2xml} from "xml-js";
10
10
  import $hgUW1$json2md from "json2md";
11
11
  import $hgUW1$mime from "mime";
12
+ import $hgUW1$pmap from "p-map";
13
+ import $hgUW1$pretry from "p-retry";
14
+ import {Buffer as $hgUW1$Buffer} from "buffer";
15
+ import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
16
+ import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
17
+ import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
18
+ import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
19
+ import {ReadableStream as $hgUW1$ReadableStream} from "node:stream/web";
12
20
 
13
21
  function $parcel$interopDefault(a) {
14
22
  return a && a.__esModule ? a.default : a;
@@ -1339,23 +1347,133 @@ async function $22f22d9ed76050e5$var$convertMarkdownToHtml(markdown, options) {
1339
1347
  var $22f22d9ed76050e5$export$2e2bcd8739ae039 = $22f22d9ed76050e5$var$convertMarkdownToHtml;
1340
1348
 
1341
1349
 
1350
+
1351
+
1352
+
1342
1353
  async function $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere(body, api_key) {
1343
- const response = await fetch("https://api.cohere.com/v2/embed", {
1344
- method: "POST",
1345
- headers: {
1346
- accept: "application/json",
1347
- "content-type": "application/json",
1348
- "user-agent": "truto",
1349
- Authorization: `Bearer ${api_key}`
1350
- },
1351
- body: JSON.stringify(body)
1354
+ if (!(0, $hgUW1$isEmpty)(body.texts)) {
1355
+ const chunks = (0, $hgUW1$chunk)((0, $hgUW1$castArray)(body.texts), 20);
1356
+ return await (0, $hgUW1$pmap)(chunks, async (chunk)=>{
1357
+ return await (0, $hgUW1$pretry)(async ()=>{
1358
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1359
+ method: "POST",
1360
+ headers: {
1361
+ accept: "application/json",
1362
+ "content-type": "application/json",
1363
+ "user-agent": "truto",
1364
+ Authorization: `Bearer ${api_key}`
1365
+ },
1366
+ body: JSON.stringify({
1367
+ ...body,
1368
+ texts: chunk
1369
+ })
1370
+ });
1371
+ if (!response.ok) throw new Error(await response.text());
1372
+ return await response.json();
1373
+ }, {
1374
+ retries: 10,
1375
+ maxTimeout: 600000
1376
+ });
1377
+ }, {
1378
+ concurrency: 1
1379
+ });
1380
+ } else if (!(0, $hgUW1$isEmpty)(body.images)) return await (0, $hgUW1$pretry)(async ()=>{
1381
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1382
+ method: "POST",
1383
+ headers: {
1384
+ accept: "application/json",
1385
+ "content-type": "application/json",
1386
+ "user-agent": "truto",
1387
+ Authorization: `Bearer ${api_key}`
1388
+ },
1389
+ body: JSON.stringify(body)
1390
+ });
1391
+ if (!response.ok) throw new Error(await response.text());
1392
+ return await response.json();
1393
+ }, {
1394
+ retries: 10,
1395
+ maxTimeout: 600000
1352
1396
  });
1353
- if (!response.ok) throw new Error(await response.text());
1354
- return await response.json();
1355
1397
  }
1356
1398
  var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmbeddingsCohere;
1357
1399
 
1358
1400
 
1401
+
1402
+
1403
+
1404
+
1405
+
1406
+ var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
1407
+ async function $f29f00a7536a3143$var$parsePdf(buffer) {
1408
+ const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
1409
+ const { getDocument: getDocument } = await (0, $hgUW1$resolvePDFJS)();
1410
+ const doc = await getDocument({
1411
+ data: data,
1412
+ useSystemFonts: true
1413
+ }).promise;
1414
+ // Get metadata and initialize output object
1415
+ const output = [];
1416
+ // Iterate through each page and fetch the text content
1417
+ for(let i = 1; i <= doc.numPages; i++){
1418
+ const page = await doc.getPage(i);
1419
+ const textContent = await page.getTextContent();
1420
+ const contents = textContent.items.map((item)=>(0, $hgUW1$get)(item, "str")).join(" ");
1421
+ // Add page content to output
1422
+ if (contents) output.push((0, $hgUW1$join)([
1423
+ `Page Number: ${i}`,
1424
+ contents
1425
+ ], "/\n"));
1426
+ }
1427
+ // Return the results as JSON
1428
+ return (0, $hgUW1$join)(output, "\n");
1429
+ }
1430
+ async function $f29f00a7536a3143$var$parseDocument(file) {
1431
+ if (file) {
1432
+ const chunks = [];
1433
+ for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
1434
+ const buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1435
+ const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
1436
+ if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1437
+ return await (0, $hgUW1$parseOfficeAsync)(buffer);
1438
+ }
1439
+ return await (0, $hgUW1$parseOfficeAsync)(file);
1440
+ }
1441
+ var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
1442
+
1443
+
1444
+
1445
+ function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
1446
+ chunkSize: 200,
1447
+ chunkOverlap: 60
1448
+ }) {
1449
+ const splitter = new (0, $hgUW1$RecursiveCharacterTextSplitter)(options);
1450
+ return splitter.splitText(text);
1451
+ }
1452
+
1453
+
1454
+
1455
+
1456
+
1457
+ var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
1458
+ async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
1459
+ if (file instanceof (0, $hgUW1$ReadableStream)) {
1460
+ const chunks = [];
1461
+ for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
1462
+ const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
1463
+ const base64Image = buffer.toString("base64");
1464
+ const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1465
+ // Construct the data URI for a PNG image
1466
+ return `data:${mimeType};base64,${base64Image}`;
1467
+ }
1468
+ const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
1469
+ const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
1470
+ const base64Image = buffer.toString("base64");
1471
+ const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1472
+ return `data:${mimeType};base64,${base64Image}`;
1473
+ }
1474
+ var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
1475
+
1476
+
1359
1477
  function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1360
1478
  expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
1361
1479
  expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
@@ -1423,6 +1541,9 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1423
1541
  expression.registerFunction("values", function(obj) {
1424
1542
  return (0, $hgUW1$values)(obj);
1425
1543
  });
1544
+ expression.registerFunction("chunk", function(arr, size) {
1545
+ return (0, $hgUW1$chunk)((0, $hgUW1$castArray)(arr), size);
1546
+ });
1426
1547
  expression.registerFunction("wrap", function(value, wrapper, endWrapper) {
1427
1548
  return (0, $hgUW1$join)([
1428
1549
  wrapper,
@@ -1430,6 +1551,9 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1430
1551
  endWrapper || wrapper
1431
1552
  ], "");
1432
1553
  });
1554
+ expression.registerFunction("parseDocument", (0, $f29f00a7536a3143$export$2e2bcd8739ae039));
1555
+ expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
1556
+ expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
1433
1557
  return expression;
1434
1558
  }
1435
1559