@truto/truto-jsonata 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import $hgUW1$jsonata from "jsonata";
2
- import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber} from "lodash-es";
2
+ import {difference as $hgUW1$difference, groupBy as $hgUW1$groupBy, castArray as $hgUW1$castArray, keyBy as $hgUW1$keyBy, pick as $hgUW1$pick, omit as $hgUW1$omit, compact as $hgUW1$compact, join as $hgUW1$join, orderBy as $hgUW1$orderBy, find as $hgUW1$find, filter as $hgUW1$filter, values as $hgUW1$values, chunk as $hgUW1$chunk, reduce as $hgUW1$reduce, each as $hgUW1$each, get as $hgUW1$get, isEmpty as $hgUW1$isEmpty, concat as $hgUW1$concat, isEqual as $hgUW1$isEqual, flatMap as $hgUW1$flatMap, map as $hgUW1$map, set as $hgUW1$set, isString as $hgUW1$isString, flattenDeep as $hgUW1$flattenDeep, sortBy as $hgUW1$sortBy, repeat as $hgUW1$repeat, flatten as $hgUW1$flatten, reject as $hgUW1$reject, isNil as $hgUW1$isNil, isFunction as $hgUW1$isFunction, toNumber as $hgUW1$toNumber, includes as $hgUW1$includes} from "lodash-es";
3
3
  import {DateTime as $hgUW1$DateTime} from "luxon";
4
4
  import {Lexer as $hgUW1$Lexer, marked as $hgUW1$marked} from "marked";
5
5
  import {diceCoefficient as $hgUW1$diceCoefficient} from "dice-coefficient";
@@ -13,7 +13,6 @@ import $hgUW1$pmap from "p-map";
13
13
  import $hgUW1$pretry from "p-retry";
14
14
  import {Buffer as $hgUW1$Buffer} from "buffer";
15
15
  import {parseOfficeAsync as $hgUW1$parseOfficeAsync} from "officeparser";
16
- import {fileTypeFromBuffer as $hgUW1$fileTypeFromBuffer} from "file-type";
17
16
  import {resolvePDFJS as $hgUW1$resolvePDFJS} from "pdfjs-serverless";
18
17
  import {RecursiveCharacterTextSplitter as $hgUW1$RecursiveCharacterTextSplitter} from "@langchain/textsplitters";
19
18
 
@@ -1401,6 +1400,19 @@ var $bf2fdaa075f6a5e5$export$2e2bcd8739ae039 = $bf2fdaa075f6a5e5$var$generateEmb
1401
1400
 
1402
1401
 
1403
1402
 
1403
+ var $65fc862361d7fe06$require$Buffer = $hgUW1$Buffer;
1404
+ async function $65fc862361d7fe06$var$bufferToString(value, encoding) {
1405
+ if (value instanceof ReadableStream) {
1406
+ const chunks = [];
1407
+ for await (const chunk of value)chunks.push($65fc862361d7fe06$require$Buffer.from(chunk));
1408
+ const buffer = $65fc862361d7fe06$require$Buffer.concat(chunks);
1409
+ return buffer.toString(encoding);
1410
+ }
1411
+ return value.toString(encoding);
1412
+ }
1413
+ var $65fc862361d7fe06$export$2e2bcd8739ae039 = $65fc862361d7fe06$var$bufferToString;
1414
+
1415
+
1404
1416
 
1405
1417
  var $f29f00a7536a3143$require$Buffer = $hgUW1$Buffer;
1406
1418
  async function $f29f00a7536a3143$var$parsePdf(buffer) {
@@ -1426,16 +1438,23 @@ async function $f29f00a7536a3143$var$parsePdf(buffer) {
1426
1438
  // Return the results as JSON
1427
1439
  return (0, $hgUW1$join)(output, "\n");
1428
1440
  }
1429
- async function $f29f00a7536a3143$var$parseDocument(file) {
1441
+ async function $f29f00a7536a3143$var$parseDocument(file, fileType) {
1442
+ let buffer;
1430
1443
  if (file instanceof ReadableStream) {
1431
1444
  const chunks = [];
1432
1445
  for await (const chunk of file)chunks.push($f29f00a7536a3143$require$Buffer.from(chunk));
1433
- const buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1434
- const fileExt = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.ext;
1435
- if (fileExt === "pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1436
- return await (0, $hgUW1$parseOfficeAsync)(buffer);
1446
+ buffer = $f29f00a7536a3143$require$Buffer.concat(chunks);
1437
1447
  }
1438
- return await (0, $hgUW1$parseOfficeAsync)(file);
1448
+ if ((0, $hgUW1$includes)([
1449
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1450
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1451
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1452
+ "application/vnd.oasis.opendocument.text",
1453
+ "application/vnd.oasis.opendocument.presentation",
1454
+ "application/vnd.oasis.opendocument.spreadsheet"
1455
+ ], fileType)) return await (0, $hgUW1$parseOfficeAsync)(buffer);
1456
+ if (fileType === "application/pdf") return await $f29f00a7536a3143$var$parsePdf(buffer);
1457
+ return (0, $65fc862361d7fe06$export$2e2bcd8739ae039)(buffer, "utf-8");
1439
1458
  }
1440
1459
  var $f29f00a7536a3143$export$2e2bcd8739ae039 = $f29f00a7536a3143$var$parseDocument;
1441
1460
 
@@ -1451,22 +1470,20 @@ function $a12871c9a32cbf3b$export$e600492876ee595b(text, options = {
1451
1470
 
1452
1471
 
1453
1472
 
1454
-
1455
1473
  var $6b4f9d8bcf8ab446$require$Buffer = $hgUW1$Buffer;
1456
- async function $6b4f9d8bcf8ab446$var$getDataUri(file) {
1474
+ async function $6b4f9d8bcf8ab446$var$getDataUri(file, mimeType) {
1475
+ if (!mimeType) throw new Error("Mime type is required");
1457
1476
  if (file instanceof ReadableStream) {
1458
1477
  const chunks = [];
1459
1478
  for await (const chunk of file)chunks.push($6b4f9d8bcf8ab446$require$Buffer.from(chunk));
1460
1479
  const buffer = $6b4f9d8bcf8ab446$require$Buffer.concat(chunks);
1461
1480
  const base64Image = buffer.toString("base64");
1462
- const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1463
1481
  // Construct the data URI for a PNG image
1464
1482
  return `data:${mimeType};base64,${base64Image}`;
1465
1483
  }
1466
1484
  const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
1467
1485
  const buffer = $6b4f9d8bcf8ab446$require$Buffer.from(arrayBuffer);
1468
1486
  const base64Image = buffer.toString("base64");
1469
- const mimeType = (await (0, $hgUW1$fileTypeFromBuffer)(buffer))?.mime;
1470
1487
  return `data:${mimeType};base64,${base64Image}`;
1471
1488
  }
1472
1489
  var $6b4f9d8bcf8ab446$export$2e2bcd8739ae039 = $6b4f9d8bcf8ab446$var$getDataUri;
@@ -1478,6 +1495,7 @@ async function $5e08a3608e719b52$var$teeStream(stream) {
1478
1495
  var $5e08a3608e719b52$export$2e2bcd8739ae039 = $5e08a3608e719b52$var$teeStream;
1479
1496
 
1480
1497
 
1498
+
1481
1499
  function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1482
1500
  expression.registerFunction("dtFromIso", (0, $9e00534fc92334a4$export$2e2bcd8739ae039));
1483
1501
  expression.registerFunction("base64decode", (0, $52da679a67e0b066$export$2e2bcd8739ae039));
@@ -1559,6 +1577,7 @@ function $3e1c03caddb74419$export$2e2bcd8739ae039(expression) {
1559
1577
  expression.registerFunction("recursiveCharacterTextSplitter", (0, $a12871c9a32cbf3b$export$e600492876ee595b));
1560
1578
  expression.registerFunction("getDataUri", (0, $6b4f9d8bcf8ab446$export$2e2bcd8739ae039));
1561
1579
  expression.registerFunction("teeStream", (0, $5e08a3608e719b52$export$2e2bcd8739ae039));
1580
+ expression.registerFunction("bufferToString", (0, $65fc862361d7fe06$export$2e2bcd8739ae039));
1562
1581
  return expression;
1563
1582
  }
1564
1583