@truto/truto-jsonata 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -0
- package/dist/main.cjs +138 -11
- package/dist/main.cjs.map +1 -1
- package/dist/module.js +144 -12
- package/dist/module.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +8 -1
package/README.md
CHANGED
|
@@ -881,6 +881,23 @@ expression.evaluate({ file}).then(result => { console.log(result); });
|
|
|
881
881
|
|
|
882
882
|
</details>
|
|
883
883
|
|
|
884
|
+
<details>
|
|
885
|
+
<summary>getDataUri(file)</summary>
|
|
886
|
+
|
|
887
|
+
Converts a `Blob` or `Buffer` or `Readable Stream` to a data URI.
|
|
888
|
+
|
|
889
|
+
**Example:**
|
|
890
|
+
|
|
891
|
+
```javascript
|
|
892
|
+
import trutoJsonata from '@truto/truto-jsonata';
|
|
893
|
+
const file = new Blob(['Hello, World!'], { type: 'text/plain' });
|
|
894
|
+
const expression = trutoJsonata("getDataUri(file)");
|
|
895
|
+
expression.evaluate({ file}).then(result => { console.log(result); });
|
|
896
|
+
// Output: 'data:text/plain;base64,SGVsbG8sIFdvcmxkIQ=='
|
|
897
|
+
```
|
|
898
|
+
|
|
899
|
+
</details>
|
|
900
|
+
|
|
884
901
|
<details>
|
|
885
902
|
<summary>blob(content, options)</summary>
|
|
886
903
|
|
|
@@ -1241,6 +1258,25 @@ expression3.evaluate({ jsonData: jsonData3, options: options3 }).then(result =>
|
|
|
1241
1258
|
|
|
1242
1259
|
</details>
|
|
1243
1260
|
|
|
1261
|
+
<details>
|
|
1262
|
+
<summary>parseDocument(file)</summary>
|
|
1263
|
+
|
|
1264
|
+
Parses a document file (e.g., PDF, DOCX) and extracts text content.
|
|
1265
|
+
|
|
1266
|
+
**Example:**
|
|
1267
|
+
|
|
1268
|
+
```javascript
|
|
1269
|
+
import trutoJsonata from '@truto/truto-jsonata';
|
|
1270
|
+
const file = new Blob(['Hello, World!'], { type: 'application/pdf' });
|
|
1271
|
+
const buffer = await file.arrayBuffer();
|
|
1272
|
+
const expression = trutoJsonata("$parseDocument(buffer)");
|
|
1273
|
+
expression.evaluate({ file}).then(result => { console.log(result); });
|
|
1274
|
+
// Output: 'Hello, World!'
|
|
1275
|
+
|
|
1276
|
+
````
|
|
1277
|
+
|
|
1278
|
+
</details>
|
|
1279
|
+
|
|
1244
1280
|
### Markdown and Text Conversion
|
|
1245
1281
|
|
|
1246
1282
|
<details>
|
|
@@ -1855,6 +1891,35 @@ expression.evaluate({ body, api_key }).then(result => { console.log(result); });
|
|
|
1855
1891
|
|
|
1856
1892
|
</details>
|
|
1857
1893
|
|
|
1894
|
+
<details>
|
|
1895
|
+
<summary>recursiveCharacterTextSplitter(text, options)</summary>
|
|
1896
|
+
|
|
1897
|
+
Splits a text into an array of characters, words, or sentences, recursively.
|
|
1898
|
+
|
|
1899
|
+
**Parameters:**
|
|
1900
|
+
|
|
1901
|
+
- **text**: The input text to split.
|
|
1902
|
+
- **options**: An object containing the following properties:
|
|
1903
|
+
- **chunkSize**: The maximum number of characters, words, or sentences per chunk (default is `200`).
|
|
1904
|
+
- **chunkOverlap**: The number of characters, words, or sentences to overlap between chunks (default is `60`).
|
|
1905
|
+
|
|
1906
|
+
**Example Usage:**
|
|
1907
|
+
|
|
1908
|
+
```javascript
|
|
1909
|
+
import trutoJsonata from '@truto/truto-jsonata';
|
|
1910
|
+
|
|
1911
|
+
const text = "Hello, World! This is a sample text.";
|
|
1912
|
+
const options = {
|
|
1913
|
+
chunkSize: 10,
|
|
1914
|
+
chunkOverlap: 3
|
|
1915
|
+
};
|
|
1916
|
+
const expression = trutoJsonata("$recursiveCharacterTextSplitter(text, options)");
|
|
1917
|
+
expression.evaluate(text).then(result => { console.log(result); });
|
|
1918
|
+
// Output: ["Hello, Wo", "lo, World", "rld! This", "is a samp", "ample text", "text."]
|
|
1919
|
+
```
|
|
1920
|
+
|
|
1921
|
+
</details>
|
|
1922
|
+
|
|
1858
1923
|
|
|
1859
1924
|
### Miscellaneous
|
|
1860
1925
|
|
|
@@ -2015,3 +2080,23 @@ expression.evaluate({}).then(result => { console.log(result); });
|
|
|
2015
2080
|
```
|
|
2016
2081
|
|
|
2017
2082
|
</details>
|
|
2083
|
+
|
|
2084
|
+
|
|
2085
|
+
|
|
2086
|
+
<details>
|
|
2087
|
+
<summary>chunk(arr,size)</summary>
|
|
2088
|
+
|
|
2089
|
+
Chunks an array into smaller arrays of a specified size.
|
|
2090
|
+
|
|
2091
|
+
**Example:**
|
|
2092
|
+
|
|
2093
|
+
```javascript
|
|
2094
|
+
import trutoJsonata from '@truto/truto-jsonata';
|
|
2095
|
+
|
|
2096
|
+
|
|
2097
|
+
const expression = trutoJsonata("$chunk([1, 2, 3, 4, 5], 2)");
|
|
2098
|
+
expression.evaluate({}).then(result => { console.log(result); });
|
|
2099
|
+
// Output: [[1,2],[3,4],[5]]
|
|
2100
|
+
```
|
|
2101
|
+
|
|
2102
|
+
</details>
|
package/dist/main.cjs
CHANGED
|
@@ -9,6 +9,13 @@ var $dxT2C$turndownplugingfm = require("turndown-plugin-gfm");
|
|
|
9
9
|
var $dxT2C$xmljs = require("xml-js");
|
|
10
10
|
var $dxT2C$json2md = require("json2md");
|
|
11
11
|
var $dxT2C$mime = require("mime");
|
|
12
|
+
var $dxT2C$pmap = require("p-map");
|
|
13
|
+
var $dxT2C$pretry = require("p-retry");
|
|
14
|
+
var $dxT2C$officeparser = require("officeparser");
|
|
15
|
+
var $dxT2C$filetype = require("file-type");
|
|
16
|
+
var $dxT2C$pdfjsserverless = require("pdfjs-serverless");
|
|
17
|
+
var $dxT2C$langchaintextsplitters = require("@langchain/textsplitters");
|
|
18
|
+
var $dxT2C$nodestreamweb = require("node:stream/web");
|
|
12
19
|
|
|
13
20
|
function $parcel$interopDefault(a) {
|
|
14
21
|
return a && a.__esModule ? a.default : a;
|
|
@@ -1335,6 +1342,13 @@ var $77081a2d6d46cd50$export$2e2bcd8739ae039 = $77081a2d6d46cd50$var$base64decod
|
|
|
1335
1342
|
|
|
1336
1343
|
|
|
1337
1344
|
|
|
1345
|
+
function $49ae26210d3b6a4c$var$toNumber(value) {
|
|
1346
|
+
return (0, $dxT2C$lodashes.toNumber)(value);
|
|
1347
|
+
}
|
|
1348
|
+
var $49ae26210d3b6a4c$export$2e2bcd8739ae039 = $49ae26210d3b6a4c$var$toNumber;
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
|
|
1338
1352
|
|
|
1339
1353
|
async function $2c639152271462a6$var$convertMarkdownToHtml(markdown, options) {
|
|
1340
1354
|
return (0, $dxT2C$marked.marked).parse(markdown, options);
|
|
@@ -1342,23 +1356,129 @@ async function $2c639152271462a6$var$convertMarkdownToHtml(markdown, options) {
|
|
|
1342
1356
|
var $2c639152271462a6$export$2e2bcd8739ae039 = $2c639152271462a6$var$convertMarkdownToHtml;
|
|
1343
1357
|
|
|
1344
1358
|
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
|
|
1345
1362
|
async function $caaee789061bb8bb$var$generateEmbeddingsCohere(body, api_key) {
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1363
|
+
if (!(0, $dxT2C$lodashes.isEmpty)(body.texts)) {
|
|
1364
|
+
const chunks = (0, $dxT2C$lodashes.chunk)((0, $dxT2C$lodashes.castArray)(body.texts), 20);
|
|
1365
|
+
return await (0, ($parcel$interopDefault($dxT2C$pmap)))(chunks, async (chunk)=>{
|
|
1366
|
+
return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
|
|
1367
|
+
const response = await fetch("https://api.cohere.com/v2/embed", {
|
|
1368
|
+
method: "POST",
|
|
1369
|
+
headers: {
|
|
1370
|
+
accept: "application/json",
|
|
1371
|
+
"content-type": "application/json",
|
|
1372
|
+
"user-agent": "truto",
|
|
1373
|
+
Authorization: `Bearer ${api_key}`
|
|
1374
|
+
},
|
|
1375
|
+
body: JSON.stringify({
|
|
1376
|
+
...body,
|
|
1377
|
+
texts: chunk
|
|
1378
|
+
})
|
|
1379
|
+
});
|
|
1380
|
+
if (!response.ok) throw new Error(await response.text());
|
|
1381
|
+
return await response.json();
|
|
1382
|
+
}, {
|
|
1383
|
+
retries: 10,
|
|
1384
|
+
maxTimeout: 600000
|
|
1385
|
+
});
|
|
1386
|
+
}, {
|
|
1387
|
+
concurrency: 1
|
|
1388
|
+
});
|
|
1389
|
+
} else if (!(0, $dxT2C$lodashes.isEmpty)(body.images)) return await (0, ($parcel$interopDefault($dxT2C$pretry)))(async ()=>{
|
|
1390
|
+
const response = await fetch("https://api.cohere.com/v2/embed", {
|
|
1391
|
+
method: "POST",
|
|
1392
|
+
headers: {
|
|
1393
|
+
accept: "application/json",
|
|
1394
|
+
"content-type": "application/json",
|
|
1395
|
+
"user-agent": "truto",
|
|
1396
|
+
Authorization: `Bearer ${api_key}`
|
|
1397
|
+
},
|
|
1398
|
+
body: JSON.stringify(body)
|
|
1399
|
+
});
|
|
1400
|
+
if (!response.ok) throw new Error(await response.text());
|
|
1401
|
+
return await response.json();
|
|
1402
|
+
}, {
|
|
1403
|
+
retries: 10,
|
|
1404
|
+
maxTimeout: 600000
|
|
1355
1405
|
});
|
|
1356
|
-
if (!response.ok) throw new Error(await response.text());
|
|
1357
|
-
return await response.json();
|
|
1358
1406
|
}
|
|
1359
1407
|
var $caaee789061bb8bb$export$2e2bcd8739ae039 = $caaee789061bb8bb$var$generateEmbeddingsCohere;
|
|
1360
1408
|
|
|
1361
1409
|
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
async function $05e3378f7c17d263$var$parsePdf(buffer) {
|
|
1415
|
+
const data = buffer.buffer.slice(buffer?.byteOffset, buffer?.byteOffset + buffer?.byteLength);
|
|
1416
|
+
const { getDocument: getDocument } = await (0, $dxT2C$pdfjsserverless.resolvePDFJS)();
|
|
1417
|
+
const doc = await getDocument({
|
|
1418
|
+
data: data,
|
|
1419
|
+
useSystemFonts: true
|
|
1420
|
+
}).promise;
|
|
1421
|
+
// Get metadata and initialize output object
|
|
1422
|
+
const output = [];
|
|
1423
|
+
// Iterate through each page and fetch the text content
|
|
1424
|
+
for(let i = 1; i <= doc.numPages; i++){
|
|
1425
|
+
const page = await doc.getPage(i);
|
|
1426
|
+
const textContent = await page.getTextContent();
|
|
1427
|
+
const contents = textContent.items.map((item)=>(0, $dxT2C$lodashes.get)(item, "str")).join(" ");
|
|
1428
|
+
// Add page content to output
|
|
1429
|
+
if (contents) output.push((0, $dxT2C$lodashes.join)([
|
|
1430
|
+
`Page Number: ${i}`,
|
|
1431
|
+
contents
|
|
1432
|
+
], "/\n"));
|
|
1433
|
+
}
|
|
1434
|
+
// Return the results as JSON
|
|
1435
|
+
return (0, $dxT2C$lodashes.join)(output, "\n");
|
|
1436
|
+
}
|
|
1437
|
+
async function $05e3378f7c17d263$var$parseDocument(file) {
|
|
1438
|
+
if (file) {
|
|
1439
|
+
const chunks = [];
|
|
1440
|
+
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1441
|
+
const buffer = Buffer.concat(chunks);
|
|
1442
|
+
const fileExt = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.ext;
|
|
1443
|
+
if (fileExt === "pdf") return await $05e3378f7c17d263$var$parsePdf(buffer);
|
|
1444
|
+
return await (0, $dxT2C$officeparser.parseOfficeAsync)(buffer);
|
|
1445
|
+
}
|
|
1446
|
+
return await (0, $dxT2C$officeparser.parseOfficeAsync)(file);
|
|
1447
|
+
}
|
|
1448
|
+
var $05e3378f7c17d263$export$2e2bcd8739ae039 = $05e3378f7c17d263$var$parseDocument;
|
|
1449
|
+
|
|
1450
|
+
|
|
1451
|
+
|
|
1452
|
+
function $fe4dcef142601b8c$export$e600492876ee595b(text, options = {
|
|
1453
|
+
chunkSize: 200,
|
|
1454
|
+
chunkOverlap: 60
|
|
1455
|
+
}) {
|
|
1456
|
+
const splitter = new (0, $dxT2C$langchaintextsplitters.RecursiveCharacterTextSplitter)(options);
|
|
1457
|
+
return splitter.splitText(text);
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
async function $15c432f5f036a88a$var$getDataUri(file) {
|
|
1464
|
+
if (file instanceof (0, $dxT2C$nodestreamweb.ReadableStream)) {
|
|
1465
|
+
const chunks = [];
|
|
1466
|
+
for await (const chunk of file)chunks.push(Buffer.from(chunk));
|
|
1467
|
+
const buffer = Buffer.concat(chunks);
|
|
1468
|
+
const base64Image = buffer.toString("base64");
|
|
1469
|
+
const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
|
|
1470
|
+
// Construct the data URI for a PNG image
|
|
1471
|
+
return `data:${mimeType};base64,${base64Image}`;
|
|
1472
|
+
}
|
|
1473
|
+
const arrayBuffer = file instanceof Blob ? await file.arrayBuffer() : file;
|
|
1474
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
1475
|
+
const base64Image = buffer.toString("base64");
|
|
1476
|
+
const mimeType = (await (0, $dxT2C$filetype.fileTypeFromBuffer)(buffer))?.mime;
|
|
1477
|
+
return `data:${mimeType};base64,${base64Image}`;
|
|
1478
|
+
}
|
|
1479
|
+
var $15c432f5f036a88a$export$2e2bcd8739ae039 = $15c432f5f036a88a$var$getDataUri;
|
|
1480
|
+
|
|
1481
|
+
|
|
1362
1482
|
function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
1363
1483
|
expression.registerFunction("dtFromIso", (0, $bab42b5e4be720d3$export$2e2bcd8739ae039));
|
|
1364
1484
|
expression.registerFunction("base64decode", (0, $77081a2d6d46cd50$export$2e2bcd8739ae039));
|
|
@@ -1381,6 +1501,7 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
|
1381
1501
|
expression.registerFunction("difference", function(arr1, arr2) {
|
|
1382
1502
|
return (0, $dxT2C$lodashes.difference)(arr1, arr2);
|
|
1383
1503
|
});
|
|
1504
|
+
expression.registerFunction("toNumber", (0, $49ae26210d3b6a4c$export$2e2bcd8739ae039));
|
|
1384
1505
|
expression.registerFunction("jsonParse", (0, $491a3e93ed46e445$export$2e2bcd8739ae039));
|
|
1385
1506
|
expression.registerFunction("getMimeType", (0, $cff411daa1ea9f52$export$2e2bcd8739ae039));
|
|
1386
1507
|
expression.registerFunction("uuid", (0, $b3c3220fc07098c9$export$2e2bcd8739ae039));
|
|
@@ -1425,6 +1546,9 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
|
1425
1546
|
expression.registerFunction("values", function(obj) {
|
|
1426
1547
|
return (0, $dxT2C$lodashes.values)(obj);
|
|
1427
1548
|
});
|
|
1549
|
+
expression.registerFunction("chunk", function(arr, size) {
|
|
1550
|
+
return (0, $dxT2C$lodashes.chunk)((0, $dxT2C$lodashes.castArray)(arr), size);
|
|
1551
|
+
});
|
|
1428
1552
|
expression.registerFunction("wrap", function(value, wrapper, endWrapper) {
|
|
1429
1553
|
return (0, $dxT2C$lodashes.join)([
|
|
1430
1554
|
wrapper,
|
|
@@ -1432,6 +1556,9 @@ function $af351c41b7fd6f79$export$2e2bcd8739ae039(expression) {
|
|
|
1432
1556
|
endWrapper || wrapper
|
|
1433
1557
|
], "");
|
|
1434
1558
|
});
|
|
1559
|
+
expression.registerFunction("parseDocument", (0, $05e3378f7c17d263$export$2e2bcd8739ae039));
|
|
1560
|
+
expression.registerFunction("recursiveCharacterTextSplitter", (0, $fe4dcef142601b8c$export$e600492876ee595b));
|
|
1561
|
+
expression.registerFunction("getDataUri", (0, $15c432f5f036a88a$export$2e2bcd8739ae039));
|
|
1435
1562
|
return expression;
|
|
1436
1563
|
}
|
|
1437
1564
|
|