@mastra/rag 0.0.2-alpha.7 → 0.0.2-alpha.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +485 -0
  2. package/LICENSE +44 -0
  3. package/README.md +26 -0
  4. package/dist/document/document.d.ts +32 -0
  5. package/dist/document/document.d.ts.map +1 -0
  6. package/dist/document/index.d.ts +3 -0
  7. package/dist/document/index.d.ts.map +1 -0
  8. package/dist/document/transformers/character.d.ts +45 -0
  9. package/dist/document/transformers/character.d.ts.map +1 -0
  10. package/dist/document/transformers/html.d.ts +24 -0
  11. package/dist/document/transformers/html.d.ts.map +1 -0
  12. package/dist/document/transformers/json.d.ts +57 -0
  13. package/dist/document/transformers/json.d.ts.map +1 -0
  14. package/dist/document/transformers/latex.d.ts +12 -0
  15. package/dist/document/transformers/latex.d.ts.map +1 -0
  16. package/dist/document/transformers/markdown.d.ts +25 -0
  17. package/dist/document/transformers/markdown.d.ts.map +1 -0
  18. package/dist/document/transformers/text.d.ts +22 -0
  19. package/dist/document/transformers/text.d.ts.map +1 -0
  20. package/dist/document/transformers/token.d.ts +46 -0
  21. package/dist/document/transformers/token.d.ts.map +1 -0
  22. package/dist/document/transformers/transformer.d.ts +5 -0
  23. package/dist/document/transformers/transformer.d.ts.map +1 -0
  24. package/dist/document/types.d.ts +88 -0
  25. package/dist/document/types.d.ts.map +1 -0
  26. package/dist/embeddings/index.d.ts +5 -0
  27. package/dist/embeddings/index.d.ts.map +1 -0
  28. package/dist/index.d.ts +4 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +8 -0
  31. package/dist/rag.cjs.development.js +2756 -0
  32. package/dist/rag.cjs.development.js.map +1 -0
  33. package/dist/rag.cjs.production.min.js +2 -0
  34. package/dist/rag.cjs.production.min.js.map +1 -0
  35. package/dist/rag.esm.js +2745 -0
  36. package/dist/rag.esm.js.map +1 -0
  37. package/dist/utils/graph-rag/index.d.ts +52 -0
  38. package/dist/utils/graph-rag/index.d.ts.map +1 -0
  39. package/dist/utils/index.d.ts +4 -0
  40. package/dist/utils/index.d.ts.map +1 -0
  41. package/dist/utils/rag-tools.d.ts +246 -0
  42. package/dist/utils/rag-tools.d.ts.map +1 -0
  43. package/dist/utils/rerank/index.d.ts +36 -0
  44. package/dist/utils/rerank/index.d.ts.map +1 -0
  45. package/docker-compose.yaml +4 -0
  46. package/package.json +22 -17
  47. package/src/document/document.test.ts +201 -412
  48. package/src/document/document.ts +55 -58
  49. package/src/document/transformers/character.ts +6 -6
  50. package/src/document/transformers/json.ts +7 -7
  51. package/src/document/transformers/latex.ts +2 -2
  52. package/src/document/transformers/text.ts +14 -16
  53. package/src/document/transformers/token.ts +17 -28
  54. package/src/document/types.ts +10 -5
  55. package/src/embeddings/index.ts +22 -0
  56. package/src/index.ts +2 -4
  57. package/src/utils/graph-rag/index.test.ts +230 -0
  58. package/src/utils/graph-rag/index.ts +296 -0
  59. package/src/utils/index.ts +3 -0
  60. package/src/utils/rag-tools.ts +266 -0
  61. package/src/utils/rerank/index.test.ts +178 -0
  62. package/src/utils/rerank/index.ts +162 -0
  63. package/vitest.config.ts +11 -0
  64. package/jest.config.ts +0 -19
  65. package/src/astra-db/index.test.ts +0 -201
  66. package/src/astra-db/index.ts +0 -146
  67. package/src/pg/index.ts +0 -255
  68. package/src/pg/index_test.ts +0 -212
  69. package/src/pinecone/index.test.ts +0 -130
  70. package/src/pinecone/index.ts +0 -118
  71. package/src/qdrant/index.test.ts +0 -119
  72. package/src/qdrant/index.ts +0 -116
package/CHANGELOG.md CHANGED
@@ -1,5 +1,490 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.0.2-alpha.70
4
+
5
+ ### Patch Changes
6
+
7
+ - 327ece7: Updates for ts versions
8
+ - Updated dependencies [327ece7]
9
+ - @mastra/core@0.1.27-alpha.80
10
+
11
+ ## 0.0.2-alpha.69
12
+
13
+ ### Patch Changes
14
+
15
+ - 21fe536: add keyword tags for packages and update readmes
16
+ - Updated dependencies [21fe536]
17
+ - @mastra/core@0.1.27-alpha.79
18
+
19
+ ## 0.0.2-alpha.68
20
+
21
+ ### Patch Changes
22
+
23
+ - 685108a: Remove syncs and excess rag
24
+ - Updated dependencies [685108a]
25
+ - Updated dependencies [685108a]
26
+ - @mastra/core@0.1.27-alpha.78
27
+
28
+ ## 0.0.2-alpha.67
29
+
30
+ ### Patch Changes
31
+
32
+ - 8105fae: Split embed into embed and embedMany to handle different return types
33
+ - ecdee97: Update return type for chunk to Chunk[]
34
+ - Updated dependencies [8105fae]
35
+ - @mastra/core@0.1.27-alpha.77
36
+
37
+ ## 0.0.2-alpha.66
38
+
39
+ ### Patch Changes
40
+
41
+ - 9c0d010: Deprecated Reranker in favor of rerank function (BREAKING CHANGE)
42
+
43
+ ## 0.0.2-alpha.65
44
+
45
+ ### Patch Changes
46
+
47
+ - Updated dependencies [ae7bf94]
48
+ - Updated dependencies [ae7bf94]
49
+ - @mastra/core@0.1.27-alpha.76
50
+
51
+ ## 0.0.2-alpha.64
52
+
53
+ ### Patch Changes
54
+
55
+ - Updated dependencies [23dcb23]
56
+ - @mastra/core@0.1.27-alpha.75
57
+
58
+ ## 0.0.2-alpha.63
59
+
60
+ ### Patch Changes
61
+
62
+ - Updated dependencies [7b87567]
63
+ - @mastra/core@0.1.27-alpha.74
64
+
65
+ ## 0.0.2-alpha.62
66
+
67
+ ### Patch Changes
68
+
69
+ - Updated dependencies [3427b95]
70
+ - @mastra/core@0.1.27-alpha.73
71
+
72
+ ## 0.0.2-alpha.61
73
+
74
+ ### Patch Changes
75
+
76
+ - Updated dependencies [e4d4ede]
77
+ - Updated dependencies [06b2c0a]
78
+ - @mastra/core@0.1.27-alpha.72
79
+
80
+ ## 0.0.2-alpha.60
81
+
82
+ ### Patch Changes
83
+
84
+ - Updated dependencies [d9c8dd0]
85
+ - @mastra/core@0.1.27-alpha.71
86
+
87
+ ## 0.0.2-alpha.59
88
+
89
+ ### Patch Changes
90
+
91
+ - bdaf834: publish packages
92
+
93
+ ## 0.0.2-alpha.58
94
+
95
+ ### Patch Changes
96
+
97
+ - Updated dependencies [dd6d87f]
98
+ - Updated dependencies [04434b6]
99
+ - @mastra/core@0.1.27-alpha.70
100
+
101
+ ## 0.0.2-alpha.57
102
+
103
+ ### Patch Changes
104
+
105
+ - ab1dc59: Add Cloudflare vectorize
106
+ - 4769753: Added turso/libsql support for RAG
107
+
108
+ ## 0.0.2-alpha.56
109
+
110
+ ### Patch Changes
111
+
112
+ - ee856f3: Updated pg filter function and updated docs and example
113
+ - c3047a7: Added tests
114
+
115
+ ## 0.0.2-alpha.55
116
+
117
+ ### Patch Changes
118
+
119
+ - 1944807: Unified logger and major step in better logs
120
+ - Updated dependencies [1944807]
121
+ - Updated dependencies [9ade36e]
122
+ - @mastra/core@0.1.27-alpha.69
123
+
124
+ ## 0.0.2-alpha.54
125
+
126
+ ### Patch Changes
127
+
128
+ - Updated dependencies [0be7181]
129
+ - Updated dependencies [0be7181]
130
+ - @mastra/core@0.1.27-alpha.68
131
+
132
+ ## 0.0.2-alpha.53
133
+
134
+ ### Patch Changes
135
+
136
+ - Updated dependencies [c8ff2f5]
137
+ - @mastra/core@0.1.27-alpha.67
138
+
139
+ ## 0.0.2-alpha.52
140
+
141
+ ### Patch Changes
142
+
143
+ - Updated dependencies [14064f2]
144
+ - @mastra/core@0.1.27-alpha.66
145
+
146
+ ## 0.0.2-alpha.51
147
+
148
+ ### Patch Changes
149
+
150
+ - Updated dependencies [e66643a]
151
+ - @mastra/core@0.1.27-alpha.65
152
+
153
+ ## 0.0.2-alpha.50
154
+
155
+ ### Patch Changes
156
+
157
+ - 1616f70: Added transaction handling to pgvector
158
+
159
+ ## 0.0.2-alpha.49
160
+
161
+ ### Patch Changes
162
+
163
+ - Updated dependencies [f368477]
164
+ - Updated dependencies [d5ec619]
165
+ - @mastra/core@0.1.27-alpha.64
166
+
167
+ ## 0.0.2-alpha.48
168
+
169
+ ### Patch Changes
170
+
171
+ - 5826db3: Updated reference docs and exports for Reranker and GraphRag
172
+
173
+ ## 0.0.2-alpha.47
174
+
175
+ ### Patch Changes
176
+
177
+ - Updated dependencies [e097800]
178
+ - @mastra/core@0.1.27-alpha.63
179
+
180
+ ## 0.0.2-alpha.46
181
+
182
+ ### Patch Changes
183
+
184
+ - Updated dependencies [93a3719]
185
+ - @mastra/core@0.1.27-alpha.62
186
+
187
+ ## 0.0.2-alpha.45
188
+
189
+ ### Patch Changes
190
+
191
+ - Updated dependencies [dc90663]
192
+ - @mastra/core@0.1.27-alpha.61
193
+
194
+ ## 0.0.2-alpha.44
195
+
196
+ ### Patch Changes
197
+
198
+ - 3967e69: Added GraphRAG implementation and updated docs
199
+ - Updated dependencies [3967e69]
200
+ - @mastra/core@0.1.27-alpha.60
201
+
202
+ ## 0.0.2-alpha.43
203
+
204
+ ### Patch Changes
205
+
206
+ - Updated dependencies [b524c22]
207
+ - @mastra/core@0.1.27-alpha.59
208
+
209
+ ## 0.0.2-alpha.42
210
+
211
+ ### Patch Changes
212
+
213
+ - 1874f40: Added re ranking tool to RAG
214
+ - Updated dependencies [1874f40]
215
+ - Updated dependencies [4b1ce2c]
216
+ - @mastra/core@0.1.27-alpha.58
217
+
218
+ ## 0.0.2-alpha.41
219
+
220
+ ### Patch Changes
221
+
222
+ - 7de6d71: Update filter for vector query to work with more stores
223
+ - Updated dependencies [fd494a3]
224
+ - @mastra/core@0.1.27-alpha.57
225
+
226
+ ## 0.0.2-alpha.40
227
+
228
+ ### Patch Changes
229
+
230
+ - ae638a4: make useFilter option optional
231
+ - Updated dependencies [9f3ab05]
232
+ - @mastra/core@0.1.27-alpha.56
233
+
234
+ ## 0.0.2-alpha.39
235
+
236
+ ### Patch Changes
237
+
238
+ - 592e3cf: Add custom rag tools, add vector retrieval, and update docs
239
+ - 837a288: MAJOR Revamp of tools, workflows, syncs.
240
+ - 0b74006: Workflow updates
241
+ - Updated dependencies [592e3cf]
242
+ - Updated dependencies [837a288]
243
+ - Updated dependencies [0b74006]
244
+ - @mastra/core@0.1.27-alpha.55
245
+
246
+ ## 0.0.2-alpha.38
247
+
248
+ ### Patch Changes
249
+
250
+ - eb45d76: Updated PG Vector filter and added rag examples in docs
251
+ - Updated dependencies [d2cd535]
252
+ - @mastra/core@0.1.27-alpha.54
253
+
254
+ ## 0.0.2-alpha.37
255
+
256
+ ### Patch Changes
257
+
258
+ - Updated dependencies [8e7814f]
259
+ - @mastra/core@0.1.27-alpha.53
260
+
261
+ ## 0.0.2-alpha.36
262
+
263
+ ### Patch Changes
264
+
265
+ - Updated dependencies [eedb829]
266
+ - @mastra/core@0.1.27-alpha.52
267
+
268
+ ## 0.0.2-alpha.35
269
+
270
+ ### Patch Changes
271
+
272
+ - Updated dependencies [a7b016d]
273
+ - Updated dependencies [da2e8d3]
274
+ - Updated dependencies [538a136]
275
+ - @mastra/core@0.1.27-alpha.51
276
+
277
+ ## 0.0.2-alpha.34
278
+
279
+ ### Patch Changes
280
+
281
+ - Updated dependencies [401a4d9]
282
+ - @mastra/core@0.1.27-alpha.50
283
+
284
+ ## 0.0.2-alpha.33
285
+
286
+ ### Patch Changes
287
+
288
+ - a621c34: Add validation for indexName in pgVector and dimension for all vector dbs
289
+ - Updated dependencies [79acad0]
290
+ - Updated dependencies [f5dfa20]
291
+ - @mastra/core@0.1.27-alpha.49
292
+
293
+ ## 0.0.2-alpha.32
294
+
295
+ ### Patch Changes
296
+
297
+ - Updated dependencies [b726bf5]
298
+ - @mastra/core@0.1.27-alpha.48
299
+
300
+ ## 0.0.2-alpha.31
301
+
302
+ ### Patch Changes
303
+
304
+ - Updated dependencies [f6ba259]
305
+ - @mastra/core@0.1.27-alpha.47
306
+
307
+ ## 0.0.2-alpha.30
308
+
309
+ ### Patch Changes
310
+
311
+ - 8ae2bbc: Dane publishing
312
+ - 0bd142c: Fixes learned from docs
313
+ - ee4de15: Dane fixes
314
+ - Updated dependencies [8ae2bbc]
315
+ - Updated dependencies [0bd142c]
316
+ - Updated dependencies [ee4de15]
317
+ - @mastra/core@0.1.27-alpha.46
318
+
319
+ ## 0.0.2-alpha.29
320
+
321
+ ### Patch Changes
322
+
323
+ - Updated dependencies [e608d8c]
324
+ - Updated dependencies [002d6d8]
325
+ - @mastra/core@0.1.27-alpha.45
326
+
327
+ ## 0.0.2-alpha.28
328
+
329
+ ### Patch Changes
330
+
331
+ - Updated dependencies [2fa7f53]
332
+ - @mastra/core@0.1.27-alpha.44
333
+
334
+ ## 0.0.2-alpha.27
335
+
336
+ ### Patch Changes
337
+
338
+ - Updated dependencies [2e099d2]
339
+ - Updated dependencies [d6d8159]
340
+ - @mastra/core@0.1.27-alpha.43
341
+
342
+ ## 0.0.2-alpha.26
343
+
344
+ ### Patch Changes
345
+
346
+ - Updated dependencies [4a54c82]
347
+ - @mastra/core@0.1.27-alpha.42
348
+
349
+ ## 0.0.2-alpha.25
350
+
351
+ ### Patch Changes
352
+
353
+ - Updated dependencies [5cdfb88]
354
+ - @mastra/core@0.1.27-alpha.41
355
+
356
+ ## 0.0.2-alpha.24
357
+
358
+ ### Patch Changes
359
+
360
+ - Updated dependencies [9029796]
361
+ - @mastra/core@0.1.27-alpha.40
362
+
363
+ ## 0.0.2-alpha.23
364
+
365
+ ### Patch Changes
366
+
367
+ - 6101f2d: Fix module incompatibility, and dane cli build
368
+ - Updated dependencies [2b01511]
369
+ - @mastra/core@0.1.27-alpha.39
370
+
371
+ ## 0.0.2-alpha.22
372
+
373
+ ### Patch Changes
374
+
375
+ - f031a1f: expose embed from rag, and refactor embed
376
+ - Updated dependencies [f031a1f]
377
+ - @mastra/core@0.1.27-alpha.38
378
+
379
+ ## 0.0.2-alpha.21
380
+
381
+ ### Patch Changes
382
+
383
+ - 45fd5b8: rename MastraDocument to MDocument
384
+ - Updated dependencies [c872875]
385
+ - Updated dependencies [f6da688]
386
+ - Updated dependencies [b5393f1]
387
+ - @mastra/core@0.1.27-alpha.37
388
+
389
+ ## 0.0.2-alpha.20
390
+
391
+ ### Patch Changes
392
+
393
+ - Updated dependencies [f537e33]
394
+ - Updated dependencies [bc40916]
395
+ - Updated dependencies [f7d1131]
396
+ - Updated dependencies [75bf3f0]
397
+ - Updated dependencies [3c4488b]
398
+ - Updated dependencies [d38f7a6]
399
+ - @mastra/core@0.1.27-alpha.36
400
+
401
+ ## 0.0.2-alpha.19
402
+
403
+ ### Patch Changes
404
+
405
+ - 033eda6: More fixes for refactor
406
+ - Updated dependencies [033eda6]
407
+ - @mastra/core@0.1.27-alpha.35
408
+
409
+ ## 0.0.2-alpha.18
410
+
411
+ ### Patch Changes
412
+
413
+ - 837a288: MAJOR Revamp of tools, workflows, syncs.
414
+ - 5811de6: Updates spec-writer example to use new workflows constructs. Small improvements to workflow internals. Switch transformer tokenizer for js compatible one.
415
+ - Updated dependencies [837a288]
416
+ - Updated dependencies [5811de6]
417
+ - @mastra/core@0.1.27-alpha.34
418
+
419
+ ## 0.0.2-alpha.17
420
+
421
+ ### Patch Changes
422
+
423
+ - e1dd94a: update the api for embeddings
424
+ - Updated dependencies [e1dd94a]
425
+ - @mastra/core@0.1.27-alpha.33
426
+
427
+ ## 0.0.2-alpha.16
428
+
429
+ ### Patch Changes
430
+
431
+ - Updated dependencies [2712098]
432
+ - @mastra/core@0.1.27-alpha.32
433
+
434
+ ## 0.0.2-alpha.15
435
+
436
+ ### Patch Changes
437
+
438
+ - Updated dependencies [c2dd6b5]
439
+ - @mastra/core@0.1.27-alpha.31
440
+
441
+ ## 0.0.2-alpha.14
442
+
443
+ ### Patch Changes
444
+
445
+ - 1c3232a: ChromaDB
446
+
447
+ ## 0.0.2-alpha.13
448
+
449
+ ### Patch Changes
450
+
451
+ - Updated dependencies [963c15a]
452
+ - @mastra/core@0.1.27-alpha.30
453
+
454
+ ## 0.0.2-alpha.12
455
+
456
+ ### Patch Changes
457
+
458
+ - Updated dependencies [7d87a15]
459
+ - @mastra/core@0.1.27-alpha.29
460
+
461
+ ## 0.0.2-alpha.11
462
+
463
+ ### Patch Changes
464
+
465
+ - Updated dependencies [1ebd071]
466
+ - @mastra/core@0.1.27-alpha.28
467
+
468
+ ## 0.0.2-alpha.10
469
+
470
+ ### Patch Changes
471
+
472
+ - 779702b: Upstash vector
473
+
474
+ ## 0.0.2-alpha.9
475
+
476
+ ### Patch Changes
477
+
478
+ - Updated dependencies [cd02c56]
479
+ - @mastra/core@0.1.27-alpha.27
480
+
481
+ ## 0.0.2-alpha.8
482
+
483
+ ### Patch Changes
484
+
485
+ - Updated dependencies [d5e12de]
486
+ - @mastra/core@0.1.27-alpha.26
487
+
3
488
  ## 0.0.2-alpha.7
4
489
 
5
490
  ### Patch Changes
package/LICENSE ADDED
@@ -0,0 +1,44 @@
1
+ Elastic License 2.0 (ELv2)
2
+
3
+ **Acceptance**
4
+ By using the software, you agree to all of the terms and conditions below.
5
+
6
+ **Copyright License**
7
+ The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below
8
+
9
+ **Limitations**
10
+ You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
11
+
12
+ You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
13
+
14
+ You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
15
+
16
+ **Patents**
17
+ The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
18
+
19
+ **Notices**
20
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
21
+
22
+ If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
23
+
24
+ **No Other Rights**
25
+ These terms do not imply any licenses other than those expressly granted in these terms.
26
+
27
+ **Termination**
28
+ If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
29
+
30
+ **No Liability**
31
+ As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
32
+
33
+ **Definitions**
34
+ The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it.
35
+
36
+ _you_ refers to the individual or entity agreeing to these terms.
37
+
38
+ _your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
39
+
40
+ _your licenses_ are all the licenses granted to you for the software under these terms.
41
+
42
+ _use_ means anything you do with the software requiring one of your licenses.
43
+
44
+ _trademark_ means trademarks, service marks, and similar rights.
package/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # @mastra/rag
2
+
3
+ The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @mastra/rag
9
+ ```
10
+
11
+ ## Components
12
+
13
+ ### Document
14
+
15
+ The `MDocument` class represents text content with associated metadata:
16
+
17
+ ```typescript
18
+ import { MDocument } from '@mastra/rag';
19
+
20
+ const doc = new MDocument({
21
+ text: 'Document content',
22
+ metadata: { source: 'example.txt' },
23
+ });
24
+ ```
25
+
26
+ [Documentation](https://mastra.ai/docs/reference/rag/document)
@@ -0,0 +1,32 @@
1
+ import { Document as Chunk } from 'llamaindex';
2
+ import { ChunkOptions, ChunkParams, ExtractParams } from './types';
3
+ export declare class MDocument {
4
+ private chunks;
5
+ private type;
6
+ constructor({ docs, type }: {
7
+ docs: {
8
+ text: string;
9
+ metadata?: Record<string, any>;
10
+ }[];
11
+ type: string;
12
+ });
13
+ extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument>;
14
+ static fromText(text: string, metadata?: Record<string, any>): MDocument;
15
+ static fromHTML(html: string, metadata?: Record<string, any>): MDocument;
16
+ static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument;
17
+ static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument;
18
+ private defaultStrategy;
19
+ private chunkBy;
20
+ chunkRecursive(options?: ChunkOptions): Promise<void>;
21
+ chunkCharacter(options?: ChunkOptions): Promise<void>;
22
+ chunkHTML(options?: ChunkOptions): Promise<void>;
23
+ chunkJSON(options?: ChunkOptions): Promise<void>;
24
+ chunkLatex(options?: ChunkOptions): Promise<void>;
25
+ chunkToken(options?: ChunkOptions): Promise<void>;
26
+ chunkMarkdown(options?: ChunkOptions): Promise<void>;
27
+ chunk(params?: ChunkParams): Promise<Chunk[]>;
28
+ getDocs(): Chunk[];
29
+ getText(): string[];
30
+ getMetadata(): Record<string, any>[];
31
+ }
32
+ //# sourceMappingURL=document.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,KAAK,EAMlB,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAE,YAAY,EAAE,WAAW,EAAiB,aAAa,EAAE,MAAM,SAAS,CAAC;AAElF,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAwCjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;YAeT,OAAO;IA4Bf,cAAc,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAiBrD,cAAc,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAUrD,SAAS,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBhD,SAAS,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBhD,UAAU,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAMjD,UAAU,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAUjD,aAAa,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAapD,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAenD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
@@ -0,0 +1,3 @@
1
+ export * from './document';
2
+ export * from './types';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/document/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC"}
@@ -0,0 +1,45 @@
1
+ import { ChunkOptions, Language } from '../types';
2
+ import { TextTransformer } from './text';
3
+ export declare class CharacterTransformer extends TextTransformer {
4
+ protected separator: string;
5
+ protected isSeparatorRegex: boolean;
6
+ constructor({ separator, isSeparatorRegex, options, }: {
7
+ separator?: string;
8
+ isSeparatorRegex?: boolean;
9
+ options?: {
10
+ size?: number;
11
+ overlap?: number;
12
+ lengthFunction?: (text: string) => number;
13
+ keepSeparator?: boolean | 'start' | 'end';
14
+ addStartIndex?: boolean;
15
+ stripWhitespace?: boolean;
16
+ };
17
+ });
18
+ splitText({ text }: {
19
+ text: string;
20
+ }): string[];
21
+ private __splitChunk;
22
+ }
23
+ export declare class RecursiveCharacterTransformer extends TextTransformer {
24
+ protected separators: string[];
25
+ protected isSeparatorRegex: boolean;
26
+ constructor({ separators, isSeparatorRegex, options, }: {
27
+ separators?: string[];
28
+ isSeparatorRegex?: boolean;
29
+ options?: ChunkOptions;
30
+ });
31
+ private _splitText;
32
+ splitText({ text }: {
33
+ text: string;
34
+ }): string[];
35
+ static fromLanguage(language: Language, options?: {
36
+ size?: number;
37
+ chunkOverlap?: number;
38
+ lengthFunction?: (text: string) => number;
39
+ keepSeparator?: boolean | 'start' | 'end';
40
+ addStartIndex?: boolean;
41
+ stripWhitespace?: boolean;
42
+ }): RecursiveCharacterTransformer;
43
+ static getSeparatorsForLanguage(language: Language): string[];
44
+ }
45
+ //# sourceMappingURL=character.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AA+CzC,qBAAa,oBAAqB,SAAQ,eAAe;IACvD,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EACV,SAAkB,EAClB,gBAAwB,EACxB,OAAY,GACb,EAAE;QACD,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,OAAO,CAAC,EAAE;YACR,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;YACxB,eAAe,CAAC,EAAE,OAAO,CAAC;SAC3B,CAAC;KACH;IAMD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,OAAO,CAAC,YAAY;CAwBrB;AAED,qBAAa,6BAA8B,SAAQ,eAAe;IAChE,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAC/B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EACV,UAAU,EACV,gBAAwB,EACxB,OAAY,GACb,EAAE;QACD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,OAAO,CAAC,EAAE,YAAY,CAAC;KACxB;IAMD,OAAO,CAAC,UAAU;IAuDlB,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAI/C,MAAM,CAAC,YAAY,CACjB,QAAQ,EAAE,QAAQ,EAClB,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB,GACL,6BAA6B;IAKhC,MAAM,CAAC,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,EAAE;CAgE9D"}
@@ -0,0 +1,24 @@
1
+ import { Document } from 'llamaindex';
2
+ export declare class HTMLHeaderTransformer {
3
+ private headersToSplitOn;
4
+ private returnEachElement;
5
+ constructor(headersToSplitOn: [string, string][], returnEachElement?: boolean);
6
+ splitText({ text }: {
7
+ text: string;
8
+ }): Document[];
9
+ private getXPath;
10
+ private aggregateElementsToChunks;
11
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
12
+ transformDocuments(documents: Document[]): Document[];
13
+ }
14
+ export declare class HTMLSectionTransformer {
15
+ private headersToSplitOn;
16
+ private options;
17
+ constructor(headersToSplitOn: [string, string][], options?: Record<string, any>);
18
+ splitText(text: string): Document[];
19
+ splitDocuments(documents: Document[]): Promise<Document[]>;
20
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
21
+ private splitHtmlByHeaders;
22
+ transformDocuments(documents: Document[]): Document[];
23
+ }
24
+ //# sourceMappingURL=html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAWtC,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,iBAAiB,GAAE,OAAe;IAKpF,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwCjD,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,OAAO,CAAsB;gBAEzB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,OAAO,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM;IAKnF,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAc7B,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAchE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,OAAO,CAAC,kBAAkB;IAuD1B,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}