@mastra/rag 0.0.2-alpha.4 → 0.0.2-alpha.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +298 -0
  2. package/dist/astra-db/index.d.ts +54 -0
  3. package/dist/astra-db/index.d.ts.map +1 -0
  4. package/dist/chroma/index.d.ts +21 -0
  5. package/dist/chroma/index.d.ts.map +1 -0
  6. package/dist/document/document.d.ts +32 -0
  7. package/dist/document/document.d.ts.map +1 -0
  8. package/dist/document/index.d.ts +3 -0
  9. package/dist/document/index.d.ts.map +1 -0
  10. package/dist/document/transformers/character.d.ts +45 -0
  11. package/dist/document/transformers/character.d.ts.map +1 -0
  12. package/dist/document/transformers/html.d.ts +24 -0
  13. package/dist/document/transformers/html.d.ts.map +1 -0
  14. package/dist/document/transformers/json.d.ts +57 -0
  15. package/dist/document/transformers/json.d.ts.map +1 -0
  16. package/dist/document/transformers/latex.d.ts +12 -0
  17. package/dist/document/transformers/latex.d.ts.map +1 -0
  18. package/dist/document/transformers/markdown.d.ts +25 -0
  19. package/dist/document/transformers/markdown.d.ts.map +1 -0
  20. package/dist/document/transformers/text.d.ts +22 -0
  21. package/dist/document/transformers/text.d.ts.map +1 -0
  22. package/dist/document/transformers/token.d.ts +46 -0
  23. package/dist/document/transformers/token.d.ts.map +1 -0
  24. package/dist/document/transformers/transformer.d.ts +5 -0
  25. package/dist/document/transformers/transformer.d.ts.map +1 -0
  26. package/dist/document/types.d.ts +88 -0
  27. package/dist/document/types.d.ts.map +1 -0
  28. package/dist/embeddings/index.d.ts +4 -0
  29. package/dist/embeddings/index.d.ts.map +1 -0
  30. package/dist/index.d.ts +9 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +8 -0
  33. package/dist/pg/filter.d.ts +18 -0
  34. package/dist/pg/filter.d.ts.map +1 -0
  35. package/dist/pg/index.d.ts +15 -0
  36. package/dist/pg/index.d.ts.map +1 -0
  37. package/dist/pinecone/index.d.ts +12 -0
  38. package/dist/pinecone/index.d.ts.map +1 -0
  39. package/dist/qdrant/index.d.ts +12 -0
  40. package/dist/qdrant/index.d.ts.map +1 -0
  41. package/dist/rag.cjs.development.js +3729 -0
  42. package/dist/rag.cjs.development.js.map +1 -0
  43. package/dist/rag.cjs.production.min.js +2 -0
  44. package/dist/rag.cjs.production.min.js.map +1 -0
  45. package/dist/rag.esm.js +3715 -0
  46. package/dist/rag.esm.js.map +1 -0
  47. package/dist/upstash/index.d.ts +20 -0
  48. package/dist/upstash/index.d.ts.map +1 -0
  49. package/dist/utils/index.d.ts +3 -0
  50. package/dist/utils/index.d.ts.map +1 -0
  51. package/dist/utils/rag-tools.d.ts +91 -0
  52. package/dist/utils/rag-tools.d.ts.map +1 -0
  53. package/dist/utils/re-ranker.d.ts +47 -0
  54. package/dist/utils/re-ranker.d.ts.map +1 -0
  55. package/docker-compose.yaml +4 -0
  56. package/package.json +10 -3
  57. package/src/astra-db/index.test.ts +201 -0
  58. package/src/astra-db/index.ts +149 -0
  59. package/src/chroma/index.test.ts +154 -0
  60. package/src/chroma/index.ts +138 -0
  61. package/src/document/document.test.ts +809 -0
  62. package/src/document/document.ts +281 -0
  63. package/src/document/index.ts +2 -129
  64. package/src/document/transformers/character.ts +278 -0
  65. package/src/document/transformers/html.ts +283 -0
  66. package/src/document/transformers/json.ts +265 -0
  67. package/src/document/transformers/latex.ts +19 -0
  68. package/src/document/transformers/markdown.ts +244 -0
  69. package/src/document/transformers/text.ts +134 -0
  70. package/src/document/transformers/token.ts +147 -0
  71. package/src/document/transformers/transformer.ts +5 -0
  72. package/src/document/types.ts +103 -0
  73. package/src/embeddings/index.ts +16 -0
  74. package/src/index.ts +5 -1
  75. package/src/pg/filter.ts +77 -0
  76. package/src/pg/index.test.ts +405 -0
  77. package/src/pg/index.ts +71 -24
  78. package/src/pinecone/index.ts +3 -0
  79. package/src/qdrant/index.ts +3 -0
  80. package/src/upstash/index.test.ts +98 -0
  81. package/src/upstash/index.ts +92 -0
  82. package/src/utils/index.ts +2 -0
  83. package/src/utils/rag-tools.ts +149 -0
  84. package/src/utils/re-ranker.test.ts +222 -0
  85. package/src/utils/re-ranker.ts +159 -0
  86. package/src/document/index.test.ts +0 -229
  87. package/src/pg/index_test.ts +0 -212
package/CHANGELOG.md CHANGED
@@ -1,5 +1,303 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.0.2-alpha.42
4
+
5
+ ### Patch Changes
6
+
7
+ - 1874f40: Added re ranking tool to RAG
8
+ - Updated dependencies [1874f40]
9
+ - Updated dependencies [4b1ce2c]
10
+ - @mastra/core@0.1.27-alpha.58
11
+
12
+ ## 0.0.2-alpha.41
13
+
14
+ ### Patch Changes
15
+
16
+ - 7de6d71: Update filter for vector query to work with more stores
17
+ - Updated dependencies [fd494a3]
18
+ - @mastra/core@0.1.27-alpha.57
19
+
20
+ ## 0.0.2-alpha.40
21
+
22
+ ### Patch Changes
23
+
24
+ - ae638a4: make useFilter option optional
25
+ - Updated dependencies [9f3ab05]
26
+ - @mastra/core@0.1.27-alpha.56
27
+
28
+ ## 0.0.2-alpha.39
29
+
30
+ ### Patch Changes
31
+
32
+ - 592e3cf: Add custom rag tools, add vector retrieval, and update docs
33
+ - 837a288: MAJOR Revamp of tools, workflows, syncs.
34
+ - 0b74006: Workflow updates
35
+ - Updated dependencies [592e3cf]
36
+ - Updated dependencies [837a288]
37
+ - Updated dependencies [0b74006]
38
+ - @mastra/core@0.1.27-alpha.55
39
+
40
+ ## 0.0.2-alpha.38
41
+
42
+ ### Patch Changes
43
+
44
+ - eb45d76: Updated PG Vector filter and added rag examples in docs
45
+ - Updated dependencies [d2cd535]
46
+ - @mastra/core@0.1.27-alpha.54
47
+
48
+ ## 0.0.2-alpha.37
49
+
50
+ ### Patch Changes
51
+
52
+ - Updated dependencies [8e7814f]
53
+ - @mastra/core@0.1.27-alpha.53
54
+
55
+ ## 0.0.2-alpha.36
56
+
57
+ ### Patch Changes
58
+
59
+ - Updated dependencies [eedb829]
60
+ - @mastra/core@0.1.27-alpha.52
61
+
62
+ ## 0.0.2-alpha.35
63
+
64
+ ### Patch Changes
65
+
66
+ - Updated dependencies [a7b016d]
67
+ - Updated dependencies [da2e8d3]
68
+ - Updated dependencies [538a136]
69
+ - @mastra/core@0.1.27-alpha.51
70
+
71
+ ## 0.0.2-alpha.34
72
+
73
+ ### Patch Changes
74
+
75
+ - Updated dependencies [401a4d9]
76
+ - @mastra/core@0.1.27-alpha.50
77
+
78
+ ## 0.0.2-alpha.33
79
+
80
+ ### Patch Changes
81
+
82
+ - a621c34: Add validation for indexName in pgVector and dimension for all vector dbs
83
+ - Updated dependencies [79acad0]
84
+ - Updated dependencies [f5dfa20]
85
+ - @mastra/core@0.1.27-alpha.49
86
+
87
+ ## 0.0.2-alpha.32
88
+
89
+ ### Patch Changes
90
+
91
+ - Updated dependencies [b726bf5]
92
+ - @mastra/core@0.1.27-alpha.48
93
+
94
+ ## 0.0.2-alpha.31
95
+
96
+ ### Patch Changes
97
+
98
+ - Updated dependencies [f6ba259]
99
+ - @mastra/core@0.1.27-alpha.47
100
+
101
+ ## 0.0.2-alpha.30
102
+
103
+ ### Patch Changes
104
+
105
+ - 8ae2bbc: Dane publishing
106
+ - 0bd142c: Fixes learned from docs
107
+ - ee4de15: Dane fixes
108
+ - Updated dependencies [8ae2bbc]
109
+ - Updated dependencies [0bd142c]
110
+ - Updated dependencies [ee4de15]
111
+ - @mastra/core@0.1.27-alpha.46
112
+
113
+ ## 0.0.2-alpha.29
114
+
115
+ ### Patch Changes
116
+
117
+ - Updated dependencies [e608d8c]
118
+ - Updated dependencies [002d6d8]
119
+ - @mastra/core@0.1.27-alpha.45
120
+
121
+ ## 0.0.2-alpha.28
122
+
123
+ ### Patch Changes
124
+
125
+ - Updated dependencies [2fa7f53]
126
+ - @mastra/core@0.1.27-alpha.44
127
+
128
+ ## 0.0.2-alpha.27
129
+
130
+ ### Patch Changes
131
+
132
+ - Updated dependencies [2e099d2]
133
+ - Updated dependencies [d6d8159]
134
+ - @mastra/core@0.1.27-alpha.43
135
+
136
+ ## 0.0.2-alpha.26
137
+
138
+ ### Patch Changes
139
+
140
+ - Updated dependencies [4a54c82]
141
+ - @mastra/core@0.1.27-alpha.42
142
+
143
+ ## 0.0.2-alpha.25
144
+
145
+ ### Patch Changes
146
+
147
+ - Updated dependencies [5cdfb88]
148
+ - @mastra/core@0.1.27-alpha.41
149
+
150
+ ## 0.0.2-alpha.24
151
+
152
+ ### Patch Changes
153
+
154
+ - Updated dependencies [9029796]
155
+ - @mastra/core@0.1.27-alpha.40
156
+
157
+ ## 0.0.2-alpha.23
158
+
159
+ ### Patch Changes
160
+
161
+ - 6101f2d: Fix module incompatibility, and dane cli build
162
+ - Updated dependencies [2b01511]
163
+ - @mastra/core@0.1.27-alpha.39
164
+
165
+ ## 0.0.2-alpha.22
166
+
167
+ ### Patch Changes
168
+
169
+ - f031a1f: expose embed from rag, and refactor embed
170
+ - Updated dependencies [f031a1f]
171
+ - @mastra/core@0.1.27-alpha.38
172
+
173
+ ## 0.0.2-alpha.21
174
+
175
+ ### Patch Changes
176
+
177
+ - 45fd5b8: rename MastraDocument to MDocument
178
+ - Updated dependencies [c872875]
179
+ - Updated dependencies [f6da688]
180
+ - Updated dependencies [b5393f1]
181
+ - @mastra/core@0.1.27-alpha.37
182
+
183
+ ## 0.0.2-alpha.20
184
+
185
+ ### Patch Changes
186
+
187
+ - Updated dependencies [f537e33]
188
+ - Updated dependencies [bc40916]
189
+ - Updated dependencies [f7d1131]
190
+ - Updated dependencies [75bf3f0]
191
+ - Updated dependencies [3c4488b]
192
+ - Updated dependencies [d38f7a6]
193
+ - @mastra/core@0.1.27-alpha.36
194
+
195
+ ## 0.0.2-alpha.19
196
+
197
+ ### Patch Changes
198
+
199
+ - 033eda6: More fixes for refactor
200
+ - Updated dependencies [033eda6]
201
+ - @mastra/core@0.1.27-alpha.35
202
+
203
+ ## 0.0.2-alpha.18
204
+
205
+ ### Patch Changes
206
+
207
+ - 837a288: MAJOR Revamp of tools, workflows, syncs.
208
+ - 5811de6: Updates spec-writer example to use new workflows constructs. Small improvements to workflow internals. Switch transformer tokenizer for js compatible one.
209
+ - Updated dependencies [837a288]
210
+ - Updated dependencies [5811de6]
211
+ - @mastra/core@0.1.27-alpha.34
212
+
213
+ ## 0.0.2-alpha.17
214
+
215
+ ### Patch Changes
216
+
217
+ - e1dd94a: update the api for embeddings
218
+ - Updated dependencies [e1dd94a]
219
+ - @mastra/core@0.1.27-alpha.33
220
+
221
+ ## 0.0.2-alpha.16
222
+
223
+ ### Patch Changes
224
+
225
+ - Updated dependencies [2712098]
226
+ - @mastra/core@0.1.27-alpha.32
227
+
228
+ ## 0.0.2-alpha.15
229
+
230
+ ### Patch Changes
231
+
232
+ - Updated dependencies [c2dd6b5]
233
+ - @mastra/core@0.1.27-alpha.31
234
+
235
+ ## 0.0.2-alpha.14
236
+
237
+ ### Patch Changes
238
+
239
+ - 1c3232a: ChromaDB
240
+
241
+ ## 0.0.2-alpha.13
242
+
243
+ ### Patch Changes
244
+
245
+ - Updated dependencies [963c15a]
246
+ - @mastra/core@0.1.27-alpha.30
247
+
248
+ ## 0.0.2-alpha.12
249
+
250
+ ### Patch Changes
251
+
252
+ - Updated dependencies [7d87a15]
253
+ - @mastra/core@0.1.27-alpha.29
254
+
255
+ ## 0.0.2-alpha.11
256
+
257
+ ### Patch Changes
258
+
259
+ - Updated dependencies [1ebd071]
260
+ - @mastra/core@0.1.27-alpha.28
261
+
262
+ ## 0.0.2-alpha.10
263
+
264
+ ### Patch Changes
265
+
266
+ - 779702b: Upstash vector
267
+
268
+ ## 0.0.2-alpha.9
269
+
270
+ ### Patch Changes
271
+
272
+ - Updated dependencies [cd02c56]
273
+ - @mastra/core@0.1.27-alpha.27
274
+
275
+ ## 0.0.2-alpha.8
276
+
277
+ ### Patch Changes
278
+
279
+ - Updated dependencies [d5e12de]
280
+ - @mastra/core@0.1.27-alpha.26
281
+
282
+ ## 0.0.2-alpha.7
283
+
284
+ ### Patch Changes
285
+
286
+ - 24fe87e: Change document semantics
287
+
288
+ ## 0.0.2-alpha.6
289
+
290
+ ### Patch Changes
291
+
292
+ - Updated dependencies [01502b0]
293
+ - @mastra/core@0.1.27-alpha.25
294
+
295
+ ## 0.0.2-alpha.5
296
+
297
+ ### Patch Changes
298
+
299
+ - 036ee5e: Add astra-db to rag
300
+
3
301
  ## 0.0.2-alpha.4
4
302
 
5
303
  ### Patch Changes
@@ -0,0 +1,54 @@
1
+ import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
2
+ export interface AstraDbOptions {
3
+ token: string;
4
+ endpoint: string;
5
+ keyspace?: string;
6
+ }
7
+ export declare class AstraVector extends MastraVector {
8
+ #private;
9
+ constructor({ token, endpoint, keyspace }: AstraDbOptions);
10
+ /**
11
+ * Creates a new collection with the specified configuration.
12
+ *
13
+ * @param {string} indexName - The name of the collection to create.
14
+ * @param {number} dimension - The dimension of the vectors to be stored in the collection.
15
+ * @param {'cosine' | 'euclidean' | 'dotproduct'} [metric=cosine] - The metric to use to sort vectors in the collection.
16
+ * @returns {Promise<void>} A promise that resolves when the collection is created.
17
+ */
18
+ createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
19
+ /**
20
+ * Inserts or updates vectors in the specified collection.
21
+ *
22
+ * @param {string} indexName - The name of the collection to upsert into.
23
+ * @param {number[][]} vectors - An array of vectors to upsert.
24
+ * @param {Record<string, any>[]} [metadata] - An optional array of metadata objects corresponding to each vector.
25
+ * @param {string[]} [ids] - An optional array of IDs corresponding to each vector. If not provided, new IDs will be generated.
26
+ * @returns {Promise<string[]>} A promise that resolves to an array of IDs of the upserted vectors.
27
+ */
28
+ upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
29
+ /**
30
+ * Queries the specified collection using a vector and optional filter.
31
+ *
32
+ * @param {string} indexName - The name of the collection to query.
33
+ * @param {number[]} queryVector - The vector to query with.
34
+ * @param {number} [topK] - The maximum number of results to return.
35
+ * @param {Record<string, any>} [filter] - An optional filter to apply to the query. For more on filters in Astra DB, see the filtering reference: https://docs.datastax.com/en/astra-db-serverless/api-reference/documents.html#operators
36
+ * @returns {Promise<QueryResult[]>} A promise that resolves to an array of query results.
37
+ */
38
+ query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
39
+ /**
40
+ * Lists all collections in the database.
41
+ *
42
+ * @returns {Promise<string[]>} A promise that resolves to an array of collection names.
43
+ */
44
+ listIndexes(): Promise<string[]>;
45
+ describeIndex(indexName: string): Promise<IndexStats>;
46
+ /**
47
+ * Deletes the specified collection.
48
+ *
49
+ * @param {string} indexName - The name of the collection to delete.
50
+ * @returns {Promise<void>} A promise that resolves when the collection is deleted.
51
+ */
52
+ deleteIndex(indexName: string): Promise<void>;
53
+ }
54
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/astra-db/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AASrE,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,WAAY,SAAQ,YAAY;;gBAG/B,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,EAAE,cAAc;IAMzD;;;;;;;OAOG;IACG,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,QAAQ,GAAG,WAAW,GAAG,YAAuB,GACvD,OAAO,CAAC,IAAI,CAAC;IAahB;;;;;;;;OAQG;IACG,MAAM,CACV,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EAAE,EAAE,EACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,EAChC,GAAG,CAAC,EAAE,MAAM,EAAE,GACb,OAAO,CAAC,MAAM,EAAE,CAAC;IAgBpB;;;;;;;;OAQG;IACG,KAAK,CACT,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EAAE,EACrB,IAAI,CAAC,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC3B,OAAO,CAAC,WAAW,EAAE,CAAC;IAkBzB;;;;OAIG;IACH,WAAW,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAI1B,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAiB3D;;;;;OAKG;IACG,WAAW,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAIpD"}
@@ -0,0 +1,21 @@
1
+ import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
2
+ export declare class ChromaVector extends MastraVector {
3
+ private client;
4
+ private collections;
5
+ constructor({ path, auth, }: {
6
+ path: string;
7
+ auth?: {
8
+ provider: string;
9
+ credentials: string;
10
+ };
11
+ });
12
+ private getCollection;
13
+ private validateVectorDimensions;
14
+ upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
15
+ createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
16
+ query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
17
+ listIndexes(): Promise<string[]>;
18
+ describeIndex(indexName: string): Promise<IndexStats>;
19
+ deleteIndex(indexName: string): Promise<void>;
20
+ }
21
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chroma/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAGrE,qBAAa,YAAa,SAAQ,YAAY;IAC5C,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,WAAW,CAAmB;gBAE1B,EACV,IAAI,EACJ,IAAI,GACL,EAAE;QACD,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,CAAC,EAAE;YACL,QAAQ,EAAE,MAAM,CAAC;YACjB,WAAW,EAAE,MAAM,CAAC;SACrB,CAAC;KACH;YASa,aAAa;IAa3B,OAAO,CAAC,wBAAwB;IAU1B,MAAM,CACV,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EAAE,EAAE,EACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,EAChC,GAAG,CAAC,EAAE,MAAM,EAAE,GACb,OAAO,CAAC,MAAM,EAAE,CAAC;IAwBd,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,QAAQ,GAAG,WAAW,GAAG,YAAuB,GACvD,OAAO,CAAC,IAAI,CAAC;IAaV,KAAK,CACT,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EAAE,EACrB,IAAI,GAAE,MAAW,EACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC3B,OAAO,CAAC,WAAW,EAAE,CAAC;IAiBnB,WAAW,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAKhC,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAYrD,WAAW,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAIpD"}
@@ -0,0 +1,32 @@
1
+ import { Document as Chunk } from 'llamaindex';
2
+ import { ChunkOptions, ChunkParams, ExtractParams } from './types';
3
+ export declare class MDocument {
4
+ private chunks;
5
+ private type;
6
+ constructor({ docs, type }: {
7
+ docs: {
8
+ text: string;
9
+ metadata?: Record<string, any>;
10
+ }[];
11
+ type: string;
12
+ });
13
+ extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument>;
14
+ static fromText(text: string, metadata?: Record<string, any>): MDocument;
15
+ static fromHTML(html: string, metadata?: Record<string, any>): MDocument;
16
+ static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument;
17
+ static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument;
18
+ private defaultStrategy;
19
+ private chunkBy;
20
+ chunkRecursive(options?: ChunkOptions): Promise<void>;
21
+ chunkCharacter(options?: ChunkOptions): Promise<void>;
22
+ chunkHTML(options?: ChunkOptions): Promise<void>;
23
+ chunkJSON(options?: ChunkOptions): Promise<void>;
24
+ chunkLatex(options?: ChunkOptions): Promise<void>;
25
+ chunkToken(options?: ChunkOptions): Promise<void>;
26
+ chunkMarkdown(options?: ChunkOptions): Promise<void>;
27
+ chunk(params?: ChunkParams): Promise<MDocument['chunks']>;
28
+ getDocs(): Chunk[];
29
+ getText(): string[];
30
+ getMetadata(): Record<string, any>[];
31
+ }
32
+ //# sourceMappingURL=document.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,IAAI,KAAK,EAMlB,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAE,YAAY,EAAE,WAAW,EAAiB,aAAa,EAAE,MAAM,SAAS,CAAC;AAElF,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAwCjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;YAeT,OAAO;IA4Bf,cAAc,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAiBrD,cAAc,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAUrD,SAAS,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBhD,SAAS,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBhD,UAAU,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAMjD,UAAU,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAUjD,aAAa,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAapD,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAe/D,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
@@ -0,0 +1,3 @@
1
+ export * from './document';
2
+ export * from './types';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/document/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC"}
@@ -0,0 +1,45 @@
1
+ import { ChunkOptions, Language } from '../types';
2
+ import { TextTransformer } from './text';
3
+ export declare class CharacterTransformer extends TextTransformer {
4
+ protected separator: string;
5
+ protected isSeparatorRegex: boolean;
6
+ constructor({ separator, isSeparatorRegex, options, }: {
7
+ separator?: string;
8
+ isSeparatorRegex?: boolean;
9
+ options?: {
10
+ size?: number;
11
+ overlap?: number;
12
+ lengthFunction?: (text: string) => number;
13
+ keepSeparator?: boolean | 'start' | 'end';
14
+ addStartIndex?: boolean;
15
+ stripWhitespace?: boolean;
16
+ };
17
+ });
18
+ splitText({ text }: {
19
+ text: string;
20
+ }): string[];
21
+ private __splitChunk;
22
+ }
23
+ export declare class RecursiveCharacterTransformer extends TextTransformer {
24
+ protected separators: string[];
25
+ protected isSeparatorRegex: boolean;
26
+ constructor({ separators, isSeparatorRegex, options, }: {
27
+ separators?: string[];
28
+ isSeparatorRegex?: boolean;
29
+ options?: ChunkOptions;
30
+ });
31
+ private _splitText;
32
+ splitText({ text }: {
33
+ text: string;
34
+ }): string[];
35
+ static fromLanguage(language: Language, options?: {
36
+ size?: number;
37
+ chunkOverlap?: number;
38
+ lengthFunction?: (text: string) => number;
39
+ keepSeparator?: boolean | 'start' | 'end';
40
+ addStartIndex?: boolean;
41
+ stripWhitespace?: boolean;
42
+ }): RecursiveCharacterTransformer;
43
+ static getSeparatorsForLanguage(language: Language): string[];
44
+ }
45
+ //# sourceMappingURL=character.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AA+CzC,qBAAa,oBAAqB,SAAQ,eAAe;IACvD,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EACV,SAAkB,EAClB,gBAAwB,EACxB,OAAY,GACb,EAAE;QACD,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,OAAO,CAAC,EAAE;YACR,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;YACxB,eAAe,CAAC,EAAE,OAAO,CAAC;SAC3B,CAAC;KACH;IAMD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,OAAO,CAAC,YAAY;CAwBrB;AAED,qBAAa,6BAA8B,SAAQ,eAAe;IAChE,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAC/B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EACV,UAAU,EACV,gBAAwB,EACxB,OAAY,GACb,EAAE;QACD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,OAAO,CAAC,EAAE,YAAY,CAAC;KACxB;IAMD,OAAO,CAAC,UAAU;IAuDlB,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAI/C,MAAM,CAAC,YAAY,CACjB,QAAQ,EAAE,QAAQ,EAClB,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB,GACL,6BAA6B;IAKhC,MAAM,CAAC,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,EAAE;CAgE9D"}
@@ -0,0 +1,24 @@
1
+ import { Document } from 'llamaindex';
2
+ export declare class HTMLHeaderTransformer {
3
+ private headersToSplitOn;
4
+ private returnEachElement;
5
+ constructor(headersToSplitOn: [string, string][], returnEachElement?: boolean);
6
+ splitText({ text }: {
7
+ text: string;
8
+ }): Document[];
9
+ private getXPath;
10
+ private aggregateElementsToChunks;
11
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
12
+ transformDocuments(documents: Document[]): Document[];
13
+ }
14
+ export declare class HTMLSectionTransformer {
15
+ private headersToSplitOn;
16
+ private options;
17
+ constructor(headersToSplitOn: [string, string][], options?: Record<string, any>);
18
+ splitText(text: string): Document[];
19
+ splitDocuments(documents: Document[]): Promise<Document[]>;
20
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
21
+ private splitHtmlByHeaders;
22
+ transformDocuments(documents: Document[]): Document[];
23
+ }
24
+ //# sourceMappingURL=html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAWtC,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,iBAAiB,GAAE,OAAe;IAKpF,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwCjD,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,OAAO,CAAsB;gBAEzB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,OAAO,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM;IAKnF,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAc7B,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAchE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,OAAO,CAAC,kBAAkB;IAuD1B,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
@@ -0,0 +1,57 @@
1
+ import { Document } from 'llamaindex';
2
+ export declare class RecursiveJsonTransformer {
3
+ private maxSize;
4
+ private minSize;
5
+ constructor({ maxSize, minSize }: {
6
+ maxSize: number;
7
+ minSize?: number;
8
+ });
9
+ private static jsonSize;
10
+ /**
11
+ * Transform JSON data while handling circular references
12
+ */
13
+ transform(data: Record<string, any>): Record<string, any>;
14
+ /**
15
+ * Set a value in a nested dictionary based on the given path
16
+ */
17
+ private static setNestedDict;
18
+ /**
19
+ * Convert lists in the JSON structure to dictionaries with index-based keys
20
+ */
21
+ private listToDictPreprocessing;
22
+ /**
23
+ * Split json into maximum size dictionaries while preserving structure
24
+ */
25
+ private jsonSplit;
26
+ /**
27
+ * Splits JSON into a list of JSON chunks
28
+ */
29
+ splitJson({ jsonData, convertLists, }: {
30
+ jsonData: Record<string, any>;
31
+ convertLists?: boolean;
32
+ }): Record<string, any>[];
33
+ private escapeNonAscii;
34
+ /**
35
+ * Splits JSON into a list of JSON formatted strings
36
+ */
37
+ splitText({ jsonData, convertLists, ensureAscii, }: {
38
+ jsonData: Record<string, any>;
39
+ convertLists?: boolean;
40
+ ensureAscii?: boolean;
41
+ }): string[];
42
+ /**
43
+ * Create documents from a list of json objects
44
+ */
45
+ createDocuments({ texts, convertLists, ensureAscii, metadatas, }: {
46
+ texts: string[];
47
+ convertLists?: boolean;
48
+ ensureAscii?: boolean;
49
+ metadatas?: Record<string, any>[];
50
+ }): Document[];
51
+ transformDocuments({ ensureAscii, documents, convertLists, }: {
52
+ ensureAscii?: boolean;
53
+ convertLists?: boolean;
54
+ documents: Document[];
55
+ }): Document[];
56
+ }
57
+ //# sourceMappingURL=json.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,qBAAa,wBAAwB;IACnC,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;gBAEZ,EAAE,OAAc,EAAE,OAAO,EAAE,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE;IAK9E,OAAO,CAAC,MAAM,CAAC,QAAQ;IAmCvB;;OAEG;IACI,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAmChE;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAS5B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAU/B;;OAEG;IACH,OAAO,CAAC,SAAS;IAsCjB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,GACrB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;IAYzB,OAAO,CAAC,cAAc;IAatB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,EACpB,WAAkB,GACnB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB,GAAG,MAAM,EAAE;IAWZ;;OAEG;IACH,eAAe,CAAC,EACd,KAAK,EACL,YAAoB,EACpB,WAAkB,EAClB,SAAS,GACV,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;KACnC,GAAG,QAAQ,EAAE;IAoBd,kBAAkB,CAAC,EACjB,WAAW,EACX,SAAS,EACT,YAAY,GACb,EAAE;QACD,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,SAAS,EAAE,QAAQ,EAAE,CAAC;KACvB,GAAG,QAAQ,EAAE;CAiBf"}
@@ -0,0 +1,12 @@
1
+ import { RecursiveCharacterTransformer } from './character';
2
+ export declare class LatexTransformer extends RecursiveCharacterTransformer {
3
+ constructor(options?: {
4
+ size?: number;
5
+ overlap?: number;
6
+ lengthFunction?: (text: string) => number;
7
+ keepSeparator?: boolean | 'start' | 'end';
8
+ addStartIndex?: boolean;
9
+ stripWhitespace?: boolean;
10
+ });
11
+ }
12
+ //# sourceMappingURL=latex.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAE5D,qBAAa,gBAAiB,SAAQ,6BAA6B;gBAE/D,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB;CAKT"}
@@ -0,0 +1,25 @@
1
+ import { Document } from 'llamaindex';
2
+ import { RecursiveCharacterTransformer } from './character';
3
+ export declare class MarkdownTransformer extends RecursiveCharacterTransformer {
4
+ constructor(options?: {
5
+ chunkSize?: number;
6
+ chunkOverlap?: number;
7
+ lengthFunction?: (text: string) => number;
8
+ keepSeparator?: boolean | 'start' | 'end';
9
+ addStartIndex?: boolean;
10
+ stripWhitespace?: boolean;
11
+ });
12
+ }
13
+ export declare class MarkdownHeaderTransformer {
14
+ private headersToSplitOn;
15
+ private returnEachLine;
16
+ private stripHeaders;
17
+ constructor(headersToSplitOn: [string, string][], returnEachLine?: boolean, stripHeaders?: boolean);
18
+ private aggregateLinesToChunks;
19
+ splitText({ text }: {
20
+ text: string;
21
+ }): Document[];
22
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
23
+ transformDocuments(documents: Document[]): Document[];
24
+ }
25
+ //# sourceMappingURL=markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAItC,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBAElE,OAAO,GAAE;QACP,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB;CAKT;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAsD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAmHjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
@@ -0,0 +1,22 @@
1
+ import { Document } from 'llamaindex';
2
+ import { ChunkOptions } from '../types';
3
+ import { Transformer } from './transformer';
4
+ export declare abstract class TextTransformer implements Transformer {
5
+ protected size: number;
6
+ protected overlap: number;
7
+ protected lengthFunction: (text: string) => number;
8
+ protected keepSeparator: boolean | 'start' | 'end';
9
+ protected addStartIndex: boolean;
10
+ protected stripWhitespace: boolean;
11
+ constructor({ size, overlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: ChunkOptions);
12
+ setAddStartIndex(value: boolean): void;
13
+ abstract splitText({ text }: {
14
+ text: string;
15
+ }): string[];
16
+ createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
17
+ splitDocuments(documents: Document[]): Document[];
18
+ transformDocuments(documents: Document[]): Document[];
19
+ protected joinDocs(docs: string[], separator: string): string | null;
20
+ protected mergeSplits(splits: string[], separator: string): string[];
21
+ }
22
+ //# sourceMappingURL=text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAExC,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAE5C,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,IAAW,EACX,OAAa,EACb,cAA8C,EAC9C,aAAqB,EACrB,aAAqB,EACrB,eAAsB,GACvB,EAAE,YAAY;IAYf,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CAoCrE"}