langchain 0.1.35 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/chains/conversational_retrieval_chain.cjs +61 -19
  2. package/dist/chains/conversational_retrieval_chain.d.ts +61 -19
  3. package/dist/chains/conversational_retrieval_chain.js +61 -19
  4. package/dist/chains/llm_chain.cjs +10 -5
  5. package/dist/chains/llm_chain.d.ts +10 -5
  6. package/dist/chains/llm_chain.js +10 -5
  7. package/dist/chains/openai_functions/base.cjs +2 -0
  8. package/dist/chains/openai_functions/base.d.ts +2 -0
  9. package/dist/chains/openai_functions/base.js +2 -0
  10. package/dist/chains/query_constructor/index.cjs +5 -8
  11. package/dist/chains/query_constructor/index.d.ts +5 -4
  12. package/dist/chains/query_constructor/index.js +3 -6
  13. package/dist/chains/query_constructor/ir.cjs +15 -139
  14. package/dist/chains/query_constructor/ir.d.ts +1 -138
  15. package/dist/chains/query_constructor/ir.js +1 -132
  16. package/dist/chains/query_constructor/prompt.cjs +2 -2
  17. package/dist/chains/query_constructor/prompt.d.ts +1 -1
  18. package/dist/chains/query_constructor/prompt.js +1 -1
  19. package/dist/chains/retrieval_qa.cjs +23 -14
  20. package/dist/chains/retrieval_qa.d.ts +23 -14
  21. package/dist/chains/retrieval_qa.js +23 -14
  22. package/dist/document_loaders/fs/unstructured.cjs +1 -1
  23. package/dist/document_loaders/fs/unstructured.js +1 -1
  24. package/dist/document_loaders/web/browserbase.cjs +87 -0
  25. package/dist/document_loaders/web/browserbase.d.ts +49 -0
  26. package/dist/document_loaders/web/browserbase.js +80 -0
  27. package/dist/document_loaders/web/firecrawl.cjs +88 -0
  28. package/dist/document_loaders/web/firecrawl.d.ts +48 -0
  29. package/dist/document_loaders/web/firecrawl.js +81 -0
  30. package/dist/document_loaders/web/s3.cjs +2 -2
  31. package/dist/document_loaders/web/s3.js +2 -2
  32. package/dist/load/import_constants.cjs +2 -0
  33. package/dist/load/import_constants.js +2 -0
  34. package/dist/output_parsers/expression.cjs +1 -1
  35. package/dist/output_parsers/expression.d.ts +1 -1
  36. package/dist/output_parsers/expression.js +1 -1
  37. package/dist/retrievers/self_query/base.cjs +3 -136
  38. package/dist/retrievers/self_query/base.d.ts +1 -69
  39. package/dist/retrievers/self_query/base.js +1 -134
  40. package/dist/retrievers/self_query/chroma.cjs +9 -10
  41. package/dist/retrievers/self_query/chroma.d.ts +1 -1
  42. package/dist/retrievers/self_query/chroma.js +1 -2
  43. package/dist/retrievers/self_query/functional.cjs +2 -195
  44. package/dist/retrievers/self_query/functional.d.ts +1 -87
  45. package/dist/retrievers/self_query/functional.js +1 -194
  46. package/dist/retrievers/self_query/index.cjs +9 -13
  47. package/dist/retrievers/self_query/index.d.ts +11 -8
  48. package/dist/retrievers/self_query/index.js +7 -11
  49. package/dist/retrievers/self_query/pinecone.cjs +9 -10
  50. package/dist/retrievers/self_query/pinecone.d.ts +1 -1
  51. package/dist/retrievers/self_query/pinecone.js +1 -2
  52. package/dist/retrievers/self_query/supabase.cjs +28 -30
  53. package/dist/retrievers/self_query/supabase.d.ts +1 -2
  54. package/dist/retrievers/self_query/supabase.js +1 -3
  55. package/dist/retrievers/self_query/supabase_utils.cjs +2 -2
  56. package/dist/retrievers/self_query/supabase_utils.d.ts +1 -1
  57. package/dist/retrievers/self_query/supabase_utils.js +1 -1
  58. package/dist/retrievers/self_query/vectara.cjs +15 -17
  59. package/dist/retrievers/self_query/vectara.d.ts +1 -2
  60. package/dist/retrievers/self_query/vectara.js +1 -3
  61. package/dist/retrievers/self_query/weaviate.cjs +19 -21
  62. package/dist/retrievers/self_query/weaviate.d.ts +1 -2
  63. package/dist/retrievers/self_query/weaviate.js +1 -3
  64. package/dist/smith/config.d.ts +4 -4
  65. package/dist/storage/in_memory.cjs +2 -81
  66. package/dist/storage/in_memory.d.ts +1 -49
  67. package/dist/storage/in_memory.js +1 -80
  68. package/dist/text_splitter.cjs +15 -727
  69. package/dist/text_splitter.d.ts +1 -77
  70. package/dist/text_splitter.js +1 -720
  71. package/dist/vectorstores/qdrant.cjs +2 -0
  72. package/dist/vectorstores/qdrant.js +2 -0
  73. package/document_loaders/web/browserbase.cjs +1 -0
  74. package/document_loaders/web/browserbase.d.cts +1 -0
  75. package/document_loaders/web/browserbase.d.ts +1 -0
  76. package/document_loaders/web/browserbase.js +1 -0
  77. package/document_loaders/web/firecrawl.cjs +1 -0
  78. package/document_loaders/web/firecrawl.d.cts +1 -0
  79. package/document_loaders/web/firecrawl.d.ts +1 -0
  80. package/document_loaders/web/firecrawl.js +1 -0
  81. package/package.json +40 -3
  82. package/dist/retrievers/self_query/utils.cjs +0 -94
  83. package/dist/retrievers/self_query/utils.d.ts +0 -29
  84. package/dist/retrievers/self_query/utils.js +0 -85
@@ -1,729 +1,17 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
2
16
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.LatexTextSplitter = exports.MarkdownTextSplitter = exports.TokenTextSplitter = exports.RecursiveCharacterTextSplitter = exports.SupportedTextSplitterLanguages = exports.CharacterTextSplitter = exports.TextSplitter = void 0;
4
- const documents_1 = require("@langchain/core/documents");
5
- const tiktoken_1 = require("@langchain/core/utils/tiktoken");
6
- class TextSplitter extends documents_1.BaseDocumentTransformer {
7
- constructor(fields) {
8
- super(fields);
9
- Object.defineProperty(this, "lc_namespace", {
10
- enumerable: true,
11
- configurable: true,
12
- writable: true,
13
- value: ["langchain", "document_transformers", "text_splitters"]
14
- });
15
- Object.defineProperty(this, "chunkSize", {
16
- enumerable: true,
17
- configurable: true,
18
- writable: true,
19
- value: 1000
20
- });
21
- Object.defineProperty(this, "chunkOverlap", {
22
- enumerable: true,
23
- configurable: true,
24
- writable: true,
25
- value: 200
26
- });
27
- Object.defineProperty(this, "keepSeparator", {
28
- enumerable: true,
29
- configurable: true,
30
- writable: true,
31
- value: false
32
- });
33
- Object.defineProperty(this, "lengthFunction", {
34
- enumerable: true,
35
- configurable: true,
36
- writable: true,
37
- value: void 0
38
- });
39
- this.chunkSize = fields?.chunkSize ?? this.chunkSize;
40
- this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
41
- this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
42
- this.lengthFunction =
43
- fields?.lengthFunction ?? ((text) => text.length);
44
- if (this.chunkOverlap >= this.chunkSize) {
45
- throw new Error("Cannot have chunkOverlap >= chunkSize");
46
- }
47
- }
48
- async transformDocuments(documents, chunkHeaderOptions = {}) {
49
- return this.splitDocuments(documents, chunkHeaderOptions);
50
- }
51
- splitOnSeparator(text, separator) {
52
- let splits;
53
- if (separator) {
54
- if (this.keepSeparator) {
55
- const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
56
- splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
57
- }
58
- else {
59
- splits = text.split(separator);
60
- }
61
- }
62
- else {
63
- splits = text.split("");
64
- }
65
- return splits.filter((s) => s !== "");
66
- }
67
- async createDocuments(texts,
68
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
69
- metadatas = [], chunkHeaderOptions = {}) {
70
- // if no metadata is provided, we create an empty one for each text
71
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
72
- const _metadatas = metadatas.length > 0
73
- ? metadatas
74
- : [...Array(texts.length)].map(() => ({}));
75
- const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
76
- const documents = new Array();
77
- for (let i = 0; i < texts.length; i += 1) {
78
- const text = texts[i];
79
- let lineCounterIndex = 1;
80
- let prevChunk = null;
81
- let indexPrevChunk = -1;
82
- for (const chunk of await this.splitText(text)) {
83
- let pageContent = chunkHeader;
84
- // we need to count the \n that are in the text before getting removed by the splitting
85
- const indexChunk = text.indexOf(chunk, indexPrevChunk + 1);
86
- if (prevChunk === null) {
87
- const newLinesBeforeFirstChunk = this.numberOfNewLines(text, 0, indexChunk);
88
- lineCounterIndex += newLinesBeforeFirstChunk;
89
- }
90
- else {
91
- const indexEndPrevChunk = indexPrevChunk + (await this.lengthFunction(prevChunk));
92
- if (indexEndPrevChunk < indexChunk) {
93
- const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexEndPrevChunk, indexChunk);
94
- lineCounterIndex += numberOfIntermediateNewLines;
95
- }
96
- else if (indexEndPrevChunk > indexChunk) {
97
- const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexChunk, indexEndPrevChunk);
98
- lineCounterIndex -= numberOfIntermediateNewLines;
99
- }
100
- if (appendChunkOverlapHeader) {
101
- pageContent += chunkOverlapHeader;
102
- }
103
- }
104
- const newLinesCount = this.numberOfNewLines(chunk);
105
- const loc = _metadatas[i].loc && typeof _metadatas[i].loc === "object"
106
- ? { ..._metadatas[i].loc }
107
- : {};
108
- loc.lines = {
109
- from: lineCounterIndex,
110
- to: lineCounterIndex + newLinesCount,
111
- };
112
- const metadataWithLinesNumber = {
113
- ..._metadatas[i],
114
- loc,
115
- };
116
- pageContent += chunk;
117
- documents.push(new documents_1.Document({
118
- pageContent,
119
- metadata: metadataWithLinesNumber,
120
- }));
121
- lineCounterIndex += newLinesCount;
122
- prevChunk = chunk;
123
- indexPrevChunk = indexChunk;
124
- }
125
- }
126
- return documents;
127
- }
128
- numberOfNewLines(text, start, end) {
129
- const textSection = text.slice(start, end);
130
- return (textSection.match(/\n/g) || []).length;
131
- }
132
- async splitDocuments(documents, chunkHeaderOptions = {}) {
133
- const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
134
- const texts = selectedDocuments.map((doc) => doc.pageContent);
135
- const metadatas = selectedDocuments.map((doc) => doc.metadata);
136
- return this.createDocuments(texts, metadatas, chunkHeaderOptions);
137
- }
138
- joinDocs(docs, separator) {
139
- const text = docs.join(separator).trim();
140
- return text === "" ? null : text;
141
- }
142
- async mergeSplits(splits, separator) {
143
- const docs = [];
144
- const currentDoc = [];
145
- let total = 0;
146
- for (const d of splits) {
147
- const _len = await this.lengthFunction(d);
148
- if (total + _len + currentDoc.length * separator.length >
149
- this.chunkSize) {
150
- if (total > this.chunkSize) {
151
- console.warn(`Created a chunk of size ${total}, +
152
- which is longer than the specified ${this.chunkSize}`);
153
- }
154
- if (currentDoc.length > 0) {
155
- const doc = this.joinDocs(currentDoc, separator);
156
- if (doc !== null) {
157
- docs.push(doc);
158
- }
159
- // Keep on popping if:
160
- // - we have a larger chunk than in the chunk overlap
161
- // - or if we still have any chunks and the length is long
162
- while (total > this.chunkOverlap ||
163
- (total + _len + currentDoc.length * separator.length >
164
- this.chunkSize &&
165
- total > 0)) {
166
- total -= await this.lengthFunction(currentDoc[0]);
167
- currentDoc.shift();
168
- }
169
- }
170
- }
171
- currentDoc.push(d);
172
- total += _len;
173
- }
174
- const doc = this.joinDocs(currentDoc, separator);
175
- if (doc !== null) {
176
- docs.push(doc);
177
- }
178
- return docs;
179
- }
180
- }
181
- exports.TextSplitter = TextSplitter;
182
- class CharacterTextSplitter extends TextSplitter {
183
- static lc_name() {
184
- return "CharacterTextSplitter";
185
- }
186
- constructor(fields) {
187
- super(fields);
188
- Object.defineProperty(this, "separator", {
189
- enumerable: true,
190
- configurable: true,
191
- writable: true,
192
- value: "\n\n"
193
- });
194
- this.separator = fields?.separator ?? this.separator;
195
- }
196
- async splitText(text) {
197
- // First we naively split the large input into a bunch of smaller ones.
198
- const splits = this.splitOnSeparator(text, this.separator);
199
- return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
200
- }
201
- }
202
- exports.CharacterTextSplitter = CharacterTextSplitter;
203
- exports.SupportedTextSplitterLanguages = [
204
- "cpp",
205
- "go",
206
- "java",
207
- "js",
208
- "php",
209
- "proto",
210
- "python",
211
- "rst",
212
- "ruby",
213
- "rust",
214
- "scala",
215
- "swift",
216
- "markdown",
217
- "latex",
218
- "html",
219
- "sol",
220
- ];
221
- class RecursiveCharacterTextSplitter extends TextSplitter {
222
- static lc_name() {
223
- return "RecursiveCharacterTextSplitter";
224
- }
225
- constructor(fields) {
226
- super(fields);
227
- Object.defineProperty(this, "separators", {
228
- enumerable: true,
229
- configurable: true,
230
- writable: true,
231
- value: ["\n\n", "\n", " ", ""]
232
- });
233
- this.separators = fields?.separators ?? this.separators;
234
- this.keepSeparator = fields?.keepSeparator ?? true;
235
- }
236
- async _splitText(text, separators) {
237
- const finalChunks = [];
238
- // Get appropriate separator to use
239
- let separator = separators[separators.length - 1];
240
- let newSeparators;
241
- for (let i = 0; i < separators.length; i += 1) {
242
- const s = separators[i];
243
- if (s === "") {
244
- separator = s;
245
- break;
246
- }
247
- if (text.includes(s)) {
248
- separator = s;
249
- newSeparators = separators.slice(i + 1);
250
- break;
251
- }
252
- }
253
- // Now that we have the separator, split the text
254
- const splits = this.splitOnSeparator(text, separator);
255
- // Now go merging things, recursively splitting longer texts.
256
- let goodSplits = [];
257
- const _separator = this.keepSeparator ? "" : separator;
258
- for (const s of splits) {
259
- if ((await this.lengthFunction(s)) < this.chunkSize) {
260
- goodSplits.push(s);
261
- }
262
- else {
263
- if (goodSplits.length) {
264
- const mergedText = await this.mergeSplits(goodSplits, _separator);
265
- finalChunks.push(...mergedText);
266
- goodSplits = [];
267
- }
268
- if (!newSeparators) {
269
- finalChunks.push(s);
270
- }
271
- else {
272
- const otherInfo = await this._splitText(s, newSeparators);
273
- finalChunks.push(...otherInfo);
274
- }
275
- }
276
- }
277
- if (goodSplits.length) {
278
- const mergedText = await this.mergeSplits(goodSplits, _separator);
279
- finalChunks.push(...mergedText);
280
- }
281
- return finalChunks;
282
- }
283
- async splitText(text) {
284
- return this._splitText(text, this.separators);
285
- }
286
- static fromLanguage(language, options) {
287
- return new RecursiveCharacterTextSplitter({
288
- ...options,
289
- separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
290
- });
291
- }
292
- static getSeparatorsForLanguage(language) {
293
- if (language === "cpp") {
294
- return [
295
- // Split along class definitions
296
- "\nclass ",
297
- // Split along function definitions
298
- "\nvoid ",
299
- "\nint ",
300
- "\nfloat ",
301
- "\ndouble ",
302
- // Split along control flow statements
303
- "\nif ",
304
- "\nfor ",
305
- "\nwhile ",
306
- "\nswitch ",
307
- "\ncase ",
308
- // Split by the normal type of lines
309
- "\n\n",
310
- "\n",
311
- " ",
312
- "",
313
- ];
314
- }
315
- else if (language === "go") {
316
- return [
317
- // Split along function definitions
318
- "\nfunc ",
319
- "\nvar ",
320
- "\nconst ",
321
- "\ntype ",
322
- // Split along control flow statements
323
- "\nif ",
324
- "\nfor ",
325
- "\nswitch ",
326
- "\ncase ",
327
- // Split by the normal type of lines
328
- "\n\n",
329
- "\n",
330
- " ",
331
- "",
332
- ];
333
- }
334
- else if (language === "java") {
335
- return [
336
- // Split along class definitions
337
- "\nclass ",
338
- // Split along method definitions
339
- "\npublic ",
340
- "\nprotected ",
341
- "\nprivate ",
342
- "\nstatic ",
343
- // Split along control flow statements
344
- "\nif ",
345
- "\nfor ",
346
- "\nwhile ",
347
- "\nswitch ",
348
- "\ncase ",
349
- // Split by the normal type of lines
350
- "\n\n",
351
- "\n",
352
- " ",
353
- "",
354
- ];
355
- }
356
- else if (language === "js") {
357
- return [
358
- // Split along function definitions
359
- "\nfunction ",
360
- "\nconst ",
361
- "\nlet ",
362
- "\nvar ",
363
- "\nclass ",
364
- // Split along control flow statements
365
- "\nif ",
366
- "\nfor ",
367
- "\nwhile ",
368
- "\nswitch ",
369
- "\ncase ",
370
- "\ndefault ",
371
- // Split by the normal type of lines
372
- "\n\n",
373
- "\n",
374
- " ",
375
- "",
376
- ];
377
- }
378
- else if (language === "php") {
379
- return [
380
- // Split along function definitions
381
- "\nfunction ",
382
- // Split along class definitions
383
- "\nclass ",
384
- // Split along control flow statements
385
- "\nif ",
386
- "\nforeach ",
387
- "\nwhile ",
388
- "\ndo ",
389
- "\nswitch ",
390
- "\ncase ",
391
- // Split by the normal type of lines
392
- "\n\n",
393
- "\n",
394
- " ",
395
- "",
396
- ];
397
- }
398
- else if (language === "proto") {
399
- return [
400
- // Split along message definitions
401
- "\nmessage ",
402
- // Split along service definitions
403
- "\nservice ",
404
- // Split along enum definitions
405
- "\nenum ",
406
- // Split along option definitions
407
- "\noption ",
408
- // Split along import statements
409
- "\nimport ",
410
- // Split along syntax declarations
411
- "\nsyntax ",
412
- // Split by the normal type of lines
413
- "\n\n",
414
- "\n",
415
- " ",
416
- "",
417
- ];
418
- }
419
- else if (language === "python") {
420
- return [
421
- // First, try to split along class definitions
422
- "\nclass ",
423
- "\ndef ",
424
- "\n\tdef ",
425
- // Now split by the normal type of lines
426
- "\n\n",
427
- "\n",
428
- " ",
429
- "",
430
- ];
431
- }
432
- else if (language === "rst") {
433
- return [
434
- // Split along section titles
435
- "\n===\n",
436
- "\n---\n",
437
- "\n***\n",
438
- // Split along directive markers
439
- "\n.. ",
440
- // Split by the normal type of lines
441
- "\n\n",
442
- "\n",
443
- " ",
444
- "",
445
- ];
446
- }
447
- else if (language === "ruby") {
448
- return [
449
- // Split along method definitions
450
- "\ndef ",
451
- "\nclass ",
452
- // Split along control flow statements
453
- "\nif ",
454
- "\nunless ",
455
- "\nwhile ",
456
- "\nfor ",
457
- "\ndo ",
458
- "\nbegin ",
459
- "\nrescue ",
460
- // Split by the normal type of lines
461
- "\n\n",
462
- "\n",
463
- " ",
464
- "",
465
- ];
466
- }
467
- else if (language === "rust") {
468
- return [
469
- // Split along function definitions
470
- "\nfn ",
471
- "\nconst ",
472
- "\nlet ",
473
- // Split along control flow statements
474
- "\nif ",
475
- "\nwhile ",
476
- "\nfor ",
477
- "\nloop ",
478
- "\nmatch ",
479
- "\nconst ",
480
- // Split by the normal type of lines
481
- "\n\n",
482
- "\n",
483
- " ",
484
- "",
485
- ];
486
- }
487
- else if (language === "scala") {
488
- return [
489
- // Split along class definitions
490
- "\nclass ",
491
- "\nobject ",
492
- // Split along method definitions
493
- "\ndef ",
494
- "\nval ",
495
- "\nvar ",
496
- // Split along control flow statements
497
- "\nif ",
498
- "\nfor ",
499
- "\nwhile ",
500
- "\nmatch ",
501
- "\ncase ",
502
- // Split by the normal type of lines
503
- "\n\n",
504
- "\n",
505
- " ",
506
- "",
507
- ];
508
- }
509
- else if (language === "swift") {
510
- return [
511
- // Split along function definitions
512
- "\nfunc ",
513
- // Split along class definitions
514
- "\nclass ",
515
- "\nstruct ",
516
- "\nenum ",
517
- // Split along control flow statements
518
- "\nif ",
519
- "\nfor ",
520
- "\nwhile ",
521
- "\ndo ",
522
- "\nswitch ",
523
- "\ncase ",
524
- // Split by the normal type of lines
525
- "\n\n",
526
- "\n",
527
- " ",
528
- "",
529
- ];
530
- }
531
- else if (language === "markdown") {
532
- return [
533
- // First, try to split along Markdown headings (starting with level 2)
534
- "\n## ",
535
- "\n### ",
536
- "\n#### ",
537
- "\n##### ",
538
- "\n###### ",
539
- // Note the alternative syntax for headings (below) is not handled here
540
- // Heading level 2
541
- // ---------------
542
- // End of code block
543
- "```\n\n",
544
- // Horizontal lines
545
- "\n\n***\n\n",
546
- "\n\n---\n\n",
547
- "\n\n___\n\n",
548
- // Note that this splitter doesn't handle horizontal lines defined
549
- // by *three or more* of ***, ---, or ___, but this is not handled
550
- "\n\n",
551
- "\n",
552
- " ",
553
- "",
554
- ];
555
- }
556
- else if (language === "latex") {
557
- return [
558
- // First, try to split along Latex sections
559
- "\n\\chapter{",
560
- "\n\\section{",
561
- "\n\\subsection{",
562
- "\n\\subsubsection{",
563
- // Now split by environments
564
- "\n\\begin{enumerate}",
565
- "\n\\begin{itemize}",
566
- "\n\\begin{description}",
567
- "\n\\begin{list}",
568
- "\n\\begin{quote}",
569
- "\n\\begin{quotation}",
570
- "\n\\begin{verse}",
571
- "\n\\begin{verbatim}",
572
- // Now split by math environments
573
- "\n\\begin{align}",
574
- "$$",
575
- "$",
576
- // Now split by the normal type of lines
577
- "\n\n",
578
- "\n",
579
- " ",
580
- "",
581
- ];
582
- }
583
- else if (language === "html") {
584
- return [
585
- // First, try to split along HTML tags
586
- "<body>",
587
- "<div>",
588
- "<p>",
589
- "<br>",
590
- "<li>",
591
- "<h1>",
592
- "<h2>",
593
- "<h3>",
594
- "<h4>",
595
- "<h5>",
596
- "<h6>",
597
- "<span>",
598
- "<table>",
599
- "<tr>",
600
- "<td>",
601
- "<th>",
602
- "<ul>",
603
- "<ol>",
604
- "<header>",
605
- "<footer>",
606
- "<nav>",
607
- // Head
608
- "<head>",
609
- "<style>",
610
- "<script>",
611
- "<meta>",
612
- "<title>",
613
- // Normal type of lines
614
- " ",
615
- "",
616
- ];
617
- }
618
- else if (language === "sol") {
619
- return [
620
- // Split along compiler informations definitions
621
- "\npragma ",
622
- "\nusing ",
623
- // Split along contract definitions
624
- "\ncontract ",
625
- "\ninterface ",
626
- "\nlibrary ",
627
- // Split along method definitions
628
- "\nconstructor ",
629
- "\ntype ",
630
- "\nfunction ",
631
- "\nevent ",
632
- "\nmodifier ",
633
- "\nerror ",
634
- "\nstruct ",
635
- "\nenum ",
636
- // Split along control flow statements
637
- "\nif ",
638
- "\nfor ",
639
- "\nwhile ",
640
- "\ndo while ",
641
- "\nassembly ",
642
- // Split by the normal type of lines
643
- "\n\n",
644
- "\n",
645
- " ",
646
- "",
647
- ];
648
- }
649
- else {
650
- throw new Error(`Language ${language} is not supported.`);
651
- }
652
- }
653
- }
654
- exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;
655
- /**
656
- * Implementation of splitter which looks at tokens.
657
- */
658
- class TokenTextSplitter extends TextSplitter {
659
- static lc_name() {
660
- return "TokenTextSplitter";
661
- }
662
- constructor(fields) {
663
- super(fields);
664
- Object.defineProperty(this, "encodingName", {
665
- enumerable: true,
666
- configurable: true,
667
- writable: true,
668
- value: void 0
669
- });
670
- Object.defineProperty(this, "allowedSpecial", {
671
- enumerable: true,
672
- configurable: true,
673
- writable: true,
674
- value: void 0
675
- });
676
- Object.defineProperty(this, "disallowedSpecial", {
677
- enumerable: true,
678
- configurable: true,
679
- writable: true,
680
- value: void 0
681
- });
682
- Object.defineProperty(this, "tokenizer", {
683
- enumerable: true,
684
- configurable: true,
685
- writable: true,
686
- value: void 0
687
- });
688
- this.encodingName = fields?.encodingName ?? "gpt2";
689
- this.allowedSpecial = fields?.allowedSpecial ?? [];
690
- this.disallowedSpecial = fields?.disallowedSpecial ?? "all";
691
- }
692
- async splitText(text) {
693
- if (!this.tokenizer) {
694
- this.tokenizer = await (0, tiktoken_1.getEncoding)(this.encodingName);
695
- }
696
- const splits = [];
697
- const input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);
698
- let start_idx = 0;
699
- while (start_idx < input_ids.length) {
700
- if (start_idx > 0) {
701
- start_idx -= this.chunkOverlap;
702
- }
703
- const end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
704
- const chunk_ids = input_ids.slice(start_idx, end_idx);
705
- splits.push(this.tokenizer.decode(chunk_ids));
706
- start_idx = end_idx;
707
- }
708
- return splits;
709
- }
710
- }
711
- exports.TokenTextSplitter = TokenTextSplitter;
712
- class MarkdownTextSplitter extends RecursiveCharacterTextSplitter {
713
- constructor(fields) {
714
- super({
715
- ...fields,
716
- separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
717
- });
718
- }
719
- }
720
- exports.MarkdownTextSplitter = MarkdownTextSplitter;
721
- class LatexTextSplitter extends RecursiveCharacterTextSplitter {
722
- constructor(fields) {
723
- super({
724
- ...fields,
725
- separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
726
- });
727
- }
728
- }
729
- exports.LatexTextSplitter = LatexTextSplitter;
17
+ __exportStar(require("@langchain/textsplitters"), exports);