langchain 0.0.85 → 0.0.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/callbacks/handlers/tracer_langchain.cjs +12 -67
- package/dist/callbacks/handlers/tracer_langchain.d.ts +5 -23
- package/dist/callbacks/handlers/tracer_langchain.js +12 -67
- package/dist/chains/conversational_retrieval_chain.cjs +20 -2
- package/dist/chains/conversational_retrieval_chain.d.ts +3 -2
- package/dist/chains/conversational_retrieval_chain.js +20 -2
- package/dist/client/langchainplus.cjs +37 -56
- package/dist/client/langchainplus.d.ts +8 -5
- package/dist/client/langchainplus.js +38 -57
- package/dist/memory/zep.cjs +13 -1
- package/dist/memory/zep.js +14 -2
- package/dist/text_splitter.cjs +406 -79
- package/dist/text_splitter.d.ts +8 -2
- package/dist/text_splitter.js +405 -78
- package/package.json +1 -1
package/dist/text_splitter.js
CHANGED
|
@@ -14,12 +14,35 @@ export class TextSplitter {
|
|
|
14
14
|
writable: true,
|
|
15
15
|
value: 200
|
|
16
16
|
});
|
|
17
|
+
Object.defineProperty(this, "keepSeparator", {
|
|
18
|
+
enumerable: true,
|
|
19
|
+
configurable: true,
|
|
20
|
+
writable: true,
|
|
21
|
+
value: false
|
|
22
|
+
});
|
|
17
23
|
this.chunkSize = fields?.chunkSize ?? this.chunkSize;
|
|
18
24
|
this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
|
|
25
|
+
this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
|
|
19
26
|
if (this.chunkOverlap >= this.chunkSize) {
|
|
20
27
|
throw new Error("Cannot have chunkOverlap >= chunkSize");
|
|
21
28
|
}
|
|
22
29
|
}
|
|
30
|
+
splitOnSeparator(text, separator) {
|
|
31
|
+
let splits;
|
|
32
|
+
if (separator) {
|
|
33
|
+
if (this.keepSeparator) {
|
|
34
|
+
const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
|
|
35
|
+
splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
splits = text.split(separator);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
splits = text.split("");
|
|
43
|
+
}
|
|
44
|
+
return splits.filter((s) => s !== "");
|
|
45
|
+
}
|
|
23
46
|
async createDocuments(texts,
|
|
24
47
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
25
48
|
metadatas = [], chunkHeaderOptions = {}) {
|
|
@@ -128,16 +151,27 @@ export class CharacterTextSplitter extends TextSplitter {
|
|
|
128
151
|
}
|
|
129
152
|
async splitText(text) {
|
|
130
153
|
// First we naively split the large input into a bunch of smaller ones.
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
splits = text.split(this.separator);
|
|
134
|
-
}
|
|
135
|
-
else {
|
|
136
|
-
splits = text.split("");
|
|
137
|
-
}
|
|
138
|
-
return this.mergeSplits(splits, this.separator);
|
|
154
|
+
const splits = this.splitOnSeparator(text, this.separator);
|
|
155
|
+
return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
|
|
139
156
|
}
|
|
140
157
|
}
|
|
158
|
+
export const SupportedTextSplitterLanguages = [
|
|
159
|
+
"cpp",
|
|
160
|
+
"go",
|
|
161
|
+
"java",
|
|
162
|
+
"js",
|
|
163
|
+
"php",
|
|
164
|
+
"proto",
|
|
165
|
+
"python",
|
|
166
|
+
"rst",
|
|
167
|
+
"ruby",
|
|
168
|
+
"rust",
|
|
169
|
+
"scala",
|
|
170
|
+
"swift",
|
|
171
|
+
"markdown",
|
|
172
|
+
"latex",
|
|
173
|
+
"html",
|
|
174
|
+
];
|
|
141
175
|
export class RecursiveCharacterTextSplitter extends TextSplitter {
|
|
142
176
|
constructor(fields) {
|
|
143
177
|
super(fields);
|
|
@@ -148,51 +182,394 @@ export class RecursiveCharacterTextSplitter extends TextSplitter {
|
|
|
148
182
|
value: ["\n\n", "\n", " ", ""]
|
|
149
183
|
});
|
|
150
184
|
this.separators = fields?.separators ?? this.separators;
|
|
185
|
+
this.keepSeparator = fields?.keepSeparator ?? true;
|
|
151
186
|
}
|
|
152
|
-
async
|
|
187
|
+
async _splitText(text, separators) {
|
|
153
188
|
const finalChunks = [];
|
|
154
189
|
// Get appropriate separator to use
|
|
155
|
-
let separator =
|
|
156
|
-
|
|
190
|
+
let separator = separators[separators.length - 1];
|
|
191
|
+
let newSeparators;
|
|
192
|
+
for (let i = 0; i < separators.length; i += 1) {
|
|
193
|
+
const s = separators[i];
|
|
157
194
|
if (s === "") {
|
|
158
195
|
separator = s;
|
|
159
196
|
break;
|
|
160
197
|
}
|
|
161
198
|
if (text.includes(s)) {
|
|
162
199
|
separator = s;
|
|
200
|
+
newSeparators = separators.slice(i + 1);
|
|
163
201
|
break;
|
|
164
202
|
}
|
|
165
203
|
}
|
|
166
204
|
// Now that we have the separator, split the text
|
|
167
|
-
|
|
168
|
-
if (separator) {
|
|
169
|
-
splits = text.split(separator);
|
|
170
|
-
}
|
|
171
|
-
else {
|
|
172
|
-
splits = text.split("");
|
|
173
|
-
}
|
|
205
|
+
const splits = this.splitOnSeparator(text, separator);
|
|
174
206
|
// Now go merging things, recursively splitting longer texts.
|
|
175
207
|
let goodSplits = [];
|
|
208
|
+
const _separator = this.keepSeparator ? "" : separator;
|
|
176
209
|
for (const s of splits) {
|
|
177
210
|
if (s.length < this.chunkSize) {
|
|
178
211
|
goodSplits.push(s);
|
|
179
212
|
}
|
|
180
213
|
else {
|
|
181
214
|
if (goodSplits.length) {
|
|
182
|
-
const mergedText = this.mergeSplits(goodSplits,
|
|
215
|
+
const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
183
216
|
finalChunks.push(...mergedText);
|
|
184
217
|
goodSplits = [];
|
|
185
218
|
}
|
|
186
|
-
|
|
187
|
-
|
|
219
|
+
if (!newSeparators) {
|
|
220
|
+
finalChunks.push(s);
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
const otherInfo = await this._splitText(s, newSeparators);
|
|
224
|
+
finalChunks.push(...otherInfo);
|
|
225
|
+
}
|
|
188
226
|
}
|
|
189
227
|
}
|
|
190
228
|
if (goodSplits.length) {
|
|
191
|
-
const mergedText = this.mergeSplits(goodSplits,
|
|
229
|
+
const mergedText = this.mergeSplits(goodSplits, _separator);
|
|
192
230
|
finalChunks.push(...mergedText);
|
|
193
231
|
}
|
|
194
232
|
return finalChunks;
|
|
195
233
|
}
|
|
234
|
+
async splitText(text) {
|
|
235
|
+
return this._splitText(text, this.separators);
|
|
236
|
+
}
|
|
237
|
+
static fromLanguage(language, options) {
|
|
238
|
+
return new RecursiveCharacterTextSplitter({
|
|
239
|
+
...options,
|
|
240
|
+
separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
static getSeparatorsForLanguage(language) {
|
|
244
|
+
if (language === "cpp") {
|
|
245
|
+
return [
|
|
246
|
+
// Split along class definitions
|
|
247
|
+
"\nclass ",
|
|
248
|
+
// Split along function definitions
|
|
249
|
+
"\nvoid ",
|
|
250
|
+
"\nint ",
|
|
251
|
+
"\nfloat ",
|
|
252
|
+
"\ndouble ",
|
|
253
|
+
// Split along control flow statements
|
|
254
|
+
"\nif ",
|
|
255
|
+
"\nfor ",
|
|
256
|
+
"\nwhile ",
|
|
257
|
+
"\nswitch ",
|
|
258
|
+
"\ncase ",
|
|
259
|
+
// Split by the normal type of lines
|
|
260
|
+
"\n\n",
|
|
261
|
+
"\n",
|
|
262
|
+
" ",
|
|
263
|
+
"",
|
|
264
|
+
];
|
|
265
|
+
}
|
|
266
|
+
else if (language === "go") {
|
|
267
|
+
return [
|
|
268
|
+
// Split along function definitions
|
|
269
|
+
"\nfunc ",
|
|
270
|
+
"\nvar ",
|
|
271
|
+
"\nconst ",
|
|
272
|
+
"\ntype ",
|
|
273
|
+
// Split along control flow statements
|
|
274
|
+
"\nif ",
|
|
275
|
+
"\nfor ",
|
|
276
|
+
"\nswitch ",
|
|
277
|
+
"\ncase ",
|
|
278
|
+
// Split by the normal type of lines
|
|
279
|
+
"\n\n",
|
|
280
|
+
"\n",
|
|
281
|
+
" ",
|
|
282
|
+
"",
|
|
283
|
+
];
|
|
284
|
+
}
|
|
285
|
+
else if (language === "java") {
|
|
286
|
+
return [
|
|
287
|
+
// Split along class definitions
|
|
288
|
+
"\nclass ",
|
|
289
|
+
// Split along method definitions
|
|
290
|
+
"\npublic ",
|
|
291
|
+
"\nprotected ",
|
|
292
|
+
"\nprivate ",
|
|
293
|
+
"\nstatic ",
|
|
294
|
+
// Split along control flow statements
|
|
295
|
+
"\nif ",
|
|
296
|
+
"\nfor ",
|
|
297
|
+
"\nwhile ",
|
|
298
|
+
"\nswitch ",
|
|
299
|
+
"\ncase ",
|
|
300
|
+
// Split by the normal type of lines
|
|
301
|
+
"\n\n",
|
|
302
|
+
"\n",
|
|
303
|
+
" ",
|
|
304
|
+
"",
|
|
305
|
+
];
|
|
306
|
+
}
|
|
307
|
+
else if (language === "js") {
|
|
308
|
+
return [
|
|
309
|
+
// Split along function definitions
|
|
310
|
+
"\nfunction ",
|
|
311
|
+
"\nconst ",
|
|
312
|
+
"\nlet ",
|
|
313
|
+
"\nvar ",
|
|
314
|
+
"\nclass ",
|
|
315
|
+
// Split along control flow statements
|
|
316
|
+
"\nif ",
|
|
317
|
+
"\nfor ",
|
|
318
|
+
"\nwhile ",
|
|
319
|
+
"\nswitch ",
|
|
320
|
+
"\ncase ",
|
|
321
|
+
"\ndefault ",
|
|
322
|
+
// Split by the normal type of lines
|
|
323
|
+
"\n\n",
|
|
324
|
+
"\n",
|
|
325
|
+
" ",
|
|
326
|
+
"",
|
|
327
|
+
];
|
|
328
|
+
}
|
|
329
|
+
else if (language === "php") {
|
|
330
|
+
return [
|
|
331
|
+
// Split along function definitions
|
|
332
|
+
"\nfunction ",
|
|
333
|
+
// Split along class definitions
|
|
334
|
+
"\nclass ",
|
|
335
|
+
// Split along control flow statements
|
|
336
|
+
"\nif ",
|
|
337
|
+
"\nforeach ",
|
|
338
|
+
"\nwhile ",
|
|
339
|
+
"\ndo ",
|
|
340
|
+
"\nswitch ",
|
|
341
|
+
"\ncase ",
|
|
342
|
+
// Split by the normal type of lines
|
|
343
|
+
"\n\n",
|
|
344
|
+
"\n",
|
|
345
|
+
" ",
|
|
346
|
+
"",
|
|
347
|
+
];
|
|
348
|
+
}
|
|
349
|
+
else if (language === "proto") {
|
|
350
|
+
return [
|
|
351
|
+
// Split along message definitions
|
|
352
|
+
"\nmessage ",
|
|
353
|
+
// Split along service definitions
|
|
354
|
+
"\nservice ",
|
|
355
|
+
// Split along enum definitions
|
|
356
|
+
"\nenum ",
|
|
357
|
+
// Split along option definitions
|
|
358
|
+
"\noption ",
|
|
359
|
+
// Split along import statements
|
|
360
|
+
"\nimport ",
|
|
361
|
+
// Split along syntax declarations
|
|
362
|
+
"\nsyntax ",
|
|
363
|
+
// Split by the normal type of lines
|
|
364
|
+
"\n\n",
|
|
365
|
+
"\n",
|
|
366
|
+
" ",
|
|
367
|
+
"",
|
|
368
|
+
];
|
|
369
|
+
}
|
|
370
|
+
else if (language === "python") {
|
|
371
|
+
return [
|
|
372
|
+
// First, try to split along class definitions
|
|
373
|
+
"\nclass ",
|
|
374
|
+
"\ndef ",
|
|
375
|
+
"\n\tdef ",
|
|
376
|
+
// Now split by the normal type of lines
|
|
377
|
+
"\n\n",
|
|
378
|
+
"\n",
|
|
379
|
+
" ",
|
|
380
|
+
"",
|
|
381
|
+
];
|
|
382
|
+
}
|
|
383
|
+
else if (language === "rst") {
|
|
384
|
+
return [
|
|
385
|
+
// Split along section titles
|
|
386
|
+
"\n===\n",
|
|
387
|
+
"\n---\n",
|
|
388
|
+
"\n***\n",
|
|
389
|
+
// Split along directive markers
|
|
390
|
+
"\n.. ",
|
|
391
|
+
// Split by the normal type of lines
|
|
392
|
+
"\n\n",
|
|
393
|
+
"\n",
|
|
394
|
+
" ",
|
|
395
|
+
"",
|
|
396
|
+
];
|
|
397
|
+
}
|
|
398
|
+
else if (language === "ruby") {
|
|
399
|
+
return [
|
|
400
|
+
// Split along method definitions
|
|
401
|
+
"\ndef ",
|
|
402
|
+
"\nclass ",
|
|
403
|
+
// Split along control flow statements
|
|
404
|
+
"\nif ",
|
|
405
|
+
"\nunless ",
|
|
406
|
+
"\nwhile ",
|
|
407
|
+
"\nfor ",
|
|
408
|
+
"\ndo ",
|
|
409
|
+
"\nbegin ",
|
|
410
|
+
"\nrescue ",
|
|
411
|
+
// Split by the normal type of lines
|
|
412
|
+
"\n\n",
|
|
413
|
+
"\n",
|
|
414
|
+
" ",
|
|
415
|
+
"",
|
|
416
|
+
];
|
|
417
|
+
}
|
|
418
|
+
else if (language === "rust") {
|
|
419
|
+
return [
|
|
420
|
+
// Split along function definitions
|
|
421
|
+
"\nfn ",
|
|
422
|
+
"\nconst ",
|
|
423
|
+
"\nlet ",
|
|
424
|
+
// Split along control flow statements
|
|
425
|
+
"\nif ",
|
|
426
|
+
"\nwhile ",
|
|
427
|
+
"\nfor ",
|
|
428
|
+
"\nloop ",
|
|
429
|
+
"\nmatch ",
|
|
430
|
+
"\nconst ",
|
|
431
|
+
// Split by the normal type of lines
|
|
432
|
+
"\n\n",
|
|
433
|
+
"\n",
|
|
434
|
+
" ",
|
|
435
|
+
"",
|
|
436
|
+
];
|
|
437
|
+
}
|
|
438
|
+
else if (language === "scala") {
|
|
439
|
+
return [
|
|
440
|
+
// Split along class definitions
|
|
441
|
+
"\nclass ",
|
|
442
|
+
"\nobject ",
|
|
443
|
+
// Split along method definitions
|
|
444
|
+
"\ndef ",
|
|
445
|
+
"\nval ",
|
|
446
|
+
"\nvar ",
|
|
447
|
+
// Split along control flow statements
|
|
448
|
+
"\nif ",
|
|
449
|
+
"\nfor ",
|
|
450
|
+
"\nwhile ",
|
|
451
|
+
"\nmatch ",
|
|
452
|
+
"\ncase ",
|
|
453
|
+
// Split by the normal type of lines
|
|
454
|
+
"\n\n",
|
|
455
|
+
"\n",
|
|
456
|
+
" ",
|
|
457
|
+
"",
|
|
458
|
+
];
|
|
459
|
+
}
|
|
460
|
+
else if (language === "swift") {
|
|
461
|
+
return [
|
|
462
|
+
// Split along function definitions
|
|
463
|
+
"\nfunc ",
|
|
464
|
+
// Split along class definitions
|
|
465
|
+
"\nclass ",
|
|
466
|
+
"\nstruct ",
|
|
467
|
+
"\nenum ",
|
|
468
|
+
// Split along control flow statements
|
|
469
|
+
"\nif ",
|
|
470
|
+
"\nfor ",
|
|
471
|
+
"\nwhile ",
|
|
472
|
+
"\ndo ",
|
|
473
|
+
"\nswitch ",
|
|
474
|
+
"\ncase ",
|
|
475
|
+
// Split by the normal type of lines
|
|
476
|
+
"\n\n",
|
|
477
|
+
"\n",
|
|
478
|
+
" ",
|
|
479
|
+
"",
|
|
480
|
+
];
|
|
481
|
+
}
|
|
482
|
+
else if (language === "markdown") {
|
|
483
|
+
return [
|
|
484
|
+
// First, try to split along Markdown headings (starting with level 2)
|
|
485
|
+
"\n## ",
|
|
486
|
+
"\n### ",
|
|
487
|
+
"\n#### ",
|
|
488
|
+
"\n##### ",
|
|
489
|
+
"\n###### ",
|
|
490
|
+
// Note the alternative syntax for headings (below) is not handled here
|
|
491
|
+
// Heading level 2
|
|
492
|
+
// ---------------
|
|
493
|
+
// End of code block
|
|
494
|
+
"```\n\n",
|
|
495
|
+
// Horizontal lines
|
|
496
|
+
"\n\n***\n\n",
|
|
497
|
+
"\n\n---\n\n",
|
|
498
|
+
"\n\n___\n\n",
|
|
499
|
+
// Note that this splitter doesn't handle horizontal lines defined
|
|
500
|
+
// by *three or more* of ***, ---, or ___, but this is not handled
|
|
501
|
+
"\n\n",
|
|
502
|
+
"\n",
|
|
503
|
+
" ",
|
|
504
|
+
"",
|
|
505
|
+
];
|
|
506
|
+
}
|
|
507
|
+
else if (language === "latex") {
|
|
508
|
+
return [
|
|
509
|
+
// First, try to split along Latex sections
|
|
510
|
+
"\n\\chapter{",
|
|
511
|
+
"\n\\section{",
|
|
512
|
+
"\n\\subsection{",
|
|
513
|
+
"\n\\subsubsection{",
|
|
514
|
+
// Now split by environments
|
|
515
|
+
"\n\\begin{enumerate}",
|
|
516
|
+
"\n\\begin{itemize}",
|
|
517
|
+
"\n\\begin{description}",
|
|
518
|
+
"\n\\begin{list}",
|
|
519
|
+
"\n\\begin{quote}",
|
|
520
|
+
"\n\\begin{quotation}",
|
|
521
|
+
"\n\\begin{verse}",
|
|
522
|
+
"\n\\begin{verbatim}",
|
|
523
|
+
// Now split by math environments
|
|
524
|
+
"\n\\begin{align}",
|
|
525
|
+
"$$",
|
|
526
|
+
"$",
|
|
527
|
+
// Now split by the normal type of lines
|
|
528
|
+
"\n\n",
|
|
529
|
+
"\n",
|
|
530
|
+
" ",
|
|
531
|
+
"",
|
|
532
|
+
];
|
|
533
|
+
}
|
|
534
|
+
else if (language === "html") {
|
|
535
|
+
return [
|
|
536
|
+
// First, try to split along HTML tags
|
|
537
|
+
"<body>",
|
|
538
|
+
"<div>",
|
|
539
|
+
"<p>",
|
|
540
|
+
"<br>",
|
|
541
|
+
"<li>",
|
|
542
|
+
"<h1>",
|
|
543
|
+
"<h2>",
|
|
544
|
+
"<h3>",
|
|
545
|
+
"<h4>",
|
|
546
|
+
"<h5>",
|
|
547
|
+
"<h6>",
|
|
548
|
+
"<span>",
|
|
549
|
+
"<table>",
|
|
550
|
+
"<tr>",
|
|
551
|
+
"<td>",
|
|
552
|
+
"<th>",
|
|
553
|
+
"<ul>",
|
|
554
|
+
"<ol>",
|
|
555
|
+
"<header>",
|
|
556
|
+
"<footer>",
|
|
557
|
+
"<nav>",
|
|
558
|
+
// Head
|
|
559
|
+
"<head>",
|
|
560
|
+
"<style>",
|
|
561
|
+
"<script>",
|
|
562
|
+
"<meta>",
|
|
563
|
+
"<title>",
|
|
564
|
+
// Normal type of lines
|
|
565
|
+
" ",
|
|
566
|
+
"",
|
|
567
|
+
];
|
|
568
|
+
}
|
|
569
|
+
else {
|
|
570
|
+
throw new Error(`Language ${language} is not supported.`);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
196
573
|
}
|
|
197
574
|
/**
|
|
198
575
|
* Implementation of splitter which looks at tokens.
|
|
@@ -248,67 +625,17 @@ export class TokenTextSplitter extends TextSplitter {
|
|
|
248
625
|
}
|
|
249
626
|
export class MarkdownTextSplitter extends RecursiveCharacterTextSplitter {
|
|
250
627
|
constructor(fields) {
|
|
251
|
-
super(
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
configurable: true,
|
|
255
|
-
writable: true,
|
|
256
|
-
value: [
|
|
257
|
-
// First, try to split along Markdown headings (starting with level 2)
|
|
258
|
-
"\n## ",
|
|
259
|
-
"\n### ",
|
|
260
|
-
"\n#### ",
|
|
261
|
-
"\n##### ",
|
|
262
|
-
"\n###### ",
|
|
263
|
-
// Note the alternative syntax for headings (below) is not handled here
|
|
264
|
-
// Heading level 2
|
|
265
|
-
// ---------------
|
|
266
|
-
// End of code block
|
|
267
|
-
"```\n\n",
|
|
268
|
-
// Horizontal lines
|
|
269
|
-
"\n\n***\n\n",
|
|
270
|
-
"\n\n---\n\n",
|
|
271
|
-
"\n\n___\n\n",
|
|
272
|
-
// Note that this splitter doesn't handle horizontal lines defined
|
|
273
|
-
// by *three or more* of ***, ---, or ___, but this is not handled
|
|
274
|
-
"\n\n",
|
|
275
|
-
"\n",
|
|
276
|
-
" ",
|
|
277
|
-
"",
|
|
278
|
-
]
|
|
628
|
+
super({
|
|
629
|
+
...fields,
|
|
630
|
+
separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
|
|
279
631
|
});
|
|
280
632
|
}
|
|
281
633
|
}
|
|
282
634
|
export class LatexTextSplitter extends RecursiveCharacterTextSplitter {
|
|
283
635
|
constructor(fields) {
|
|
284
|
-
super(
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
configurable: true,
|
|
288
|
-
writable: true,
|
|
289
|
-
value: [
|
|
290
|
-
// First, try to split along Latex sections
|
|
291
|
-
"\n\\chapter{",
|
|
292
|
-
"\n\\section{",
|
|
293
|
-
"\n\\subsection{",
|
|
294
|
-
"\n\\subsubsection{",
|
|
295
|
-
// Now split by environments
|
|
296
|
-
"\n\\begin{enumerate}",
|
|
297
|
-
"\n\\begin{itemize}",
|
|
298
|
-
"\n\\begin{description}",
|
|
299
|
-
"\n\\begin{list}",
|
|
300
|
-
"\n\\begin{quote}",
|
|
301
|
-
"\n\\begin{quotation}",
|
|
302
|
-
"\n\\begin{verse}",
|
|
303
|
-
"\n\\begin{verbatim}",
|
|
304
|
-
// Now split by math environments
|
|
305
|
-
"\n\\begin{align}",
|
|
306
|
-
"$$",
|
|
307
|
-
"$",
|
|
308
|
-
// Now split by the normal type of lines
|
|
309
|
-
" ",
|
|
310
|
-
"",
|
|
311
|
-
]
|
|
636
|
+
super({
|
|
637
|
+
...fields,
|
|
638
|
+
separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
|
|
312
639
|
});
|
|
313
640
|
}
|
|
314
641
|
}
|