vectra 0.7.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +10 -10
  2. package/package.json +7 -6
  3. package/src/ItemSelector.ts +7 -1
  4. package/src/LocalDocumentIndex.ts +10 -4
  5. package/src/LocalDocumentResult.ts +70 -22
  6. package/src/LocalIndex.ts +77 -10
  7. package/src/TextSplitter.ts +10 -15
  8. package/src/internals/Colorize.ts +3 -3
  9. package/src/internals/wink-bm25-text-search.d.ts +4 -0
  10. package/src/types.ts +2 -1
  11. package/src/vectra-cli.ts +13 -2
  12. package/lib/FileFetcher.d.ts +0 -5
  13. package/lib/FileFetcher.d.ts.map +0 -1
  14. package/lib/FileFetcher.js +0 -69
  15. package/lib/FileFetcher.js.map +0 -1
  16. package/lib/GPT3Tokenizer.d.ts +0 -9
  17. package/lib/GPT3Tokenizer.d.ts.map +0 -1
  18. package/lib/GPT3Tokenizer.js +0 -17
  19. package/lib/GPT3Tokenizer.js.map +0 -1
  20. package/lib/ItemSelector.d.ts +0 -41
  21. package/lib/ItemSelector.d.ts.map +0 -1
  22. package/lib/ItemSelector.js +0 -162
  23. package/lib/ItemSelector.js.map +0 -1
  24. package/lib/LocalDocument.d.ts +0 -54
  25. package/lib/LocalDocument.d.ts.map +0 -1
  26. package/lib/LocalDocument.js +0 -146
  27. package/lib/LocalDocument.js.map +0 -1
  28. package/lib/LocalDocumentIndex.d.ts +0 -128
  29. package/lib/LocalDocumentIndex.d.ts.map +0 -1
  30. package/lib/LocalDocumentIndex.js +0 -446
  31. package/lib/LocalDocumentIndex.js.map +0 -1
  32. package/lib/LocalDocumentResult.d.ts +0 -45
  33. package/lib/LocalDocumentResult.d.ts.map +0 -1
  34. package/lib/LocalDocumentResult.js +0 -282
  35. package/lib/LocalDocumentResult.js.map +0 -1
  36. package/lib/LocalIndex.d.ts +0 -136
  37. package/lib/LocalIndex.d.ts.map +0 -1
  38. package/lib/LocalIndex.js +0 -413
  39. package/lib/LocalIndex.js.map +0 -1
  40. package/lib/OpenAIEmbeddings.d.ts +0 -126
  41. package/lib/OpenAIEmbeddings.d.ts.map +0 -1
  42. package/lib/OpenAIEmbeddings.js +0 -174
  43. package/lib/OpenAIEmbeddings.js.map +0 -1
  44. package/lib/TextSplitter.d.ts +0 -20
  45. package/lib/TextSplitter.d.ts.map +0 -1
  46. package/lib/TextSplitter.js +0 -543
  47. package/lib/TextSplitter.js.map +0 -1
  48. package/lib/WebFetcher.d.ts +0 -15
  49. package/lib/WebFetcher.d.ts.map +0 -1
  50. package/lib/WebFetcher.js +0 -224
  51. package/lib/WebFetcher.js.map +0 -1
  52. package/lib/index.d.ts +0 -12
  53. package/lib/index.d.ts.map +0 -1
  54. package/lib/index.js +0 -28
  55. package/lib/index.js.map +0 -1
  56. package/lib/internals/Colorize.d.ts +0 -14
  57. package/lib/internals/Colorize.d.ts.map +0 -1
  58. package/lib/internals/Colorize.js +0 -64
  59. package/lib/internals/Colorize.js.map +0 -1
  60. package/lib/internals/index.d.ts +0 -3
  61. package/lib/internals/index.d.ts.map +0 -1
  62. package/lib/internals/index.js +0 -19
  63. package/lib/internals/index.js.map +0 -1
  64. package/lib/internals/types.d.ts +0 -43
  65. package/lib/internals/types.d.ts.map +0 -1
  66. package/lib/internals/types.js +0 -3
  67. package/lib/internals/types.js.map +0 -1
  68. package/lib/types.d.ts +0 -145
  69. package/lib/types.d.ts.map +0 -1
  70. package/lib/types.js +0 -3
  71. package/lib/types.js.map +0 -1
  72. package/lib/vectra-cli.d.ts +0 -2
  73. package/lib/vectra-cli.d.ts.map +0 -1
  74. package/lib/vectra-cli.js +0 -303
  75. package/lib/vectra-cli.js.map +0 -1
@@ -1,543 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.TextSplitter = void 0;
4
- const GPT3Tokenizer_1 = require("./GPT3Tokenizer");
5
- const ALPHANUMERIC_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
6
- class TextSplitter {
7
- constructor(config) {
8
- this._config = Object.assign({
9
- keepSeparators: false,
10
- chunkSize: 400,
11
- chunkOverlap: 40,
12
- }, config);
13
- // Create a default tokenizer if none is provided
14
- if (!this._config.tokenizer) {
15
- this._config.tokenizer = new GPT3Tokenizer_1.GPT3Tokenizer();
16
- }
17
- // Use default separators if none are provided
18
- if (!this._config.separators || this._config.separators.length === 0) {
19
- this._config.separators = this.getSeparators(this._config.docType);
20
- }
21
- // Validate the config settings
22
- if (this._config.chunkSize < 1) {
23
- throw new Error("chunkSize must be >= 1");
24
- }
25
- else if (this._config.chunkOverlap < 0) {
26
- throw new Error("chunkOverlap must be >= 0");
27
- }
28
- else if (this._config.chunkOverlap > this._config.chunkSize) {
29
- throw new Error("chunkOverlap must be <= chunkSize");
30
- }
31
- }
32
- split(text) {
33
- // Get basic chunks
34
- const chunks = this.recursiveSplit(text, this._config.separators, 0);
35
- const that = this;
36
- function getOverlapTokens(tokens) {
37
- if (tokens != undefined) {
38
- const len = tokens.length > that._config.chunkOverlap ? that._config.chunkOverlap : tokens.length;
39
- return tokens.slice(0, len);
40
- }
41
- else {
42
- return [];
43
- }
44
- }
45
- // Add overlap tokens and text to the start and end of each chunk
46
- if (this._config.chunkOverlap > 0) {
47
- for (let i = 1; i < chunks.length; i++) {
48
- const previousChunk = chunks[i - 1];
49
- const chunk = chunks[i];
50
- const nextChunk = i < chunks.length - 1 ? chunks[i + 1] : undefined;
51
- chunk.startOverlap = getOverlapTokens(previousChunk.tokens.reverse()).reverse();
52
- chunk.endOverlap = getOverlapTokens(nextChunk === null || nextChunk === void 0 ? void 0 : nextChunk.tokens);
53
- }
54
- }
55
- return chunks;
56
- }
57
- recursiveSplit(text, separators, startPos) {
58
- const chunks = [];
59
- if (text.length > 0) {
60
- // Split text into parts
61
- let parts;
62
- let separator = '';
63
- const nextSeparators = separators.length > 1 ? separators.slice(1) : [];
64
- if (separators.length > 0) {
65
- // Split by separator
66
- separator = separators[0];
67
- parts = separator == ' ' ? this.splitBySpaces(text) : text.split(separator);
68
- }
69
- else {
70
- // Cut text in half
71
- const half = Math.floor(text.length / 2);
72
- parts = [text.substring(0, half), text.substring(half)];
73
- }
74
- // Iterate over parts
75
- for (let i = 0; i < parts.length; i++) {
76
- const lastChunk = (i === parts.length - 1);
77
- // Get chunk text and endPos
78
- let chunk = parts[i];
79
- const endPos = (startPos + (chunk.length - 1)) + (lastChunk ? 0 : separator.length);
80
- if (this._config.keepSeparators && !lastChunk) {
81
- chunk += separator;
82
- }
83
- // Ensure chunk contains text
84
- if (!this.containsAlphanumeric(chunk)) {
85
- continue;
86
- }
87
- // Optimization to avoid encoding really large chunks
88
- if (chunk.length / 6 > this._config.chunkSize) {
89
- // Break the text into smaller chunks
90
- const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
91
- chunks.push(...subChunks);
92
- }
93
- else {
94
- // Encode chunk text
95
- const tokens = this._config.tokenizer.encode(chunk);
96
- if (tokens.length > this._config.chunkSize) {
97
- // Break the text into smaller chunks
98
- const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
99
- chunks.push(...subChunks);
100
- }
101
- else {
102
- // Append chunk to output
103
- chunks.push({
104
- text: chunk,
105
- tokens: tokens,
106
- startPos: startPos,
107
- endPos: endPos,
108
- startOverlap: [],
109
- endOverlap: [],
110
- });
111
- }
112
- }
113
- // Update startPos
114
- startPos = endPos + 1;
115
- }
116
- }
117
- return this.combineChunks(chunks);
118
- }
119
- combineChunks(chunks) {
120
- const combinedChunks = [];
121
- let currentChunk;
122
- let currentLength = 0;
123
- const separator = this._config.keepSeparators ? '' : ' ';
124
- for (let i = 0; i < chunks.length; i++) {
125
- const chunk = chunks[i];
126
- if (currentChunk) {
127
- const length = currentChunk.tokens.length + chunk.tokens.length;
128
- if (length > this._config.chunkSize) {
129
- combinedChunks.push(currentChunk);
130
- currentChunk = chunk;
131
- currentLength = chunk.tokens.length;
132
- }
133
- else {
134
- currentChunk.text += separator + chunk.text;
135
- currentChunk.endPos = chunk.endPos;
136
- currentChunk.tokens.push(...chunk.tokens);
137
- currentLength += chunk.tokens.length;
138
- }
139
- }
140
- else {
141
- currentChunk = chunk;
142
- currentLength = chunk.tokens.length;
143
- }
144
- }
145
- if (currentChunk) {
146
- combinedChunks.push(currentChunk);
147
- }
148
- return combinedChunks;
149
- }
150
- containsAlphanumeric(text) {
151
- for (let i = 0; i < text.length; i++) {
152
- if (ALPHANUMERIC_CHARS.includes(text[i])) {
153
- return true;
154
- }
155
- }
156
- return false;
157
- }
158
- splitBySpaces(text) {
159
- const parts = [];
160
- const words = text.split(' ');
161
- if (words.length > 0) {
162
- let part = words[0];
163
- for (let i = 1; i < words.length; i++) {
164
- const nextWord = words[i];
165
- if (this._config.tokenizer.encode(part + ' ' + nextWord).length <= this._config.chunkSize) {
166
- part += ' ' + nextWord;
167
- }
168
- else {
169
- parts.push(part);
170
- part = nextWord;
171
- }
172
- }
173
- parts.push(part);
174
- }
175
- else {
176
- parts.push(text);
177
- }
178
- return parts;
179
- }
180
- getSeparators(docType) {
181
- switch (docType !== null && docType !== void 0 ? docType : '') {
182
- case "cpp":
183
- return [
184
- // Split along class definitions
185
- "\nclass ",
186
- // Split along function definitions
187
- "\nvoid ",
188
- "\nint ",
189
- "\nfloat ",
190
- "\ndouble ",
191
- // Split along control flow statements
192
- "\nif ",
193
- "\nfor ",
194
- "\nwhile ",
195
- "\nswitch ",
196
- "\ncase ",
197
- // Split by the normal type of lines
198
- "\n\n",
199
- "\n",
200
- " "
201
- ];
202
- case "go":
203
- return [
204
- // Split along function definitions
205
- "\nfunc ",
206
- "\nvar ",
207
- "\nconst ",
208
- "\ntype ",
209
- // Split along control flow statements
210
- "\nif ",
211
- "\nfor ",
212
- "\nswitch ",
213
- "\ncase ",
214
- // Split by the normal type of lines
215
- "\n\n",
216
- "\n",
217
- " "
218
- ];
219
- case "java":
220
- case "c#":
221
- case "csharp":
222
- case "cs":
223
- case "ts":
224
- case "tsx":
225
- case "typescript":
226
- return [
227
- // split along regions
228
- "// LLM-REGION",
229
- "/* LLM-REGION",
230
- "/** LLM-REGION",
231
- // Split along class definitions
232
- "\nclass ",
233
- // Split along method definitions
234
- "\npublic ",
235
- "\nprotected ",
236
- "\nprivate ",
237
- "\nstatic ",
238
- // Split along control flow statements
239
- "\nif ",
240
- "\nfor ",
241
- "\nwhile ",
242
- "\nswitch ",
243
- "\ncase ",
244
- // Split by the normal type of lines
245
- "\n\n",
246
- "\n",
247
- " "
248
- ];
249
- case "js":
250
- case "jsx":
251
- case "javascript":
252
- return [
253
- // split along regions
254
- "// LLM-REGION",
255
- "/* LLM-REGION",
256
- "/** LLM-REGION",
257
- // Split along class definitions
258
- "\nclass ",
259
- // Split along function definitions
260
- "\nfunction ",
261
- "\nconst ",
262
- "\nlet ",
263
- "\nvar ",
264
- "\nclass ",
265
- // Split along control flow statements
266
- "\nif ",
267
- "\nfor ",
268
- "\nwhile ",
269
- "\nswitch ",
270
- "\ncase ",
271
- "\ndefault ",
272
- // Split by the normal type of lines
273
- "\n\n",
274
- "\n",
275
- " "
276
- ];
277
- case "php":
278
- return [
279
- // Split along function definitions
280
- "\nfunction ",
281
- // Split along class definitions
282
- "\nclass ",
283
- // Split along control flow statements
284
- "\nif ",
285
- "\nforeach ",
286
- "\nwhile ",
287
- "\ndo ",
288
- "\nswitch ",
289
- "\ncase ",
290
- // Split by the normal type of lines
291
- "\n\n",
292
- "\n",
293
- " "
294
- ];
295
- case "proto":
296
- return [
297
- // Split along message definitions
298
- "\nmessage ",
299
- // Split along service definitions
300
- "\nservice ",
301
- // Split along enum definitions
302
- "\nenum ",
303
- // Split along option definitions
304
- "\noption ",
305
- // Split along import statements
306
- "\nimport ",
307
- // Split along syntax declarations
308
- "\nsyntax ",
309
- // Split by the normal type of lines
310
- "\n\n",
311
- "\n",
312
- " "
313
- ];
314
- case "python":
315
- case "py":
316
- return [
317
- // First, try to split along class definitions
318
- "\nclass ",
319
- "\ndef ",
320
- "\n\tdef ",
321
- // Now split by the normal type of lines
322
- "\n\n",
323
- "\n",
324
- " "
325
- ];
326
- case "rst":
327
- return [
328
- // Split along section titles
329
- "\n===\n",
330
- "\n---\n",
331
- "\n***\n",
332
- // Split along directive markers
333
- "\n.. ",
334
- // Split by the normal type of lines
335
- "\n\n",
336
- "\n",
337
- " "
338
- ];
339
- case "ruby":
340
- return [
341
- // Split along method definitions
342
- "\ndef ",
343
- "\nclass ",
344
- // Split along control flow statements
345
- "\nif ",
346
- "\nunless ",
347
- "\nwhile ",
348
- "\nfor ",
349
- "\ndo ",
350
- "\nbegin ",
351
- "\nrescue ",
352
- // Split by the normal type of lines
353
- "\n\n",
354
- "\n",
355
- " "
356
- ];
357
- case "rust":
358
- return [
359
- // Split along function definitions
360
- "\nfn ",
361
- "\nconst ",
362
- "\nlet ",
363
- // Split along control flow statements
364
- "\nif ",
365
- "\nwhile ",
366
- "\nfor ",
367
- "\nloop ",
368
- "\nmatch ",
369
- "\nconst ",
370
- // Split by the normal type of lines
371
- "\n\n",
372
- "\n",
373
- " "
374
- ];
375
- case "scala":
376
- return [
377
- // Split along class definitions
378
- "\nclass ",
379
- "\nobject ",
380
- // Split along method definitions
381
- "\ndef ",
382
- "\nval ",
383
- "\nvar ",
384
- // Split along control flow statements
385
- "\nif ",
386
- "\nfor ",
387
- "\nwhile ",
388
- "\nmatch ",
389
- "\ncase ",
390
- // Split by the normal type of lines
391
- "\n\n",
392
- "\n",
393
- " "
394
- ];
395
- case "swift":
396
- return [
397
- // Split along function definitions
398
- "\nfunc ",
399
- // Split along class definitions
400
- "\nclass ",
401
- "\nstruct ",
402
- "\nenum ",
403
- // Split along control flow statements
404
- "\nif ",
405
- "\nfor ",
406
- "\nwhile ",
407
- "\ndo ",
408
- "\nswitch ",
409
- "\ncase ",
410
- // Split by the normal type of lines
411
- "\n\n",
412
- "\n",
413
- " "
414
- ];
415
- case "md":
416
- case "markdown":
417
- return [
418
- // First, try to split along Markdown headings (starting with level 2)
419
- "\n## ",
420
- "\n### ",
421
- "\n#### ",
422
- "\n##### ",
423
- "\n###### ",
424
- // Note the alternative syntax for headings (below) is not handled here
425
- // Heading level 2
426
- // ---------------
427
- // End of code block
428
- "```\n\n",
429
- // Horizontal lines
430
- "\n\n***\n\n",
431
- "\n\n---\n\n",
432
- "\n\n___\n\n",
433
- // Note that this splitter doesn't handle horizontal lines defined
434
- // by *three or more* of ***, ---, or ___, but this is not handled
435
- // Github tables
436
- "<table>",
437
- // "<tr>",
438
- // "<td>",
439
- // "<td ",
440
- "\n\n",
441
- "\n",
442
- " "
443
- ];
444
- case "latex":
445
- return [
446
- // First, try to split along Latex sections
447
- "\n\\chapter{",
448
- "\n\\section{",
449
- "\n\\subsection{",
450
- "\n\\subsubsection{",
451
- // Now split by environments
452
- "\n\\begin{enumerate}",
453
- "\n\\begin{itemize}",
454
- "\n\\begin{description}",
455
- "\n\\begin{list}",
456
- "\n\\begin{quote}",
457
- "\n\\begin{quotation}",
458
- "\n\\begin{verse}",
459
- "\n\\begin{verbatim}",
460
- // Now split by math environments
461
- "\n\\begin{align}",
462
- "$$",
463
- "$",
464
- // Now split by the normal type of lines
465
- "\n\n",
466
- "\n",
467
- " "
468
- ];
469
- case "html":
470
- return [
471
- // First, try to split along HTML tags
472
- "<body>",
473
- "<div>",
474
- "<p>",
475
- "<br>",
476
- "<li>",
477
- "<h1>",
478
- "<h2>",
479
- "<h3>",
480
- "<h4>",
481
- "<h5>",
482
- "<h6>",
483
- "<span>",
484
- "<table>",
485
- "<tr>",
486
- "<td>",
487
- "<th>",
488
- "<ul>",
489
- "<ol>",
490
- "<header>",
491
- "<footer>",
492
- "<nav>",
493
- // Head
494
- "<head>",
495
- "<style>",
496
- "<script>",
497
- "<meta>",
498
- "<title>",
499
- // Normal type of lines
500
- " "
501
- ];
502
- case "sol":
503
- return [
504
- // Split along compiler informations definitions
505
- "\npragma ",
506
- "\nusing ",
507
- // Split along contract definitions
508
- "\ncontract ",
509
- "\ninterface ",
510
- "\nlibrary ",
511
- // Split along method definitions
512
- "\nconstructor ",
513
- "\ntype ",
514
- "\nfunction ",
515
- "\nevent ",
516
- "\nmodifier ",
517
- "\nerror ",
518
- "\nstruct ",
519
- "\nenum ",
520
- // Split along control flow statements
521
- "\nif ",
522
- "\nfor ",
523
- "\nwhile ",
524
- "\ndo while ",
525
- "\nassembly ",
526
- // Split by the normal type of lines
527
- "\n\n",
528
- "\n",
529
- " "
530
- ];
531
- default:
532
- return [
533
- // Split by the normal type of lines
534
- "\n\n",
535
- "\n",
536
- " ",
537
- "",
538
- ];
539
- }
540
- }
541
- }
542
- exports.TextSplitter = TextSplitter;
543
- //# sourceMappingURL=TextSplitter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;AAGhD,MAAM,kBAAkB,GAAG,gEAAgE,CAAC;AAW5F,MAAa,YAAY;IAGrB,YAAmB,MAAoC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,EAAE;SACG,EAAE,MAAM,CAAC,CAAC;QAEjC,iDAAiD;QACjD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,6BAAa,EAAE,CAAC;SAChD;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;YAClE,IAAI,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACtE;QAED,+BAA+B;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE;YAC5B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;SAC7C;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAChD;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;SACxD;IACL,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,mBAAmB;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,SAAS,gBAAgB,CAAC,MAAiB;YACvC,IAAI,MAAM,IAAI,SAAS,EAAE;gBACrB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClG,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;aAC/B;iBAAM;gBACH,OAAO,EAAE,CAAC;aACb;QACL,CAAC;QAED,iEAAiE;QACjE,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,aAAa,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACpE,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;gBAChF,KAAK,CAAC,UAAU,GAAG,gBAAgB,CAAC,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,MAAM,CAAC,CAAC;aAC1D;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,IAAY,EAAE,UAAoB,EAAE,QAAgB;QACvE,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;YACjB,wBAAwB;YACxB,IAAI,KAAe,CAAC;YACpB,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACxE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;gBACvB,qBAAqB;gBACrB,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC1B,KAAK,GAAG,SAAS,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;aAC/E;iBAAM;gBACH,mBAAmB;gBACnB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzC,KAAK,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;aAC3D;YAED,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAE3C,4BAA4B;gBAC5B,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACpF,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE;oBAC3C,KAAK,IAAI,SAAS,CAAC;iBACtB;gBAED,6BAA6B;gBAC7B,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,EAAE;oBACnC,SAAS;iBACZ;gBAED,qDAAqD;gBACrD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBAC3C,qCAAqC;oBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;iBAC7B;qBAAM;oBACH,oBAAoB;oBACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACpD,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;wBACxC,qCAAqC;wBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;wBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;qBAC7B;yBAAM;wBACH,yBAAyB;wBACzB,MAAM,CAAC,IAAI,CAAC;4BACR,IAAI,EAAE,KAAK;4BACX,MAAM,EAAE,MAAM;4BACd,QAAQ,EAAE,QAAQ;4BAClB,MAAM,EAAE,MAAM;4BACd,YAAY,EAAE,EAAE;4BAChB,UAAU,EAAE,EAAE;yBACjB,CAAC,CAAC;qBACN;iBAEJ;gBAGD,kBAAkB;gBAClB,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC;aACzB;SACJ;QAED,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAEO,aAAa,CAAC,MAAmB;QACrC,MAAM,cAAc,GAAgB,EAAE,CAAC;QACvC,IAAI,YAAiC,CAAC;QACtC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,YAAY,EAAE;gBACd,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;gBAChE,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACjC,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAClC,YAAY,GAAG,KAAK,CAAC;oBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACvC;qBAAM;oBACH,YAAY,CAAC,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;oBAC5C,YAAY,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;oBACnC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;oBAC1C,aAAa,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACxC;aACJ;iBAAM;gBACH,YAAY,GAAG,KAAK,CAAC;gBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;aACvC;SACJ;QACD,IAAI,YAAY,EAAE;YACd,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;SACrC;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAEO,oBAAoB,CAAC,IAAY;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAClC,IAAI,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;gBACtC,OAAO,IAAI,CAAC;aACf;SACJ;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,IAAY;QAC9B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAClB,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,GAAG,GAAG,GAAG,QAAQ,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACvF,IAAI,IAAI,GAAG,GAAG,QAAQ,CAAC;iBAC1B;qBAAM;oBACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACjB,IAAI,GAAG,QAAQ,CAAC;iBACnB;aACJ;YACD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACpB;aAAM;YACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACpB;QAED,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,OAAgB;QAClC,QAAQ,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,EAAE;YACnB,KAAK,KAAK;gBACN,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI;gBACL,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,iCAAiC;oBACjC,WAAW;oBACX,cAAc;oBACd,YAAY;oBACZ,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,UAAU;oBACV,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,YAAY;oBACZ,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,mCAAmC;oBACnC,aAAa;oBACb,gCAAgC;oBAChC,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,YAAY;oBACZ,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,kCAAkC;oBAClC,YAAY;oBACZ,kCAAkC;oBAClC,YAAY;oBACZ,+BAA+B;oBAC/B,SAAS;oBACT,iCAAiC;oBACjC,WAAW;oBACX,gCAAgC;oBAChC,WAAW;oBACX,kCAAkC;oBAClC,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACL,OAAO;oBACH,8CAA8C;oBAC9C,UAAU;oBACV,QAAQ;oBACR,UAAU;oBACV,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,6BAA6B;oBAC7B,SAAS;oBACT,SAAS;oBACT,SAAS;oBACT,gCAAgC;oBAChC,OAAO;oBACP,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,iCAAiC;oBACjC,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,WAAW;oBACX,UAAU;oBACV,QAAQ;oBACR,OAAO;oBACP,UAAU;oBACV,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,mCAAmC;oBACnC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,iCAAiC;oBACjC,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,UAAU;oBACV,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,UAAU;gBACX,OAAO;oBACH,sEAAsE;oBACtE,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,uEAAuE;oBACvE,kBAAkB;oBAClB,kBAAkB;oBAClB,oBAAoB;oBACpB,SAAS;oBACT,mBAAmB;oBACnB,aAAa;oBACb,aAAa;oBACb,aAAa;oBACb,kEAAkE;oBAClE,kEAAkE;oBAClE,gBAAgB;oBAChB,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,UAAU;oBACV,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,2CAA2C;oBAC3C,cAAc;oBACd,cAAc;oBACd,iBAAiB;oBACjB,oBAAoB;oBAEpB,4BAA4B;oBAC5B,sBAAsB;oBACtB,oBAAoB;oBACpB,wBAAwB;oBACxB,iBAAiB;oBACjB,kBAAkB;oBAClB,sBAAsB;oBACtB,kBAAkB;oBAClB,qBAAqB;oBAErB,iCAAiC;oBACjC,kBAAkB;oBAClB,IAAI;oBACJ,GAAG;oBAEH,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,sCAAsC;oBACtC,QAAQ;oBACR,OAAO;oBACP,KAAK;oBACL,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,QAAQ;oBACR,SAAS;oBACT,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,UAAU;oBACV,UAAU;oBACV,OAAO;oBACP,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,uBAAuB;oBACvB,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,gDAAgD;oBAChD,WAAW;oBACX,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,cAAc;oBACd,YAAY;oBACZ,iCAAiC;oBACjC,gBAAgB;oBAChB,SAAS;oBACT,aAAa;oBACb,UAAU;oBACV,aAAa;oBACb,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,aAAa;oBACb,aAAa;oBACb,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN;gBACI,OAAO;oBACH,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;SACT;IACL,CAAC;CACJ;AAviBD,oCAuiBC"}
@@ -1,15 +0,0 @@
1
- import { AxiosRequestConfig } from "axios";
2
- import { TextFetcher } from './types';
3
- export interface WebFetcherConfig {
4
- headers?: Record<string, string>;
5
- requestConfig?: AxiosRequestConfig;
6
- htmlToMarkdown: boolean;
7
- summarizeHtml: boolean;
8
- }
9
- export declare class WebFetcher implements TextFetcher {
10
- private readonly _config;
11
- constructor(config?: Partial<WebFetcherConfig>);
12
- fetch(uri: string, onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>): Promise<boolean>;
13
- private htmlToMarkdown;
14
- }
15
- //# sourceMappingURL=WebFetcher.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAc,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAClD,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AA2BtC,MAAM,WAAW,gBAAgB;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAC,MAAM,CAAC,CAAC;IAChC,aAAa,CAAC,EAAE,kBAAkB,CAAC;IACnC,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;CAC1B;AAED,qBAAa,UAAW,YAAW,WAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;gBAExB,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;IAOxC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;IAyChI,OAAO,CAAC,cAAc;CAmCzB"}