vectra 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +3 -3
  2. package/bin/vectra.js +3 -0
  3. package/lib/GPT3Tokenizer.d.ts +9 -0
  4. package/lib/GPT3Tokenizer.d.ts.map +1 -0
  5. package/lib/GPT3Tokenizer.js +17 -0
  6. package/lib/GPT3Tokenizer.js.map +1 -0
  7. package/lib/ItemSelector.d.ts +41 -0
  8. package/lib/ItemSelector.d.ts.map +1 -0
  9. package/lib/ItemSelector.js +156 -0
  10. package/lib/ItemSelector.js.map +1 -0
  11. package/lib/LocalDocument.d.ts +16 -0
  12. package/lib/LocalDocument.d.ts.map +1 -0
  13. package/lib/LocalDocument.js +99 -0
  14. package/lib/LocalDocument.js.map +1 -0
  15. package/lib/LocalDocumentIndex.d.ts +48 -0
  16. package/lib/LocalDocumentIndex.d.ts.map +1 -0
  17. package/lib/LocalDocumentIndex.js +367 -0
  18. package/lib/LocalDocumentIndex.js.map +1 -0
  19. package/lib/LocalDocumentResult.d.ts +12 -0
  20. package/lib/LocalDocumentResult.d.ts.map +1 -0
  21. package/lib/LocalDocumentResult.js +186 -0
  22. package/lib/LocalDocumentResult.js.map +1 -0
  23. package/lib/LocalIndex.d.ts +130 -0
  24. package/lib/LocalIndex.d.ts.map +1 -0
  25. package/lib/LocalIndex.js +405 -0
  26. package/lib/LocalIndex.js.map +1 -0
  27. package/lib/OpenAIEmbeddings.d.ts +98 -0
  28. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  29. package/lib/OpenAIEmbeddings.js +139 -0
  30. package/lib/OpenAIEmbeddings.js.map +1 -0
  31. package/lib/TextSplitter.d.ts +17 -0
  32. package/lib/TextSplitter.d.ts.map +1 -0
  33. package/lib/TextSplitter.js +460 -0
  34. package/lib/TextSplitter.js.map +1 -0
  35. package/lib/WebFetcher.d.ts +16 -0
  36. package/lib/WebFetcher.d.ts.map +1 -0
  37. package/lib/WebFetcher.js +144 -0
  38. package/lib/WebFetcher.js.map +1 -0
  39. package/lib/index.d.ts +11 -0
  40. package/lib/index.d.ts.map +1 -0
  41. package/lib/index.js +27 -0
  42. package/lib/index.js.map +1 -0
  43. package/lib/internals/Colorize.d.ts +14 -0
  44. package/lib/internals/Colorize.d.ts.map +1 -0
  45. package/lib/internals/Colorize.js +64 -0
  46. package/lib/internals/Colorize.js.map +1 -0
  47. package/lib/internals/index.d.ts +3 -0
  48. package/lib/internals/index.d.ts.map +1 -0
  49. package/lib/internals/index.js +19 -0
  50. package/lib/internals/index.js.map +1 -0
  51. package/lib/internals/types.d.ts +42 -0
  52. package/lib/internals/types.d.ts.map +1 -0
  53. package/lib/internals/types.js +3 -0
  54. package/lib/internals/types.js.map +1 -0
  55. package/lib/types.d.ts +133 -0
  56. package/lib/types.d.ts.map +1 -0
  57. package/lib/types.js +3 -0
  58. package/lib/types.js.map +1 -0
  59. package/lib/vectra-cli.d.ts +2 -0
  60. package/lib/vectra-cli.d.ts.map +1 -0
  61. package/lib/vectra-cli.js +276 -0
  62. package/lib/vectra-cli.js.map +1 -0
  63. package/package.json +21 -3
  64. package/src/GPT3Tokenizer.ts +15 -0
  65. package/src/ItemSelector.ts +9 -9
  66. package/src/LocalDocument.ts +70 -0
  67. package/src/LocalDocumentIndex.ts +355 -0
  68. package/src/LocalDocumentResult.ts +206 -0
  69. package/src/LocalIndex.ts +12 -78
  70. package/src/OpenAIEmbeddings.ts +205 -0
  71. package/src/TextSplitter.ts +480 -0
  72. package/src/WebFetcher.ts +128 -0
  73. package/src/index.ts +8 -0
  74. package/src/internals/Colorize.ts +64 -0
  75. package/src/internals/index.ts +2 -0
  76. package/src/internals/types.ts +46 -0
  77. package/src/types.ts +160 -0
  78. package/src/vectra-cli.ts +238 -0
@@ -0,0 +1,139 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.OpenAIEmbeddings = void 0;
16
+ const axios_1 = __importDefault(require("axios"));
17
+ /**
18
+ * A `PromptCompletionModel` for calling OpenAI and Azure OpenAI hosted models.
19
+ * @remarks
20
+ */
21
+ class OpenAIEmbeddings {
22
+ /**
23
+ * Creates a new `OpenAIClient` instance.
24
+ * @param options Options for configuring an `OpenAIClient`.
25
+ */
26
+ constructor(options) {
27
+ this.UserAgent = 'AlphaWave';
28
+ // Check for azure config
29
+ if (options.azureApiKey) {
30
+ this._useAzure = true;
31
+ this.options = Object.assign({
32
+ retryPolicy: [2000, 5000],
33
+ azureApiVersion: '2023-05-15',
34
+ }, options);
35
+ // Cleanup and validate endpoint
36
+ let endpoint = this.options.azureEndpoint.trim();
37
+ if (endpoint.endsWith('/')) {
38
+ endpoint = endpoint.substring(0, endpoint.length - 1);
39
+ }
40
+ if (!endpoint.toLowerCase().startsWith('https://')) {
41
+ throw new Error(`Client created with an invalid endpoint of '${endpoint}'. The endpoint must be a valid HTTPS url.`);
42
+ }
43
+ this.options.azureEndpoint = endpoint;
44
+ }
45
+ else {
46
+ this._useAzure = false;
47
+ this.options = Object.assign({
48
+ retryPolicy: [2000, 5000]
49
+ }, options);
50
+ }
51
+ // Create client
52
+ this._httpClient = axios_1.default.create({
53
+ validateStatus: (status) => status < 400 || status == 429
54
+ });
55
+ }
56
+ /**
57
+ * Creates embeddings for the given inputs using the OpenAI API.
58
+ * @param model Name of the model to use (or deployment for Azure).
59
+ * @param inputs Text inputs to create embeddings for.
60
+ * @returns A `EmbeddingsResponse` with a status and the generated embeddings or a message when an error occurs.
61
+ */
62
+ createEmbeddings(inputs) {
63
+ return __awaiter(this, void 0, void 0, function* () {
64
+ const response = yield this.createEmbeddingRequest({
65
+ input: inputs,
66
+ });
67
+ // Process response
68
+ if (response.status < 300) {
69
+ return { status: 'success', output: response.data.data.sort((a, b) => a.index - b.index).map((item) => item.embedding) };
70
+ }
71
+ else if (response.status == 429) {
72
+ return { status: 'rate_limited', message: `The embeddings API returned a rate limit error.` };
73
+ }
74
+ else {
75
+ return { status: 'error', message: `The embeddings API returned an error status of ${response.status}: ${response.statusText}` };
76
+ }
77
+ });
78
+ }
79
+ /**
80
+ * @private
81
+ */
82
+ createEmbeddingRequest(request) {
83
+ var _a;
84
+ if (this._useAzure) {
85
+ const options = this.options;
86
+ const url = `${options.azureEndpoint}/openai/deployments/${options.azureDeployment}/embeddings?api-version=${options.azureApiVersion}`;
87
+ return this.post(url, request);
88
+ }
89
+ else {
90
+ const options = this.options;
91
+ const url = `${(_a = options.endpoint) !== null && _a !== void 0 ? _a : 'https://api.openai.com'}/v1/embeddings`;
92
+ request.model = options.model;
93
+ return this.post(url, request);
94
+ }
95
+ }
96
+ /**
97
+ * @private
98
+ */
99
+ post(url, body, retryCount = 0) {
100
+ return __awaiter(this, void 0, void 0, function* () {
101
+ // Initialize request config
102
+ const requestConfig = Object.assign({}, this.options.requestConfig);
103
+ // Initialize request headers
104
+ if (!requestConfig.headers) {
105
+ requestConfig.headers = {};
106
+ }
107
+ if (!requestConfig.headers['Content-Type']) {
108
+ requestConfig.headers['Content-Type'] = 'application/json';
109
+ }
110
+ if (!requestConfig.headers['User-Agent']) {
111
+ requestConfig.headers['User-Agent'] = this.UserAgent;
112
+ }
113
+ if (this._useAzure) {
114
+ const options = this.options;
115
+ requestConfig.headers['api-key'] = options.azureApiKey;
116
+ }
117
+ else {
118
+ const options = this.options;
119
+ requestConfig.headers['Authorization'] = `Bearer ${options.apiKey}`;
120
+ if (options.organization) {
121
+ requestConfig.headers['OpenAI-Organization'] = options.organization;
122
+ }
123
+ }
124
+ // Send request
125
+ const response = yield this._httpClient.post(url, body, requestConfig);
126
+ // Check for rate limit error
127
+ if (response.status == 429 && Array.isArray(this.options.retryPolicy) && retryCount < this.options.retryPolicy.length) {
128
+ const delay = this.options.retryPolicy[retryCount];
129
+ yield new Promise((resolve) => setTimeout(resolve, delay));
130
+ return this.post(url, body, retryCount + 1);
131
+ }
132
+ else {
133
+ return response;
134
+ }
135
+ });
136
+ }
137
+ }
138
+ exports.OpenAIEmbeddings = OpenAIEmbeddings;
139
+ //# sourceMappingURL=OpenAIEmbeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"OpenAIEmbeddings.js","sourceRoot":"","sources":["../src/OpenAIEmbeddings.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,kDAAgF;AA2EhF;;;GAGG;AACH,MAAa,gBAAgB;IAWzB;;;OAGG;IACH,YAAmB,OAA6D;QAX/D,cAAS,GAAG,WAAW,CAAC;QAYrC,yBAAyB;QACzB,IAAK,OAAwC,CAAC,WAAW,EAAE;YACvD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;gBACzB,eAAe,EAAE,YAAY;aAChC,EAAE,OAAO,CAAiC,CAAC;YAE5C,gCAAgC;YAChC,IAAI,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACjD,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBACxB,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;aACzD;YAED,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE;gBAChD,MAAM,IAAI,KAAK,CAAC,+CAA+C,QAAQ,4CAA4C,CAAC,CAAC;aACxH;YAED,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,QAAQ,CAAC;SACzC;aAAM;YACH,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;aAC5B,EAAE,OAAO,CAA4B,CAAC;SAC1C;QAED,gBAAgB;QAChB,IAAI,CAAC,WAAW,GAAG,eAAK,CAAC,MAAM,CAAC;YAC5B,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,GAAG,GAAG,IAAI,MAAM,IAAI,GAAG;SAC5D,CAAC,CAAC;IACP,CAAC;IAED;;;;;OAKG;IACU,gBAAgB,CAAC,MAAyB;;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,sBAAsB,CAAC;gBAC/C,KAAK,EAAE,MAAM;aAChB,CAAC,CAAC;YAEH,mBAAmB;YACnB,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE;gBACvB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;aAC5H;iBAAM,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE;gBAC/B,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAA;aAChG;iBAAM;gBACH,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,kDAAkD,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC;aACpI;QACL,CAAC;KAAA;IAED;;OAEG;IACO,sBAAsB,CAAC,OAA+B;;QAC5D,IAAI,IAAI,CAAC,SAAS,EAAE;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAuC,CAAC;YAC7D,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC,aAAa,uBAAuB,OAAO,CAAC,eAAe,2BAA2B,OAAO,CAAC,eAAgB,EAAE,CAAC;YACxI,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;SAClC;aAAM;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAkC,CAAC;YACxD,MAAM,GAAG,GAAG,GAAG,MAAA,OAAO,CAAC,QAAQ,mCAAI,wBAAwB,gBAAgB,CAAC;YAC3E,OAAwC,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;YAChE,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;SAClC;IACL,CAAC;IAED;;OAEG;IACa,IAAI,CAAQ,GAAW,EAAE,IAAY,EAAE,UAAU,GAAG,CAAC;;YACjE,4BAA4B;YAC5B,MAAM,aAAa,GAAuB,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;YAExF,6BAA6B;YAC7B,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE;gBACxB,aAAa,CAAC,OAAO,GAAG,EAAE,CAAC;aAC9B;YACD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;gBACxC,aAAa,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,kBAAkB,CAAC;aAC9D;YACD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE;gBACtC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC;aACxD;YACD,IAAI,IAAI,CAAC,SAAS,EAAE;gBAChB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAuC,CAAC;gBAC7D,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC;aAC1D;iBAAM;gBACH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAkC,CAAC;gBACxD,aAAa,CAAC,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpE,IAAI,OAAO,CAAC,YAAY,EAAE;oBACtB,aAAa,CAAC,OAAO,CAAC,qBAAqB,CAAC,GAAG,OAAO,CAAC,YAAY,CAAC;iBACvE;aACJ;YAED,eAAe;YACf,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,aAAa,CAAC,CAAC;YAEvE,6BAA6B;YAC7B,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,EAAE;gBACnH,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;gBACnD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC3D,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;aAC/C;iBAAM;gBACH,OAAO,QAAQ,CAAC;aACnB;QACL,CAAC;KAAA;CACJ;AA7HD,4CA6HC"}
@@ -0,0 +1,17 @@
1
+ import { TextChunk, Tokenizer } from "./types";
2
+ export interface TextSplitterConfig {
3
+ separators: string[];
4
+ keepSeparators: boolean;
5
+ chunkSize: number;
6
+ chunkOverlap: number;
7
+ tokenizer: Tokenizer;
8
+ docType?: string;
9
+ }
10
+ export declare class TextSplitter {
11
+ private readonly _config;
12
+ constructor(config?: Partial<TextSplitterConfig>);
13
+ split(text: string): TextChunk[];
14
+ private recursiveSplit;
15
+ private getSeparators;
16
+ }
17
+ //# sourceMappingURL=TextSplitter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TextSplitter.d.ts","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IAC/B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAY;IACrB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;gBAE1B,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC;IA4BhD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA4BvC,OAAO,CAAC,cAAc;IA0CtB,OAAO,CAAC,aAAa;CA8WxB"}
@@ -0,0 +1,460 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TextSplitter = void 0;
4
+ const GPT3Tokenizer_1 = require("./GPT3Tokenizer");
5
+ class TextSplitter {
6
+ constructor(config) {
7
+ this._config = Object.assign({
8
+ separators: ["\n\n", "\n", " ", ""],
9
+ keepSeparators: false,
10
+ chunkSize: 400,
11
+ chunkOverlap: 40,
12
+ }, config);
13
+ // Create a default tokenizer if none is provided
14
+ if (!this._config.tokenizer) {
15
+ this._config.tokenizer = new GPT3Tokenizer_1.GPT3Tokenizer();
16
+ }
17
+ // Use default separators if none are provided
18
+ if (!this._config.separators || this._config.separators.length === 0) {
19
+ this._config.separators = this.getSeparators(this._config.docType);
20
+ }
21
+ // Validate the config settings
22
+ if (this._config.chunkSize < 1) {
23
+ throw new Error("chunkSize must be >= 1");
24
+ }
25
+ else if (this._config.chunkOverlap < 0) {
26
+ throw new Error("chunkOverlap must be >= 0");
27
+ }
28
+ else if (this._config.chunkOverlap > this._config.chunkSize) {
29
+ throw new Error("chunkOverlap must be <= chunkSize");
30
+ }
31
+ }
32
+ split(text) {
33
+ // Get basic chunks
34
+ const chunks = this.recursiveSplit(text, this._config.separators, 0);
35
+ const that = this;
36
+ function getOverlapTokens(tokens) {
37
+ if (tokens != undefined) {
38
+ const len = tokens.length > that._config.chunkOverlap ? that._config.chunkOverlap : tokens.length;
39
+ return tokens.slice(tokens.length);
40
+ }
41
+ else {
42
+ return [];
43
+ }
44
+ }
45
+ // Add overlap tokens and text to the start and end of each chunk
46
+ if (this._config.chunkOverlap > 0) {
47
+ for (let i = 1; i < chunks.length; i++) {
48
+ const previousChunk = chunks[i - 1];
49
+ const chunk = chunks[i];
50
+ const nextChunk = i < chunks.length - 1 ? chunks[i + 1] : undefined;
51
+ chunk.startOverlap = getOverlapTokens(previousChunk.tokens.reverse()).reverse();
52
+ chunk.endOverlap = getOverlapTokens(nextChunk === null || nextChunk === void 0 ? void 0 : nextChunk.tokens);
53
+ }
54
+ }
55
+ return chunks;
56
+ }
57
+ recursiveSplit(text, separators, startPos) {
58
+ const chunks = [];
59
+ if (text.length > 0 && separators.length > 0) {
60
+ const separator = separators[0];
61
+ const nextSeparators = separators.length > 1 ? separators.slice(1) : [];
62
+ const parts = text.split(separator);
63
+ for (let i = 0; i < parts.length; i++) {
64
+ const lastChunk = (i === parts.length - 1);
65
+ // Get chunk text and endPos
66
+ let chunk = parts[i];
67
+ const endPos = (startPos + (chunk.length - 1)) + (lastChunk ? 0 : separator.length);
68
+ if (this._config.keepSeparators && !lastChunk) {
69
+ chunk += separator;
70
+ }
71
+ // Encode chunk text
72
+ const tokens = this._config.tokenizer.encode(chunk);
73
+ if (tokens.length > this._config.chunkSize) {
74
+ // Break the text into smaller chunks
75
+ const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
76
+ chunks.push(...subChunks);
77
+ }
78
+ else {
79
+ // Append chunk to output
80
+ chunks.push({
81
+ text: chunk,
82
+ tokens: tokens,
83
+ startPos: startPos,
84
+ endPos: endPos,
85
+ startOverlap: [],
86
+ endOverlap: [],
87
+ });
88
+ }
89
+ // Update startPos
90
+ startPos = endPos + 1;
91
+ }
92
+ }
93
+ return chunks;
94
+ }
95
+ getSeparators(docType) {
96
+ switch (docType !== null && docType !== void 0 ? docType : '') {
97
+ case "cpp":
98
+ return [
99
+ // Split along class definitions
100
+ "\nclass ",
101
+ // Split along function definitions
102
+ "\nvoid ",
103
+ "\nint ",
104
+ "\nfloat ",
105
+ "\ndouble ",
106
+ // Split along control flow statements
107
+ "\nif ",
108
+ "\nfor ",
109
+ "\nwhile ",
110
+ "\nswitch ",
111
+ "\ncase ",
112
+ // Split by the normal type of lines
113
+ "\n\n",
114
+ "\n",
115
+ " ",
116
+ "",
117
+ ];
118
+ case "go":
119
+ return [
120
+ // Split along function definitions
121
+ "\nfunc ",
122
+ "\nvar ",
123
+ "\nconst ",
124
+ "\ntype ",
125
+ // Split along control flow statements
126
+ "\nif ",
127
+ "\nfor ",
128
+ "\nswitch ",
129
+ "\ncase ",
130
+ // Split by the normal type of lines
131
+ "\n\n",
132
+ "\n",
133
+ " ",
134
+ "",
135
+ ];
136
+ case "java":
137
+ case "c#":
138
+ case "csharp":
139
+ case "cs":
140
+ case "ts":
141
+ case "tsx":
142
+ case "typescript":
143
+ return [
144
+ // Split along class definitions
145
+ "\nclass ",
146
+ // Split along method definitions
147
+ "\npublic ",
148
+ "\nprotected ",
149
+ "\nprivate ",
150
+ "\nstatic ",
151
+ // Split along control flow statements
152
+ "\nif ",
153
+ "\nfor ",
154
+ "\nwhile ",
155
+ "\nswitch ",
156
+ "\ncase ",
157
+ // Split by the normal type of lines
158
+ "\n\n",
159
+ "\n",
160
+ " ",
161
+ "",
162
+ ];
163
+ case "js":
164
+ case "jsx":
165
+ case "javascript":
166
+ return [
167
+ // Split along class definitions
168
+ "\nclass ",
169
+ // Split along function definitions
170
+ "\nfunction ",
171
+ "\nconst ",
172
+ "\nlet ",
173
+ "\nvar ",
174
+ "\nclass ",
175
+ // Split along control flow statements
176
+ "\nif ",
177
+ "\nfor ",
178
+ "\nwhile ",
179
+ "\nswitch ",
180
+ "\ncase ",
181
+ "\ndefault ",
182
+ // Split by the normal type of lines
183
+ "\n\n",
184
+ "\n",
185
+ " ",
186
+ "",
187
+ ];
188
+ case "php":
189
+ return [
190
+ // Split along function definitions
191
+ "\nfunction ",
192
+ // Split along class definitions
193
+ "\nclass ",
194
+ // Split along control flow statements
195
+ "\nif ",
196
+ "\nforeach ",
197
+ "\nwhile ",
198
+ "\ndo ",
199
+ "\nswitch ",
200
+ "\ncase ",
201
+ // Split by the normal type of lines
202
+ "\n\n",
203
+ "\n",
204
+ " ",
205
+ "",
206
+ ];
207
+ case "proto":
208
+ return [
209
+ // Split along message definitions
210
+ "\nmessage ",
211
+ // Split along service definitions
212
+ "\nservice ",
213
+ // Split along enum definitions
214
+ "\nenum ",
215
+ // Split along option definitions
216
+ "\noption ",
217
+ // Split along import statements
218
+ "\nimport ",
219
+ // Split along syntax declarations
220
+ "\nsyntax ",
221
+ // Split by the normal type of lines
222
+ "\n\n",
223
+ "\n",
224
+ " ",
225
+ "",
226
+ ];
227
+ case "python":
228
+ case "py":
229
+ return [
230
+ // First, try to split along class definitions
231
+ "\nclass ",
232
+ "\ndef ",
233
+ "\n\tdef ",
234
+ // Now split by the normal type of lines
235
+ "\n\n",
236
+ "\n",
237
+ " ",
238
+ "",
239
+ ];
240
+ case "rst":
241
+ return [
242
+ // Split along section titles
243
+ "\n===\n",
244
+ "\n---\n",
245
+ "\n***\n",
246
+ // Split along directive markers
247
+ "\n.. ",
248
+ // Split by the normal type of lines
249
+ "\n\n",
250
+ "\n",
251
+ " ",
252
+ "",
253
+ ];
254
+ case "ruby":
255
+ return [
256
+ // Split along method definitions
257
+ "\ndef ",
258
+ "\nclass ",
259
+ // Split along control flow statements
260
+ "\nif ",
261
+ "\nunless ",
262
+ "\nwhile ",
263
+ "\nfor ",
264
+ "\ndo ",
265
+ "\nbegin ",
266
+ "\nrescue ",
267
+ // Split by the normal type of lines
268
+ "\n\n",
269
+ "\n",
270
+ " ",
271
+ "",
272
+ ];
273
+ case "rust":
274
+ return [
275
+ // Split along function definitions
276
+ "\nfn ",
277
+ "\nconst ",
278
+ "\nlet ",
279
+ // Split along control flow statements
280
+ "\nif ",
281
+ "\nwhile ",
282
+ "\nfor ",
283
+ "\nloop ",
284
+ "\nmatch ",
285
+ "\nconst ",
286
+ // Split by the normal type of lines
287
+ "\n\n",
288
+ "\n",
289
+ " ",
290
+ "",
291
+ ];
292
+ case "scala":
293
+ return [
294
+ // Split along class definitions
295
+ "\nclass ",
296
+ "\nobject ",
297
+ // Split along method definitions
298
+ "\ndef ",
299
+ "\nval ",
300
+ "\nvar ",
301
+ // Split along control flow statements
302
+ "\nif ",
303
+ "\nfor ",
304
+ "\nwhile ",
305
+ "\nmatch ",
306
+ "\ncase ",
307
+ // Split by the normal type of lines
308
+ "\n\n",
309
+ "\n",
310
+ " ",
311
+ "",
312
+ ];
313
+ case "swift":
314
+ return [
315
+ // Split along function definitions
316
+ "\nfunc ",
317
+ // Split along class definitions
318
+ "\nclass ",
319
+ "\nstruct ",
320
+ "\nenum ",
321
+ // Split along control flow statements
322
+ "\nif ",
323
+ "\nfor ",
324
+ "\nwhile ",
325
+ "\ndo ",
326
+ "\nswitch ",
327
+ "\ncase ",
328
+ // Split by the normal type of lines
329
+ "\n\n",
330
+ "\n",
331
+ " ",
332
+ "",
333
+ ];
334
+ case "markdown":
335
+ return [
336
+ // First, try to split along Markdown headings (starting with level 2)
337
+ "\n## ",
338
+ "\n### ",
339
+ "\n#### ",
340
+ "\n##### ",
341
+ "\n###### ",
342
+ // Note the alternative syntax for headings (below) is not handled here
343
+ // Heading level 2
344
+ // ---------------
345
+ // End of code block
346
+ "```\n\n",
347
+ // Horizontal lines
348
+ "\n\n***\n\n",
349
+ "\n\n---\n\n",
350
+ "\n\n___\n\n",
351
+ // Note that this splitter doesn't handle horizontal lines defined
352
+ // by *three or more* of ***, ---, or ___, but this is not handled
353
+ "\n\n",
354
+ "\n",
355
+ " ",
356
+ "",
357
+ ];
358
+ case "latex":
359
+ return [
360
+ // First, try to split along Latex sections
361
+ "\n\\chapter{",
362
+ "\n\\section{",
363
+ "\n\\subsection{",
364
+ "\n\\subsubsection{",
365
+ // Now split by environments
366
+ "\n\\begin{enumerate}",
367
+ "\n\\begin{itemize}",
368
+ "\n\\begin{description}",
369
+ "\n\\begin{list}",
370
+ "\n\\begin{quote}",
371
+ "\n\\begin{quotation}",
372
+ "\n\\begin{verse}",
373
+ "\n\\begin{verbatim}",
374
+ // Now split by math environments
375
+ "\n\\begin{align}",
376
+ "$$",
377
+ "$",
378
+ // Now split by the normal type of lines
379
+ "\n\n",
380
+ "\n",
381
+ " ",
382
+ "",
383
+ ];
384
+ case "html":
385
+ return [
386
+ // First, try to split along HTML tags
387
+ "<body>",
388
+ "<div>",
389
+ "<p>",
390
+ "<br>",
391
+ "<li>",
392
+ "<h1>",
393
+ "<h2>",
394
+ "<h3>",
395
+ "<h4>",
396
+ "<h5>",
397
+ "<h6>",
398
+ "<span>",
399
+ "<table>",
400
+ "<tr>",
401
+ "<td>",
402
+ "<th>",
403
+ "<ul>",
404
+ "<ol>",
405
+ "<header>",
406
+ "<footer>",
407
+ "<nav>",
408
+ // Head
409
+ "<head>",
410
+ "<style>",
411
+ "<script>",
412
+ "<meta>",
413
+ "<title>",
414
+ // Normal type of lines
415
+ " ",
416
+ "",
417
+ ];
418
+ case "sol":
419
+ return [
420
+ // Split along compiler informations definitions
421
+ "\npragma ",
422
+ "\nusing ",
423
+ // Split along contract definitions
424
+ "\ncontract ",
425
+ "\ninterface ",
426
+ "\nlibrary ",
427
+ // Split along method definitions
428
+ "\nconstructor ",
429
+ "\ntype ",
430
+ "\nfunction ",
431
+ "\nevent ",
432
+ "\nmodifier ",
433
+ "\nerror ",
434
+ "\nstruct ",
435
+ "\nenum ",
436
+ // Split along control flow statements
437
+ "\nif ",
438
+ "\nfor ",
439
+ "\nwhile ",
440
+ "\ndo while ",
441
+ "\nassembly ",
442
+ // Split by the normal type of lines
443
+ "\n\n",
444
+ "\n",
445
+ " ",
446
+ "",
447
+ ];
448
+ default:
449
+ return [
450
+ // Split by the normal type of lines
451
+ "\n\n",
452
+ "\n",
453
+ " ",
454
+ "",
455
+ ];
456
+ }
457
+ }
458
+ }
459
+ exports.TextSplitter = TextSplitter;
460
+ //# sourceMappingURL=TextSplitter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;AAYhD,MAAa,YAAY;IAGrB,YAAmB,MAAoC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,UAAU,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;YACnC,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,EAAE;SACG,EAAE,MAAM,CAAC,CAAC;QAEjC,iDAAiD;QACjD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,6BAAa,EAAE,CAAC;SAChD;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;YAClE,IAAI,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACtE;QAED,+BAA+B;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE;YAC5B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;SAC7C;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAChD;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;SACxD;IACL,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,mBAAmB;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,SAAS,gBAAgB,CAAC,MAAiB;YACvC,IAAI,MAAM,IAAI,SAAS,EAAE;gBACrB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClG,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;aACtC;iBAAM;gBACH,OAAO,EAAE,CAAC;aACb;QACL,CAAC;QAED,iEAAiE;QACjE,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,aAAa,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACpE,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;gBAChF,KAAK,CAAC,UAAU,GAAG,gBAAgB,CAAC,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,MAAM,CAAC,CAAC;aAC1D;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,IAAY,EAAE,UAAoB,EAAE,QAAgB;QACvE,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACxE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAE3C,4BAA4B;gBAC5B,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACpF,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE;oBAC3C,KAAK,IAAI,SAAS,CAAC;iBACtB;gBAED,oBAAoB;gBACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpD,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACxC,qCAAqC;oBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;iBAC7B;qBAAM;oBACH,yBAAyB;oBACzB,MAAM,CAAC,IAAI,CAAC;wBACR,IAAI,EAAE,KAAK;wBACX,MAAM,EAAE,MAAM;wBACd,QAAQ,EAAE,QAAQ;wBAClB,MAAM,EAAE,MAAM;wBACd,YAAY,EAAE,EAAE;wBAChB,UAAU,EAAE,EAAE;qBACjB,CAAC,CAAC;iBACN;gBAED,kBAAkB;gBAClB,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC;aACzB;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,aAAa,CAAC,OAAgB;QAClC,QAAQ,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,EAAE;YACnB,KAAK,KAAK;gBACN,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,IAAI;gBACL,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,MAAM,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,iCAAiC;oBACjC,WAAW;oBACX,cAAc;oBACd,YAAY;oBACZ,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,UAAU;oBACV,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,YAAY;oBACZ,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,mCAAmC;oBACnC,aAAa;oBACb,gCAAgC;oBAChC,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,YAAY;oBACZ,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,kCAAkC;oBAClC,YAAY;oBACZ,kCAAkC;oBAClC,YAAY;oBACZ,+BAA+B;oBAC/B,SAAS;oBACT,iCAAiC;oBACjC,WAAW;oBACX,gCAAgC;oBAChC,WAAW;oBACX,kCAAkC;oBAClC,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACL,OAAO;oBACH,8CAA8C;oBAC9C,UAAU;oBACV,QAAQ;oBACR,UAAU;oBACV,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,6BAA6B;oBAC7B,SAAS;oBACT,SAAS;oBACT,SAAS;oBACT,gCAAgC;oBAChC,OAAO;oBACP,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,iCAAiC;oBACjC,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,WAAW;oBACX,UAAU;oBACV,QAAQ;oBACR,OAAO;oBACP,UAAU;oBACV,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,mCAAmC;oBACnC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,iCAAiC;oBACjC,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,UAAU;oBACV,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,UAAU;gBACX,OAAO;oBACH,sEAAsE;oBACtE,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,uEAAuE;oBACvE,kBAAkB;oBAClB,kBAAkB;oBAClB,oBAAoB;oBACpB,SAAS;oBACT,mBAAmB;oBACnB,aAAa;oBACb,aAAa;oBACb,aAAa;oBACb,kEAAkE;oBAClE,kEAAkE;oBAClE,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,2CAA2C;oBAC3C,cAAc;oBACd,cAAc;oBACd,iBAAiB;oBACjB,oBAAoB;oBAEpB,4BAA4B;oBAC5B,sBAAsB;oBACtB,oBAAoB;oBACpB,wBAAwB;oBACxB,iBAAiB;oBACjB,kBAAkB;oBAClB,sBAAsB;oBACtB,kBAAkB;oBAClB,qBAAqB;oBAErB,iCAAiC;oBACjC,kBAAkB;oBAClB,IAAI;oBACJ,GAAG;oBAEH,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,sCAAsC;oBACtC,QAAQ;oBACR,OAAO;oBACP,KAAK;oBACL,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,QAAQ;oBACR,SAAS;oBACT,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,UAAU;oBACV,UAAU;oBACV,OAAO;oBACP,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,uBAAuB;oBACvB,GAAG;oBACH,EAAE;iBACL,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,gDAAgD;oBAChD,WAAW;oBACX,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,cAAc;oBACd,YAAY;oBACZ,iCAAiC;oBACjC,gBAAgB;oBAChB,SAAS;oBACT,aAAa;oBACb,UAAU;oBACV,aAAa;oBACb,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,aAAa;oBACb,aAAa;oBACb,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;YACN;gBACI,OAAO;oBACH,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;SACT;IACL,CAAC;CACJ;AAndD,oCAmdC"}
@@ -0,0 +1,16 @@
1
+ import { AxiosRequestConfig } from "axios";
2
+ import { TextFetcher } from './types';
3
+ export interface WebFetcherConfig {
4
+ headers?: Record<string, string>;
5
+ requestConfig?: AxiosRequestConfig;
6
+ htmlToText: boolean;
7
+ summarizeHtml: boolean;
8
+ }
9
+ export declare class WebFetcher implements TextFetcher {
10
+ private readonly _config;
11
+ constructor(config?: Partial<WebFetcherConfig>);
12
+ fetch(uri: string): Promise<string>;
13
+ private extractText;
14
+ private fetchPage;
15
+ }
16
+ //# sourceMappingURL=WebFetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAc,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAElD,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AA2BtC,MAAM,WAAW,gBAAgB;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAC,MAAM,CAAC,CAAC;IAChC,aAAa,CAAC,EAAE,kBAAkB,CAAC;IACnC,UAAU,EAAE,OAAO,CAAC;IACpB,aAAa,EAAE,OAAO,CAAC;CAC1B;AAED,qBAAa,UAAW,YAAW,WAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;gBAExB,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;IAOxC,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAShD,OAAO,CAAC,WAAW;YAyCL,SAAS;CA+B1B"}