vectra 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +3 -3
  2. package/bin/vectra.js +3 -0
  3. package/lib/GPT3Tokenizer.d.ts +9 -0
  4. package/lib/GPT3Tokenizer.d.ts.map +1 -0
  5. package/lib/GPT3Tokenizer.js +17 -0
  6. package/lib/GPT3Tokenizer.js.map +1 -0
  7. package/lib/ItemSelector.d.ts +41 -0
  8. package/lib/ItemSelector.d.ts.map +1 -0
  9. package/lib/ItemSelector.js +156 -0
  10. package/lib/ItemSelector.js.map +1 -0
  11. package/lib/LocalDocument.d.ts +16 -0
  12. package/lib/LocalDocument.d.ts.map +1 -0
  13. package/lib/LocalDocument.js +99 -0
  14. package/lib/LocalDocument.js.map +1 -0
  15. package/lib/LocalDocumentIndex.d.ts +48 -0
  16. package/lib/LocalDocumentIndex.d.ts.map +1 -0
  17. package/lib/LocalDocumentIndex.js +367 -0
  18. package/lib/LocalDocumentIndex.js.map +1 -0
  19. package/lib/LocalDocumentResult.d.ts +12 -0
  20. package/lib/LocalDocumentResult.d.ts.map +1 -0
  21. package/lib/LocalDocumentResult.js +186 -0
  22. package/lib/LocalDocumentResult.js.map +1 -0
  23. package/lib/LocalIndex.d.ts +130 -0
  24. package/lib/LocalIndex.d.ts.map +1 -0
  25. package/lib/LocalIndex.js +405 -0
  26. package/lib/LocalIndex.js.map +1 -0
  27. package/lib/OpenAIEmbeddings.d.ts +98 -0
  28. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  29. package/lib/OpenAIEmbeddings.js +139 -0
  30. package/lib/OpenAIEmbeddings.js.map +1 -0
  31. package/lib/TextSplitter.d.ts +17 -0
  32. package/lib/TextSplitter.d.ts.map +1 -0
  33. package/lib/TextSplitter.js +460 -0
  34. package/lib/TextSplitter.js.map +1 -0
  35. package/lib/WebFetcher.d.ts +16 -0
  36. package/lib/WebFetcher.d.ts.map +1 -0
  37. package/lib/WebFetcher.js +144 -0
  38. package/lib/WebFetcher.js.map +1 -0
  39. package/lib/index.d.ts +11 -0
  40. package/lib/index.d.ts.map +1 -0
  41. package/lib/index.js +27 -0
  42. package/lib/index.js.map +1 -0
  43. package/lib/internals/Colorize.d.ts +14 -0
  44. package/lib/internals/Colorize.d.ts.map +1 -0
  45. package/lib/internals/Colorize.js +64 -0
  46. package/lib/internals/Colorize.js.map +1 -0
  47. package/lib/internals/index.d.ts +3 -0
  48. package/lib/internals/index.d.ts.map +1 -0
  49. package/lib/internals/index.js +19 -0
  50. package/lib/internals/index.js.map +1 -0
  51. package/lib/internals/types.d.ts +42 -0
  52. package/lib/internals/types.d.ts.map +1 -0
  53. package/lib/internals/types.js +3 -0
  54. package/lib/internals/types.js.map +1 -0
  55. package/lib/types.d.ts +133 -0
  56. package/lib/types.d.ts.map +1 -0
  57. package/lib/types.js +3 -0
  58. package/lib/types.js.map +1 -0
  59. package/lib/vectra-cli.d.ts +2 -0
  60. package/lib/vectra-cli.d.ts.map +1 -0
  61. package/lib/vectra-cli.js +276 -0
  62. package/lib/vectra-cli.js.map +1 -0
  63. package/package.json +21 -3
  64. package/src/GPT3Tokenizer.ts +15 -0
  65. package/src/ItemSelector.ts +9 -9
  66. package/src/LocalDocument.ts +70 -0
  67. package/src/LocalDocumentIndex.ts +355 -0
  68. package/src/LocalDocumentResult.ts +206 -0
  69. package/src/LocalIndex.ts +12 -78
  70. package/src/OpenAIEmbeddings.ts +205 -0
  71. package/src/TextSplitter.ts +480 -0
  72. package/src/WebFetcher.ts +128 -0
  73. package/src/index.ts +8 -0
  74. package/src/internals/Colorize.ts +64 -0
  75. package/src/internals/index.ts +2 -0
  76. package/src/internals/types.ts +46 -0
  77. package/src/types.ts +160 -0
  78. package/src/vectra-cli.ts +238 -0
@@ -0,0 +1,205 @@
1
+ import axios, { AxiosInstance, AxiosResponse, AxiosRequestConfig } from 'axios';
2
+ import { EmbeddingsModel, EmbeddingsResponse } from "./types";
3
+ import { CreateEmbeddingRequest, CreateEmbeddingResponse, OpenAICreateEmbeddingRequest } from "./internals";
4
+
5
+ export interface BaseOpenAIEmbeddingsOptions {
6
+ /**
7
+ * Optional. Retry policy to use when calling the OpenAI API.
8
+ * @remarks
9
+ * The default retry policy is `[2000, 5000]` which means that the first retry will be after
10
+ * 2 seconds and the second retry will be after 5 seconds.
11
+ */
12
+ retryPolicy?: number[];
13
+
14
+ /**
15
+ * Optional. Request options to use when calling the OpenAI API.
16
+ */
17
+ requestConfig?: AxiosRequestConfig;
18
+ }
19
+
20
+ /**
21
+ * Options for configuring an `OpenAIEmbeddings` to generate embeddings using an OpenAI hosted model.
22
+ */
23
+ export interface OpenAIEmbeddingsOptions extends BaseOpenAIEmbeddingsOptions {
24
+ /**
25
+ * API key to use when calling the OpenAI API.
26
+ * @remarks
27
+ * A new API key can be created at https://platform.openai.com/account/api-keys.
28
+ */
29
+ apiKey: string;
30
+
31
+ /**
32
+ * Model to use for completion.
33
+ * @remarks
34
+ * For Azure OpenAI this is the name of the deployment to use.
35
+ */
36
+ model: string;
37
+
38
+ /**
39
+ * Optional. Organization to use when calling the OpenAI API.
40
+ */
41
+ organization?: string;
42
+
43
+ /**
44
+ * Optional. Endpoint to use when calling the OpenAI API.
45
+ * @remarks
46
+ * For Azure OpenAI this is the deployment endpoint.
47
+ */
48
+ endpoint?: string;
49
+ }
50
+
51
+ /**
52
+ * Options for configuring an `OpenAIEmbeddings` to generate embeddings using an Azure OpenAI hosted model.
53
+ */
54
+ export interface AzureOpenAIEmbeddingsOptions extends BaseOpenAIEmbeddingsOptions {
55
+ /**
56
+ * API key to use when making requests to Azure OpenAI.
57
+ */
58
+ azureApiKey: string;
59
+
60
+ /**
61
+ * Deployment endpoint to use.
62
+ */
63
+ azureEndpoint: string;
64
+
65
+ /**
66
+ * Name of the Azure OpenAI deployment (model) to use.
67
+ */
68
+ azureDeployment: string;
69
+
70
+ /**
71
+ * Optional. Version of the API being called. Defaults to `2023-05-15`.
72
+ */
73
+ azureApiVersion?: string;
74
+ }
75
+
76
+ /**
77
+ * A `PromptCompletionModel` for calling OpenAI and Azure OpenAI hosted models.
78
+ * @remarks
79
+ */
80
+ export class OpenAIEmbeddings implements EmbeddingsModel {
81
+ private readonly _httpClient: AxiosInstance;
82
+ private readonly _useAzure: boolean;
83
+
84
+ private readonly UserAgent = 'AlphaWave';
85
+
86
+ /**
87
+ * Options the client was configured with.
88
+ */
89
+ public readonly options: OpenAIEmbeddingsOptions|AzureOpenAIEmbeddingsOptions;
90
+
91
+ /**
92
+ * Creates a new `OpenAIClient` instance.
93
+ * @param options Options for configuring an `OpenAIClient`.
94
+ */
95
+ public constructor(options: OpenAIEmbeddingsOptions|AzureOpenAIEmbeddingsOptions) {
96
+ // Check for azure config
97
+ if ((options as AzureOpenAIEmbeddingsOptions).azureApiKey) {
98
+ this._useAzure = true;
99
+ this.options = Object.assign({
100
+ retryPolicy: [2000, 5000],
101
+ azureApiVersion: '2023-05-15',
102
+ }, options) as AzureOpenAIEmbeddingsOptions;
103
+
104
+ // Cleanup and validate endpoint
105
+ let endpoint = this.options.azureEndpoint.trim();
106
+ if (endpoint.endsWith('/')) {
107
+ endpoint = endpoint.substring(0, endpoint.length - 1);
108
+ }
109
+
110
+ if (!endpoint.toLowerCase().startsWith('https://')) {
111
+ throw new Error(`Client created with an invalid endpoint of '${endpoint}'. The endpoint must be a valid HTTPS url.`);
112
+ }
113
+
114
+ this.options.azureEndpoint = endpoint;
115
+ } else {
116
+ this._useAzure = false;
117
+ this.options = Object.assign({
118
+ retryPolicy: [2000, 5000]
119
+ }, options) as OpenAIEmbeddingsOptions;
120
+ }
121
+
122
+ // Create client
123
+ this._httpClient = axios.create({
124
+ validateStatus: (status) => status < 400 || status == 429
125
+ });
126
+ }
127
+
128
+ /**
129
+ * Creates embeddings for the given inputs using the OpenAI API.
130
+ * @param model Name of the model to use (or deployment for Azure).
131
+ * @param inputs Text inputs to create embeddings for.
132
+ * @returns A `EmbeddingsResponse` with a status and the generated embeddings or a message when an error occurs.
133
+ */
134
+ public async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
135
+ const response = await this.createEmbeddingRequest({
136
+ input: inputs,
137
+ });
138
+
139
+ // Process response
140
+ if (response.status < 300) {
141
+ return { status: 'success', output: response.data.data.sort((a, b) => a.index - b.index).map((item) => item.embedding) };
142
+ } else if (response.status == 429) {
143
+ return { status: 'rate_limited', message: `The embeddings API returned a rate limit error.` }
144
+ } else {
145
+ return { status: 'error', message: `The embeddings API returned an error status of ${response.status}: ${response.statusText}` };
146
+ }
147
+ }
148
+
149
+ /**
150
+ * @private
151
+ */
152
+ protected createEmbeddingRequest(request: CreateEmbeddingRequest): Promise<AxiosResponse<CreateEmbeddingResponse>> {
153
+ if (this._useAzure) {
154
+ const options = this.options as AzureOpenAIEmbeddingsOptions;
155
+ const url = `${options.azureEndpoint}/openai/deployments/${options.azureDeployment}/embeddings?api-version=${options.azureApiVersion!}`;
156
+ return this.post(url, request);
157
+ } else {
158
+ const options = this.options as OpenAIEmbeddingsOptions;
159
+ const url = `${options.endpoint ?? 'https://api.openai.com'}/v1/embeddings`;
160
+ (request as OpenAICreateEmbeddingRequest).model = options.model;
161
+ return this.post(url, request);
162
+ }
163
+ }
164
+
165
+ /**
166
+ * @private
167
+ */
168
+ protected async post<TData>(url: string, body: object, retryCount = 0): Promise<AxiosResponse<TData>> {
169
+ // Initialize request config
170
+ const requestConfig: AxiosRequestConfig = Object.assign({}, this.options.requestConfig);
171
+
172
+ // Initialize request headers
173
+ if (!requestConfig.headers) {
174
+ requestConfig.headers = {};
175
+ }
176
+ if (!requestConfig.headers['Content-Type']) {
177
+ requestConfig.headers['Content-Type'] = 'application/json';
178
+ }
179
+ if (!requestConfig.headers['User-Agent']) {
180
+ requestConfig.headers['User-Agent'] = this.UserAgent;
181
+ }
182
+ if (this._useAzure) {
183
+ const options = this.options as AzureOpenAIEmbeddingsOptions;
184
+ requestConfig.headers['api-key'] = options.azureApiKey;
185
+ } else {
186
+ const options = this.options as OpenAIEmbeddingsOptions;
187
+ requestConfig.headers['Authorization'] = `Bearer ${options.apiKey}`;
188
+ if (options.organization) {
189
+ requestConfig.headers['OpenAI-Organization'] = options.organization;
190
+ }
191
+ }
192
+
193
+ // Send request
194
+ const response = await this._httpClient.post(url, body, requestConfig);
195
+
196
+ // Check for rate limit error
197
+ if (response.status == 429 && Array.isArray(this.options.retryPolicy) && retryCount < this.options.retryPolicy.length) {
198
+ const delay = this.options.retryPolicy[retryCount];
199
+ await new Promise((resolve) => setTimeout(resolve, delay));
200
+ return this.post(url, body, retryCount + 1);
201
+ } else {
202
+ return response;
203
+ }
204
+ }
205
+ }
@@ -0,0 +1,480 @@
1
+ import { GPT3Tokenizer } from "./GPT3Tokenizer";
2
+ import { TextChunk, Tokenizer } from "./types";
3
+
4
+ export interface TextSplitterConfig {
5
+ separators: string[];
6
+ keepSeparators: boolean;
7
+ chunkSize: number;
8
+ chunkOverlap: number;
9
+ tokenizer: Tokenizer;
10
+ docType?: string;
11
+ }
12
+
13
+ export class TextSplitter {
14
+ private readonly _config: TextSplitterConfig;
15
+
16
+ public constructor(config?: Partial<TextSplitterConfig>) {
17
+ this._config = Object.assign({
18
+ separators: ["\n\n", "\n", " ", ""],
19
+ keepSeparators: false,
20
+ chunkSize: 400,
21
+ chunkOverlap: 40,
22
+ } as TextSplitterConfig, config);
23
+
24
+ // Create a default tokenizer if none is provided
25
+ if (!this._config.tokenizer) {
26
+ this._config.tokenizer = new GPT3Tokenizer();
27
+ }
28
+
29
+ // Use default separators if none are provided
30
+ if (!this._config.separators || this._config.separators.length === 0) {
31
+ this._config.separators = this.getSeparators(this._config.docType);
32
+ }
33
+
34
+ // Validate the config settings
35
+ if (this._config.chunkSize < 1) {
36
+ throw new Error("chunkSize must be >= 1");
37
+ } else if (this._config.chunkOverlap < 0) {
38
+ throw new Error("chunkOverlap must be >= 0");
39
+ } else if (this._config.chunkOverlap > this._config.chunkSize) {
40
+ throw new Error("chunkOverlap must be <= chunkSize");
41
+ }
42
+ }
43
+
44
+ public split(text: string): TextChunk[] {
45
+ // Get basic chunks
46
+ const chunks = this.recursiveSplit(text, this._config.separators, 0);
47
+
48
+ const that = this;
49
+ function getOverlapTokens(tokens?: number[]): number[] {
50
+ if (tokens != undefined) {
51
+ const len = tokens.length > that._config.chunkOverlap ? that._config.chunkOverlap : tokens.length;
52
+ return tokens.slice(tokens.length);
53
+ } else {
54
+ return [];
55
+ }
56
+ }
57
+
58
+ // Add overlap tokens and text to the start and end of each chunk
59
+ if (this._config.chunkOverlap > 0) {
60
+ for (let i = 1; i < chunks.length; i++) {
61
+ const previousChunk = chunks[i - 1];
62
+ const chunk = chunks[i];
63
+ const nextChunk = i < chunks.length - 1 ? chunks[i + 1] : undefined;
64
+ chunk.startOverlap = getOverlapTokens(previousChunk.tokens.reverse()).reverse();
65
+ chunk.endOverlap = getOverlapTokens(nextChunk?.tokens);
66
+ }
67
+ }
68
+
69
+ return chunks;
70
+ }
71
+
72
+ private recursiveSplit(text: string, separators: string[], startPos: number): TextChunk[] {
73
+ const chunks: TextChunk[] = [];
74
+ if (text.length > 0 && separators.length > 0) {
75
+ const separator = separators[0];
76
+ const nextSeparators = separators.length > 1 ? separators.slice(1) : [];
77
+ const parts = text.split(separator);
78
+ for (let i = 0; i < parts.length; i++) {
79
+ const lastChunk = (i === parts.length - 1);
80
+
81
+ // Get chunk text and endPos
82
+ let chunk = parts[i];
83
+ const endPos = (startPos + (chunk.length - 1)) + (lastChunk ? 0 : separator.length);
84
+ if (this._config.keepSeparators && !lastChunk) {
85
+ chunk += separator;
86
+ }
87
+
88
+ // Encode chunk text
89
+ const tokens = this._config.tokenizer.encode(chunk);
90
+ if (tokens.length > this._config.chunkSize) {
91
+ // Break the text into smaller chunks
92
+ const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
93
+ chunks.push(...subChunks);
94
+ } else {
95
+ // Append chunk to output
96
+ chunks.push({
97
+ text: chunk,
98
+ tokens: tokens,
99
+ startPos: startPos,
100
+ endPos: endPos,
101
+ startOverlap: [],
102
+ endOverlap: [],
103
+ });
104
+ }
105
+
106
+ // Update startPos
107
+ startPos = endPos + 1;
108
+ }
109
+ }
110
+
111
+ return chunks;
112
+ }
113
+
114
+ private getSeparators(docType?: string): string[] {
115
+ switch (docType ?? '') {
116
+ case "cpp":
117
+ return [
118
+ // Split along class definitions
119
+ "\nclass ",
120
+ // Split along function definitions
121
+ "\nvoid ",
122
+ "\nint ",
123
+ "\nfloat ",
124
+ "\ndouble ",
125
+ // Split along control flow statements
126
+ "\nif ",
127
+ "\nfor ",
128
+ "\nwhile ",
129
+ "\nswitch ",
130
+ "\ncase ",
131
+ // Split by the normal type of lines
132
+ "\n\n",
133
+ "\n",
134
+ " ",
135
+ "",
136
+ ];
137
+ case "go":
138
+ return [
139
+ // Split along function definitions
140
+ "\nfunc ",
141
+ "\nvar ",
142
+ "\nconst ",
143
+ "\ntype ",
144
+ // Split along control flow statements
145
+ "\nif ",
146
+ "\nfor ",
147
+ "\nswitch ",
148
+ "\ncase ",
149
+ // Split by the normal type of lines
150
+ "\n\n",
151
+ "\n",
152
+ " ",
153
+ "",
154
+ ];
155
+ case "java":
156
+ case "c#":
157
+ case "csharp":
158
+ case "cs":
159
+ case "ts":
160
+ case "tsx":
161
+ case "typescript":
162
+ return [
163
+ // Split along class definitions
164
+ "\nclass ",
165
+ // Split along method definitions
166
+ "\npublic ",
167
+ "\nprotected ",
168
+ "\nprivate ",
169
+ "\nstatic ",
170
+ // Split along control flow statements
171
+ "\nif ",
172
+ "\nfor ",
173
+ "\nwhile ",
174
+ "\nswitch ",
175
+ "\ncase ",
176
+ // Split by the normal type of lines
177
+ "\n\n",
178
+ "\n",
179
+ " ",
180
+ "",
181
+ ];
182
+ case "js":
183
+ case "jsx":
184
+ case "javascript":
185
+ return [
186
+ // Split along class definitions
187
+ "\nclass ",
188
+ // Split along function definitions
189
+ "\nfunction ",
190
+ "\nconst ",
191
+ "\nlet ",
192
+ "\nvar ",
193
+ "\nclass ",
194
+ // Split along control flow statements
195
+ "\nif ",
196
+ "\nfor ",
197
+ "\nwhile ",
198
+ "\nswitch ",
199
+ "\ncase ",
200
+ "\ndefault ",
201
+ // Split by the normal type of lines
202
+ "\n\n",
203
+ "\n",
204
+ " ",
205
+ "",
206
+ ];
207
+ case "php":
208
+ return [
209
+ // Split along function definitions
210
+ "\nfunction ",
211
+ // Split along class definitions
212
+ "\nclass ",
213
+ // Split along control flow statements
214
+ "\nif ",
215
+ "\nforeach ",
216
+ "\nwhile ",
217
+ "\ndo ",
218
+ "\nswitch ",
219
+ "\ncase ",
220
+ // Split by the normal type of lines
221
+ "\n\n",
222
+ "\n",
223
+ " ",
224
+ "",
225
+ ];
226
+ case "proto":
227
+ return [
228
+ // Split along message definitions
229
+ "\nmessage ",
230
+ // Split along service definitions
231
+ "\nservice ",
232
+ // Split along enum definitions
233
+ "\nenum ",
234
+ // Split along option definitions
235
+ "\noption ",
236
+ // Split along import statements
237
+ "\nimport ",
238
+ // Split along syntax declarations
239
+ "\nsyntax ",
240
+ // Split by the normal type of lines
241
+ "\n\n",
242
+ "\n",
243
+ " ",
244
+ "",
245
+ ];
246
+ case "python":
247
+ case "py":
248
+ return [
249
+ // First, try to split along class definitions
250
+ "\nclass ",
251
+ "\ndef ",
252
+ "\n\tdef ",
253
+ // Now split by the normal type of lines
254
+ "\n\n",
255
+ "\n",
256
+ " ",
257
+ "",
258
+ ];
259
+ case "rst":
260
+ return [
261
+ // Split along section titles
262
+ "\n===\n",
263
+ "\n---\n",
264
+ "\n***\n",
265
+ // Split along directive markers
266
+ "\n.. ",
267
+ // Split by the normal type of lines
268
+ "\n\n",
269
+ "\n",
270
+ " ",
271
+ "",
272
+ ];
273
+ case "ruby":
274
+ return [
275
+ // Split along method definitions
276
+ "\ndef ",
277
+ "\nclass ",
278
+ // Split along control flow statements
279
+ "\nif ",
280
+ "\nunless ",
281
+ "\nwhile ",
282
+ "\nfor ",
283
+ "\ndo ",
284
+ "\nbegin ",
285
+ "\nrescue ",
286
+ // Split by the normal type of lines
287
+ "\n\n",
288
+ "\n",
289
+ " ",
290
+ "",
291
+ ];
292
+ case "rust":
293
+ return [
294
+ // Split along function definitions
295
+ "\nfn ",
296
+ "\nconst ",
297
+ "\nlet ",
298
+ // Split along control flow statements
299
+ "\nif ",
300
+ "\nwhile ",
301
+ "\nfor ",
302
+ "\nloop ",
303
+ "\nmatch ",
304
+ "\nconst ",
305
+ // Split by the normal type of lines
306
+ "\n\n",
307
+ "\n",
308
+ " ",
309
+ "",
310
+ ];
311
+ case "scala":
312
+ return [
313
+ // Split along class definitions
314
+ "\nclass ",
315
+ "\nobject ",
316
+ // Split along method definitions
317
+ "\ndef ",
318
+ "\nval ",
319
+ "\nvar ",
320
+ // Split along control flow statements
321
+ "\nif ",
322
+ "\nfor ",
323
+ "\nwhile ",
324
+ "\nmatch ",
325
+ "\ncase ",
326
+ // Split by the normal type of lines
327
+ "\n\n",
328
+ "\n",
329
+ " ",
330
+ "",
331
+ ];
332
+ case "swift":
333
+ return [
334
+ // Split along function definitions
335
+ "\nfunc ",
336
+ // Split along class definitions
337
+ "\nclass ",
338
+ "\nstruct ",
339
+ "\nenum ",
340
+ // Split along control flow statements
341
+ "\nif ",
342
+ "\nfor ",
343
+ "\nwhile ",
344
+ "\ndo ",
345
+ "\nswitch ",
346
+ "\ncase ",
347
+ // Split by the normal type of lines
348
+ "\n\n",
349
+ "\n",
350
+ " ",
351
+ "",
352
+ ];
353
+ case "markdown":
354
+ return [
355
+ // First, try to split along Markdown headings (starting with level 2)
356
+ "\n## ",
357
+ "\n### ",
358
+ "\n#### ",
359
+ "\n##### ",
360
+ "\n###### ",
361
+ // Note the alternative syntax for headings (below) is not handled here
362
+ // Heading level 2
363
+ // ---------------
364
+ // End of code block
365
+ "```\n\n",
366
+ // Horizontal lines
367
+ "\n\n***\n\n",
368
+ "\n\n---\n\n",
369
+ "\n\n___\n\n",
370
+ // Note that this splitter doesn't handle horizontal lines defined
371
+ // by *three or more* of ***, ---, or ___, but this is not handled
372
+ "\n\n",
373
+ "\n",
374
+ " ",
375
+ "",
376
+ ];
377
+ case "latex":
378
+ return [
379
+ // First, try to split along Latex sections
380
+ "\n\\chapter{",
381
+ "\n\\section{",
382
+ "\n\\subsection{",
383
+ "\n\\subsubsection{",
384
+
385
+ // Now split by environments
386
+ "\n\\begin{enumerate}",
387
+ "\n\\begin{itemize}",
388
+ "\n\\begin{description}",
389
+ "\n\\begin{list}",
390
+ "\n\\begin{quote}",
391
+ "\n\\begin{quotation}",
392
+ "\n\\begin{verse}",
393
+ "\n\\begin{verbatim}",
394
+
395
+ // Now split by math environments
396
+ "\n\\begin{align}",
397
+ "$$",
398
+ "$",
399
+
400
+ // Now split by the normal type of lines
401
+ "\n\n",
402
+ "\n",
403
+ " ",
404
+ "",
405
+ ];
406
+ case "html":
407
+ return [
408
+ // First, try to split along HTML tags
409
+ "<body>",
410
+ "<div>",
411
+ "<p>",
412
+ "<br>",
413
+ "<li>",
414
+ "<h1>",
415
+ "<h2>",
416
+ "<h3>",
417
+ "<h4>",
418
+ "<h5>",
419
+ "<h6>",
420
+ "<span>",
421
+ "<table>",
422
+ "<tr>",
423
+ "<td>",
424
+ "<th>",
425
+ "<ul>",
426
+ "<ol>",
427
+ "<header>",
428
+ "<footer>",
429
+ "<nav>",
430
+ // Head
431
+ "<head>",
432
+ "<style>",
433
+ "<script>",
434
+ "<meta>",
435
+ "<title>",
436
+ // Normal type of lines
437
+ " ",
438
+ "",
439
+ ];
440
+ case "sol":
441
+ return [
442
+ // Split along compiler informations definitions
443
+ "\npragma ",
444
+ "\nusing ",
445
+ // Split along contract definitions
446
+ "\ncontract ",
447
+ "\ninterface ",
448
+ "\nlibrary ",
449
+ // Split along method definitions
450
+ "\nconstructor ",
451
+ "\ntype ",
452
+ "\nfunction ",
453
+ "\nevent ",
454
+ "\nmodifier ",
455
+ "\nerror ",
456
+ "\nstruct ",
457
+ "\nenum ",
458
+ // Split along control flow statements
459
+ "\nif ",
460
+ "\nfor ",
461
+ "\nwhile ",
462
+ "\ndo while ",
463
+ "\nassembly ",
464
+ // Split by the normal type of lines
465
+ "\n\n",
466
+ "\n",
467
+ " ",
468
+ "",
469
+ ];
470
+ default:
471
+ return [
472
+ // Split by the normal type of lines
473
+ "\n\n",
474
+ "\n",
475
+ " ",
476
+ "",
477
+ ];
478
+ }
479
+ }
480
+ }