voyageai-cli 1.15.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Available chunking strategies.
5
+ */
6
+ const STRATEGIES = ['fixed', 'sentence', 'paragraph', 'recursive', 'markdown'];
7
+
8
+ /**
9
+ * Default chunk options.
10
+ */
11
+ const DEFAULTS = {
12
+ size: 512,
13
+ overlap: 50,
14
+ minSize: 20,
15
+ };
16
+
17
+ // ── Sentence splitting ──
18
+
19
+ /**
20
+ * Split text into sentences. Handles common abbreviations and edge cases.
21
+ * @param {string} text
22
+ * @returns {string[]}
23
+ */
24
+ function splitSentences(text) {
25
+ // Split on sentence-ending punctuation followed by whitespace or EOL.
26
+ // Negative lookbehind for common abbreviations (Mr., Dr., etc.)
27
+ const parts = text.split(/(?<=[.!?])\s+(?=[A-Z\u00C0-\u024F"])/);
28
+ return parts.map(s => s.trim()).filter(s => s.length > 0);
29
+ }
30
+
31
+ // ── Strategy implementations ──
32
+
33
+ /**
34
+ * Fixed-size chunking with character count and overlap.
35
+ * @param {string} text
36
+ * @param {object} opts
37
+ * @param {number} opts.size - Target chunk size in characters
38
+ * @param {number} opts.overlap - Overlap between chunks in characters
39
+ * @returns {string[]}
40
+ */
41
+ function chunkFixed(text, opts) {
42
+ const { size, overlap } = opts;
43
+ const chunks = [];
44
+ let start = 0;
45
+
46
+ while (start < text.length) {
47
+ const end = start + size;
48
+ chunks.push(text.slice(start, end).trim());
49
+ start = end - overlap;
50
+ if (start >= text.length) break;
51
+ // Prevent infinite loop with tiny overlap
52
+ if (end >= text.length) break;
53
+ }
54
+
55
+ return chunks.filter(c => c.length >= (opts.minSize || DEFAULTS.minSize));
56
+ }
57
+
58
+ /**
59
+ * Sentence-boundary chunking. Groups sentences until size limit.
60
+ * @param {string} text
61
+ * @param {object} opts
62
+ * @returns {string[]}
63
+ */
64
+ function chunkSentence(text, opts) {
65
+ const { size, overlap } = opts;
66
+ const sentences = splitSentences(text);
67
+ return groupUnits(sentences, size, overlap, opts.minSize || DEFAULTS.minSize);
68
+ }
69
+
70
+ /**
71
+ * Paragraph chunking. Splits on double newlines, groups if needed.
72
+ * @param {string} text
73
+ * @param {object} opts
74
+ * @returns {string[]}
75
+ */
76
+ function chunkParagraph(text, opts) {
77
+ const { size, overlap } = opts;
78
+ const paragraphs = text.split(/\n\s*\n/).map(p => p.trim()).filter(p => p.length > 0);
79
+ return groupUnits(paragraphs, size, overlap, opts.minSize || DEFAULTS.minSize);
80
+ }
81
+
82
+ /**
83
+ * Recursive chunking. Tries largest delimiters first, falls back to smaller.
84
+ * This is the most commonly used strategy for RAG pipelines.
85
+ * @param {string} text
86
+ * @param {object} opts
87
+ * @returns {string[]}
88
+ */
89
+ function chunkRecursive(text, opts) {
90
+ const { size, minSize } = opts;
91
+ const separators = ['\n\n', '\n', '. ', '! ', '? ', '; ', ', ', ' '];
92
+
93
+ return recursiveSplit(text, separators, size, minSize || DEFAULTS.minSize);
94
+ }
95
+
96
+ /**
97
+ * Internal recursive split implementation.
98
+ * @param {string} text
99
+ * @param {string[]} separators
100
+ * @param {number} maxSize
101
+ * @param {number} minSize
102
+ * @returns {string[]}
103
+ */
104
+ function recursiveSplit(text, separators, maxSize, minSize) {
105
+ if (text.length <= maxSize) {
106
+ return text.trim().length >= minSize ? [text.trim()] : [];
107
+ }
108
+
109
+ // Find the first separator that exists in the text
110
+ let sep = null;
111
+ for (const s of separators) {
112
+ if (text.includes(s)) {
113
+ sep = s;
114
+ break;
115
+ }
116
+ }
117
+
118
+ // If no separator found, hard-split by characters
119
+ if (sep === null) {
120
+ const chunks = [];
121
+ for (let i = 0; i < text.length; i += maxSize) {
122
+ const chunk = text.slice(i, i + maxSize).trim();
123
+ if (chunk.length >= minSize) chunks.push(chunk);
124
+ }
125
+ return chunks;
126
+ }
127
+
128
+ // Split on this separator and greedily merge pieces under maxSize
129
+ const parts = text.split(sep);
130
+ const chunks = [];
131
+ let current = '';
132
+
133
+ for (const part of parts) {
134
+ const candidate = current ? current + sep + part : part;
135
+
136
+ if (candidate.length <= maxSize) {
137
+ current = candidate;
138
+ } else {
139
+ // Flush current chunk
140
+ if (current.trim().length >= minSize) {
141
+ chunks.push(current.trim());
142
+ }
143
+ // If this single part exceeds maxSize, recurse with next separator level
144
+ if (part.length > maxSize) {
145
+ const remainingSeps = separators.slice(separators.indexOf(sep) + 1);
146
+ chunks.push(...recursiveSplit(part, remainingSeps, maxSize, minSize));
147
+ current = '';
148
+ } else {
149
+ current = part;
150
+ }
151
+ }
152
+ }
153
+
154
+ // Flush remainder
155
+ if (current.trim().length >= minSize) {
156
+ chunks.push(current.trim());
157
+ }
158
+
159
+ return chunks;
160
+ }
161
+
162
+ /**
163
+ * Markdown-aware chunking. Splits on headings, preserves structure.
164
+ * Each heading starts a new chunk; content under it is grouped.
165
+ * @param {string} text
166
+ * @param {object} opts
167
+ * @returns {string[]}
168
+ */
169
+ function chunkMarkdown(text, opts) {
170
+ const { size, minSize } = opts;
171
+
172
+ // Split on markdown headings (# through ######)
173
+ const headingPattern = /^(#{1,6}\s.+)$/gm;
174
+ const sections = [];
175
+ let lastIndex = 0;
176
+ let match;
177
+
178
+ while ((match = headingPattern.exec(text)) !== null) {
179
+ // Content before this heading
180
+ if (match.index > lastIndex) {
181
+ const content = text.slice(lastIndex, match.index).trim();
182
+ if (content) {
183
+ if (sections.length > 0) {
184
+ // Append to previous section
185
+ sections[sections.length - 1].content += '\n\n' + content;
186
+ } else {
187
+ sections.push({ heading: '', content });
188
+ }
189
+ }
190
+ }
191
+ sections.push({ heading: match[1], content: '' });
192
+ lastIndex = match.index + match[0].length;
193
+ }
194
+
195
+ // Remaining content after last heading
196
+ if (lastIndex < text.length) {
197
+ const content = text.slice(lastIndex).trim();
198
+ if (content) {
199
+ if (sections.length > 0) {
200
+ sections[sections.length - 1].content += '\n\n' + content;
201
+ } else {
202
+ sections.push({ heading: '', content });
203
+ }
204
+ }
205
+ }
206
+
207
+ // Build chunks from sections, splitting large sections recursively
208
+ const chunks = [];
209
+ for (const section of sections) {
210
+ const full = section.heading
211
+ ? section.heading + '\n\n' + section.content.trim()
212
+ : section.content.trim();
213
+
214
+ if (!full || full.length < (minSize || DEFAULTS.minSize)) continue;
215
+
216
+ if (full.length <= size) {
217
+ chunks.push(full);
218
+ } else {
219
+ // Section too large — recursively split the content, prepend heading to first chunk
220
+ const subChunks = chunkRecursive(section.content.trim(), opts);
221
+ for (let i = 0; i < subChunks.length; i++) {
222
+ if (i === 0 && section.heading) {
223
+ chunks.push(section.heading + '\n\n' + subChunks[i]);
224
+ } else {
225
+ chunks.push(subChunks[i]);
226
+ }
227
+ }
228
+ }
229
+ }
230
+
231
+ return chunks;
232
+ }
233
+
234
+ // ── Shared helpers ──
235
+
236
+ /**
237
+ * Group text units (sentences, paragraphs) into chunks under a size limit.
238
+ * Supports overlap by re-including trailing units from the previous chunk.
239
+ * @param {string[]} units
240
+ * @param {number} maxSize
241
+ * @param {number} overlapChars
242
+ * @param {number} minSize
243
+ * @returns {string[]}
244
+ */
245
+ function groupUnits(units, maxSize, overlapChars, minSize) {
246
+ const chunks = [];
247
+ let current = [];
248
+ let currentLen = 0;
249
+
250
+ for (const unit of units) {
251
+ const addLen = current.length > 0 ? unit.length + 1 : unit.length; // +1 for space
252
+
253
+ if (currentLen + addLen > maxSize && current.length > 0) {
254
+ chunks.push(current.join(' ').trim());
255
+
256
+ // Overlap: keep trailing units that fit within overlap budget
257
+ if (overlapChars > 0) {
258
+ let overlapUnits = [];
259
+ let overlapLen = 0;
260
+ for (let i = current.length - 1; i >= 0; i--) {
261
+ if (overlapLen + current[i].length + 1 > overlapChars) break;
262
+ overlapUnits.unshift(current[i]);
263
+ overlapLen += current[i].length + 1;
264
+ }
265
+ current = overlapUnits;
266
+ currentLen = overlapLen;
267
+ } else {
268
+ current = [];
269
+ currentLen = 0;
270
+ }
271
+ }
272
+
273
+ current.push(unit);
274
+ currentLen += addLen;
275
+ }
276
+
277
+ // Flush remainder
278
+ if (current.length > 0) {
279
+ const text = current.join(' ').trim();
280
+ if (text.length >= minSize) chunks.push(text);
281
+ }
282
+
283
+ return chunks;
284
+ }
285
+
286
+ // ── Token estimation ──
287
+
288
+ /**
289
+ * Rough token estimate. ~4 chars per token for English text.
290
+ * @param {string} text
291
+ * @returns {number}
292
+ */
293
+ function estimateTokens(text) {
294
+ return Math.ceil(text.length / 4);
295
+ }
296
+
297
+ // ── Public API ──
298
+
299
+ /**
300
+ * Chunk text using the specified strategy.
301
+ * @param {string} text - Input text
302
+ * @param {object} [options]
303
+ * @param {string} [options.strategy='recursive'] - Chunking strategy
304
+ * @param {number} [options.size=512] - Target chunk size in characters
305
+ * @param {number} [options.overlap=50] - Overlap between chunks in characters
306
+ * @param {number} [options.minSize=20] - Minimum chunk size
307
+ * @returns {string[]} Array of text chunks
308
+ */
309
+ function chunk(text, options = {}) {
310
+ const opts = {
311
+ strategy: options.strategy || 'recursive',
312
+ size: options.size || DEFAULTS.size,
313
+ overlap: options.overlap != null ? options.overlap : DEFAULTS.overlap,
314
+ minSize: options.minSize || DEFAULTS.minSize,
315
+ };
316
+
317
+ if (!text || text.trim().length === 0) return [];
318
+
319
+ switch (opts.strategy) {
320
+ case 'fixed':
321
+ return chunkFixed(text, opts);
322
+ case 'sentence':
323
+ return chunkSentence(text, opts);
324
+ case 'paragraph':
325
+ return chunkParagraph(text, opts);
326
+ case 'recursive':
327
+ return chunkRecursive(text, opts);
328
+ case 'markdown':
329
+ return chunkMarkdown(text, opts);
330
+ default:
331
+ throw new Error(`Unknown chunking strategy: ${opts.strategy}. Available: ${STRATEGIES.join(', ')}`);
332
+ }
333
+ }
334
+
335
+ module.exports = {
336
+ chunk,
337
+ splitSentences,
338
+ estimateTokens,
339
+ STRATEGIES,
340
+ DEFAULTS,
341
+ };
@@ -0,0 +1,122 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const PROJECT_FILE = '.vai.json';
7
+ const PROJECT_VERSION = 1;
8
+
9
+ /**
10
+ * Search for .vai.json starting from startDir, walking up to root.
11
+ * @param {string} [startDir] - Directory to start from (default: cwd)
12
+ * @returns {string|null} Absolute path to .vai.json or null
13
+ */
14
+ function findProjectFile(startDir) {
15
+ let dir = path.resolve(startDir || process.cwd());
16
+ const root = path.parse(dir).root;
17
+
18
+ while (dir !== root) {
19
+ const candidate = path.join(dir, PROJECT_FILE);
20
+ if (fs.existsSync(candidate)) return candidate;
21
+ dir = path.dirname(dir);
22
+ }
23
+ // Check root too
24
+ const rootCandidate = path.join(root, PROJECT_FILE);
25
+ if (fs.existsSync(rootCandidate)) return rootCandidate;
26
+
27
+ return null;
28
+ }
29
+
30
+ /**
31
+ * Load project config from .vai.json.
32
+ * @param {string} [startDir] - Directory to start searching from
33
+ * @returns {{ config: object, filePath: string|null }}
34
+ */
35
+ function loadProject(startDir) {
36
+ const filePath = findProjectFile(startDir);
37
+ if (!filePath) return { config: {}, filePath: null };
38
+
39
+ try {
40
+ const raw = fs.readFileSync(filePath, 'utf-8');
41
+ return { config: JSON.parse(raw), filePath };
42
+ } catch (err) {
43
+ return { config: {}, filePath };
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Save project config to .vai.json.
49
+ * @param {object} config - Project configuration
50
+ * @param {string} [targetPath] - Path to write (default: cwd/.vai.json)
51
+ */
52
+ function saveProject(config, targetPath) {
53
+ const filePath = targetPath || path.join(process.cwd(), PROJECT_FILE);
54
+ const output = { version: PROJECT_VERSION, ...config };
55
+ fs.writeFileSync(filePath, JSON.stringify(output, null, 2) + '\n', 'utf-8');
56
+ return filePath;
57
+ }
58
+
59
+ /**
60
+ * Merge project config with CLI options. CLI options take precedence.
61
+ * Only merges known keys — doesn't blindly spread everything.
62
+ * @param {object} projectConfig - From .vai.json
63
+ * @param {object} cliOpts - From commander
64
+ * @returns {object} Merged options
65
+ */
66
+ function mergeOptions(projectConfig, cliOpts) {
67
+ const merged = {};
68
+
69
+ // Map of project config keys → CLI option keys
70
+ const keys = [
71
+ 'model', 'db', 'collection', 'field', 'inputType',
72
+ 'dimensions', 'index',
73
+ ];
74
+
75
+ for (const key of keys) {
76
+ // CLI explicit value wins, then project config, then undefined
77
+ if (cliOpts[key] !== undefined) {
78
+ merged[key] = cliOpts[key];
79
+ } else if (projectConfig[key] !== undefined) {
80
+ merged[key] = projectConfig[key];
81
+ }
82
+ }
83
+
84
+ // Chunk config nests under project.chunk
85
+ if (projectConfig.chunk) {
86
+ merged.chunk = { ...projectConfig.chunk };
87
+ }
88
+
89
+ return merged;
90
+ }
91
+
92
+ /**
93
+ * Default project config scaffold.
94
+ * @returns {object}
95
+ */
96
+ function defaultProjectConfig() {
97
+ return {
98
+ version: PROJECT_VERSION,
99
+ model: 'voyage-4-large',
100
+ db: '',
101
+ collection: '',
102
+ field: 'embedding',
103
+ inputType: 'document',
104
+ dimensions: 1024,
105
+ index: 'vector_index',
106
+ chunk: {
107
+ strategy: 'recursive',
108
+ size: 512,
109
+ overlap: 50,
110
+ },
111
+ };
112
+ }
113
+
114
+ module.exports = {
115
+ PROJECT_FILE,
116
+ PROJECT_VERSION,
117
+ findProjectFile,
118
+ loadProject,
119
+ saveProject,
120
+ mergeOptions,
121
+ defaultProjectConfig,
122
+ };
@@ -0,0 +1,239 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ /**
7
+ * Supported file extensions and their reader types.
8
+ */
9
+ const SUPPORTED_EXTENSIONS = {
10
+ '.txt': 'text',
11
+ '.md': 'text',
12
+ '.markdown': 'text',
13
+ '.rst': 'text',
14
+ '.html': 'html',
15
+ '.htm': 'html',
16
+ '.json': 'json',
17
+ '.jsonl': 'jsonl',
18
+ '.ndjson': 'jsonl',
19
+ '.csv': 'text',
20
+ '.pdf': 'pdf',
21
+ };
22
+
23
+ /**
24
+ * Check if a file extension is supported.
25
+ * @param {string} filePath
26
+ * @returns {boolean}
27
+ */
28
+ function isSupported(filePath) {
29
+ const ext = path.extname(filePath).toLowerCase();
30
+ return ext in SUPPORTED_EXTENSIONS;
31
+ }
32
+
33
+ /**
34
+ * Get the reader type for a file.
35
+ * @param {string} filePath
36
+ * @returns {string|null}
37
+ */
38
+ function getReaderType(filePath) {
39
+ const ext = path.extname(filePath).toLowerCase();
40
+ return SUPPORTED_EXTENSIONS[ext] || null;
41
+ }
42
+
43
+ /**
44
+ * Read a text file (txt, md, rst, csv).
45
+ * @param {string} filePath
46
+ * @returns {Promise<string>}
47
+ */
48
+ async function readTextFile(filePath) {
49
+ return fs.readFileSync(filePath, 'utf-8');
50
+ }
51
+
52
+ /**
53
+ * Read an HTML file and strip tags to plain text.
54
+ * Lightweight — no external dependencies.
55
+ * @param {string} filePath
56
+ * @returns {Promise<string>}
57
+ */
58
+ async function readHtmlFile(filePath) {
59
+ const html = fs.readFileSync(filePath, 'utf-8');
60
+ return stripHtml(html);
61
+ }
62
+
63
+ /**
64
+ * Strip HTML tags and decode common entities.
65
+ * @param {string} html
66
+ * @returns {string}
67
+ */
68
+ function stripHtml(html) {
69
+ return html
70
+ // Remove script and style blocks
71
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
72
+ .replace(/<style[\s\S]*?<\/style>/gi, '')
73
+ // Replace block elements with newlines
74
+ .replace(/<\/?(p|div|br|h[1-6]|li|tr|blockquote|section|article|header|footer|nav|pre)[^>]*>/gi, '\n')
75
+ // Remove remaining tags
76
+ .replace(/<[^>]+>/g, '')
77
+ // Decode common entities
78
+ .replace(/&amp;/g, '&')
79
+ .replace(/&lt;/g, '<')
80
+ .replace(/&gt;/g, '>')
81
+ .replace(/&quot;/g, '"')
82
+ .replace(/&#39;/g, "'")
83
+ .replace(/&nbsp;/g, ' ')
84
+ // Collapse whitespace
85
+ .replace(/\n{3,}/g, '\n\n')
86
+ .trim();
87
+ }
88
+
89
+ /**
90
+ * Read a JSON file. Extracts text from objects using a text field.
91
+ * Supports JSON array of objects or a single object with a text field.
92
+ * @param {string} filePath
93
+ * @param {string} [textField='text'] - Field name containing text
94
+ * @returns {Promise<Array<{text: string, metadata: object}>>}
95
+ */
96
+ async function readJsonFile(filePath, textField = 'text') {
97
+ const raw = fs.readFileSync(filePath, 'utf-8');
98
+ const data = JSON.parse(raw);
99
+
100
+ if (Array.isArray(data)) {
101
+ return data.map((item, i) => {
102
+ const text = item[textField];
103
+ if (!text) throw new Error(`Missing "${textField}" field in array item ${i}`);
104
+ const metadata = { ...item };
105
+ delete metadata[textField];
106
+ return { text, metadata };
107
+ });
108
+ }
109
+
110
+ if (typeof data === 'object' && data[textField]) {
111
+ const metadata = { ...data };
112
+ delete metadata[textField];
113
+ return [{ text: data[textField], metadata }];
114
+ }
115
+
116
+ throw new Error(`JSON file must be an array of objects or an object with a "${textField}" field`);
117
+ }
118
+
119
+ /**
120
+ * Read a JSONL/NDJSON file.
121
+ * @param {string} filePath
122
+ * @param {string} [textField='text']
123
+ * @returns {Promise<Array<{text: string, metadata: object}>>}
124
+ */
125
+ async function readJsonlFile(filePath, textField = 'text') {
126
+ const raw = fs.readFileSync(filePath, 'utf-8');
127
+ const lines = raw.split('\n').filter(l => l.trim().length > 0);
128
+
129
+ return lines.map((line, i) => {
130
+ const item = JSON.parse(line);
131
+ const text = item[textField];
132
+ if (!text) throw new Error(`Missing "${textField}" field on line ${i + 1}`);
133
+ const metadata = { ...item };
134
+ delete metadata[textField];
135
+ return { text, metadata };
136
+ });
137
+ }
138
+
139
+ /**
140
+ * Read a PDF file. Requires optional `pdf-parse` dependency.
141
+ * @param {string} filePath
142
+ * @returns {Promise<string>}
143
+ */
144
+ async function readPdfFile(filePath) {
145
+ let pdfParse;
146
+ try {
147
+ pdfParse = require('pdf-parse');
148
+ } catch {
149
+ throw new Error(
150
+ 'PDF support requires the "pdf-parse" package.\n' +
151
+ 'Install it: npm install pdf-parse\n' +
152
+ 'Then retry your command.'
153
+ );
154
+ }
155
+ const buffer = fs.readFileSync(filePath);
156
+ const data = await pdfParse(buffer);
157
+ return data.text;
158
+ }
159
+
160
+ /**
161
+ * Read a single file and return its text content.
162
+ * For structured files (JSON/JSONL), returns array of {text, metadata}.
163
+ * For text files, returns the raw text string.
164
+ * @param {string} filePath
165
+ * @param {object} [opts]
166
+ * @param {string} [opts.textField='text'] - Field name for JSON/JSONL
167
+ * @returns {Promise<string|Array<{text: string, metadata: object}>>}
168
+ */
169
+ async function readFile(filePath, opts = {}) {
170
+ const type = getReaderType(filePath);
171
+ if (!type) {
172
+ throw new Error(`Unsupported file type: ${path.extname(filePath)}. Supported: ${Object.keys(SUPPORTED_EXTENSIONS).join(', ')}`);
173
+ }
174
+
175
+ switch (type) {
176
+ case 'text':
177
+ return readTextFile(filePath);
178
+ case 'html':
179
+ return readHtmlFile(filePath);
180
+ case 'json':
181
+ return readJsonFile(filePath, opts.textField || 'text');
182
+ case 'jsonl':
183
+ return readJsonlFile(filePath, opts.textField || 'text');
184
+ case 'pdf':
185
+ return readPdfFile(filePath);
186
+ default:
187
+ throw new Error(`No reader for type: ${type}`);
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Recursively scan a directory for supported files.
193
+ * @param {string} dirPath
194
+ * @param {object} [opts]
195
+ * @param {string[]} [opts.extensions] - Filter to specific extensions
196
+ * @param {string[]} [opts.ignore] - Directory names to skip
197
+ * @returns {string[]} Array of absolute file paths
198
+ */
199
+ function scanDirectory(dirPath, opts = {}) {
200
+ const ignore = new Set(opts.ignore || ['node_modules', '.git', '.vai', '__pycache__', '.DS_Store']);
201
+ const extensions = opts.extensions
202
+ ? new Set(opts.extensions.map(e => e.startsWith('.') ? e : '.' + e))
203
+ : null;
204
+
205
+ const results = [];
206
+
207
+ function walk(dir) {
208
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
209
+ for (const entry of entries) {
210
+ if (entry.name.startsWith('.') && ignore.has(entry.name)) continue;
211
+ if (ignore.has(entry.name)) continue;
212
+
213
+ const fullPath = path.join(dir, entry.name);
214
+
215
+ if (entry.isDirectory()) {
216
+ walk(fullPath);
217
+ } else if (entry.isFile()) {
218
+ const ext = path.extname(entry.name).toLowerCase();
219
+ if (extensions) {
220
+ if (extensions.has(ext)) results.push(fullPath);
221
+ } else if (SUPPORTED_EXTENSIONS[ext]) {
222
+ results.push(fullPath);
223
+ }
224
+ }
225
+ }
226
+ }
227
+
228
+ walk(path.resolve(dirPath));
229
+ return results.sort();
230
+ }
231
+
232
+ module.exports = {
233
+ SUPPORTED_EXTENSIONS,
234
+ isSupported,
235
+ getReaderType,
236
+ readFile,
237
+ scanDirectory,
238
+ stripHtml,
239
+ };