codebasesearch 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +15 -14
- package/src/ignore-parser.js +293 -4
- package/src/scanner.js +1 -2
- package/src/store.js +20 -8
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -74,25 +74,26 @@ export async function run(args) {
|
|
|
74
74
|
// Always reindex to ensure freshness
|
|
75
75
|
console.log('Generating embeddings and indexing...');
|
|
76
76
|
|
|
77
|
-
// Generate embeddings in batches
|
|
77
|
+
// Generate embeddings in batches and upsert immediately to free memory
|
|
78
78
|
const batchSize = 32;
|
|
79
|
-
|
|
80
|
-
const allEmbeddings = [];
|
|
79
|
+
let processedCount = 0;
|
|
81
80
|
|
|
82
|
-
for (let i = 0; i <
|
|
83
|
-
const
|
|
81
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
82
|
+
const batchChunks = chunks.slice(i, i + batchSize);
|
|
83
|
+
const batchTexts = batchChunks.map(c => c.content);
|
|
84
84
|
const batchEmbeddings = await generateEmbeddings(batchTexts);
|
|
85
|
-
allEmbeddings.push(...batchEmbeddings);
|
|
86
|
-
}
|
|
87
85
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
86
|
+
// Create batch with embeddings
|
|
87
|
+
const batchWithEmbeddings = batchChunks.map((chunk, idx) => ({
|
|
88
|
+
...chunk,
|
|
89
|
+
vector: batchEmbeddings[idx]
|
|
90
|
+
}));
|
|
91
|
+
|
|
92
|
+
// Upsert immediately to free memory
|
|
93
|
+
await upsertChunks(batchWithEmbeddings);
|
|
94
|
+
processedCount += batchWithEmbeddings.length;
|
|
95
|
+
}
|
|
93
96
|
|
|
94
|
-
// Upsert to store
|
|
95
|
-
await upsertChunks(chunksWithEmbeddings);
|
|
96
97
|
console.log('Index created\n');
|
|
97
98
|
|
|
98
99
|
// Execute search
|
package/src/ignore-parser.js
CHANGED
|
@@ -1,9 +1,87 @@
|
|
|
1
1
|
import { readFileSync, existsSync } from 'fs';
|
|
2
|
-
import { join, dirname } from 'path';
|
|
2
|
+
import { join, dirname, extname } from 'path';
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
4
|
|
|
5
5
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
6
|
|
|
7
|
+
// Whitelist of code file extensions to include
|
|
8
|
+
const CODE_EXTENSIONS = new Set([
|
|
9
|
+
// JavaScript/TypeScript
|
|
10
|
+
'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.mts', '.cts',
|
|
11
|
+
// Python
|
|
12
|
+
'.py', '.pyw', '.pyi',
|
|
13
|
+
// Java
|
|
14
|
+
'.java',
|
|
15
|
+
// C/C++
|
|
16
|
+
'.c', '.cpp', '.cc', '.cxx', '.h', '.hpp', '.hh', '.hxx',
|
|
17
|
+
// C#
|
|
18
|
+
'.cs',
|
|
19
|
+
// Go
|
|
20
|
+
'.go',
|
|
21
|
+
// Rust
|
|
22
|
+
'.rs',
|
|
23
|
+
// Ruby
|
|
24
|
+
'.rb',
|
|
25
|
+
// PHP
|
|
26
|
+
'.php', '.phtml',
|
|
27
|
+
// Swift
|
|
28
|
+
'.swift',
|
|
29
|
+
// Kotlin
|
|
30
|
+
'.kt', '.kts',
|
|
31
|
+
// Scala
|
|
32
|
+
'.scala', '.sc',
|
|
33
|
+
// Perl
|
|
34
|
+
'.pl', '.pm',
|
|
35
|
+
// Shell/Bash
|
|
36
|
+
'.sh', '.bash', '.zsh', '.fish',
|
|
37
|
+
// PowerShell
|
|
38
|
+
'.ps1', '.psm1', '.psd1',
|
|
39
|
+
// Lua
|
|
40
|
+
'.lua',
|
|
41
|
+
// R
|
|
42
|
+
'.r', '.R',
|
|
43
|
+
// MATLAB/Octave
|
|
44
|
+
'.m', '.mat',
|
|
45
|
+
// Julia
|
|
46
|
+
'.jl',
|
|
47
|
+
// Dart
|
|
48
|
+
'.dart',
|
|
49
|
+
// Elixir
|
|
50
|
+
'.ex', '.exs',
|
|
51
|
+
// Erlang
|
|
52
|
+
'.erl', '.hrl',
|
|
53
|
+
// Haskell
|
|
54
|
+
'.hs', '.lhs',
|
|
55
|
+
// Clojure
|
|
56
|
+
'.clj', '.cljs', '.cljc',
|
|
57
|
+
// Lisp
|
|
58
|
+
'.lisp', '.lsp', '.scm', '.ss', '.rkt',
|
|
59
|
+
// Fortran
|
|
60
|
+
'.f', '.for', '.f90', '.f95', '.f03',
|
|
61
|
+
// Assembly
|
|
62
|
+
'.asm', '.s', '.S',
|
|
63
|
+
// Groovy
|
|
64
|
+
'.groovy', '.gvy',
|
|
65
|
+
// Visual Basic
|
|
66
|
+
'.vb', '.vbs',
|
|
67
|
+
// F#
|
|
68
|
+
'.fs', '.fsx',
|
|
69
|
+
// OCaml
|
|
70
|
+
'.ml', '.mli',
|
|
71
|
+
// Objective-C
|
|
72
|
+
'.m', '.mm',
|
|
73
|
+
// Arduino
|
|
74
|
+
'.ino',
|
|
75
|
+
// Vue SFC
|
|
76
|
+
'.vue',
|
|
77
|
+
// Svelte
|
|
78
|
+
'.svelte',
|
|
79
|
+
// CoffeeScript
|
|
80
|
+
'.coffee',
|
|
81
|
+
// Reason
|
|
82
|
+
'.re', '.rei'
|
|
83
|
+
]);
|
|
84
|
+
|
|
7
85
|
function loadDefaultIgnores() {
|
|
8
86
|
const ignorePath = join(__dirname, '..', '.thornsignore');
|
|
9
87
|
if (!existsSync(ignorePath)) {
|
|
@@ -32,7 +110,97 @@ function getHardcodedIgnores() {
|
|
|
32
110
|
'temp', 'tmp', '.tmp', '.DS_Store', 'Thumbs.db',
|
|
33
111
|
'.swp', '.swo', '*.swp', '*.swo', '.tern-port',
|
|
34
112
|
'dist-server', 'out-tsc', '.cache', '.parcel-cache',
|
|
35
|
-
'typings', '.env', '.env.local', '.env.*.local'
|
|
113
|
+
'typings', '.env', '.env.local', '.env.*.local',
|
|
114
|
+
// JSON files - PRIMARY PRIORITY for memory reduction
|
|
115
|
+
'*.json', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml',
|
|
116
|
+
'Gemfile.lock', 'poetry.lock', 'Pipfile.lock',
|
|
117
|
+
// Lock files
|
|
118
|
+
'*.lock',
|
|
119
|
+
// Build outputs
|
|
120
|
+
'public', 'static', 'site', '_site', '.docusaurus', '.gatsby',
|
|
121
|
+
// Cache/dependency directories
|
|
122
|
+
'.rush', '.lerna', '.nx',
|
|
123
|
+
// IDE/editor configs
|
|
124
|
+
'.cursor', '.replit', '.sublime-project', '.sublime-workspace',
|
|
125
|
+
'*.iml', '.project', '.classpath', '.settings', '*.sublime-*',
|
|
126
|
+
// OS files
|
|
127
|
+
'.Spotlight-V100', '.Trashes', 'ehthumbs.db', '.fseventsd',
|
|
128
|
+
'.TemporaryItems', '.AppleDouble', '.LSOverride', 'desktop.ini',
|
|
129
|
+
// Large data files
|
|
130
|
+
'*.db', '*.sqlite', '*.sqlite3', '*.bak', '*.dump',
|
|
131
|
+
'*.backup', '*.data', '*.orig',
|
|
132
|
+
// Logs and temp
|
|
133
|
+
'*.log', 'logs', 'npm-debug.log', 'yarn-error.log',
|
|
134
|
+
// Test coverage and reports
|
|
135
|
+
'lcov.info', '.coverage', 'test-results',
|
|
136
|
+
// Database related
|
|
137
|
+
'storage', 'fixtures',
|
|
138
|
+
// LLM/Vector related
|
|
139
|
+
'.llamaindex', '.chroma', '.vectorstore', '.embeddings',
|
|
140
|
+
'.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
|
|
141
|
+
'.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
|
|
142
|
+
'chromadb', 'pinecone-cache', 'weaviate-data',
|
|
143
|
+
// Compiled output
|
|
144
|
+
'*.min.js', '*.min.css', '*.bundle.js', '*.chunk.js', '*.map',
|
|
145
|
+
// Generated/build artifacts
|
|
146
|
+
'.assets', 'out-tsc', 'cmake_build_debug', 'cmake_build_release',
|
|
147
|
+
// Version managers
|
|
148
|
+
'.rbenv', '.nvm', '.nvmrc',
|
|
149
|
+
// Ruby specific
|
|
150
|
+
'*.gem', '*.rbc', '/pkg', '/spec/reports', '/spec/examples.txt',
|
|
151
|
+
'/test/tmp', '/test/version_tmp', 'lib/bundler/man', '.ruby-version',
|
|
152
|
+
// Go specific
|
|
153
|
+
'go.work',
|
|
154
|
+
// Rust specific
|
|
155
|
+
'Cargo.lock', '**/*.rs.bk', '*.pdb',
|
|
156
|
+
// Java specific
|
|
157
|
+
'*.class', '*.jar', '*.war', '*.ear', '*.nar', '*.nupkg', '*.snupkg',
|
|
158
|
+
// C# specific
|
|
159
|
+
'*.suo', '*.user', '*.userosscache', '*.sln.docstates',
|
|
160
|
+
'project.lock.json', 'project.fragment.lock.json', 'artifacts',
|
|
161
|
+
// C/C++ specific
|
|
162
|
+
'*.o', '*.a', '*.so', '*.exe', '*.obj', '*.dll', '*.dylib',
|
|
163
|
+
'CMakeFiles', 'CMakeCache.txt', '*.cmake',
|
|
164
|
+
// Swift/Xcode specific
|
|
165
|
+
'*.xcodeproj', '*.xcworkspace', '*.moved-aside', '*.pbxuser',
|
|
166
|
+
'*.mode1v3', '*.mode2v3', '*.perspectivev3',
|
|
167
|
+
// Scala/SBT specific
|
|
168
|
+
'lib_managed', 'src_managed', 'project/boot', 'project/plugins/project',
|
|
169
|
+
'.history', '.lib',
|
|
170
|
+
// PHP specific
|
|
171
|
+
'composer.lock', '*.phar',
|
|
172
|
+
// Docker
|
|
173
|
+
'.dockerignore', 'docker-compose.override.yml', '.docker',
|
|
174
|
+
// Documentation build
|
|
175
|
+
'docs/_build', '.vuepress',
|
|
176
|
+
// Testing frameworks
|
|
177
|
+
'jest.config', 'vitest.config', 'pytest.ini', 'tox.ini',
|
|
178
|
+
'__tests__', '__mocks__', 'spec', 'cypress', 'playwright',
|
|
179
|
+
// Monorepo workspace patterns (implicit through directory coverage)
|
|
180
|
+
'.turbo', '.nx',
|
|
181
|
+
// Python package patterns
|
|
182
|
+
'*.py[cod]', '*$py.class', '.Python', 'pip-log.txt',
|
|
183
|
+
'pip-delete-this-directory.txt', '.hypothesis', '.pyre', '.pytype',
|
|
184
|
+
'*.whl',
|
|
185
|
+
// Config/metadata that are typically low-value
|
|
186
|
+
'*.config.js', '*.config.ts', 'webpack.config.js', 'rollup.config.js',
|
|
187
|
+
'vite.config.js', 'tsconfig.json', 'jsconfig.json', 'babel.config',
|
|
188
|
+
'.babelrc', '.eslintrc', '.prettierrc', '.stylelintrc', '.editorconfig',
|
|
189
|
+
'*.local', '*.development', '*.production',
|
|
190
|
+
// Node specific
|
|
191
|
+
'.npm', '.node_repl_history', '*.tsbuildinfo', 'yarn-error.log',
|
|
192
|
+
// Documentation/reference files that don't help with search
|
|
193
|
+
'*.md', '*.txt', '*.rst', '*.adoc', 'docs', 'documentation', 'wiki',
|
|
194
|
+
'CHANGELOG', 'HISTORY', 'NEWS', 'UPGRADING', 'FAQ', 'CONTRIBUTING',
|
|
195
|
+
'SECURITY', 'LICENSE', 'LICENCE', 'COPYRIGHT', 'NOTICE', 'AUTHORS',
|
|
196
|
+
'THIRDPARTY',
|
|
197
|
+
// Test and coverage files
|
|
198
|
+
'*.test', '*.spec', 'test', 'tests', 'htmlcov',
|
|
199
|
+
// Profiling
|
|
200
|
+
'*.prof', '*.cpuprofile', '*.heapprofile',
|
|
201
|
+
// Misc
|
|
202
|
+
'.tern-port', 'firebase-debug.log', 'firestore-debug.log',
|
|
203
|
+
'ui-debug.log', '.firebaserc', '.stackdump'
|
|
36
204
|
]);
|
|
37
205
|
}
|
|
38
206
|
|
|
@@ -107,18 +275,139 @@ export function loadIgnorePatterns(rootPath) {
|
|
|
107
275
|
return merged;
|
|
108
276
|
}
|
|
109
277
|
|
|
278
|
+
// Directories to always ignore
|
|
279
|
+
const IGNORED_DIRECTORIES = new Set([
|
|
280
|
+
// Dependencies - NEVER include
|
|
281
|
+
'node_modules', 'bower_components', 'jspm_packages', 'web_modules',
|
|
282
|
+
// Version control
|
|
283
|
+
'.git', '.svn', '.hg', '.bzr', '.vscode', '.idea', '.vs', '.atom', '.sublime-project',
|
|
284
|
+
// Build outputs - comprehensive list
|
|
285
|
+
'dist', 'dist-server', 'dist-ssr', 'dist-client', 'dist-server',
|
|
286
|
+
'build', 'built', 'Build', 'BUILD',
|
|
287
|
+
'out', 'output', 'Output', 'OUT', 'release', 'Release', 'RELEASE',
|
|
288
|
+
'target', 'Target', 'TARGET',
|
|
289
|
+
'bin', 'Bin', 'BIN', 'obj', 'Obj', 'OBJ',
|
|
290
|
+
'public', 'static', 'assets', 'www', 'wwwroot',
|
|
291
|
+
'site', '_site', '.site', '.docusaurus', '.gatsby', '.vuepress',
|
|
292
|
+
'storybook-static', '.nuxt', 'nuxt', '.next', 'next',
|
|
293
|
+
'out-tsc', 'tsc', '.tsc',
|
|
294
|
+
// Cache directories
|
|
295
|
+
'.cache', 'cache', '.parcel-cache', '.vite', 'vite', '.turbo', 'turbo',
|
|
296
|
+
'.npm', '.yarn', '.pnp', '.pnpm-store', '.rush', '.lerna', '.nx',
|
|
297
|
+
// Testing
|
|
298
|
+
'coverage', '.nyc_output', '.coverage', 'htmlcov', 'test-results',
|
|
299
|
+
'test', 'tests', 'Test', 'Tests', 'TEST', 'TESTS',
|
|
300
|
+
'__tests__', '__mocks__', '__snapshots__', '__fixtures__',
|
|
301
|
+
'cypress', 'playwright', 'e2e', 'integration', 'spec', 'specs',
|
|
302
|
+
'.tox', '.eggs', '.hypothesis', '.pyre', '.pytype',
|
|
303
|
+
// Python
|
|
304
|
+
'__pycache__', '.pytest_cache', '.mypy_cache', '.venv', 'venv', 'env',
|
|
305
|
+
'env.bak', 'venv.bak', '.Python', 'pip-wheel-metadata', '*.egg-info',
|
|
306
|
+
// Java/Gradle/Maven
|
|
307
|
+
'.gradle', '.mvn', 'gradle', 'mvn', '.settings', '.project', '.classpath',
|
|
308
|
+
// iOS/Android
|
|
309
|
+
'Pods', 'DerivedData', 'build', '.bundle', 'xcuserdata', '.xcodeproj', '.xcworkspace',
|
|
310
|
+
// Ruby
|
|
311
|
+
'vendor', '.bundle', '.ruby-version', 'pkg',
|
|
312
|
+
// Rust
|
|
313
|
+
'target', 'Cargo.lock',
|
|
314
|
+
// Go
|
|
315
|
+
'vendor', 'Godeps',
|
|
316
|
+
// PHP
|
|
317
|
+
'vendor', 'composer',
|
|
318
|
+
// Infrastructure
|
|
319
|
+
'.terraform', '.terragrunt-cache', '.pulumi', '.serverless', '.firebase',
|
|
320
|
+
'.aws', '.azure', '.gcloud', '.vercel', '.netlify', '.now',
|
|
321
|
+
// Docker
|
|
322
|
+
'.docker', 'docker', '.dockerignore',
|
|
323
|
+
// Temp files
|
|
324
|
+
'temp', 'tmp', '.tmp', '.temp', 'tmpfs', 'scratch', '.scratch',
|
|
325
|
+
// Documentation
|
|
326
|
+
'docs', 'doc', 'documentation', 'wiki', 'guides', 'examples', 'demo', 'demos',
|
|
327
|
+
'CHANGELOG', 'HISTORY', 'NEWS', 'LICENSE', 'LICENCE', 'COPYING', 'AUTHORS',
|
|
328
|
+
// IDE/Editor
|
|
329
|
+
'.vs', '.vscode', '.idea', '.eclipse', '.settings', '.classpath', '.project',
|
|
330
|
+
// Logs
|
|
331
|
+
'logs', 'log', '*.log',
|
|
332
|
+
// Data/Storage
|
|
333
|
+
'storage', 'data', 'database', 'db', 'fixtures', 'seeds',
|
|
334
|
+
'uploads', 'files', 'media', 'resources', 'assets', 'images', 'img',
|
|
335
|
+
// LLM/AI
|
|
336
|
+
'.llamaindex', '.chroma', '.vectorstore', '.embeddings',
|
|
337
|
+
'.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
|
|
338
|
+
'.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
|
|
339
|
+
'chromadb', 'pinecone-cache', 'weaviate-data',
|
|
340
|
+
// Package managers
|
|
341
|
+
'.yarn', '.pnpm', '.npm', '.bun',
|
|
342
|
+
// Compiled outputs
|
|
343
|
+
'typings', 'types', '@types', 'type-definitions',
|
|
344
|
+
// Misc
|
|
345
|
+
'public', 'static', 'site', '_site',
|
|
346
|
+
'cmake_build_debug', 'cmake_build_release', 'CMakeFiles', 'CMakeCache.txt',
|
|
347
|
+
'out-tsc', 'dist-server', 'server', 'client', 'browser', 'esm', 'cjs', 'umd', 'lib', 'es'
|
|
348
|
+
]);
|
|
349
|
+
|
|
350
|
+
export function isCodeFile(filePath) {
|
|
351
|
+
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
352
|
+
const pathParts = normalizedPath.split('/');
|
|
353
|
+
const fileName = pathParts[pathParts.length - 1];
|
|
354
|
+
|
|
355
|
+
// Get file extension
|
|
356
|
+
const lastDotIndex = fileName.lastIndexOf('.');
|
|
357
|
+
if (lastDotIndex === -1 || lastDotIndex === 0) {
|
|
358
|
+
return false; // No extension or hidden file without extension
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const ext = fileName.slice(lastDotIndex).toLowerCase();
|
|
362
|
+
return CODE_EXTENSIONS.has(ext);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
export function shouldIgnoreDirectory(dirPath) {
|
|
366
|
+
const normalizedPath = dirPath.replace(/\\/g, '/');
|
|
367
|
+
const pathParts = normalizedPath.split('/');
|
|
368
|
+
|
|
369
|
+
for (const part of pathParts) {
|
|
370
|
+
if (IGNORED_DIRECTORIES.has(part)) {
|
|
371
|
+
return true;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
|
|
110
378
|
export function shouldIgnore(filePath, ignorePatterns) {
|
|
111
379
|
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
112
380
|
const pathParts = normalizedPath.split('/');
|
|
381
|
+
const fileName = pathParts[pathParts.length - 1];
|
|
382
|
+
|
|
383
|
+
// Check if any directory in path should be ignored
|
|
384
|
+
for (const part of pathParts.slice(0, -1)) {
|
|
385
|
+
if (IGNORED_DIRECTORIES.has(part)) {
|
|
386
|
+
return true;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Check if it's a code file using whitelist
|
|
391
|
+
if (!isCodeFile(filePath)) {
|
|
392
|
+
return true;
|
|
393
|
+
}
|
|
113
394
|
|
|
395
|
+
// Check against additional ignore patterns
|
|
114
396
|
for (const pattern of ignorePatterns) {
|
|
397
|
+
// Handle path patterns (contain /)
|
|
115
398
|
if (pattern.includes('/')) {
|
|
116
399
|
if (normalizedPath.includes(pattern)) {
|
|
117
400
|
return true;
|
|
118
401
|
}
|
|
119
|
-
}
|
|
402
|
+
}
|
|
403
|
+
// Handle exact file name patterns
|
|
404
|
+
else if (fileName === pattern) {
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
// Handle directory name patterns (match any path part)
|
|
408
|
+
else {
|
|
120
409
|
for (const part of pathParts) {
|
|
121
|
-
if (part === pattern) {
|
|
410
|
+
if (part === pattern || part.startsWith(pattern + '/')) {
|
|
122
411
|
return true;
|
|
123
412
|
}
|
|
124
413
|
}
|
package/src/scanner.js
CHANGED
|
@@ -23,7 +23,6 @@ const SUPPORTED_EXTENSIONS = new Set([
|
|
|
23
23
|
'.groovy',
|
|
24
24
|
'.gradle',
|
|
25
25
|
'.xml', '.xsd',
|
|
26
|
-
'.json', '.jsonc',
|
|
27
26
|
'.yaml', '.yml',
|
|
28
27
|
'.toml',
|
|
29
28
|
'.html', '.htm',
|
|
@@ -95,7 +94,7 @@ function walkDirectory(dirPath, ignorePatterns, relativePath = '') {
|
|
|
95
94
|
return files;
|
|
96
95
|
}
|
|
97
96
|
|
|
98
|
-
function chunkContent(content, chunkSize = 1000, overlapSize =
|
|
97
|
+
function chunkContent(content, chunkSize = 1000, overlapSize = 100) {
|
|
99
98
|
const lines = content.split('\n');
|
|
100
99
|
const chunks = [];
|
|
101
100
|
|
package/src/store.js
CHANGED
|
@@ -4,6 +4,7 @@ import { mkdirSync, existsSync } from 'fs';
|
|
|
4
4
|
|
|
5
5
|
let dbConnection = null;
|
|
6
6
|
let tableRef = null;
|
|
7
|
+
let isFirstBatch = true;
|
|
7
8
|
|
|
8
9
|
export async function initStore(dbPath) {
|
|
9
10
|
// Ensure directory exists
|
|
@@ -19,6 +20,7 @@ export async function initStore(dbPath) {
|
|
|
19
20
|
uri: dbDir,
|
|
20
21
|
mode: 'overwrite'
|
|
21
22
|
});
|
|
23
|
+
isFirstBatch = true;
|
|
22
24
|
console.error('Vector store initialized');
|
|
23
25
|
return true;
|
|
24
26
|
} catch (e) {
|
|
@@ -68,14 +70,24 @@ export async function upsertChunks(chunks) {
|
|
|
68
70
|
try {
|
|
69
71
|
let table = null;
|
|
70
72
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
if (isFirstBatch) {
|
|
74
|
+
// First batch: try to open existing table, or create new one
|
|
75
|
+
try {
|
|
76
|
+
table = await dbConnection.openTable(tableName);
|
|
77
|
+
await table.overwrite(data);
|
|
78
|
+
} catch (e) {
|
|
79
|
+
table = await dbConnection.createTable(tableName, data);
|
|
80
|
+
}
|
|
81
|
+
isFirstBatch = false;
|
|
82
|
+
} else {
|
|
83
|
+
// Subsequent batches: add to existing table
|
|
84
|
+
try {
|
|
85
|
+
table = await dbConnection.openTable(tableName);
|
|
86
|
+
await table.add(data);
|
|
87
|
+
} catch (e) {
|
|
88
|
+
console.error('Failed to add to table:', e.message);
|
|
89
|
+
throw e;
|
|
90
|
+
}
|
|
79
91
|
}
|
|
80
92
|
|
|
81
93
|
tableRef = table;
|