@lojban/semantic-search-mcp 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.ts +57 -25
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lojban/semantic-search-mcp",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "Local-first MCP server for semantic search using transformers.js and SQLite",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/index.ts CHANGED
@@ -36,17 +36,31 @@ const indexStatus: IndexStatus = {
36
36
  directories: [],
37
37
  };
38
38
 
39
- async function startIndexing(storage: VectorStorage, directories: string[]): Promise<void> {
39
+ // Single "mutex": only one indexing job is allowed to run. Starting a new job aborts the previous one.
40
+ let currentIndexingAbortController: AbortController | null = null;
41
+ let currentJobId = 0;
42
+
43
+ /**
44
+ * Request indexing of directories. If another indexing job is running, it is aborted first.
45
+ * Then a new job is started (clears index and rebuilds).
46
+ */
47
+ function requestIndexing(storage: VectorStorage, directories: string[]): void {
40
48
  if (!directories.length) {
41
49
  console.error('No directories to index. Set SEMANTIC_SEARCH_INDEX_DIRS (comma-separated paths).');
42
50
  return;
43
51
  }
44
52
 
45
- if (indexStatus.isIndexing) {
46
- console.error('Indexing already in progress, not starting a new job.');
47
- return;
53
+ // Abort any in-progress indexing so it doesn't conflict or flush this job's work.
54
+ if (currentIndexingAbortController) {
55
+ currentIndexingAbortController.abort();
56
+ currentIndexingAbortController = null;
48
57
  }
49
58
 
59
+ currentJobId += 1;
60
+ const jobId = currentJobId;
61
+ currentIndexingAbortController = new AbortController();
62
+ const signal = currentIndexingAbortController.signal;
63
+
50
64
  indexStatus.isIndexing = true;
51
65
  indexStatus.startedAt = Date.now();
52
66
  indexStatus.finishedAt = null;
@@ -55,9 +69,21 @@ async function startIndexing(storage: VectorStorage, directories: string[]): Pro
55
69
  indexStatus.indexedLines = 0;
56
70
  indexStatus.indexedFiles = 0;
57
71
 
72
+ void startIndexing(storage, directories, signal, jobId);
73
+ }
74
+
75
+ async function startIndexing(
76
+ storage: VectorStorage,
77
+ directories: string[],
78
+ signal: AbortSignal,
79
+ jobId: number
80
+ ): Promise<void> {
81
+ const isCurrentJob = (): boolean => currentJobId === jobId;
82
+
58
83
  try {
59
- storage.clear();
84
+ if (signal.aborted) return;
60
85
 
86
+ storage.clear();
61
87
  console.error(`Scanning ${directories.length} directories (background indexing)...`);
62
88
 
63
89
  let indexedCount = 0;
@@ -79,43 +105,45 @@ async function startIndexing(storage: VectorStorage, directories: string[]): Pro
79
105
 
80
106
  await storage.upsertLinesBatch(batchData);
81
107
  indexedCount += batchToProcess.length;
82
- indexStatus.indexedLines = indexedCount;
108
+ if (isCurrentJob()) indexStatus.indexedLines = indexedCount;
83
109
  console.error(`Indexed ${indexedCount} lines...`);
84
110
  };
85
111
 
86
- // Pipelining: Read next batch while processing current batch
87
- // We allow ONE batch to be processed in parallel with reading the next one.
88
112
  let processingPromise: Promise<void> | null = null;
89
113
 
90
114
  for await (const line of scanDirectories(directories)) {
115
+ if (signal.aborted) break;
116
+
91
117
  currentBatch.push(line);
92
118
  if (currentBatch.length >= BATCH_SIZE) {
93
- // If there's a previous batch still processing, wait for it
94
119
  if (processingPromise) {
95
120
  await processingPromise;
96
121
  }
122
+ if (signal.aborted) break;
97
123
 
98
124
  const batchToProcess = currentBatch;
99
125
  currentBatch = [];
100
126
 
101
- // Start processing this batch, but don't await it yet!
102
- // This allows the loop to continue and read the next batch from disk.
103
127
  processingPromise = processBatch(batchToProcess).catch((err) => {
104
128
  console.error('Error in background batch processing:', err);
105
129
  });
106
130
  }
107
131
  }
108
132
 
109
- // Wait for the last async batch
133
+ if (signal.aborted) {
134
+ console.error('Indexing aborted (new job started or cancelled).');
135
+ return;
136
+ }
137
+
110
138
  if (processingPromise) {
111
139
  await processingPromise;
112
140
  }
113
-
114
- // Process any remaining lines
115
141
  if (currentBatch.length > 0) {
116
142
  await processBatch(currentBatch);
117
143
  }
118
144
 
145
+ if (!isCurrentJob()) return;
146
+
119
147
  const stats = await storage.getStats();
120
148
  indexStatus.indexedFiles = stats.totalFiles;
121
149
  indexStatus.indexedLines = stats.totalLines;
@@ -126,15 +154,22 @@ async function startIndexing(storage: VectorStorage, directories: string[]): Pro
126
154
  );
127
155
  } catch (err) {
128
156
  const message = err instanceof Error ? err.message : String(err);
129
- indexStatus.lastError = message;
130
- indexStatus.finishedAt = Date.now();
157
+ if (isCurrentJob()) {
158
+ indexStatus.lastError = message;
159
+ indexStatus.finishedAt = Date.now();
160
+ }
131
161
  console.error('Error during indexing job:', err);
132
162
  } finally {
133
- indexStatus.isIndexing = false;
163
+ if (isCurrentJob()) {
164
+ indexStatus.isIndexing = false;
165
+ }
166
+ if (currentIndexingAbortController && currentJobId === jobId) {
167
+ currentIndexingAbortController = null;
168
+ }
134
169
  }
135
170
  }
136
171
 
137
- async function ensureInitialIndexing(storage: VectorStorage): Promise<void> {
172
+ function ensureInitialIndexing(storage: VectorStorage): void {
138
173
  const envDirs = process.env.SEMANTIC_SEARCH_INDEX_DIRS;
139
174
  const directories = envDirs ? envDirs.split(',').map((d) => d.trim()).filter(Boolean) : [];
140
175
 
@@ -145,8 +180,7 @@ async function ensureInitialIndexing(storage: VectorStorage): Promise<void> {
145
180
  return;
146
181
  }
147
182
 
148
- // Fire-and-forget; indexing runs in background.
149
- void startIndexing(storage, directories);
183
+ requestIndexing(storage, directories);
150
184
  }
151
185
 
152
186
  async function main() {
@@ -222,10 +256,8 @@ async function main() {
222
256
  );
223
257
  }
224
258
 
225
- // Trigger (or reuse) background indexing job.
226
- if (!indexStatus.isIndexing) {
227
- void startIndexing(storage, directories);
228
- }
259
+ // Abort any in-progress indexing and start a new job (clears and rebuilds).
260
+ requestIndexing(storage, directories);
229
261
 
230
262
  const stats = await storage.getStats();
231
263
  return {
@@ -313,7 +345,7 @@ async function main() {
313
345
  console.error('Semantic Search MCP Server running on stdio');
314
346
 
315
347
  // Kick off initial background indexing when the MCP server is enabled.
316
- await ensureInitialIndexing(storage);
348
+ ensureInitialIndexing(storage);
317
349
  }
318
350
 
319
351
  main().catch(console.error);