@lojban/semantic-search-mcp 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +10 -0
  2. package/package.json +1 -1
  3. package/src/index.ts +29 -14
package/README.md CHANGED
@@ -101,6 +101,16 @@ To replace the entire index with new content from several places:
101
101
 
102
102
  Paths can be anywhere on disk (e.g. different drives or projects); the server reads and indexes all supported text/TSV/CSV files under each directory recursively.
103
103
 
104
+ ### Memory and batch size
105
+
106
+ Indexing uses **adaptive batch size** based on free system RAM so the OS doesn’t freeze on low-memory machines. The server reads `os.freemem()`, keeps a reserve (default 400MB), and caps batch size between 32 and 512 lines. You can tune this with env vars:
107
+
108
+ - **`SEMANTIC_SEARCH_RESERVE_MB`** — MB of RAM to keep free (default `400`).
109
+ - **`SEMANTIC_SEARCH_MIN_BATCH`** — minimum lines per batch (default `32`).
110
+ - **`SEMANTIC_SEARCH_MAX_BATCH`** — maximum lines per batch (default `512`).
111
+
112
+ Example: `SEMANTIC_SEARCH_RESERVE_MB=800 SEMANTIC_SEARCH_MAX_BATCH=256` to leave more headroom and use smaller batches.
113
+
104
114
  ## Example: Lojban dictionary gaps
105
115
 
106
116
  1. Put your dictionary TSV (e.g. `jbo-eng.tsv`) in a folder.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lojban/semantic-search-mcp",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "Local-first MCP server for semantic search using transformers.js and SQLite",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/index.ts CHANGED
@@ -5,6 +5,7 @@ import {
5
5
  CallToolRequestSchema,
6
6
  ListToolsRequestSchema,
7
7
  } from '@modelcontextprotocol/sdk/types.js';
8
+ import os from 'node:os';
8
9
  import path from 'path';
9
10
  import { getEmbedding, getBatchEmbeddings } from './embeddings.js';
10
11
  import { createVectorStorage, type SearchResult, type VectorStorage } from './storage.js';
@@ -40,6 +41,26 @@ const indexStatus: IndexStatus = {
40
41
  let currentIndexingAbortController: AbortController | null = null;
41
42
  let currentJobId = 0;
42
43
 
44
+ // Adaptive batch size: reserve RAM so we don't freeze the OS (env overrides in bytes or MB)
45
+ const RESERVE_MB = Number(process.env.SEMANTIC_SEARCH_RESERVE_MB) || 400;
46
+ const RESERVE_BYTES = RESERVE_MB * 1024 * 1024;
47
+ const MIN_BATCH = Number(process.env.SEMANTIC_SEARCH_MIN_BATCH) || 32;
48
+ const MAX_BATCH = Number(process.env.SEMANTIC_SEARCH_MAX_BATCH) || 512;
49
+
50
+ /** Rough bytes per indexed line in memory: line text + path + embedding (384 floats) + overhead */
51
+ const BYTES_PER_LINE_ESTIMATE = 4000;
52
+
53
+ /**
54
+ * Compute batch size from current free system RAM. Keeps reserve free to avoid freezing the OS.
55
+ */
56
+ function getAdaptiveBatchSize(): number {
57
+ const free = os.freemem();
58
+ const available = free > RESERVE_BYTES ? free - RESERVE_BYTES : Math.floor(free / 2);
59
+ const batch = Math.floor(available / BYTES_PER_LINE_ESTIMATE);
60
+ const clamped = Math.max(MIN_BATCH, Math.min(MAX_BATCH, batch));
61
+ return clamped;
62
+ }
63
+
43
64
  /**
44
65
  * Request indexing of directories. If another indexing job is running, it is aborted first.
45
66
  * Then a new job is started (clears index and rebuilds).
@@ -87,7 +108,6 @@ async function startIndexing(
87
108
  console.error(`Scanning ${directories.length} directories (background indexing)...`);
88
109
 
89
110
  let indexedCount = 0;
90
- const BATCH_SIZE = 512;
91
111
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
92
112
  let currentBatch: any[] = [];
93
113
 
@@ -109,24 +129,22 @@ async function startIndexing(
109
129
  console.error(`Indexed ${indexedCount} lines...`);
110
130
  };
111
131
 
112
- let processingPromise: Promise<void> | null = null;
132
+ // Single task queue: only one batch is processed at a time (no pipelining).
133
+ // We do not read the next batch until the current one is fully done, to avoid memory spikes and OS freezes.
134
+ let batchSize = getAdaptiveBatchSize();
135
+ console.error(`Adaptive batch size: ${batchSize} (free RAM: ${Math.round(os.freemem() / 1024 / 1024)}MB, reserve: ${RESERVE_MB}MB)`);
113
136
 
114
137
  for await (const line of scanDirectories(directories)) {
115
138
  if (signal.aborted) break;
116
139
 
117
140
  currentBatch.push(line);
118
- if (currentBatch.length >= BATCH_SIZE) {
119
- if (processingPromise) {
120
- await processingPromise;
121
- }
122
- if (signal.aborted) break;
123
-
141
+ if (currentBatch.length >= batchSize) {
124
142
  const batchToProcess = currentBatch;
125
143
  currentBatch = [];
144
+ batchSize = getAdaptiveBatchSize();
126
145
 
127
- processingPromise = processBatch(batchToProcess).catch((err) => {
128
- console.error('Error in background batch processing:', err);
129
- });
146
+ await processBatch(batchToProcess);
147
+ if (signal.aborted) break;
130
148
  }
131
149
  }
132
150
 
@@ -135,9 +153,6 @@ async function startIndexing(
135
153
  return;
136
154
  }
137
155
 
138
- if (processingPromise) {
139
- await processingPromise;
140
- }
141
156
  if (currentBatch.length > 0) {
142
157
  await processBatch(currentBatch);
143
158
  }