@lojban/semantic-search-mcp 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +1 -1
- package/src/index.ts +7 -1
- package/src/scanner.ts +21 -11
package/README.md
CHANGED
|
@@ -111,6 +111,8 @@ Indexing uses **adaptive batch size** based on free system RAM so the OS doesn
|
|
|
111
111
|
|
|
112
112
|
Example: `SEMANTIC_SEARCH_RESERVE_MB=800 SEMANTIC_SEARCH_MAX_BATCH=256` to leave more headroom and use smaller batches.
|
|
113
113
|
|
|
114
|
+
- **`SEMANTIC_SEARCH_GC`** — explicit GC after each batch is **on by default** when Node is run with `--expose-gc` (helps avoid OS freezes during long indexing). In MCP use e.g. `"args": ["--expose-gc", "-y", "@lojban/semantic-search-mcp"]`. Set to `0` or `false` to disable.
|
|
115
|
+
|
|
114
116
|
## Example: Lojban dictionary gaps
|
|
115
117
|
|
|
116
118
|
1. Put your dictionary TSV (e.g. `jbo-eng.tsv`) in a folder.
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -45,7 +45,9 @@ let currentJobId = 0;
|
|
|
45
45
|
const RESERVE_MB = Number(process.env.SEMANTIC_SEARCH_RESERVE_MB) || 400;
|
|
46
46
|
const RESERVE_BYTES = RESERVE_MB * 1024 * 1024;
|
|
47
47
|
const MIN_BATCH = Number(process.env.SEMANTIC_SEARCH_MIN_BATCH) || 32;
|
|
48
|
-
const MAX_BATCH = Number(process.env.SEMANTIC_SEARCH_MAX_BATCH) ||
|
|
48
|
+
const MAX_BATCH = Number(process.env.SEMANTIC_SEARCH_MAX_BATCH) || 128;
|
|
49
|
+
// Explicit GC after each batch (when --expose-gc is available). Default on; set SEMANTIC_SEARCH_GC=0 or false to disable.
|
|
50
|
+
const ENABLE_GC = process.env.SEMANTIC_SEARCH_GC !== '0' && process.env.SEMANTIC_SEARCH_GC !== 'false';
|
|
49
51
|
|
|
50
52
|
/** Rough bytes per indexed line in memory: line text + path + embedding (384 floats) + overhead */
|
|
51
53
|
const BYTES_PER_LINE_ESTIMATE = 4000;
|
|
@@ -127,6 +129,10 @@ async function startIndexing(
|
|
|
127
129
|
indexedCount += batchToProcess.length;
|
|
128
130
|
if (isCurrentJob()) indexStatus.indexedLines = indexedCount;
|
|
129
131
|
console.error(`Indexed ${indexedCount} lines...`);
|
|
132
|
+
// Explicit GC when enabled (default) and Node run with --expose-gc
|
|
133
|
+
if (ENABLE_GC && typeof (globalThis as { gc?: () => void }).gc === 'function') {
|
|
134
|
+
(globalThis as { gc: () => void }).gc();
|
|
135
|
+
}
|
|
130
136
|
};
|
|
131
137
|
|
|
132
138
|
// Single task queue: only one batch is processed at a time (no pipelining).
|
package/src/scanner.ts
CHANGED
|
@@ -38,6 +38,7 @@ export async function* scanDirectory(dirPath: string): AsyncGenerator<FileLine>
|
|
|
38
38
|
for (const filePath of files) {
|
|
39
39
|
if (!isTextFile(filePath)) continue;
|
|
40
40
|
|
|
41
|
+
let fileStream: ReturnType<typeof createReadStream> | null = null;
|
|
41
42
|
try {
|
|
42
43
|
const stats = statSync(filePath);
|
|
43
44
|
if (stats.size > MAX_FILE_SIZE) {
|
|
@@ -45,26 +46,35 @@ export async function* scanDirectory(dirPath: string): AsyncGenerator<FileLine>
|
|
|
45
46
|
continue;
|
|
46
47
|
}
|
|
47
48
|
|
|
48
|
-
|
|
49
|
+
fileStream = createReadStream(filePath);
|
|
49
50
|
const rl = readline.createInterface({
|
|
50
51
|
input: fileStream,
|
|
51
52
|
crlfDelay: Infinity,
|
|
52
53
|
});
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
55
|
+
try {
|
|
56
|
+
let lineNumber = 0;
|
|
57
|
+
for await (const line of rl) {
|
|
58
|
+
lineNumber++;
|
|
59
|
+
const trimmed = line.trim();
|
|
60
|
+
if (trimmed.length >= MIN_LINE_LENGTH) {
|
|
61
|
+
yield {
|
|
62
|
+
filePath,
|
|
63
|
+
lineNumber,
|
|
64
|
+
content: trimmed,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
64
67
|
}
|
|
68
|
+
} finally {
|
|
69
|
+
rl.close();
|
|
70
|
+
fileStream.destroy();
|
|
71
|
+
fileStream = null;
|
|
65
72
|
}
|
|
66
73
|
} catch (err) {
|
|
67
74
|
console.error(`Error reading file ${filePath}:`, err);
|
|
75
|
+
if (fileStream) {
|
|
76
|
+
fileStream.destroy();
|
|
77
|
+
}
|
|
68
78
|
}
|
|
69
79
|
}
|
|
70
80
|
}
|