@lojban/semantic-search-mcp 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -111,6 +111,8 @@ Indexing uses **adaptive batch size** based on free system RAM so the OS doesn
111
111
 
112
112
  Example: `SEMANTIC_SEARCH_RESERVE_MB=800 SEMANTIC_SEARCH_MAX_BATCH=256` to leave more headroom and use smaller batches.
113
113
 
114
+ - **`SEMANTIC_SEARCH_GC`** — explicit GC after each batch is **on by default** when Node is run with `--expose-gc` (helps avoid OS freezes during long indexing). In MCP use e.g. `"args": ["--expose-gc", "-y", "@lojban/semantic-search-mcp"]`. Set to `0` or `false` to disable.
115
+
114
116
  ## Example: Lojban dictionary gaps
115
117
 
116
118
  1. Put your dictionary TSV (e.g. `jbo-eng.tsv`) in a folder.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lojban/semantic-search-mcp",
3
- "version": "1.0.10",
3
+ "version": "1.0.11",
4
4
  "description": "Local-first MCP server for semantic search using transformers.js and SQLite",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/index.ts CHANGED
@@ -45,7 +45,9 @@ let currentJobId = 0;
45
45
  const RESERVE_MB = Number(process.env.SEMANTIC_SEARCH_RESERVE_MB) || 400;
46
46
  const RESERVE_BYTES = RESERVE_MB * 1024 * 1024;
47
47
  const MIN_BATCH = Number(process.env.SEMANTIC_SEARCH_MIN_BATCH) || 32;
48
- const MAX_BATCH = Number(process.env.SEMANTIC_SEARCH_MAX_BATCH) || 512;
48
+ const MAX_BATCH = Number(process.env.SEMANTIC_SEARCH_MAX_BATCH) || 128;
49
+ // Explicit GC after each batch (when --expose-gc is available). Default on; set SEMANTIC_SEARCH_GC=0 or false to disable.
50
+ const ENABLE_GC = process.env.SEMANTIC_SEARCH_GC !== '0' && process.env.SEMANTIC_SEARCH_GC !== 'false';
49
51
 
50
52
  /** Rough bytes per indexed line in memory: line text + path + embedding (384 floats) + overhead */
51
53
  const BYTES_PER_LINE_ESTIMATE = 4000;
@@ -127,6 +129,10 @@ async function startIndexing(
127
129
  indexedCount += batchToProcess.length;
128
130
  if (isCurrentJob()) indexStatus.indexedLines = indexedCount;
129
131
  console.error(`Indexed ${indexedCount} lines...`);
132
+ // Explicit GC when enabled (default) and Node run with --expose-gc
133
+ if (ENABLE_GC && typeof (globalThis as { gc?: () => void }).gc === 'function') {
134
+ (globalThis as { gc: () => void }).gc();
135
+ }
130
136
  };
131
137
 
132
138
  // Single task queue: only one batch is processed at a time (no pipelining).
package/src/scanner.ts CHANGED
@@ -38,6 +38,7 @@ export async function* scanDirectory(dirPath: string): AsyncGenerator<FileLine>
38
38
  for (const filePath of files) {
39
39
  if (!isTextFile(filePath)) continue;
40
40
 
41
+ let fileStream: ReturnType<typeof createReadStream> | null = null;
41
42
  try {
42
43
  const stats = statSync(filePath);
43
44
  if (stats.size > MAX_FILE_SIZE) {
@@ -45,26 +46,35 @@ export async function* scanDirectory(dirPath: string): AsyncGenerator<FileLine>
45
46
  continue;
46
47
  }
47
48
 
48
- const fileStream = createReadStream(filePath);
49
+ fileStream = createReadStream(filePath);
49
50
  const rl = readline.createInterface({
50
51
  input: fileStream,
51
52
  crlfDelay: Infinity,
52
53
  });
53
54
 
54
- let lineNumber = 0;
55
- for await (const line of rl) {
56
- lineNumber++;
57
- const trimmed = line.trim();
58
- if (trimmed.length >= MIN_LINE_LENGTH) {
59
- yield {
60
- filePath,
61
- lineNumber,
62
- content: trimmed,
63
- };
55
+ try {
56
+ let lineNumber = 0;
57
+ for await (const line of rl) {
58
+ lineNumber++;
59
+ const trimmed = line.trim();
60
+ if (trimmed.length >= MIN_LINE_LENGTH) {
61
+ yield {
62
+ filePath,
63
+ lineNumber,
64
+ content: trimmed,
65
+ };
66
+ }
64
67
  }
68
+ } finally {
69
+ rl.close();
70
+ fileStream.destroy();
71
+ fileStream = null;
65
72
  }
66
73
  } catch (err) {
67
74
  console.error(`Error reading file ${filePath}:`, err);
75
+ if (fileStream) {
76
+ fileStream.destroy();
77
+ }
68
78
  }
69
79
  }
70
80
  }