@geminilight/mindos 0.5.21 → 0.5.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ 'use client';
2
+
3
+ import { useEffect, useState, useCallback } from 'react';
4
+ import { Activity, Cpu, Database, HardDrive, Loader2, RefreshCw, Zap } from 'lucide-react';
5
+ import { apiFetch } from '@/lib/api';
6
+ import type { Messages } from '@/lib/i18n';
7
+
8
+ interface MonitoringData {
9
+ system: {
10
+ uptimeMs: number;
11
+ memory: { heapUsed: number; heapTotal: number; rss: number };
12
+ nodeVersion: string;
13
+ };
14
+ application: {
15
+ agentRequests: number;
16
+ toolExecutions: number;
17
+ totalTokens: { input: number; output: number };
18
+ avgResponseTimeMs: number;
19
+ errors: number;
20
+ };
21
+ knowledgeBase: {
22
+ root: string;
23
+ fileCount: number;
24
+ totalSizeBytes: number;
25
+ };
26
+ mcp: {
27
+ running: boolean;
28
+ port: number;
29
+ };
30
+ }
31
+
32
+ function formatBytes(bytes: number): string {
33
+ if (bytes < 1024) return `${bytes} B`;
34
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
35
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
36
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
37
+ }
38
+
39
+ function formatUptime(ms: number): string {
40
+ const s = Math.floor(ms / 1000);
41
+ if (s < 60) return `${s}s`;
42
+ const m = Math.floor(s / 60);
43
+ if (m < 60) return `${m}m ${s % 60}s`;
44
+ const h = Math.floor(m / 60);
45
+ if (h < 24) return `${h}h ${m % 60}m`;
46
+ const d = Math.floor(h / 24);
47
+ return `${d}d ${h % 24}h`;
48
+ }
49
+
50
+ function ProgressBar({ value, max, className }: { value: number; max: number; className?: string }) {
51
+ const pct = max > 0 ? Math.min(100, (value / max) * 100) : 0;
52
+ return (
53
+ <div className={`h-2 w-full rounded-full bg-muted ${className ?? ''}`}>
54
+ <div
55
+ className={`h-full rounded-full transition-all duration-300 ${pct > 85 ? 'bg-destructive' : 'bg-amber-500'}`}
56
+ style={{ width: `${pct}%` }}
57
+ />
58
+ </div>
59
+ );
60
+ }
61
+
62
+ function StatCard({ label, value, sub }: { label: string; value: string | number; sub?: string }) {
63
+ return (
64
+ <div className="flex flex-col gap-0.5">
65
+ <span className="text-xs text-muted-foreground">{label}</span>
66
+ <span className="text-sm font-medium tabular-nums">{value}</span>
67
+ {sub && <span className="text-[10px] text-muted-foreground">{sub}</span>}
68
+ </div>
69
+ );
70
+ }
71
+
72
+ export interface MonitoringTabProps {
73
+ t: Messages;
74
+ }
75
+
76
+ export function MonitoringTab({ t }: MonitoringTabProps) {
77
+ const [data, setData] = useState<MonitoringData | null>(null);
78
+ const [loading, setLoading] = useState(true);
79
+ const [error, setError] = useState(false);
80
+
81
+ const mon = t.settings.monitoring;
82
+
83
+ const fetchData = useCallback(async () => {
84
+ try {
85
+ const d = await apiFetch<MonitoringData>('/api/monitoring', { timeout: 5000 });
86
+ setData(d);
87
+ setError(false);
88
+ } catch {
89
+ setError(true);
90
+ } finally {
91
+ setLoading(false);
92
+ }
93
+ }, []);
94
+
95
+ useEffect(() => {
96
+ fetchData();
97
+ const id = setInterval(fetchData, 5000);
98
+ return () => clearInterval(id);
99
+ }, [fetchData]);
100
+
101
+ if (loading && !data) {
102
+ return (
103
+ <div className="flex justify-center py-8">
104
+ <Loader2 size={18} className="animate-spin text-muted-foreground" />
105
+ </div>
106
+ );
107
+ }
108
+
109
+ if (error && !data) {
110
+ return (
111
+ <div className="text-center py-8 text-sm text-muted-foreground">
112
+ {mon.fetchError || 'Failed to load monitoring data'}
113
+ </div>
114
+ );
115
+ }
116
+
117
+ if (!data) return null;
118
+
119
+ const { system, application, knowledgeBase, mcp } = data;
120
+ const heapPct = system.memory.heapTotal > 0
121
+ ? Math.round((system.memory.heapUsed / system.memory.heapTotal) * 100)
122
+ : 0;
123
+
124
+ return (
125
+ <div className="space-y-6">
126
+ {/* System */}
127
+ <section>
128
+ <h3 className="flex items-center gap-1.5 text-xs font-semibold text-foreground mb-3">
129
+ <Cpu size={13} className="text-muted-foreground" />
130
+ {mon.system || 'System'}
131
+ </h3>
132
+ <div className="space-y-3">
133
+ <div>
134
+ <div className="flex justify-between text-xs mb-1">
135
+ <span className="text-muted-foreground">{mon.heapMemory || 'Heap Memory'}</span>
136
+ <span className="tabular-nums">{formatBytes(system.memory.heapUsed)} / {formatBytes(system.memory.heapTotal)} ({heapPct}%)</span>
137
+ </div>
138
+ <ProgressBar value={system.memory.heapUsed} max={system.memory.heapTotal} />
139
+ </div>
140
+ <div className="grid grid-cols-3 gap-4">
141
+ <StatCard label={mon.rss || 'RSS'} value={formatBytes(system.memory.rss)} />
142
+ <StatCard label={mon.uptime || 'Uptime'} value={formatUptime(system.uptimeMs)} />
143
+ <StatCard label={mon.nodeVersion || 'Node'} value={system.nodeVersion} />
144
+ </div>
145
+ </div>
146
+ </section>
147
+
148
+ {/* Application */}
149
+ <section>
150
+ <h3 className="flex items-center gap-1.5 text-xs font-semibold text-foreground mb-3">
151
+ <Zap size={13} className="text-muted-foreground" />
152
+ {mon.application || 'Application'}
153
+ </h3>
154
+ <div className="grid grid-cols-3 gap-4">
155
+ <StatCard label={mon.requests || 'Requests'} value={application.agentRequests} />
156
+ <StatCard label={mon.toolCalls || 'Tool Calls'} value={application.toolExecutions} />
157
+ <StatCard label={mon.avgResponse || 'Avg Response'} value={application.avgResponseTimeMs > 0 ? `${application.avgResponseTimeMs}ms` : '—'} />
158
+ <StatCard
159
+ label={mon.tokens || 'Tokens'}
160
+ value={`${(application.totalTokens.input + application.totalTokens.output).toLocaleString()}`}
161
+ sub={`↑${application.totalTokens.input.toLocaleString()} ↓${application.totalTokens.output.toLocaleString()}`}
162
+ />
163
+ <StatCard label={mon.errors || 'Errors'} value={application.errors} />
164
+ </div>
165
+ </section>
166
+
167
+ {/* Knowledge Base */}
168
+ <section>
169
+ <h3 className="flex items-center gap-1.5 text-xs font-semibold text-foreground mb-3">
170
+ <Database size={13} className="text-muted-foreground" />
171
+ {mon.knowledgeBase || 'Knowledge Base'}
172
+ </h3>
173
+ <div className="grid grid-cols-3 gap-4">
174
+ <StatCard label={mon.files || 'Files'} value={knowledgeBase.fileCount} />
175
+ <StatCard label={mon.totalSize || 'Total Size'} value={formatBytes(knowledgeBase.totalSizeBytes)} />
176
+ <StatCard label={mon.rootPath || 'Root'} value={knowledgeBase.root.split('/').pop() ?? knowledgeBase.root} sub={knowledgeBase.root} />
177
+ </div>
178
+ </section>
179
+
180
+ {/* MCP */}
181
+ <section>
182
+ <h3 className="flex items-center gap-1.5 text-xs font-semibold text-foreground mb-3">
183
+ <HardDrive size={13} className="text-muted-foreground" />
184
+ MCP
185
+ </h3>
186
+ <div className="grid grid-cols-3 gap-4">
187
+ <StatCard
188
+ label={mon.mcpStatus || 'Status'}
189
+ value={mcp.running ? (mon.mcpRunning || 'Running') : (mon.mcpStopped || 'Stopped')}
190
+ />
191
+ <StatCard label={mon.mcpPort || 'Port'} value={mcp.port} />
192
+ </div>
193
+ </section>
194
+
195
+ {/* Refresh indicator */}
196
+ <div className="flex items-center gap-1.5 text-[10px] text-muted-foreground">
197
+ <RefreshCw size={10} className={loading ? 'animate-spin' : ''} />
198
+ {mon.autoRefresh || 'Auto-refresh every 5s'}
199
+ </div>
200
+ </div>
201
+ );
202
+ }
@@ -33,7 +33,7 @@ export interface SettingsData {
33
33
  envValues?: Record<string, string>;
34
34
  }
35
35
 
36
- export type Tab = 'ai' | 'appearance' | 'knowledge' | 'mcp' | 'plugins' | 'sync';
36
+ export type Tab = 'ai' | 'appearance' | 'knowledge' | 'mcp' | 'plugins' | 'sync' | 'monitoring' | 'agents';
37
37
 
38
38
  export const CONTENT_WIDTHS = [
39
39
  { value: '680px', label: 'Narrow (680px)' },
@@ -7,9 +7,14 @@ export async function register() {
7
7
  const configPath = join(homedir(), '.mindos', 'config.json');
8
8
  const config = JSON.parse(readFileSync(configPath, 'utf-8'));
9
9
  if (config.sync?.enabled && config.mindRoot) {
10
- // Resolve absolute path to avoid Turbopack bundling issues
10
+ // Turbopack statically analyzes ALL forms of require/import — including
11
+ // createRequire() calls. The only way to load a runtime-computed path
12
+ // is to hide the require call inside a Function constructor, which is
13
+ // opaque to bundler static analysis.
11
14
  const syncModule = resolve(process.cwd(), '..', 'bin', 'lib', 'sync.js');
12
- const { startSyncDaemon } = await import(/* webpackIgnore: true */ syncModule);
15
+ // eslint-disable-next-line @typescript-eslint/no-implied-eval
16
+ const dynamicRequire = new Function('id', 'return require(id)') as (id: string) => any;
17
+ const { startSyncDaemon } = dynamicRequire(syncModule);
13
18
  await startSyncDaemon(config.mindRoot);
14
19
  }
15
20
  } catch {
@@ -6,3 +6,4 @@ export {
6
6
  truncateToolOutputs, compactMessages, hardPrune, createTransformContext,
7
7
  } from './context';
8
8
  export { toAgentMessages } from './to-agent-messages';
9
+ export { loadSkillRules } from './skill-rules';
@@ -11,6 +11,7 @@ interface AgentOpEntry {
11
11
  params: Record<string, unknown>;
12
12
  result: 'ok' | 'error';
13
13
  message?: string;
14
+ durationMs?: number;
14
15
  }
15
16
 
16
17
  /**
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Load skill-rules.md and user-rules.md from the user's knowledge base.
3
+ *
4
+ * Files are located at: {mindRoot}/.agents/skills/{skillName}/skill-rules.md
5
+ * These are part of the progressive skill loading system (v4).
6
+ *
7
+ * Designed to be called from route.ts during system prompt assembly.
8
+ * All errors are caught — missing files are normal (not all users have skill rules).
9
+ */
10
+ import fs from 'fs';
11
+ import path from 'path';
12
+ import { truncate } from './tools';
13
+
14
+ export interface SkillRuleFile {
15
+ ok: boolean;
16
+ content: string;
17
+ truncated: boolean;
18
+ empty: boolean;
19
+ error?: string;
20
+ }
21
+
22
+ export interface SkillRulesResult {
23
+ skillRules: SkillRuleFile;
24
+ userRules: SkillRuleFile;
25
+ }
26
+
27
+ const MAX_SKILL_CHARS = 20_000;
28
+
29
+ function readSkillFile(absPath: string): SkillRuleFile {
30
+ try {
31
+ const raw = fs.readFileSync(absPath, 'utf-8');
32
+ const isEmpty = raw.trim().length === 0;
33
+ if (raw.length > MAX_SKILL_CHARS) {
34
+ return {
35
+ ok: true,
36
+ content: truncate(raw),
37
+ truncated: true,
38
+ empty: false,
39
+ };
40
+ }
41
+ return {
42
+ ok: true,
43
+ content: raw,
44
+ truncated: false,
45
+ empty: isEmpty,
46
+ };
47
+ } catch (err) {
48
+ return {
49
+ ok: false,
50
+ content: '',
51
+ truncated: false,
52
+ empty: true,
53
+ error: err instanceof Error ? err.message : String(err),
54
+ };
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Load skill-rules.md and user-rules.md from {mindRoot}/.agents/skills/{skillName}/
60
+ *
61
+ * @param mindRoot - Absolute path to the knowledge base root
62
+ * @param skillName - 'mindos' or 'mindos-zh'
63
+ */
64
+ export function loadSkillRules(mindRoot: string, skillName: string): SkillRulesResult {
65
+ const skillDir = path.join(mindRoot, '.agents', 'skills', skillName);
66
+ return {
67
+ skillRules: readSkillFile(path.join(skillDir, 'skill-rules.md')),
68
+ userRules: readSkillFile(path.join(skillDir, 'user-rules.md')),
69
+ };
70
+ }
package/app/lib/api.ts CHANGED
@@ -8,10 +8,12 @@
8
8
 
9
9
  export class ApiError extends Error {
10
10
  status: number;
11
- constructor(message: string, status: number) {
11
+ code?: string;
12
+ constructor(message: string, status: number, code?: string) {
12
13
  super(message);
13
14
  this.name = 'ApiError';
14
15
  this.status = status;
16
+ this.code = code;
15
17
  }
16
18
  }
17
19
 
@@ -41,11 +43,18 @@ export async function apiFetch<T>(url: string, opts: ApiFetchOptions = {}): Prom
41
43
 
42
44
  if (!res.ok) {
43
45
  let msg = `Request failed (${res.status})`;
46
+ let code: string | undefined;
44
47
  try {
45
48
  const body = await res.json();
46
- if (body?.error) msg = body.error;
49
+ // Support structured { ok: false, error: { code, message } } envelope
50
+ if (body?.error?.code && body?.error?.message) {
51
+ msg = body.error.message;
52
+ code = body.error.code;
53
+ } else if (body?.error) {
54
+ msg = typeof body.error === 'string' ? body.error : body.error.message ?? msg;
55
+ }
47
56
  } catch { /* non-JSON error body */ }
48
- throw new ApiError(msg, res.status);
57
+ throw new ApiError(msg, res.status, code);
49
58
  }
50
59
 
51
60
  return (await res.json()) as T;
@@ -1,6 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import { resolveSafe } from './security';
4
+ import { MindOSError, ErrorCodes } from '@/lib/errors';
4
5
 
5
6
  /**
6
7
  * Appends a single row to a CSV file with RFC 4180 escaping.
@@ -9,7 +10,7 @@ import { resolveSafe } from './security';
9
10
  */
10
11
  export function appendCsvRow(mindRoot: string, filePath: string, row: string[]): { newRowCount: number } {
11
12
  const resolved = resolveSafe(mindRoot, filePath);
12
- if (!filePath.endsWith('.csv')) throw new Error('Only .csv files support row append');
13
+ if (!filePath.endsWith('.csv')) throw new MindOSError(ErrorCodes.INVALID_FILE_TYPE, 'Only .csv files support row append', { filePath });
13
14
 
14
15
  const escaped = row.map((cell) => {
15
16
  if (cell.includes(',') || cell.includes('"') || cell.includes('\n')) {
@@ -1,6 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import { resolveSafe, assertWithinRoot } from './security';
4
+ import { MindOSError, ErrorCodes } from '@/lib/errors';
4
5
 
5
6
  /**
6
7
  * Reads the content of a file given a relative path from mindRoot.
@@ -35,7 +36,7 @@ export function writeFile(mindRoot: string, filePath: string, content: string):
35
36
  export function createFile(mindRoot: string, filePath: string, initialContent = ''): void {
36
37
  const resolved = resolveSafe(mindRoot, filePath);
37
38
  if (fs.existsSync(resolved)) {
38
- throw new Error(`File already exists: ${filePath}`);
39
+ throw new MindOSError(ErrorCodes.FILE_ALREADY_EXISTS, `File already exists: ${filePath}`, { filePath });
39
40
  }
40
41
  fs.mkdirSync(path.dirname(resolved), { recursive: true });
41
42
  fs.writeFileSync(resolved, initialContent, 'utf-8');
@@ -47,7 +48,7 @@ export function createFile(mindRoot: string, filePath: string, initialContent =
47
48
  export function deleteFile(mindRoot: string, filePath: string): void {
48
49
  const resolved = resolveSafe(mindRoot, filePath);
49
50
  if (!fs.existsSync(resolved)) {
50
- throw new Error(`File not found: ${filePath}`);
51
+ throw new MindOSError(ErrorCodes.FILE_NOT_FOUND, `File not found: ${filePath}`, { filePath });
51
52
  }
52
53
  fs.unlinkSync(resolved);
53
54
  }
@@ -59,7 +60,7 @@ export function deleteFile(mindRoot: string, filePath: string): void {
59
60
  */
60
61
  export function renameFile(mindRoot: string, oldPath: string, newName: string): string {
61
62
  if (newName.includes('/') || newName.includes('\\')) {
62
- throw new Error('Invalid filename: must not contain path separators');
63
+ throw new MindOSError(ErrorCodes.INVALID_PATH, 'Invalid filename: must not contain path separators', { newName });
63
64
  }
64
65
  const root = path.resolve(mindRoot);
65
66
  const oldResolved = path.resolve(path.join(root, oldPath));
@@ -70,7 +71,7 @@ export function renameFile(mindRoot: string, oldPath: string, newName: string):
70
71
  assertWithinRoot(newResolved, root);
71
72
 
72
73
  if (fs.existsSync(newResolved)) {
73
- throw new Error('A file with that name already exists');
74
+ throw new MindOSError(ErrorCodes.FILE_ALREADY_EXISTS, 'A file with that name already exists', { newName });
74
75
  }
75
76
  fs.renameSync(oldResolved, newResolved);
76
77
  return path.relative(root, newResolved);
@@ -88,8 +89,8 @@ export function moveFile(
88
89
  ): { newPath: string; affectedFiles: string[] } {
89
90
  const fromResolved = resolveSafe(mindRoot, fromPath);
90
91
  const toResolved = resolveSafe(mindRoot, toPath);
91
- if (!fs.existsSync(fromResolved)) throw new Error(`Source not found: ${fromPath}`);
92
- if (fs.existsSync(toResolved)) throw new Error(`Destination already exists: ${toPath}`);
92
+ if (!fs.existsSync(fromResolved)) throw new MindOSError(ErrorCodes.FILE_NOT_FOUND, `Source not found: ${fromPath}`, { fromPath });
93
+ if (fs.existsSync(toResolved)) throw new MindOSError(ErrorCodes.FILE_ALREADY_EXISTS, `Destination already exists: ${toPath}`, { toPath });
93
94
  fs.mkdirSync(path.dirname(toResolved), { recursive: true });
94
95
  fs.renameSync(fromResolved, toResolved);
95
96
  const backlinks = findBacklinksFn(mindRoot, fromPath);
@@ -36,7 +36,7 @@ export {
36
36
  export type { TreeOptions } from './tree';
37
37
 
38
38
  // Search
39
- export { searchFiles } from './search';
39
+ export { searchFiles, invalidateSearchIndex } from './search';
40
40
 
41
41
  // Line-level operations
42
42
  export {
@@ -1,4 +1,5 @@
1
1
  import { readFile, writeFile } from './fs-ops';
2
+ import { MindOSError, ErrorCodes } from '@/lib/errors';
2
3
 
3
4
  /**
4
5
  * Reads a file and returns its content split into lines.
@@ -11,9 +12,9 @@ export function readLines(mindRoot: string, filePath: string): string[] {
11
12
  * Validates line indices are within bounds.
12
13
  */
13
14
  function validateLineRange(totalLines: number, start: number, end: number): void {
14
- if (start < 0 || end < 0) throw new Error('Invalid line index: indices must be >= 0');
15
- if (start > end) throw new Error(`Invalid range: start (${start}) > end (${end})`);
16
- if (start >= totalLines) throw new Error(`Invalid line index: start (${start}) >= total lines (${totalLines})`);
15
+ if (start < 0 || end < 0) throw new MindOSError(ErrorCodes.INVALID_RANGE, 'Invalid line index: indices must be >= 0', { start, end });
16
+ if (start > end) throw new MindOSError(ErrorCodes.INVALID_RANGE, `Invalid range: start (${start}) > end (${end})`, { start, end });
17
+ if (start >= totalLines) throw new MindOSError(ErrorCodes.INVALID_RANGE, `Invalid line index: start (${start}) >= total lines (${totalLines})`, { start, totalLines });
17
18
  }
18
19
 
19
20
  /**
@@ -23,7 +24,7 @@ function validateLineRange(totalLines: number, start: number, end: number): void
23
24
  export function insertLines(mindRoot: string, filePath: string, afterIndex: number, lines: string[]): void {
24
25
  const existing = readLines(mindRoot, filePath);
25
26
  if (afterIndex >= existing.length) {
26
- throw new Error(`Invalid after_index: ${afterIndex} >= total lines (${existing.length})`);
27
+ throw new MindOSError(ErrorCodes.INVALID_RANGE, `Invalid after_index: ${afterIndex} >= total lines (${existing.length})`, { afterIndex, totalLines: existing.length });
27
28
  }
28
29
  const insertAt = afterIndex < 0 ? 0 : afterIndex + 1;
29
30
  existing.splice(insertAt, 0, ...lines);
@@ -58,7 +59,7 @@ export function insertAfterHeading(mindRoot: string, filePath: string, heading:
58
59
  const trimmed = l.trim();
59
60
  return trimmed === heading || trimmed.replace(/^#+\s*/, '') === heading.replace(/^#+\s*/, '');
60
61
  });
61
- if (idx === -1) throw new Error(`Heading not found: "${heading}"`);
62
+ if (idx === -1) throw new MindOSError(ErrorCodes.HEADING_NOT_FOUND, `Heading not found: "${heading}"`, { heading });
62
63
  let insertAt = idx + 1;
63
64
  while (insertAt < lines.length && lines[insertAt].trim() === '') insertAt++;
64
65
  insertLines(mindRoot, filePath, insertAt - 1, ['', content]);
@@ -73,7 +74,7 @@ export function updateSection(mindRoot: string, filePath: string, heading: strin
73
74
  const trimmed = l.trim();
74
75
  return trimmed === heading || trimmed.replace(/^#+\s*/, '') === heading.replace(/^#+\s*/, '');
75
76
  });
76
- if (idx === -1) throw new Error(`Heading not found: "${heading}"`);
77
+ if (idx === -1) throw new MindOSError(ErrorCodes.HEADING_NOT_FOUND, `Heading not found: "${heading}"`, { heading });
77
78
 
78
79
  const headingLevel = (lines[idx].match(/^#+/) ?? [''])[0].length;
79
80
  let sectionEnd = lines.length - 1;
@@ -0,0 +1,174 @@
1
+ import { collectAllFiles } from './tree';
2
+ import { readFile } from './fs-ops';
3
+
4
+ const MAX_CONTENT_LENGTH = 50_000;
5
+
6
+ // CJK Unicode ranges: Chinese, Japanese Hiragana/Katakana, Korean
7
+ const CJK_REGEX = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/;
8
+
9
+ /**
10
+ * Tokenize text for indexing: split on word boundaries + CJK bigrams.
11
+ *
12
+ * Latin/ASCII: split on non-alphanumeric characters, lowercased.
13
+ * CJK: generate character-level bigrams (overlapping pairs).
14
+ * Mixed text: both strategies applied, tokens merged.
15
+ */
16
+ function tokenize(text: string): Set<string> {
17
+ const tokens = new Set<string>();
18
+ const lower = text.toLowerCase();
19
+
20
+ // Latin/ASCII word tokens.
21
+ // Single Latin chars (e.g. "a") are noise and excluded; CJK unigrams
22
+ // carry meaning and are handled separately below.
23
+ const words = lower.match(/[a-z0-9_$@#]+/g);
24
+ if (words) {
25
+ for (const w of words) {
26
+ if (w.length >= 2) tokens.add(w);
27
+ }
28
+ }
29
+
30
+ // CJK bigrams + single chars (unigrams carry meaning in CJK scripts)
31
+ if (CJK_REGEX.test(lower)) {
32
+ const cjkChars: string[] = [];
33
+ for (const ch of lower) {
34
+ if (CJK_REGEX.test(ch)) {
35
+ cjkChars.push(ch);
36
+ } else {
37
+ // Emit bigrams for accumulated CJK run
38
+ if (cjkChars.length > 0) {
39
+ emitCjkTokens(cjkChars, tokens);
40
+ cjkChars.length = 0;
41
+ }
42
+ }
43
+ }
44
+ if (cjkChars.length > 0) emitCjkTokens(cjkChars, tokens);
45
+ }
46
+
47
+ return tokens;
48
+ }
49
+
50
+ function emitCjkTokens(chars: string[], tokens: Set<string>): void {
51
+ for (let i = 0; i < chars.length; i++) {
52
+ tokens.add(chars[i]); // unigram
53
+ if (i + 1 < chars.length) {
54
+ tokens.add(chars[i] + chars[i + 1]); // bigram
55
+ }
56
+ }
57
+ }
58
+
59
+ /**
60
+ * In-memory inverted index for core search acceleration.
61
+ *
62
+ * The index maps tokens → Set<filePath>. When a search query arrives,
63
+ * we tokenize the query and intersect candidate sets from the index,
64
+ * dramatically reducing the number of files that need full-text scanning.
65
+ *
66
+ * Lifecycle:
67
+ * - `rebuild(mindRoot)` — full build from disk (called lazily on first search)
68
+ * - `invalidate()` — mark stale (next search triggers rebuild)
69
+ * - `getCandidates(query)` — return candidate file set, or null if no index / no tokens
70
+ */
71
+ export class SearchIndex {
72
+ private invertedIndex: Map<string, Set<string>> | null = null;
73
+ private builtForRoot: string | null = null;
74
+ private fileCount = 0;
75
+
76
+ /** Full rebuild: read all files and build inverted index. */
77
+ rebuild(mindRoot: string): void {
78
+ const allFiles = collectAllFiles(mindRoot);
79
+ const inverted = new Map<string, Set<string>>();
80
+
81
+ for (const filePath of allFiles) {
82
+ let content: string;
83
+ try {
84
+ content = readFile(mindRoot, filePath);
85
+ } catch {
86
+ continue;
87
+ }
88
+
89
+ if (content.length > MAX_CONTENT_LENGTH) {
90
+ content = content.slice(0, MAX_CONTENT_LENGTH);
91
+ }
92
+
93
+ // Also index the file path itself
94
+ const allText = filePath + '\n' + content;
95
+ const tokens = tokenize(allText);
96
+
97
+ for (const token of tokens) {
98
+ let set = inverted.get(token);
99
+ if (!set) {
100
+ set = new Set<string>();
101
+ inverted.set(token, set);
102
+ }
103
+ set.add(filePath);
104
+ }
105
+ }
106
+
107
+ this.invertedIndex = inverted;
108
+ this.builtForRoot = mindRoot;
109
+ this.fileCount = allFiles.length;
110
+ }
111
+
112
+ /** Clear the index. Next search will trigger a lazy rebuild. */
113
+ invalidate(): void {
114
+ this.invertedIndex = null;
115
+ this.builtForRoot = null;
116
+ this.fileCount = 0;
117
+ }
118
+
119
+ /** Whether the index has been built for the given mindRoot. */
120
+ isBuiltFor(mindRoot: string): boolean {
121
+ return this.invertedIndex !== null && this.builtForRoot === mindRoot;
122
+ }
123
+
124
+ /** Whether the index has been built (for any root). */
125
+ isBuilt(): boolean {
126
+ return this.invertedIndex !== null;
127
+ }
128
+
129
+ /** Number of files in the index. */
130
+ getFileCount(): number {
131
+ return this.fileCount;
132
+ }
133
+
134
+ /**
135
+ * Get candidate file paths for a query (single or multi-word).
136
+ *
137
+ * Tokenizes the query and intersects candidate sets from the inverted index.
138
+ *
139
+ * Returns:
140
+ * - `null` if the index is not built, query is empty, or query produces no
141
+ * tokens (e.g. substring shorter than 2 chars). Callers should fall back
142
+ * to a full scan when null is returned.
143
+ * - `string[]` (possibly empty) if the index can answer definitively.
144
+ */
145
+ getCandidates(query: string): string[] | null {
146
+ if (!query.trim()) return null;
147
+ if (!this.invertedIndex) return null;
148
+
149
+ const tokens = tokenize(query.toLowerCase().trim());
150
+ // No tokens produced → query is a substring/single-char that the index
151
+ // cannot resolve. Return null so the caller falls back to full scan,
152
+ // preserving pre-index indexOf behavior for partial-word queries.
153
+ if (tokens.size === 0) return null;
154
+
155
+ let result: Set<string> | null = null;
156
+
157
+ for (const token of tokens) {
158
+ const set = this.invertedIndex.get(token);
159
+ if (!set) return []; // No files have this token → intersection is empty
160
+
161
+ if (result === null) {
162
+ result = new Set(set);
163
+ } else {
164
+ // Intersect
165
+ for (const path of result) {
166
+ if (!set.has(path)) result.delete(path);
167
+ }
168
+ if (result.size === 0) return [];
169
+ }
170
+ }
171
+
172
+ return result ? Array.from(result) : [];
173
+ }
174
+ }