nano-brain 2026.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/AGENTS_SNIPPET.md +36 -0
  2. package/CHANGELOG.md +68 -0
  3. package/README.md +281 -0
  4. package/SKILL.md +153 -0
  5. package/bin/cli.js +18 -0
  6. package/index.html +929 -0
  7. package/nano-brain +4 -0
  8. package/opencode-mcp.json +9 -0
  9. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/.openspec.yaml +2 -0
  10. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/design.md +68 -0
  11. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/proposal.md +27 -0
  12. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-integration-testing/spec.md +50 -0
  13. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-server/spec.md +40 -0
  14. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/search-pipeline/spec.md +29 -0
  15. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/tasks.md +37 -0
  16. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/.openspec.yaml +2 -0
  17. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/design.md +111 -0
  18. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/proposal.md +30 -0
  19. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/mcp-server/spec.md +33 -0
  20. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/storage-limits/spec.md +90 -0
  21. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/workspace-scoping/spec.md +66 -0
  22. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/tasks.md +199 -0
  23. package/openspec/changes/codebase-indexing/.openspec.yaml +2 -0
  24. package/openspec/changes/codebase-indexing/design.md +169 -0
  25. package/openspec/changes/codebase-indexing/proposal.md +30 -0
  26. package/openspec/changes/codebase-indexing/specs/codebase-collection/spec.md +187 -0
  27. package/openspec/changes/codebase-indexing/specs/mcp-server/spec.md +36 -0
  28. package/openspec/changes/codebase-indexing/tasks.md +56 -0
  29. package/openspec/specs/mcp-integration-testing/spec.md +50 -0
  30. package/openspec/specs/mcp-server/spec.md +75 -0
  31. package/openspec/specs/search-pipeline/spec.md +29 -0
  32. package/openspec/specs/storage-limits/spec.md +94 -0
  33. package/openspec/specs/workspace-scoping/spec.md +70 -0
  34. package/package.json +34 -0
  35. package/site/build.js +66 -0
  36. package/site/partials/_api.html +83 -0
  37. package/site/partials/_compare.html +100 -0
  38. package/site/partials/_config.html +23 -0
  39. package/site/partials/_features.html +43 -0
  40. package/site/partials/_footer.html +6 -0
  41. package/site/partials/_hero.html +9 -0
  42. package/site/partials/_how-it-works.html +26 -0
  43. package/site/partials/_models.html +18 -0
  44. package/site/partials/_quick-start.html +15 -0
  45. package/site/partials/_stats.html +1 -0
  46. package/site/partials/_tech-stack.html +13 -0
  47. package/site/script.js +12 -0
  48. package/site/shell.html +44 -0
  49. package/site/styles.css +548 -0
  50. package/src/chunker.ts +427 -0
  51. package/src/codebase.ts +331 -0
  52. package/src/collections.ts +192 -0
  53. package/src/embeddings.ts +293 -0
  54. package/src/expansion.ts +79 -0
  55. package/src/harvester.ts +306 -0
  56. package/src/index.ts +503 -0
  57. package/src/reranker.ts +103 -0
  58. package/src/search.ts +294 -0
  59. package/src/server.ts +664 -0
  60. package/src/storage.ts +221 -0
  61. package/src/store.ts +623 -0
  62. package/src/types.ts +202 -0
  63. package/src/watcher.ts +384 -0
  64. package/test/chunker.test.ts +479 -0
  65. package/test/cli.test.ts +309 -0
  66. package/test/codebase-chunker.test.ts +446 -0
  67. package/test/codebase.test.ts +678 -0
  68. package/test/collections.test.ts +571 -0
  69. package/test/harvester.test.ts +636 -0
  70. package/test/integration.test.ts +150 -0
  71. package/test/llm.test.ts +322 -0
  72. package/test/search.test.ts +572 -0
  73. package/test/server.test.ts +541 -0
  74. package/test/storage.test.ts +302 -0
  75. package/test/store.test.ts +465 -0
  76. package/test/watcher.test.ts +656 -0
  77. package/test/workspace.test.ts +239 -0
  78. package/tsconfig.json +19 -0
  79. package/vitest.config.ts +16 -0
@@ -0,0 +1,293 @@
1
+ import { getLlama } from 'node-llama-cpp';
2
+ import { promises as fs } from 'fs';
3
+ import { join, dirname } from 'path';
4
+ import { homedir, cpus } from 'os';
5
+ import type { EmbeddingResult, EmbeddingConfig } from './types.js';
6
+
7
+ export interface EmbeddingProvider {
8
+ embed(text: string): Promise<EmbeddingResult>;
9
+ embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
10
+ getDimensions(): number;
11
+ getModel(): string;
12
+ dispose(): void;
13
+ }
14
+
15
+ export interface EmbeddingProviderOptions {
16
+ modelPath?: string;
17
+ cacheDir?: string;
18
+ embeddingConfig?: EmbeddingConfig;
19
+ }
20
+
21
+ const DEFAULT_MODEL_URI = 'hf:nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_K_M.gguf';
22
+ const MODEL_NAME = 'nomic-embed-text-v1.5';
23
+ const DIMENSIONS = 768;
24
+
25
+ interface ParsedModelURI {
26
+ org: string;
27
+ repo: string;
28
+ file: string;
29
+ }
30
+
31
+ function parseModelURI(uri: string): ParsedModelURI | null {
32
+ const match = uri.match(/^hf:([^/]+)\/([^/]+)\/(.+\.gguf)$/);
33
+ if (!match) return null;
34
+ return {
35
+ org: match[1],
36
+ repo: match[2],
37
+ file: match[3],
38
+ };
39
+ }
40
+
41
+ async function downloadModel(url: string, destPath: string): Promise<void> {
42
+ console.log(`Downloading model from ${url}...`);
43
+
44
+ await fs.mkdir(dirname(destPath), { recursive: true });
45
+
46
+ const response = await fetch(url);
47
+ if (!response.ok) {
48
+ throw new Error(`Failed to download model: ${response.statusText}`);
49
+ }
50
+
51
+ const totalSize = parseInt(response.headers.get('content-length') || '0', 10);
52
+ let downloadedSize = 0;
53
+
54
+ const tempPath = `${destPath}.tmp`;
55
+ const fileHandle = await fs.open(tempPath, 'w');
56
+
57
+ try {
58
+ const reader = response.body?.getReader();
59
+ if (!reader) throw new Error('No response body');
60
+
61
+ while (true) {
62
+ const { done, value } = await reader.read();
63
+ if (done) break;
64
+
65
+ await fileHandle.write(value);
66
+ downloadedSize += value.length;
67
+
68
+ if (totalSize > 0) {
69
+ const percent = ((downloadedSize / totalSize) * 100).toFixed(1);
70
+ process.stdout.write(`\rDownload progress: ${percent}%`);
71
+ }
72
+ }
73
+
74
+ console.log('\nDownload complete');
75
+ } finally {
76
+ await fileHandle.close();
77
+ }
78
+
79
+ await fs.rename(tempPath, destPath);
80
+ }
81
+
82
+ export async function resolveModelPath(
83
+ uri: string,
84
+ cacheDir?: string
85
+ ): Promise<string> {
86
+ const parsed = parseModelURI(uri);
87
+ if (!parsed) {
88
+ throw new Error(`Invalid model URI format: ${uri}`);
89
+ }
90
+
91
+ const baseDir = cacheDir || join(homedir(), '.cache', 'nano-brain', 'models');
92
+ const modelPath = join(baseDir, parsed.org, parsed.repo, parsed.file);
93
+
94
+ try {
95
+ await fs.access(modelPath);
96
+ return modelPath;
97
+ } catch {
98
+ const url = `https://huggingface.co/${parsed.org}/${parsed.repo}/resolve/main/${parsed.file}`;
99
+ await downloadModel(url, modelPath);
100
+ return modelPath;
101
+ }
102
+ }
103
+
104
+ function formatQueryPrompt(query: string): string {
105
+ return `search_query: ${query}`;
106
+ }
107
+
108
+ function formatDocumentPrompt(title: string, content: string): string {
109
+ return `search_document: ${content}`;
110
+ }
111
+
112
+ // Ollama's truncate:true is broken (github.com/ollama/ollama/issues/14186)
113
+ // Client-side truncation: 1800 chars ≈ ~450 tokens, safe for 2048 context
114
+ const OLLAMA_MAX_CHARS = 1800;
115
+
116
+ function truncateForOllama(text: string): string {
117
+ if (text.length <= OLLAMA_MAX_CHARS) return text;
118
+ return text.substring(0, OLLAMA_MAX_CHARS);
119
+ }
120
+ class OllamaEmbeddingProvider implements EmbeddingProvider {
121
+ private url: string;
122
+ private model: string;
123
+ constructor(url: string, model: string) {
124
+ this.url = url.replace(/\/$/, '');
125
+ this.model = model;
126
+ }
127
+ async embed(text: string): Promise<EmbeddingResult> {
128
+ const response = await fetch(`${this.url}/api/embed`, {
129
+ method: 'POST',
130
+ headers: { 'Content-Type': 'application/json' },
131
+ body: JSON.stringify({
132
+ model: this.model,
133
+ input: [truncateForOllama(text)],
134
+ }),
135
+ });
136
+
137
+ if (!response.ok) {
138
+ throw new Error(`Ollama embed failed: ${response.status} ${response.statusText}`);
139
+ }
140
+
141
+ const data = await response.json() as { embeddings: number[][] };
142
+ return {
143
+ embedding: data.embeddings[0],
144
+ model: this.model,
145
+ dimensions: data.embeddings[0].length,
146
+ };
147
+ }
148
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
149
+ const response = await fetch(`${this.url}/api/embed`, {
150
+ method: 'POST',
151
+ headers: { 'Content-Type': 'application/json' },
152
+ body: JSON.stringify({
153
+ model: this.model,
154
+ input: texts.map(truncateForOllama),
155
+ }),
156
+ });
157
+
158
+ if (!response.ok) {
159
+ throw new Error(`Ollama embedBatch failed: ${response.status} ${response.statusText}`);
160
+ }
161
+
162
+ const data = await response.json() as { embeddings: number[][] };
163
+ return data.embeddings.map(emb => ({
164
+ embedding: emb,
165
+ model: this.model,
166
+ dimensions: emb.length,
167
+ }));
168
+ }
169
+ getDimensions(): number {
170
+ return DIMENSIONS;
171
+ }
172
+ getModel(): string {
173
+ return this.model;
174
+ }
175
+ dispose(): void {
176
+ }
177
+ }
178
+
179
+ class EmbeddingProviderImpl implements EmbeddingProvider {
180
+ private contexts: any[] = [];
181
+ private currentContextIndex = 0;
182
+
183
+ constructor(
184
+ private model: any,
185
+ private parallelism: number
186
+ ) {}
187
+
188
+ async initialize(): Promise<void> {
189
+ for (let i = 0; i < this.parallelism; i++) {
190
+ const context = await this.model.createEmbeddingContext();
191
+ this.contexts.push(context);
192
+ }
193
+ }
194
+
195
+ async embed(text: string): Promise<EmbeddingResult> {
196
+ const context = this.contexts[0];
197
+ const result = await context.getEmbeddingFor(text);
198
+
199
+ return {
200
+ embedding: Array.from(result.vector),
201
+ model: MODEL_NAME,
202
+ dimensions: DIMENSIONS,
203
+ };
204
+ }
205
+
206
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
207
+ const results: EmbeddingResult[] = [];
208
+ const batchSize = Math.min(4, this.parallelism);
209
+
210
+ for (let i = 0; i < texts.length; i += batchSize) {
211
+ const batch = texts.slice(i, i + batchSize);
212
+ const batchPromises = batch.map(async (text, idx) => {
213
+ const contextIdx = idx % this.contexts.length;
214
+ const context = this.contexts[contextIdx];
215
+ const result = await context.getEmbeddingFor(text);
216
+
217
+ return {
218
+ embedding: Array.from(result.vector) as number[],
219
+ model: MODEL_NAME,
220
+ dimensions: DIMENSIONS,
221
+ };
222
+ });
223
+
224
+ const batchResults = await Promise.all(batchPromises);
225
+ results.push(...batchResults);
226
+ }
227
+
228
+ return results;
229
+ }
230
+
231
+ getDimensions(): number {
232
+ return DIMENSIONS;
233
+ }
234
+
235
+ getModel(): string {
236
+ return MODEL_NAME;
237
+ }
238
+
239
+ dispose(): void {
240
+ this.contexts = [];
241
+ }
242
+ }
243
+
244
+ export async function createEmbeddingProvider(
245
+ options?: EmbeddingProviderOptions
246
+ ): Promise<EmbeddingProvider | null> {
247
+ const config = options?.embeddingConfig;
248
+
249
+ // Try Ollama if configured (or by default)
250
+ if (!config || config.provider !== 'local') {
251
+ const url = config?.url || 'http://host.docker.internal:11434';
252
+ const model = config?.model || 'nomic-embed-text';
253
+
254
+ try {
255
+ // Health check — verify Ollama is reachable
256
+ const healthResp = await fetch(`${url}/api/tags`, { signal: AbortSignal.timeout(3000) });
257
+ if (healthResp.ok) {
258
+ const provider = new OllamaEmbeddingProvider(url, model);
259
+ // Verify the model works with a test embed
260
+ await provider.embed('test');
261
+ console.error(`[embedding] Using Ollama provider: ${model} at ${url}`);
262
+ return provider;
263
+ }
264
+ } catch (err) {
265
+ console.warn(`[embedding] Ollama not reachable at ${url}: ${err instanceof Error ? err.message : String(err)}`);
266
+ if (config?.provider === 'ollama') {
267
+ // Explicitly configured Ollama but it's not available
268
+ console.error('[embedding] Ollama explicitly configured but not reachable, no fallback');
269
+ return null;
270
+ }
271
+ console.warn('[embedding] Falling back to local node-llama-cpp...');
272
+ }
273
+ }
274
+
275
+ // Fallback to local node-llama-cpp
276
+ try {
277
+ const modelUri = options?.modelPath || DEFAULT_MODEL_URI;
278
+ const modelPath = await resolveModelPath(modelUri, options?.cacheDir);
279
+ const llama = await getLlama();
280
+ const model = await llama.loadModel({ modelPath });
281
+ const cpuCount = cpus().length;
282
+ const parallelism = Math.max(1, Math.min(4, Math.floor(cpuCount / 4)));
283
+ const provider = new EmbeddingProviderImpl(model, parallelism);
284
+ await provider.initialize();
285
+ console.error(`[embedding] Using local provider: ${MODEL_NAME}`);
286
+ return provider;
287
+ } catch (error) {
288
+ console.warn('Failed to load embedding model:', error instanceof Error ? error.message : String(error));
289
+ return null;
290
+ }
291
+ }
292
+
293
+ export { formatQueryPrompt, formatDocumentPrompt, parseModelURI };
@@ -0,0 +1,79 @@
1
+ import { getLlama } from 'node-llama-cpp';
2
+ import { resolveModelPath } from './embeddings.js';
3
+
4
+ export interface QueryExpander {
5
+ expand(query: string): Promise<string[]>;
6
+ dispose(): void;
7
+ }
8
+
9
+ export interface QueryExpanderOptions {
10
+ modelPath?: string;
11
+ cacheDir?: string;
12
+ }
13
+
14
+ const DEFAULT_MODEL_URI = 'hf:tobi/qmd-query-expansion-1.7B-GGUF/qmd-query-expansion-1.7B-Q8_0.gguf';
15
+ const MODEL_NAME = 'qmd-query-expansion-1.7B';
16
+
17
+ class QueryExpanderImpl implements QueryExpander {
18
+ constructor(
19
+ private model: any,
20
+ private context: any
21
+ ) {}
22
+
23
+ async expand(query: string): Promise<string[]> {
24
+ try {
25
+ const prompt = `Generate 2 alternative search queries for: ${query}\n\n1.`;
26
+
27
+ const result = await this.context.evaluate([prompt], {
28
+ maxTokens: 200,
29
+ temperature: 0.7,
30
+ });
31
+
32
+ const generated = result?.text || '';
33
+
34
+ const lines = generated.split('\n').filter(line => line.trim());
35
+ const variants: string[] = [];
36
+
37
+ for (const line of lines) {
38
+ const match = line.match(/^\d+\.\s*(.+)$/);
39
+ if (match && match[1]) {
40
+ variants.push(match[1].trim());
41
+ }
42
+ }
43
+
44
+ if (variants.length >= 2) {
45
+ return variants.slice(0, 2);
46
+ }
47
+
48
+ return [query];
49
+ } catch (error) {
50
+ console.warn('Query expansion failed:', error instanceof Error ? error.message : String(error));
51
+ return [query];
52
+ }
53
+ }
54
+
55
+ dispose(): void {
56
+ this.context = null;
57
+ }
58
+ }
59
+
60
+ export async function createQueryExpander(
61
+ options?: QueryExpanderOptions
62
+ ): Promise<QueryExpander | null> {
63
+ try {
64
+ const modelUri = options?.modelPath || DEFAULT_MODEL_URI;
65
+ const modelPath = await resolveModelPath(modelUri, options?.cacheDir);
66
+
67
+ const llama = await getLlama();
68
+ const model = await llama.loadModel({ modelPath });
69
+
70
+ const context = await model.createContext({
71
+ contextSize: 2048,
72
+ });
73
+
74
+ return new QueryExpanderImpl(model, context);
75
+ } catch (error) {
76
+ console.warn('Failed to load query expander model:', error instanceof Error ? error.message : String(error));
77
+ return null;
78
+ }
79
+ }
@@ -0,0 +1,306 @@
1
+ import { readFileSync, readdirSync, existsSync, mkdirSync, writeFileSync, statSync } from 'fs';
2
+ import { join, dirname } from 'path';
3
+ import { createHash } from 'crypto';
4
+ import type { HarvestedSession } from './types.js';
5
+
6
+ export interface HarvesterOptions {
7
+ sessionDir: string;
8
+ outputDir: string;
9
+ stateFile?: string;
10
+ }
11
+
12
+ interface SessionMetadata {
13
+ id: string;
14
+ slug: string;
15
+ title: string;
16
+ projectID: string;
17
+ directory: string;
18
+ created: number;
19
+ }
20
+
21
+ interface ParsedMessage {
22
+ id: string;
23
+ role: 'user' | 'assistant';
24
+ agent?: string;
25
+ created: number;
26
+ }
27
+
28
+ export function parseSession(sessionPath: string): SessionMetadata | null {
29
+ try {
30
+ if (!existsSync(sessionPath)) {
31
+ return null;
32
+ }
33
+
34
+ const content = readFileSync(sessionPath, 'utf-8');
35
+ const data = JSON.parse(content);
36
+
37
+ return {
38
+ id: data.id,
39
+ slug: data.slug || data.id || 'untitled',
40
+ title: data.title || '',
41
+ projectID: data.projectID,
42
+ directory: data.directory,
43
+ created: data.time?.created || 0
44
+ };
45
+ } catch {
46
+ return null;
47
+ }
48
+ }
49
+
50
+ export function parseMessages(sessionId: string, storageDir: string): ParsedMessage[] {
51
+ const messageDir = join(storageDir, 'message', sessionId);
52
+
53
+ if (!existsSync(messageDir)) {
54
+ return [];
55
+ }
56
+
57
+ const messages: ParsedMessage[] = [];
58
+
59
+ try {
60
+ const files = readdirSync(messageDir).filter(f => f.startsWith('msg_') && f.endsWith('.json'));
61
+
62
+ for (const file of files) {
63
+ const filePath = join(messageDir, file);
64
+ const content = readFileSync(filePath, 'utf-8');
65
+ const data = JSON.parse(content);
66
+
67
+ messages.push({
68
+ id: data.id,
69
+ role: data.role,
70
+ agent: data.agent,
71
+ created: data.time?.created || 0
72
+ });
73
+ }
74
+ } catch {
75
+ return [];
76
+ }
77
+
78
+ messages.sort((a, b) => a.created - b.created);
79
+
80
+ return messages;
81
+ }
82
+
83
+ export function parseParts(messageId: string, storageDir: string): string {
84
+ const partDir = join(storageDir, 'part', messageId);
85
+
86
+ if (!existsSync(partDir)) {
87
+ return '';
88
+ }
89
+
90
+ const textParts: string[] = [];
91
+
92
+ try {
93
+ const files = readdirSync(partDir).filter(f => f.startsWith('prt_') && f.endsWith('.json'));
94
+
95
+ for (const file of files) {
96
+ const filePath = join(partDir, file);
97
+ const content = readFileSync(filePath, 'utf-8');
98
+ const data = JSON.parse(content);
99
+
100
+ if (data.type === 'text' && !data.synthetic && data.text) {
101
+ textParts.push(data.text);
102
+ }
103
+ }
104
+ } catch {
105
+ return '';
106
+ }
107
+
108
+ return textParts.join('\n');
109
+ }
110
+
111
+ export function sessionToMarkdown(session: HarvestedSession): string {
112
+ const lines: string[] = [];
113
+
114
+ lines.push('---');
115
+ lines.push(`session: ${session.sessionId}`);
116
+ lines.push(`agent: ${session.agent}`);
117
+ lines.push(`date: "${session.date}"`);
118
+ lines.push(`title: "${session.title}"`);
119
+ lines.push(`project: ${session.project}`);
120
+ lines.push(`projectHash: ${session.projectHash}`);
121
+ lines.push('---');
122
+ lines.push('');
123
+
124
+ for (const message of session.messages) {
125
+ if (message.role === 'user') {
126
+ lines.push('## User');
127
+ } else {
128
+ const agentName = message.agent || 'assistant';
129
+ lines.push(`## Assistant (${agentName})`);
130
+ }
131
+ lines.push('');
132
+ lines.push(message.text);
133
+ lines.push('');
134
+ }
135
+
136
+ return lines.join('\n');
137
+ }
138
+
139
+ export function getOutputPath(outputDir: string, projectPath: string, date: string, slug: string): string {
140
+ const hash = createHash('sha256').update(projectPath).digest('hex');
141
+ const projectHash = hash.substring(0, 12);
142
+
143
+ const sanitizedSlug = (slug || 'untitled')
144
+ .toLowerCase()
145
+ .replace(/[^a-z0-9]+/g, '-')
146
+ .replace(/^-+|-+$/g, '')
147
+ .replace(/-+/g, '-');
148
+
149
+ return join(outputDir, projectHash, `${date}-${sanitizedSlug}.md`);
150
+ }
151
+
152
+ export function loadHarvestState(stateFile: string): Record<string, number> {
153
+ try {
154
+ if (!existsSync(stateFile)) {
155
+ return {};
156
+ }
157
+
158
+ const content = readFileSync(stateFile, 'utf-8');
159
+ return JSON.parse(content);
160
+ } catch {
161
+ return {};
162
+ }
163
+ }
164
+
165
+ export function saveHarvestState(stateFile: string, state: Record<string, number>): void {
166
+ const dir = dirname(stateFile);
167
+ if (!existsSync(dir)) {
168
+ mkdirSync(dir, { recursive: true });
169
+ }
170
+
171
+ writeFileSync(stateFile, JSON.stringify(state, null, 2), 'utf-8');
172
+ }
173
+
174
+ export async function harvestSessions(options: HarvesterOptions): Promise<HarvestedSession[]> {
175
+ const { sessionDir, outputDir, stateFile: customStateFile } = options;
176
+ const stateFile = customStateFile || join(outputDir, '.harvest-state.json');
177
+ const state = loadHarvestState(stateFile);
178
+ const harvested: HarvestedSession[] = [];
179
+
180
+ const sessionRoot = join(sessionDir, 'session');
181
+
182
+ if (!existsSync(sessionRoot)) {
183
+ return [];
184
+ }
185
+
186
+ const projectDirs = readdirSync(sessionRoot);
187
+ let stateChanged = false;
188
+
189
+ for (const projectHash of projectDirs) {
190
+ const projectSessionDir = join(sessionRoot, projectHash);
191
+
192
+ if (!existsSync(projectSessionDir)) {
193
+ continue;
194
+ }
195
+
196
+ const sessionFiles = readdirSync(projectSessionDir).filter(f => f.startsWith('ses_') && f.endsWith('.json'));
197
+
198
+ for (const sessionFile of sessionFiles) {
199
+ const sessionPath = join(projectSessionDir, sessionFile);
200
+
201
+ const stat = statSync(sessionPath);
202
+ const lastMtime = stat.mtimeMs;
203
+
204
+ // Check if already harvested AND output file still exists
205
+ if (state[sessionFile] && state[sessionFile] >= lastMtime) {
206
+ // Verify the output file actually exists — if not, re-harvest
207
+ const session = parseSession(sessionPath);
208
+ if (session) {
209
+ const date = new Date(session.created);
210
+ const dateStr = date.toISOString().split('T')[0];
211
+ const outputPath = getOutputPath(outputDir, session.directory, dateStr, session.slug);
212
+ if (existsSync(outputPath)) {
213
+ continue;
214
+ }
215
+ // Output file missing — fall through to re-harvest
216
+ console.log(`[harvester] Re-harvesting ${sessionFile}: output file missing`);
217
+ } else {
218
+ continue;
219
+ }
220
+ }
221
+
222
+ const session = parseSession(sessionPath);
223
+
224
+ if (!session) {
225
+ continue;
226
+ }
227
+
228
+ const messages = parseMessages(session.id, sessionDir);
229
+
230
+ // Skip sessions with no messages (nothing useful to index)
231
+ if (messages.length === 0) {
232
+ state[sessionFile] = lastMtime;
233
+ stateChanged = true;
234
+ continue;
235
+ }
236
+
237
+ const parsedMessages = messages.map(msg => ({
238
+ role: msg.role,
239
+ agent: msg.agent,
240
+ text: parseParts(msg.id, sessionDir)
241
+ }));
242
+
243
+ // Skip sessions where all messages have empty text
244
+ const hasContent = parsedMessages.some(m => m.text.trim().length > 0);
245
+ if (!hasContent) {
246
+ state[sessionFile] = lastMtime;
247
+ stateChanged = true;
248
+ continue;
249
+ }
250
+
251
+ const date = new Date(session.created);
252
+ const dateStr = date.toISOString().split('T')[0];
253
+
254
+ const hash = createHash('sha256').update(session.directory).digest('hex');
255
+ const projectHashStr = hash.substring(0, 12);
256
+
257
+ const harvestedSession: HarvestedSession = {
258
+ sessionId: session.id,
259
+ slug: session.slug,
260
+ title: session.title,
261
+ agent: messages.find(m => m.role === 'assistant')?.agent || 'assistant',
262
+ date: dateStr,
263
+ project: session.directory,
264
+ projectHash: projectHashStr,
265
+ messages: parsedMessages
266
+ };
267
+
268
+ const outputPath = getOutputPath(outputDir, session.directory, dateStr, session.slug);
269
+ const outputDirPath = dirname(outputPath);
270
+
271
+ if (!existsSync(outputDirPath)) {
272
+ mkdirSync(outputDirPath, { recursive: true });
273
+ }
274
+
275
+ const markdown = sessionToMarkdown(harvestedSession);
276
+
277
+ try {
278
+ writeFileSync(outputPath, markdown, 'utf-8');
279
+
280
+ // Verify the file was actually written before updating state
281
+ if (!existsSync(outputPath)) {
282
+ console.warn(`[harvester] Write succeeded but file not found: ${outputPath}`);
283
+ continue;
284
+ }
285
+
286
+ harvested.push(harvestedSession);
287
+ state[sessionFile] = lastMtime;
288
+ stateChanged = true;
289
+ } catch (err) {
290
+ console.warn(`[harvester] Failed to write ${outputPath}:`, err);
291
+ // Do NOT update state — will retry on next cycle
292
+ continue;
293
+ }
294
+ }
295
+ }
296
+
297
+ if (stateChanged) {
298
+ saveHarvestState(stateFile, state);
299
+ }
300
+
301
+ if (harvested.length > 0) {
302
+ console.log(`[harvester] Harvested ${harvested.length} session(s)`);
303
+ }
304
+
305
+ return harvested;
306
+ }