@getlore/cli 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,10 @@
11
11
  * - Generate embeddings
12
12
  * - Store in Supabase + local data dir
13
13
  */
14
- import { readdir, readFile } from 'fs/promises';
14
+ import { readdir, readFile, mkdir, writeFile } from 'fs/promises';
15
15
  import { existsSync } from 'fs';
16
16
  import path from 'path';
17
- import { getAllSources, addSource, resetDatabaseConnection, } from '../../core/vector-store.js';
17
+ import { getAllSources, addSource, getSourcesWithPaths, resetDatabaseConnection, } from '../../core/vector-store.js';
18
18
  import { generateEmbedding, createSearchableText } from '../../core/embedder.js';
19
19
  import { gitPull, gitCommitAndPush } from '../../core/git.js';
20
20
  import { loadSyncConfig, getEnabledSources } from '../../sync/config.js';
@@ -106,6 +106,65 @@ async function legacyDiskSync(dbPath, dataDir) {
106
106
  return result;
107
107
  }
108
108
  // ============================================================================
109
+ // Local Content Reconciliation
110
+ // ============================================================================
111
+ /**
112
+ * Ensures every source in Supabase with a source_path has a local
113
+ * ~/.lore/sources/{id}/content.md file. This handles:
114
+ * - Sources indexed before storeSourceToDisk was implemented
115
+ * - Sources from other machines (in shared Supabase but no local content)
116
+ * - Any edge case where Supabase write succeeded but disk write failed
117
+ *
118
+ * Cost: One Supabase query + local filesystem checks. No LLM calls.
119
+ */
120
+ async function reconcileLocalContent(dataDir) {
121
+ const sourcesDir = path.join(dataDir, 'sources');
122
+ const textExts = ['.md', '.txt', '.json', '.jsonl', '.csv', '.xml', '.yaml', '.yml', '.html', '.log'];
123
+ // Get all sources that have a source_path in Supabase
124
+ const sourcesWithPaths = await getSourcesWithPaths('');
125
+ if (sourcesWithPaths.length === 0)
126
+ return 0;
127
+ let reconciled = 0;
128
+ for (const source of sourcesWithPaths) {
129
+ const sourceDir = path.join(sourcesDir, source.id);
130
+ const contentPath = path.join(sourceDir, 'content.md');
131
+ // Skip if content.md already exists
132
+ if (existsSync(contentPath))
133
+ continue;
134
+ // Try to create content.md from the original source_path
135
+ let content = null;
136
+ if (existsSync(source.source_path)) {
137
+ const ext = path.extname(source.source_path).toLowerCase();
138
+ if (textExts.includes(ext)) {
139
+ try {
140
+ content = await readFile(source.source_path, 'utf-8');
141
+ }
142
+ catch {
143
+ // File can't be read — fall through to summary
144
+ }
145
+ }
146
+ }
147
+ // If we couldn't read the original file, use the summary from Supabase
148
+ if (!content) {
149
+ content = [
150
+ `# ${source.title}`,
151
+ '',
152
+ source.summary,
153
+ ].join('\n');
154
+ }
155
+ // Create the source directory and content.md
156
+ try {
157
+ await mkdir(sourceDir, { recursive: true });
158
+ await writeFile(contentPath, content);
159
+ reconciled++;
160
+ }
161
+ catch {
162
+ // Skip on write failure — will retry on next sync
163
+ }
164
+ }
165
+ return reconciled;
166
+ }
167
+ // ============================================================================
109
168
  // Universal Sync (new system)
110
169
  // ============================================================================
111
170
  async function universalSync(dataDir, dryRun, hookContext) {
@@ -172,9 +231,12 @@ export async function handleSync(dbPath, dataDir, args, options = {}) {
172
231
  sources_found: 0,
173
232
  sources_indexed: 0,
174
233
  already_indexed: 0,
234
+ reconciled: 0,
175
235
  };
236
+ const { onProgress } = options;
176
237
  // 1. Git pull
177
238
  if (doPull) {
239
+ await onProgress?.(5, undefined, 'Pulling from git...');
178
240
  const pullResult = await gitPull(dataDir);
179
241
  result.git_pulled = pullResult.success && (pullResult.message?.includes('Pulled') || false);
180
242
  if (pullResult.error) {
@@ -188,20 +250,25 @@ export async function handleSync(dbPath, dataDir, args, options = {}) {
188
250
  const hasUniversalSources = getEnabledSources(config).length > 0;
189
251
  if (hasUniversalSources && !useLegacy) {
190
252
  // Use new universal sync
253
+ await onProgress?.(20, undefined, 'Discovering new files...');
191
254
  const { discovery, processing } = await universalSync(dataDir, dryRun, options.hookContext);
192
255
  result.discovery = discovery;
193
256
  result.processing = processing;
194
257
  }
195
258
  // Always run legacy disk sync for backward compatibility
196
259
  // (picks up sources added via old `lore ingest` command)
260
+ await onProgress?.(60, undefined, 'Running legacy sync...');
197
261
  const legacyResult = await legacyDiskSync(dbPath, dataDir);
198
262
  result.sources_found = legacyResult.sources_found;
199
263
  result.sources_indexed = legacyResult.sources_indexed;
200
264
  result.already_indexed = legacyResult.already_indexed;
265
+ // Reconcile: ensure every Supabase source has local content.md
266
+ await onProgress?.(80, undefined, 'Reconciling local content...');
267
+ result.reconciled = await reconcileLocalContent(dataDir);
201
268
  }
202
269
  // 3. Git push
203
270
  if (doPush && !dryRun) {
204
- const totalNew = (result.processing?.processed || 0) + result.sources_indexed;
271
+ const totalNew = (result.processing?.processed || 0) + result.sources_indexed + result.reconciled;
205
272
  if (totalNew > 0) {
206
273
  const pushResult = await gitCommitAndPush(dataDir, `Sync: Added ${totalNew} source(s)`);
207
274
  result.git_pushed = pushResult.success && (pushResult.message?.includes('pushed') || false);
@@ -21,7 +21,7 @@ import { handleGetSource } from './handlers/get-source.js';
21
21
  import { handleListSources } from './handlers/list-sources.js';
22
22
  import { handleRetain } from './handlers/retain.js';
23
23
  import { handleIngest } from './handlers/ingest.js';
24
- import { handleResearch } from './handlers/research.js';
24
+ import { startResearchJob, getResearchJobStatus } from './handlers/research.js';
25
25
  import { handleListProjects } from './handlers/list-projects.js';
26
26
  import { handleSync } from './handlers/sync.js';
27
27
  import { handleArchiveProject } from './handlers/archive-project.js';
@@ -136,7 +136,7 @@ async function main() {
136
136
  }
137
137
  const server = new Server({
138
138
  name: 'lore',
139
- version: '0.1.0',
139
+ version: '0.7.0',
140
140
  }, {
141
141
  capabilities: {
142
142
  tools: {},
@@ -184,8 +184,25 @@ async function main() {
184
184
  return { tools: toolDefinitions };
185
185
  });
186
186
  // Handle tool calls (core tools only)
187
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
187
+ server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
188
188
  const { name, arguments: args } = request.params;
189
+ // Build a progress callback for long-running tools.
190
+ // If the client sent a progressToken, we send notifications/progress back;
191
+ // otherwise, onProgress is a no-op.
192
+ const progressToken = request.params._meta?.progressToken;
193
+ const onProgress = progressToken
194
+ ? async (progress, total, message) => {
195
+ try {
196
+ await extra.sendNotification({
197
+ method: 'notifications/progress',
198
+ params: { progressToken, progress, ...(total != null ? { total } : {}), ...(message ? { message } : {}) },
199
+ });
200
+ }
201
+ catch {
202
+ // Progress notifications are best-effort
203
+ }
204
+ }
205
+ : undefined;
189
206
  try {
190
207
  let result;
191
208
  switch (name) {
@@ -215,16 +232,22 @@ async function main() {
215
232
  hookContext: { mode: 'mcp' },
216
233
  });
217
234
  break;
218
- // Agentic research tool (uses Claude Agent SDK internally)
235
+ // Agentic research tool runs async, returns job_id immediately
219
236
  case 'research':
220
- result = await handleResearch(DB_PATH, LORE_DATA_DIR, args, {
237
+ result = startResearchJob(DB_PATH, LORE_DATA_DIR, args, {
221
238
  hookContext: { mode: 'mcp' },
239
+ onProgress,
222
240
  });
223
241
  break;
242
+ // Poll for research results (long-polls up to 20s)
243
+ case 'research_status':
244
+ result = await getResearchJobStatus(args?.job_id);
245
+ break;
224
246
  // Sync tool
225
247
  case 'sync':
226
248
  result = await handleSync(DB_PATH, LORE_DATA_DIR, args, {
227
249
  hookContext: { mode: 'mcp' },
250
+ onProgress,
228
251
  });
229
252
  break;
230
253
  // Project management
package/dist/mcp/tools.js CHANGED
@@ -241,7 +241,7 @@ USE 'ingest' INSTEAD for full documents, meeting notes, transcripts, or any cont
241
241
  name: 'research',
242
242
  description: `Run a comprehensive research query across the knowledge base. An internal agent iteratively searches, reads sources, cross-references findings, and synthesizes a research package with full citations.
243
243
 
244
- Returns: summary, key findings, supporting quotes with citations, conflicts detected between sources, and suggested follow-up queries.
244
+ ASYNC: This tool returns immediately with a job_id. You MUST then poll 'research_status' with that job_id to get results. Research typically takes 2-8 minutes depending on the amount of data. Poll every 15-20 seconds. Do NOT assume it is stuck — check the 'activity' array in the status response to see what the agent is doing.
245
245
 
246
246
  WHEN TO USE:
247
247
  - Questions that span multiple sources ("What do we know about authentication?")
@@ -249,9 +249,23 @@ WHEN TO USE:
249
249
  - Building a cited research package for decision-making
250
250
  - Open-ended exploration of a topic
251
251
 
252
- COST: This tool makes multiple LLM calls internally (typically 3-8 search + read cycles). For simple lookups, use 'search' instead — it's 10x cheaper and faster.`,
252
+ COST: This tool makes multiple LLM calls internally (typically 10-30 search + read cycles). For simple lookups, use 'search' instead — it's 10x cheaper and faster.`,
253
253
  inputSchema: zodToJsonSchema(ResearchSchema),
254
254
  },
255
+ // Research status (polling for async results)
256
+ {
257
+ name: 'research_status',
258
+ description: `Check the status of a running research job. Returns the full research package when complete.
259
+
260
+ Call this after 'research' returns a job_id. Research typically takes 2-8 minutes. Poll every 15-20 seconds. The response includes an 'activity' array showing exactly what the research agent is doing (searches, sources being read, reasoning). As long as 'total_steps' is increasing or 'elapsed_seconds' is under 8 minutes, the research is progressing normally — do NOT abandon it.`,
261
+ inputSchema: {
262
+ type: 'object',
263
+ properties: {
264
+ job_id: { type: 'string', description: 'The job_id returned by the research tool' },
265
+ },
266
+ required: ['job_id'],
267
+ },
268
+ },
255
269
  // Ingest tool
256
270
  {
257
271
  name: 'ingest',
@@ -16,6 +16,7 @@ import { type ImageMediaType } from './processors.js';
16
16
  export interface ExtractedMetadata {
17
17
  title: string;
18
18
  summary: string;
19
+ description?: string;
19
20
  date: string | null;
20
21
  participants: string[];
21
22
  content_type: ContentType;
@@ -38,6 +39,13 @@ export declare function extractMetadata(content: string, filePath: string, optio
38
39
  base64: string;
39
40
  mediaType: ImageMediaType;
40
41
  };
42
+ fileMetadata?: {
43
+ filename: string;
44
+ sizeBytes: number;
45
+ createdAt: string;
46
+ modifiedAt: string;
47
+ exif?: Record<string, unknown>;
48
+ };
41
49
  }): Promise<ExtractedMetadata>;
42
50
  export declare function processFiles(files: DiscoveredFile[], dataDir: string, options?: {
43
51
  onProgress?: (completed: number, total: number, title: string) => void;
@@ -55,12 +55,24 @@ Content type guidelines:
55
55
 
56
56
  Be specific in the summary. Include concrete details, names, numbers when present.`;
57
57
  export async function extractMetadata(content, filePath, options = {}) {
58
- const { model = 'claude-sonnet-4-20250514', image } = options;
58
+ const { model = 'claude-sonnet-4-20250514', image, fileMetadata } = options;
59
59
  const client = getAnthropic();
60
60
  // Build message content based on whether we have an image or text
61
61
  let messageContent;
62
62
  if (image) {
63
- // Image analysis with Claude Vision
63
+ // Image analysis with Claude Vision — extract metadata AND a detailed text description
64
+ const imagePrompt = `Analyze this image and return ONLY valid JSON with these fields:
65
+
66
+ {
67
+ "title": "A descriptive title for this image",
68
+ "summary": "2-4 sentences capturing the key takeaway or purpose of this image",
69
+ "description": "A comprehensive text description of everything in this image. Include all text, data, labels, numbers, charts, diagrams, and visual elements. Transcribe any visible text verbatim. For charts/graphs, describe the data points and trends. For screenshots, describe the UI elements and content. Be thorough — this description replaces the image in a text-only knowledge base.",
70
+ "date": "ISO date string (YYYY-MM-DD) if mentioned, otherwise null",
71
+ "participants": ["list", "of", "names"] if people are mentioned, otherwise [],
72
+ "content_type": "one of: interview|meeting|conversation|document|note|analysis"
73
+ }
74
+
75
+ Be specific and thorough in the description. Include ALL visible text, numbers, and data.`;
64
76
  messageContent = [
65
77
  {
66
78
  type: 'image',
@@ -72,7 +84,7 @@ export async function extractMetadata(content, filePath, options = {}) {
72
84
  },
73
85
  {
74
86
  type: 'text',
75
- text: `${EXTRACTION_PROMPT}\n\nFile: ${path.basename(filePath)}\n\nAnalyze this image and extract metadata. Describe what's in the image in detail in the summary.`,
87
+ text: `${imagePrompt}\n\nFile: ${path.basename(filePath)}${fileMetadata ? `\nFile size: ${(fileMetadata.sizeBytes / 1024).toFixed(0)} KB\nFile created: ${fileMetadata.createdAt}\nFile modified: ${fileMetadata.modifiedAt}${fileMetadata.exif ? `\nEXIF data: ${JSON.stringify(fileMetadata.exif)}` : ''}` : ''}`,
76
88
  },
77
89
  ];
78
90
  }
@@ -86,7 +98,7 @@ export async function extractMetadata(content, filePath, options = {}) {
86
98
  }
87
99
  const response = await client.messages.create({
88
100
  model,
89
- max_tokens: 1000,
101
+ max_tokens: image ? 4000 : 1000,
90
102
  messages: [
91
103
  {
92
104
  role: 'user',
@@ -111,6 +123,7 @@ export async function extractMetadata(content, filePath, options = {}) {
111
123
  return {
112
124
  title: parsed.title || path.basename(filePath),
113
125
  summary: parsed.summary || 'No summary available',
126
+ description: parsed.description || undefined,
114
127
  date: parsed.date || null,
115
128
  participants: Array.isArray(parsed.participants) ? parsed.participants : [],
116
129
  content_type: validateContentType(parsed.content_type),
@@ -150,9 +163,12 @@ async function storeSourceToDisk(sourceId, file, metadata, processedContent, dat
150
163
  const sourceDir = path.join(sourcesDir, sourceId);
151
164
  // Create source directory
152
165
  await mkdir(sourceDir, { recursive: true });
153
- // Copy original file
154
- const originalExt = path.extname(file.absolutePath);
155
- await copyFile(file.absolutePath, path.join(sourceDir, `original${originalExt}`));
166
+ // Copy original file (skip binary formats — knowledge store is text-based)
167
+ const originalExt = path.extname(file.absolutePath).toLowerCase();
168
+ const binaryExts = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico', '.svg'];
169
+ if (!binaryExts.includes(originalExt)) {
170
+ await copyFile(file.absolutePath, path.join(sourceDir, `original${originalExt}`));
171
+ }
156
172
  // Save processed content
157
173
  await writeFile(path.join(sourceDir, 'content.md'), processedContent);
158
174
  // Save metadata
@@ -223,17 +239,59 @@ export async function processFiles(files, dataDir, options = {}) {
223
239
  // 1. Read and preprocess file
224
240
  const processed = await processFile(file.absolutePath);
225
241
  // 2. Extract metadata with Claude (handles both text and images)
226
- const metadata = await extractMetadata(processed.text, file.absolutePath, { model, image: processed.image });
227
- // For images, use the summary as the text content
228
- const contentText = processed.image
229
- ? `# ${metadata.title}\n\n${metadata.summary}`
230
- : processed.text;
242
+ const metadata = await extractMetadata(processed.text, file.absolutePath, { model, image: processed.image, fileMetadata: processed.fileMetadata });
243
+ // For images, use the detailed description as the text content
244
+ let contentText;
245
+ if (processed.image) {
246
+ const lines = [
247
+ `# ${metadata.title}`,
248
+ '',
249
+ metadata.description || metadata.summary,
250
+ '',
251
+ '---',
252
+ '',
253
+ `*Original file: ${path.basename(file.absolutePath)}*`,
254
+ `*Synced from: ${file.sourceName}*`,
255
+ metadata.date ? `*Date: ${metadata.date}*` : '',
256
+ ];
257
+ // Append EXIF metadata if available
258
+ const exif = processed.fileMetadata?.exif;
259
+ if (exif && Object.keys(exif).length > 0) {
260
+ lines.push('');
261
+ lines.push('## Image Metadata');
262
+ for (const [key, value] of Object.entries(exif)) {
263
+ if (value != null && value !== '') {
264
+ const label = key.replace(/([A-Z])/g, ' $1').replace(/^./, s => s.toUpperCase()).trim();
265
+ lines.push(`- **${label}:** ${Array.isArray(value) ? value.join(', ') : String(value)}`);
266
+ }
267
+ }
268
+ }
269
+ contentText = lines.filter(Boolean).join('\n');
270
+ }
271
+ else {
272
+ contentText = processed.text;
273
+ }
231
274
  // 3. Use existing ID for edits, generate new ID for new files
232
275
  const sourceId = file.existingId || generateSourceId();
233
- // 4. Index in Supabase FIRST (may fail on duplicate content_hash)
234
- await indexSource(sourceId, file, metadata, dbPath);
235
- // 5. Store source to disk ONLY if Supabase succeeded
236
- await storeSourceToDisk(sourceId, file, metadata, contentText, dataDir);
276
+ // 4. Store to disk FIRST ensures content.md always exists
277
+ // If this fails, we skip Supabase so the file stays "new" for retry.
278
+ try {
279
+ await storeSourceToDisk(sourceId, file, metadata, contentText, dataDir);
280
+ }
281
+ catch (diskError) {
282
+ console.error(`[process] Disk write failed for ${file.relativePath}: ${diskError}`);
283
+ throw new Error(`Disk write failed for ${file.relativePath}: ${diskError}`);
284
+ }
285
+ // 5. Index in Supabase — if this fails, disk content still exists
286
+ // and legacy sync will pick it up on the next run.
287
+ try {
288
+ await indexSource(sourceId, file, metadata, dbPath);
289
+ }
290
+ catch (supabaseError) {
291
+ console.error(`[process] Supabase index failed for ${file.relativePath}: ${supabaseError}`);
292
+ console.error(`[process] Content saved to disk — will be indexed on next sync via legacy path`);
293
+ // Don't re-throw: disk write succeeded, source is safe
294
+ }
237
295
  if (extensionRegistry && hookContext) {
238
296
  await extensionRegistry.runHook('onSourceCreated', {
239
297
  id: sourceId,
@@ -265,9 +323,11 @@ export async function processFiles(files, dataDir, options = {}) {
265
323
  onProgress?.(result.processed.length + result.errors.length, files.length, batchResult.value.metadata.title);
266
324
  }
267
325
  else {
326
+ const errorMsg = batchResult.reason?.message || String(batchResult.reason);
327
+ console.error(`[process] Failed to process ${file.relativePath}: ${errorMsg}`);
268
328
  result.errors.push({
269
329
  file,
270
- error: batchResult.reason?.message || String(batchResult.reason),
330
+ error: errorMsg,
271
331
  });
272
332
  onProgress?.(result.processed.length + result.errors.length, files.length, `Error: ${file.relativePath}`);
273
333
  }
@@ -17,6 +17,13 @@ export interface ProcessedContent {
17
17
  base64: string;
18
18
  mediaType: ImageMediaType;
19
19
  };
20
+ fileMetadata?: {
21
+ filename: string;
22
+ sizeBytes: number;
23
+ createdAt: string;
24
+ modifiedAt: string;
25
+ exif?: Record<string, unknown>;
26
+ };
20
27
  }
21
28
  export declare function processFile(filePath: string): Promise<ProcessedContent>;
22
29
  export declare function preprocessFiles(filePaths: string[], options?: {
@@ -4,7 +4,7 @@
4
4
  * Converts various file formats to plain text for Claude analysis.
5
5
  * All processing is IN MEMORY ONLY - original files are never modified.
6
6
  */
7
- import { readFile } from 'fs/promises';
7
+ import { readFile, stat } from 'fs/promises';
8
8
  import path from 'path';
9
9
  let pdfParser = null;
10
10
  async function getPdfParser() {
@@ -196,13 +196,107 @@ async function processImage(filePath) {
196
196
  }
197
197
  const buffer = await readFile(filePath);
198
198
  const base64 = buffer.toString('base64');
199
+ // Extract file-level metadata
200
+ const fileStat = await stat(filePath);
201
+ const filename = path.basename(filePath);
202
+ // Try to parse date from common filename patterns (e.g. WhatsApp, screenshots)
203
+ let dateFromFilename;
204
+ const whatsappMatch = filename.match(/(\d{4}-\d{2}-\d{2})/);
205
+ if (whatsappMatch) {
206
+ dateFromFilename = whatsappMatch[1];
207
+ }
208
+ // Extract EXIF metadata (GPS, camera, date, etc.)
209
+ let exifData;
210
+ try {
211
+ const exifr = await import('exifr');
212
+ const raw = await exifr.default.parse(buffer, {
213
+ // Request all available tags
214
+ tiff: true,
215
+ exif: true,
216
+ gps: true,
217
+ icc: false, // Skip color profile (not useful for knowledge)
218
+ iptc: true, // Keywords, captions, copyright
219
+ xmp: true, // Extended metadata
220
+ });
221
+ if (raw) {
222
+ // Extract the most useful fields
223
+ exifData = {};
224
+ // Camera info
225
+ if (raw.Make)
226
+ exifData.cameraMake = raw.Make;
227
+ if (raw.Model)
228
+ exifData.cameraModel = raw.Model;
229
+ if (raw.LensModel)
230
+ exifData.lens = raw.LensModel;
231
+ // Date
232
+ if (raw.DateTimeOriginal)
233
+ exifData.dateTaken = raw.DateTimeOriginal instanceof Date ? raw.DateTimeOriginal.toISOString() : String(raw.DateTimeOriginal);
234
+ if (raw.CreateDate)
235
+ exifData.dateCreated = raw.CreateDate instanceof Date ? raw.CreateDate.toISOString() : String(raw.CreateDate);
236
+ // GPS
237
+ if (raw.latitude != null && raw.longitude != null) {
238
+ exifData.gpsLatitude = raw.latitude;
239
+ exifData.gpsLongitude = raw.longitude;
240
+ }
241
+ if (raw.GPSAltitude != null)
242
+ exifData.gpsAltitude = raw.GPSAltitude;
243
+ // Image dimensions
244
+ if (raw.ImageWidth)
245
+ exifData.width = raw.ImageWidth;
246
+ if (raw.ImageHeight)
247
+ exifData.height = raw.ImageHeight;
248
+ if (raw.ExifImageWidth)
249
+ exifData.width = raw.ExifImageWidth;
250
+ if (raw.ExifImageHeight)
251
+ exifData.height = raw.ExifImageHeight;
252
+ // Software / source
253
+ if (raw.Software)
254
+ exifData.software = raw.Software;
255
+ if (raw.Artist)
256
+ exifData.artist = raw.Artist;
257
+ if (raw.Copyright)
258
+ exifData.copyright = raw.Copyright;
259
+ // IPTC/XMP tags
260
+ if (raw.Keywords)
261
+ exifData.keywords = raw.Keywords;
262
+ if (raw.Description)
263
+ exifData.description = raw.Description;
264
+ if (raw.Caption)
265
+ exifData.caption = raw.Caption;
266
+ if (raw.Subject)
267
+ exifData.subject = raw.Subject;
268
+ if (raw.Title)
269
+ exifData.title = raw.Title;
270
+ // Use EXIF date if no filename date
271
+ if (!dateFromFilename && exifData.dateTaken) {
272
+ const d = new Date(exifData.dateTaken);
273
+ if (!isNaN(d.getTime())) {
274
+ dateFromFilename = d.toISOString().split('T')[0];
275
+ }
276
+ }
277
+ // Drop empty objects
278
+ if (Object.keys(exifData).length === 0)
279
+ exifData = undefined;
280
+ }
281
+ }
282
+ catch (exifError) {
283
+ console.error(`[processors] EXIF extraction failed for ${path.basename(filePath)}: ${exifError}`);
284
+ }
199
285
  return {
200
286
  text: '', // Will be filled by Claude vision
201
287
  format: 'image',
288
+ metadata: dateFromFilename ? { date: dateFromFilename } : undefined,
202
289
  image: {
203
290
  base64,
204
291
  mediaType,
205
292
  },
293
+ fileMetadata: {
294
+ filename,
295
+ sizeBytes: fileStat.size,
296
+ createdAt: fileStat.birthtime.toISOString(),
297
+ modifiedAt: fileStat.mtime.toISOString(),
298
+ ...(exifData ? { exif: exifData } : {}),
299
+ },
206
300
  };
207
301
  }
208
302
  // ============================================================================
@@ -90,47 +90,85 @@ export async function loadFullContent(state, ui, dbPath, sourcesDir) {
90
90
  const source = getSelectedSource(state);
91
91
  if (!source)
92
92
  return;
93
- // Try to load from disk first
94
- const contentPath = path.join(sourcesDir, source.id, 'content.md');
93
+ // Try to load from disk first (content.md, then original file)
94
+ const sourceDir = path.join(sourcesDir, source.id);
95
+ const contentPath = path.join(sourceDir, 'content.md');
95
96
  try {
96
97
  const { readFile } = await import('fs/promises');
97
98
  state.fullContent = await readFile(contentPath, 'utf-8');
98
99
  }
99
100
  catch {
100
- // Fall back to database source details
101
- const details = await getSourceById(dbPath, source.id);
102
- if (details) {
103
- state.fullContent = [
104
- `# ${details.title}`,
105
- '',
106
- `**Type:** ${details.source_type} · ${details.content_type}`,
107
- `**Date:** ${formatDate(details.created_at)}`,
108
- `**Projects:** ${details.projects.join(', ') || '(none)'}`,
109
- '',
110
- '## Summary',
111
- details.summary,
112
- '',
113
- ].join('\n');
114
- if (details.themes && details.themes.length > 0) {
115
- state.fullContent += '## Themes\n';
116
- for (const theme of details.themes) {
117
- state.fullContent += `- **${theme.name}**`;
118
- if (theme.summary)
119
- state.fullContent += `: ${theme.summary}`;
120
- state.fullContent += '\n';
101
+ // content.md not found — try to find and read an original text file
102
+ let foundOriginal = false;
103
+ try {
104
+ const { readFile, readdir } = await import('fs/promises');
105
+ const files = await readdir(sourceDir);
106
+ const originalFile = files.find(f => f.startsWith('original.'));
107
+ if (originalFile) {
108
+ const textExts = ['.md', '.txt', '.json', '.jsonl', '.csv', '.xml', '.yaml', '.yml', '.html', '.log'];
109
+ const ext = path.extname(originalFile).toLowerCase();
110
+ if (textExts.includes(ext)) {
111
+ state.fullContent = await readFile(path.join(sourceDir, originalFile), 'utf-8');
112
+ foundOriginal = true;
121
113
  }
122
- state.fullContent += '\n';
123
114
  }
124
- if (details.quotes && details.quotes.length > 0) {
125
- state.fullContent += '## Key Quotes\n';
126
- for (const quote of details.quotes.slice(0, 10)) {
127
- const speaker = quote.speaker === 'user' ? '[You]' : `[${quote.speaker_name || 'Participant'}]`;
128
- state.fullContent += `> ${speaker} "${quote.text}"\n\n`;
115
+ }
116
+ catch {
117
+ // Source directory doesn't exist locally — fall through to DB
118
+ }
119
+ if (!foundOriginal) {
120
+ // Try reading from source_path (original file in sync directory)
121
+ const details = await getSourceById(dbPath, source.id);
122
+ if (details?.source_path) {
123
+ try {
124
+ const { readFile } = await import('fs/promises');
125
+ const ext = path.extname(details.source_path).toLowerCase();
126
+ const textExts = ['.md', '.txt', '.json', '.jsonl', '.csv', '.xml', '.yaml', '.yml', '.html', '.log'];
127
+ if (textExts.includes(ext)) {
128
+ state.fullContent = await readFile(details.source_path, 'utf-8');
129
+ foundOriginal = true;
130
+ }
131
+ }
132
+ catch {
133
+ // source_path file doesn't exist or can't be read
134
+ }
135
+ }
136
+ if (!foundOriginal) {
137
+ // Final fallback: database summary view
138
+ if (details) {
139
+ state.fullContent = [
140
+ `# ${details.title}`,
141
+ '',
142
+ `**Type:** ${details.source_type} · ${details.content_type}`,
143
+ `**Date:** ${formatDate(details.created_at)}`,
144
+ `**Projects:** ${details.projects.join(', ') || '(none)'}`,
145
+ '',
146
+ '## Summary',
147
+ details.summary,
148
+ '',
149
+ ].join('\n');
150
+ if (details.themes && details.themes.length > 0) {
151
+ state.fullContent += '## Themes\n';
152
+ for (const theme of details.themes) {
153
+ state.fullContent += `- **${theme.name}**`;
154
+ if (theme.summary)
155
+ state.fullContent += `: ${theme.summary}`;
156
+ state.fullContent += '\n';
157
+ }
158
+ state.fullContent += '\n';
159
+ }
160
+ if (details.quotes && details.quotes.length > 0) {
161
+ state.fullContent += '## Key Quotes\n';
162
+ for (const quote of details.quotes.slice(0, 10)) {
163
+ const speaker = quote.speaker === 'user' ? '[You]' : `[${quote.speaker_name || 'Participant'}]`;
164
+ state.fullContent += `> ${speaker} "${quote.text}"\n\n`;
165
+ }
166
+ }
167
+ }
168
+ else {
169
+ state.fullContent = `Could not load content for ${source.title}`;
129
170
  }
130
171
  }
131
- }
132
- else {
133
- state.fullContent = `Could not load content for ${source.title}`;
134
172
  }
135
173
  }
136
174
  // Store raw lines for searching
@@ -370,6 +408,7 @@ export async function applyFilter(state, ui, query, filterMode, dbPath, dataDir,
370
408
  content_type: r.content_type,
371
409
  projects: r.projects,
372
410
  created_at: r.created_at,
411
+ indexed_at: r.created_at,
373
412
  summary: r.summary,
374
413
  score: r.score,
375
414
  }));