@getlore/cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +80 -0
- package/dist/cli/colors.d.ts +48 -0
- package/dist/cli/colors.js +48 -0
- package/dist/cli/commands/ask.d.ts +7 -0
- package/dist/cli/commands/ask.js +97 -0
- package/dist/cli/commands/auth.d.ts +10 -0
- package/dist/cli/commands/auth.js +484 -0
- package/dist/cli/commands/daemon.d.ts +22 -0
- package/dist/cli/commands/daemon.js +244 -0
- package/dist/cli/commands/docs.d.ts +7 -0
- package/dist/cli/commands/docs.js +188 -0
- package/dist/cli/commands/extensions.d.ts +7 -0
- package/dist/cli/commands/extensions.js +204 -0
- package/dist/cli/commands/misc.d.ts +7 -0
- package/dist/cli/commands/misc.js +172 -0
- package/dist/cli/commands/pending.d.ts +7 -0
- package/dist/cli/commands/pending.js +63 -0
- package/dist/cli/commands/projects.d.ts +7 -0
- package/dist/cli/commands/projects.js +136 -0
- package/dist/cli/commands/search.d.ts +7 -0
- package/dist/cli/commands/search.js +102 -0
- package/dist/cli/commands/skills.d.ts +24 -0
- package/dist/cli/commands/skills.js +447 -0
- package/dist/cli/commands/sources.d.ts +7 -0
- package/dist/cli/commands/sources.js +121 -0
- package/dist/cli/commands/sync.d.ts +31 -0
- package/dist/cli/commands/sync.js +768 -0
- package/dist/cli/helpers.d.ts +30 -0
- package/dist/cli/helpers.js +119 -0
- package/dist/core/auth.d.ts +62 -0
- package/dist/core/auth.js +330 -0
- package/dist/core/config.d.ts +41 -0
- package/dist/core/config.js +96 -0
- package/dist/core/data-repo.d.ts +31 -0
- package/dist/core/data-repo.js +146 -0
- package/dist/core/embedder.d.ts +22 -0
- package/dist/core/embedder.js +104 -0
- package/dist/core/git.d.ts +37 -0
- package/dist/core/git.js +140 -0
- package/dist/core/index.d.ts +4 -0
- package/dist/core/index.js +5 -0
- package/dist/core/insight-extractor.d.ts +26 -0
- package/dist/core/insight-extractor.js +114 -0
- package/dist/core/local-search.d.ts +43 -0
- package/dist/core/local-search.js +221 -0
- package/dist/core/themes.d.ts +15 -0
- package/dist/core/themes.js +77 -0
- package/dist/core/types.d.ts +177 -0
- package/dist/core/types.js +9 -0
- package/dist/core/user-settings.d.ts +15 -0
- package/dist/core/user-settings.js +42 -0
- package/dist/core/vector-store-lance.d.ts +98 -0
- package/dist/core/vector-store-lance.js +384 -0
- package/dist/core/vector-store-supabase.d.ts +89 -0
- package/dist/core/vector-store-supabase.js +295 -0
- package/dist/core/vector-store.d.ts +131 -0
- package/dist/core/vector-store.js +503 -0
- package/dist/daemon-runner.d.ts +8 -0
- package/dist/daemon-runner.js +246 -0
- package/dist/extensions/config.d.ts +22 -0
- package/dist/extensions/config.js +102 -0
- package/dist/extensions/proposals.d.ts +30 -0
- package/dist/extensions/proposals.js +178 -0
- package/dist/extensions/registry.d.ts +35 -0
- package/dist/extensions/registry.js +309 -0
- package/dist/extensions/sandbox.d.ts +16 -0
- package/dist/extensions/sandbox.js +17 -0
- package/dist/extensions/types.d.ts +114 -0
- package/dist/extensions/types.js +4 -0
- package/dist/extensions/worker.d.ts +1 -0
- package/dist/extensions/worker.js +49 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +105 -0
- package/dist/mcp/handlers/archive-project.d.ts +51 -0
- package/dist/mcp/handlers/archive-project.js +112 -0
- package/dist/mcp/handlers/get-quotes.d.ts +27 -0
- package/dist/mcp/handlers/get-quotes.js +61 -0
- package/dist/mcp/handlers/get-source.d.ts +9 -0
- package/dist/mcp/handlers/get-source.js +40 -0
- package/dist/mcp/handlers/ingest.d.ts +25 -0
- package/dist/mcp/handlers/ingest.js +305 -0
- package/dist/mcp/handlers/list-projects.d.ts +4 -0
- package/dist/mcp/handlers/list-projects.js +16 -0
- package/dist/mcp/handlers/list-sources.d.ts +11 -0
- package/dist/mcp/handlers/list-sources.js +20 -0
- package/dist/mcp/handlers/research-agent.d.ts +21 -0
- package/dist/mcp/handlers/research-agent.js +369 -0
- package/dist/mcp/handlers/research.d.ts +22 -0
- package/dist/mcp/handlers/research.js +225 -0
- package/dist/mcp/handlers/retain.d.ts +18 -0
- package/dist/mcp/handlers/retain.js +92 -0
- package/dist/mcp/handlers/search.d.ts +52 -0
- package/dist/mcp/handlers/search.js +145 -0
- package/dist/mcp/handlers/sync.d.ts +47 -0
- package/dist/mcp/handlers/sync.js +211 -0
- package/dist/mcp/server.d.ts +10 -0
- package/dist/mcp/server.js +268 -0
- package/dist/mcp/tools.d.ts +16 -0
- package/dist/mcp/tools.js +297 -0
- package/dist/sync/config.d.ts +26 -0
- package/dist/sync/config.js +140 -0
- package/dist/sync/discover.d.ts +51 -0
- package/dist/sync/discover.js +190 -0
- package/dist/sync/index.d.ts +11 -0
- package/dist/sync/index.js +11 -0
- package/dist/sync/process.d.ts +50 -0
- package/dist/sync/process.js +285 -0
- package/dist/sync/processors.d.ts +24 -0
- package/dist/sync/processors.js +351 -0
- package/dist/tui/browse-handlers-ask.d.ts +30 -0
- package/dist/tui/browse-handlers-ask.js +372 -0
- package/dist/tui/browse-handlers-autocomplete.d.ts +49 -0
- package/dist/tui/browse-handlers-autocomplete.js +270 -0
- package/dist/tui/browse-handlers-extensions.d.ts +18 -0
- package/dist/tui/browse-handlers-extensions.js +107 -0
- package/dist/tui/browse-handlers-pending.d.ts +22 -0
- package/dist/tui/browse-handlers-pending.js +100 -0
- package/dist/tui/browse-handlers-research.d.ts +32 -0
- package/dist/tui/browse-handlers-research.js +363 -0
- package/dist/tui/browse-handlers-tools.d.ts +42 -0
- package/dist/tui/browse-handlers-tools.js +289 -0
- package/dist/tui/browse-handlers.d.ts +239 -0
- package/dist/tui/browse-handlers.js +1944 -0
- package/dist/tui/browse-render-extensions.d.ts +14 -0
- package/dist/tui/browse-render-extensions.js +114 -0
- package/dist/tui/browse-render-tools.d.ts +18 -0
- package/dist/tui/browse-render-tools.js +259 -0
- package/dist/tui/browse-render.d.ts +51 -0
- package/dist/tui/browse-render.js +599 -0
- package/dist/tui/browse-types.d.ts +142 -0
- package/dist/tui/browse-types.js +70 -0
- package/dist/tui/browse-ui.d.ts +10 -0
- package/dist/tui/browse-ui.js +432 -0
- package/dist/tui/browse.d.ts +17 -0
- package/dist/tui/browse.js +625 -0
- package/dist/tui/markdown.d.ts +22 -0
- package/dist/tui/markdown.js +223 -0
- package/package.json +71 -0
- package/plugins/claude-code/.claude-plugin/plugin.json +10 -0
- package/plugins/claude-code/.mcp.json +6 -0
- package/plugins/claude-code/skills/lore/SKILL.md +63 -0
- package/plugins/codex/SKILL.md +36 -0
- package/plugins/codex/agents/openai.yaml +10 -0
- package/plugins/gemini/GEMINI.md +31 -0
- package/plugins/gemini/gemini-extension.json +11 -0
- package/skills/generic-agent.md +99 -0
- package/skills/openclaw.md +67 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lore - Claude Processing (Phase 2)
|
|
3
|
+
*
|
|
4
|
+
* Uses Claude to extract metadata from new files:
|
|
5
|
+
* - title: Descriptive title
|
|
6
|
+
* - summary: 2-4 sentence summary with key takeaways
|
|
7
|
+
* - date: ISO date if present
|
|
8
|
+
* - participants: List of names if present
|
|
9
|
+
* - content_type: interview|meeting|conversation|document|note|analysis
|
|
10
|
+
*
|
|
11
|
+
* Only called for NEW files (not already in Supabase).
|
|
12
|
+
*/
|
|
13
|
+
import { mkdir, writeFile, copyFile } from 'fs/promises';
|
|
14
|
+
import path from 'path';
|
|
15
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
16
|
+
import { processFile } from './processors.js';
|
|
17
|
+
import { generateEmbedding, createSearchableText } from '../core/embedder.js';
|
|
18
|
+
import { addSource } from '../core/vector-store.js';
|
|
19
|
+
import { gitCommitAndPush } from '../core/git.js';
|
|
20
|
+
import { getExtensionRegistry } from '../extensions/registry.js';
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// Claude Client
|
|
23
|
+
// ============================================================================
|
|
24
|
+
let anthropic = null;
|
|
25
|
+
function getAnthropic() {
|
|
26
|
+
if (!anthropic) {
|
|
27
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
28
|
+
if (!apiKey) {
|
|
29
|
+
throw new Error('ANTHROPIC_API_KEY environment variable is required');
|
|
30
|
+
}
|
|
31
|
+
anthropic = new Anthropic({ apiKey });
|
|
32
|
+
}
|
|
33
|
+
return anthropic;
|
|
34
|
+
}
|
|
35
|
+
// ============================================================================
|
|
36
|
+
// Metadata Extraction
|
|
37
|
+
// ============================================================================
|
|
38
|
+
const EXTRACTION_PROMPT = `Analyze this document and extract metadata. Return ONLY valid JSON with these fields:
|
|
39
|
+
|
|
40
|
+
{
|
|
41
|
+
"title": "A descriptive title (create one if not obvious)",
|
|
42
|
+
"summary": "2-4 sentences capturing key takeaways, findings, or purpose",
|
|
43
|
+
"date": "ISO date string (YYYY-MM-DD) if mentioned, otherwise null",
|
|
44
|
+
"participants": ["list", "of", "names"] if this is a meeting/interview, otherwise [],
|
|
45
|
+
"content_type": "one of: interview|meeting|conversation|document|note|analysis"
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
Content type guidelines:
|
|
49
|
+
- interview: User research, customer interview, 1:1 feedback session
|
|
50
|
+
- meeting: Team meeting, standup, planning session
|
|
51
|
+
- conversation: AI chat (Claude, ChatGPT), chat logs
|
|
52
|
+
- document: Spec, design doc, report, article
|
|
53
|
+
- note: Personal notes, memo, quick thoughts
|
|
54
|
+
- analysis: Competitor analysis, market research, data analysis
|
|
55
|
+
|
|
56
|
+
Be specific in the summary. Include concrete details, names, numbers when present.`;
|
|
57
|
+
export async function extractMetadata(content, filePath, options = {}) {
|
|
58
|
+
const { model = 'claude-sonnet-4-20250514', image } = options;
|
|
59
|
+
const client = getAnthropic();
|
|
60
|
+
// Build message content based on whether we have an image or text
|
|
61
|
+
let messageContent;
|
|
62
|
+
if (image) {
|
|
63
|
+
// Image analysis with Claude Vision
|
|
64
|
+
messageContent = [
|
|
65
|
+
{
|
|
66
|
+
type: 'image',
|
|
67
|
+
source: {
|
|
68
|
+
type: 'base64',
|
|
69
|
+
media_type: image.mediaType,
|
|
70
|
+
data: image.base64,
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
type: 'text',
|
|
75
|
+
text: `${EXTRACTION_PROMPT}\n\nFile: ${path.basename(filePath)}\n\nAnalyze this image and extract metadata. Describe what's in the image in detail in the summary.`,
|
|
76
|
+
},
|
|
77
|
+
];
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
// Text-based analysis
|
|
81
|
+
const maxChars = 50000;
|
|
82
|
+
const truncatedContent = content.length > maxChars
|
|
83
|
+
? content.substring(0, maxChars) + '\n\n[Content truncated...]'
|
|
84
|
+
: content;
|
|
85
|
+
messageContent = `${EXTRACTION_PROMPT}\n\nFile: ${path.basename(filePath)}\n\n---\n\n${truncatedContent}`;
|
|
86
|
+
}
|
|
87
|
+
const response = await client.messages.create({
|
|
88
|
+
model,
|
|
89
|
+
max_tokens: 1000,
|
|
90
|
+
messages: [
|
|
91
|
+
{
|
|
92
|
+
role: 'user',
|
|
93
|
+
content: messageContent,
|
|
94
|
+
},
|
|
95
|
+
],
|
|
96
|
+
});
|
|
97
|
+
// Extract text from response
|
|
98
|
+
const responseText = response.content
|
|
99
|
+
.filter((block) => block.type === 'text')
|
|
100
|
+
.map(block => block.text)
|
|
101
|
+
.join('');
|
|
102
|
+
// Parse JSON from response
|
|
103
|
+
try {
|
|
104
|
+
// Find JSON in response (handle markdown code blocks)
|
|
105
|
+
const jsonMatch = responseText.match(/\{[\s\S]*\}/);
|
|
106
|
+
if (!jsonMatch) {
|
|
107
|
+
throw new Error('No JSON found in response');
|
|
108
|
+
}
|
|
109
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
110
|
+
// Validate and normalize
|
|
111
|
+
return {
|
|
112
|
+
title: parsed.title || path.basename(filePath),
|
|
113
|
+
summary: parsed.summary || 'No summary available',
|
|
114
|
+
date: parsed.date || null,
|
|
115
|
+
participants: Array.isArray(parsed.participants) ? parsed.participants : [],
|
|
116
|
+
content_type: validateContentType(parsed.content_type),
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
catch (error) {
|
|
120
|
+
console.error('[process] Error parsing Claude response:', error);
|
|
121
|
+
console.error('[process] Raw response:', responseText);
|
|
122
|
+
// Return fallback metadata
|
|
123
|
+
return {
|
|
124
|
+
title: path.basename(filePath),
|
|
125
|
+
summary: content.substring(0, 200) + '...',
|
|
126
|
+
date: null,
|
|
127
|
+
participants: [],
|
|
128
|
+
content_type: 'document',
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
function validateContentType(type) {
|
|
133
|
+
const validTypes = [
|
|
134
|
+
'interview', 'meeting', 'conversation', 'document', 'note', 'analysis',
|
|
135
|
+
];
|
|
136
|
+
if (validTypes.includes(type)) {
|
|
137
|
+
return type;
|
|
138
|
+
}
|
|
139
|
+
return 'document';
|
|
140
|
+
}
|
|
141
|
+
// ============================================================================
|
|
142
|
+
// Source Storage
|
|
143
|
+
// ============================================================================
|
|
144
|
+
function generateSourceId() {
|
|
145
|
+
// Generate a UUID for compatibility with Supabase schema
|
|
146
|
+
return crypto.randomUUID();
|
|
147
|
+
}
|
|
148
|
+
async function storeSourceToDisk(sourceId, file, metadata, processedContent, dataDir) {
|
|
149
|
+
const sourcesDir = path.join(dataDir, 'sources');
|
|
150
|
+
const sourceDir = path.join(sourcesDir, sourceId);
|
|
151
|
+
// Create source directory
|
|
152
|
+
await mkdir(sourceDir, { recursive: true });
|
|
153
|
+
// Copy original file
|
|
154
|
+
const originalExt = path.extname(file.absolutePath);
|
|
155
|
+
await copyFile(file.absolutePath, path.join(sourceDir, `original${originalExt}`));
|
|
156
|
+
// Save processed content
|
|
157
|
+
await writeFile(path.join(sourceDir, 'content.md'), processedContent);
|
|
158
|
+
// Save metadata
|
|
159
|
+
const sourceMetadata = {
|
|
160
|
+
id: sourceId,
|
|
161
|
+
title: metadata.title,
|
|
162
|
+
source_type: 'document', // Universal type for sync-ingested sources
|
|
163
|
+
content_type: metadata.content_type,
|
|
164
|
+
created_at: metadata.date || new Date().toISOString(),
|
|
165
|
+
imported_at: new Date().toISOString(),
|
|
166
|
+
projects: [file.project],
|
|
167
|
+
tags: [],
|
|
168
|
+
source_path: file.absolutePath,
|
|
169
|
+
content_hash: file.contentHash,
|
|
170
|
+
sync_source: file.sourceName,
|
|
171
|
+
original_file: file.relativePath,
|
|
172
|
+
};
|
|
173
|
+
await writeFile(path.join(sourceDir, 'metadata.json'), JSON.stringify(sourceMetadata, null, 2));
|
|
174
|
+
// Save insights (summary + themes placeholder)
|
|
175
|
+
await writeFile(path.join(sourceDir, 'insights.json'), JSON.stringify({ summary: metadata.summary, themes: [], quotes: [] }, null, 2));
|
|
176
|
+
}
|
|
177
|
+
async function indexSource(sourceId, file, metadata, dbPath) {
|
|
178
|
+
// Generate embedding
|
|
179
|
+
const searchableText = createSearchableText({
|
|
180
|
+
type: 'summary',
|
|
181
|
+
text: metadata.summary,
|
|
182
|
+
project: file.project,
|
|
183
|
+
});
|
|
184
|
+
const vector = await generateEmbedding(searchableText);
|
|
185
|
+
// Create source record
|
|
186
|
+
const sourceRecord = {
|
|
187
|
+
id: sourceId,
|
|
188
|
+
title: metadata.title,
|
|
189
|
+
source_type: 'document',
|
|
190
|
+
content_type: metadata.content_type,
|
|
191
|
+
projects: JSON.stringify([file.project]),
|
|
192
|
+
tags: JSON.stringify([]),
|
|
193
|
+
created_at: metadata.date || new Date().toISOString(),
|
|
194
|
+
summary: metadata.summary,
|
|
195
|
+
themes_json: JSON.stringify([]),
|
|
196
|
+
quotes_json: JSON.stringify([]),
|
|
197
|
+
has_full_content: true,
|
|
198
|
+
vector: [],
|
|
199
|
+
};
|
|
200
|
+
// Add to vector store with content_hash and source_path
|
|
201
|
+
await addSource(dbPath, sourceRecord, vector, {
|
|
202
|
+
content_hash: file.contentHash,
|
|
203
|
+
source_path: file.absolutePath,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
// ============================================================================
|
|
207
|
+
// Main Processing Function
|
|
208
|
+
// ============================================================================
|
|
209
|
+
export async function processFiles(files, dataDir, options = {}) {
|
|
210
|
+
const { onProgress, model = 'claude-sonnet-4-20250514', concurrency = 2, gitPush = true, hookContext, } = options;
|
|
211
|
+
const dbPath = path.join(dataDir, 'lore.lance');
|
|
212
|
+
const result = {
|
|
213
|
+
processed: [],
|
|
214
|
+
errors: [],
|
|
215
|
+
};
|
|
216
|
+
const extensionRegistry = hookContext
|
|
217
|
+
? await getExtensionRegistry({ logger: (message) => console.error(message) })
|
|
218
|
+
: null;
|
|
219
|
+
// Process files with controlled concurrency
|
|
220
|
+
for (let i = 0; i < files.length; i += concurrency) {
|
|
221
|
+
const batch = files.slice(i, i + concurrency);
|
|
222
|
+
const batchResults = await Promise.allSettled(batch.map(async (file) => {
|
|
223
|
+
// 1. Read and preprocess file
|
|
224
|
+
const processed = await processFile(file.absolutePath);
|
|
225
|
+
// 2. Extract metadata with Claude (handles both text and images)
|
|
226
|
+
const metadata = await extractMetadata(processed.text, file.absolutePath, { model, image: processed.image });
|
|
227
|
+
// For images, use the summary as the text content
|
|
228
|
+
const contentText = processed.image
|
|
229
|
+
? `# ${metadata.title}\n\n${metadata.summary}`
|
|
230
|
+
: processed.text;
|
|
231
|
+
// 3. Use existing ID for edits, generate new ID for new files
|
|
232
|
+
const sourceId = file.existingId || generateSourceId();
|
|
233
|
+
// 4. Index in Supabase FIRST (may fail on duplicate content_hash)
|
|
234
|
+
await indexSource(sourceId, file, metadata, dbPath);
|
|
235
|
+
// 5. Store source to disk ONLY if Supabase succeeded
|
|
236
|
+
await storeSourceToDisk(sourceId, file, metadata, contentText, dataDir);
|
|
237
|
+
if (extensionRegistry && hookContext) {
|
|
238
|
+
await extensionRegistry.runHook('onSourceCreated', {
|
|
239
|
+
id: sourceId,
|
|
240
|
+
title: metadata.title,
|
|
241
|
+
source_type: 'document',
|
|
242
|
+
content_type: metadata.content_type,
|
|
243
|
+
created_at: metadata.date || new Date().toISOString(),
|
|
244
|
+
imported_at: new Date().toISOString(),
|
|
245
|
+
projects: [file.project],
|
|
246
|
+
tags: [],
|
|
247
|
+
source_path: file.absolutePath,
|
|
248
|
+
content_hash: file.contentHash,
|
|
249
|
+
sync_source: file.sourceName,
|
|
250
|
+
original_file: file.relativePath,
|
|
251
|
+
}, {
|
|
252
|
+
mode: hookContext.mode,
|
|
253
|
+
dataDir,
|
|
254
|
+
dbPath,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
return { file, metadata, sourceId };
|
|
258
|
+
}));
|
|
259
|
+
// Collect results
|
|
260
|
+
for (let j = 0; j < batchResults.length; j++) {
|
|
261
|
+
const batchResult = batchResults[j];
|
|
262
|
+
const file = batch[j];
|
|
263
|
+
if (batchResult.status === 'fulfilled') {
|
|
264
|
+
result.processed.push(batchResult.value);
|
|
265
|
+
onProgress?.(result.processed.length + result.errors.length, files.length, batchResult.value.metadata.title);
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
result.errors.push({
|
|
269
|
+
file,
|
|
270
|
+
error: batchResult.reason?.message || String(batchResult.reason),
|
|
271
|
+
});
|
|
272
|
+
onProgress?.(result.processed.length + result.errors.length, files.length, `Error: ${file.relativePath}`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// Small delay between batches to avoid rate limits
|
|
276
|
+
if (i + concurrency < files.length) {
|
|
277
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
// Git commit and push if enabled
|
|
281
|
+
if (gitPush && result.processed.length > 0) {
|
|
282
|
+
await gitCommitAndPush(dataDir, `Sync: Added ${result.processed.length} source(s) from universal sync`);
|
|
283
|
+
}
|
|
284
|
+
return result;
|
|
285
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lore - Format Preprocessors
|
|
3
|
+
*
|
|
4
|
+
* Converts various file formats to plain text for Claude analysis.
|
|
5
|
+
* All processing is IN MEMORY ONLY - original files are never modified.
|
|
6
|
+
*/
|
|
7
|
+
export type ImageMediaType = 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
|
|
8
|
+
export interface ProcessedContent {
|
|
9
|
+
text: string;
|
|
10
|
+
format: string;
|
|
11
|
+
metadata?: {
|
|
12
|
+
title?: string;
|
|
13
|
+
date?: string;
|
|
14
|
+
participants?: string[];
|
|
15
|
+
};
|
|
16
|
+
image?: {
|
|
17
|
+
base64: string;
|
|
18
|
+
mediaType: ImageMediaType;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
export declare function processFile(filePath: string): Promise<ProcessedContent>;
|
|
22
|
+
export declare function preprocessFiles(filePaths: string[], options?: {
|
|
23
|
+
onProgress?: (completed: number, total: number) => void;
|
|
24
|
+
}): Promise<Map<string, ProcessedContent>>;
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lore - Format Preprocessors
|
|
3
|
+
*
|
|
4
|
+
* Converts various file formats to plain text for Claude analysis.
|
|
5
|
+
* All processing is IN MEMORY ONLY - original files are never modified.
|
|
6
|
+
*/
|
|
7
|
+
import { readFile } from 'fs/promises';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
let pdfParser = null;
|
|
10
|
+
async function getPdfParser() {
|
|
11
|
+
if (!pdfParser) {
|
|
12
|
+
try {
|
|
13
|
+
const { PDFParse } = await import('pdf-parse');
|
|
14
|
+
// Wrap the class in a function
|
|
15
|
+
pdfParser = async (buffer) => {
|
|
16
|
+
const parser = new PDFParse({ data: buffer });
|
|
17
|
+
await parser.load();
|
|
18
|
+
const text = await parser.getText();
|
|
19
|
+
return { text: text || '' };
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return pdfParser;
|
|
27
|
+
}
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Markdown Processing
|
|
30
|
+
// ============================================================================
|
|
31
|
+
function processMarkdown(content) {
|
|
32
|
+
// Markdown is already text, just return as-is
|
|
33
|
+
// Extract title from first H1 if present
|
|
34
|
+
const titleMatch = content.match(/^#\s+(.+)$/m);
|
|
35
|
+
return {
|
|
36
|
+
text: content,
|
|
37
|
+
format: 'markdown',
|
|
38
|
+
metadata: titleMatch ? { title: titleMatch[1] } : undefined,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
function extractTextContent(content) {
|
|
42
|
+
if (typeof content === 'string')
|
|
43
|
+
return content;
|
|
44
|
+
if (Array.isArray(content)) {
|
|
45
|
+
return content
|
|
46
|
+
.filter(c => c.type === 'text' && c.text)
|
|
47
|
+
.map(c => c.text)
|
|
48
|
+
.join('\n');
|
|
49
|
+
}
|
|
50
|
+
return '';
|
|
51
|
+
}
|
|
52
|
+
function processJSONL(content) {
|
|
53
|
+
const lines = content.split('\n').filter(line => line.trim());
|
|
54
|
+
const messages = [];
|
|
55
|
+
for (const line of lines) {
|
|
56
|
+
try {
|
|
57
|
+
const obj = JSON.parse(line);
|
|
58
|
+
// Handle various JSONL formats
|
|
59
|
+
if (obj.message?.content) {
|
|
60
|
+
const role = obj.message.role || obj.type || 'unknown';
|
|
61
|
+
const text = extractTextContent(obj.message.content);
|
|
62
|
+
if (text) {
|
|
63
|
+
messages.push(`[${role.toUpperCase()}]: ${text}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
else if (obj.content) {
|
|
67
|
+
const role = obj.role || obj.type || 'unknown';
|
|
68
|
+
const text = extractTextContent(obj.content);
|
|
69
|
+
if (text) {
|
|
70
|
+
messages.push(`[${role.toUpperCase()}]: ${text}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
// Skip invalid JSON lines
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
text: messages.join('\n\n'),
|
|
80
|
+
format: 'jsonl',
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
function proseMirrorToText(node) {
|
|
84
|
+
if (!node || typeof node !== 'object')
|
|
85
|
+
return '';
|
|
86
|
+
const n = node;
|
|
87
|
+
if (n.type === 'text')
|
|
88
|
+
return n.text || '';
|
|
89
|
+
if (Array.isArray(n.content)) {
|
|
90
|
+
return n.content.map(proseMirrorToText).join('');
|
|
91
|
+
}
|
|
92
|
+
return '';
|
|
93
|
+
}
|
|
94
|
+
function processJSON(content, filePath) {
|
|
95
|
+
try {
|
|
96
|
+
const data = JSON.parse(content);
|
|
97
|
+
// Check if it's a Granola document
|
|
98
|
+
if (data.notes || data.transcript) {
|
|
99
|
+
const doc = data;
|
|
100
|
+
const parts = [];
|
|
101
|
+
// Extract notes
|
|
102
|
+
if (doc.notes?.content) {
|
|
103
|
+
const notesText = proseMirrorToText(doc.notes);
|
|
104
|
+
if (notesText) {
|
|
105
|
+
parts.push('## Notes\n' + notesText);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// Extract transcript
|
|
109
|
+
if (doc.transcript?.utterances) {
|
|
110
|
+
const transcriptText = doc.transcript.utterances
|
|
111
|
+
.map(u => {
|
|
112
|
+
const speaker = u.source === 'microphone' ? '[ME]' : '[PARTICIPANT]';
|
|
113
|
+
const mins = Math.floor(u.start / 60);
|
|
114
|
+
const secs = Math.floor(u.start % 60);
|
|
115
|
+
const timestamp = `[${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}]`;
|
|
116
|
+
return `${timestamp} ${speaker}: ${u.text}`;
|
|
117
|
+
})
|
|
118
|
+
.join('\n\n');
|
|
119
|
+
if (transcriptText) {
|
|
120
|
+
parts.push('## Transcript\n' + transcriptText);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
text: parts.join('\n\n'),
|
|
125
|
+
format: 'json-granola',
|
|
126
|
+
metadata: {
|
|
127
|
+
title: doc.title,
|
|
128
|
+
date: doc.created_at,
|
|
129
|
+
},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
// Generic JSON - just stringify nicely
|
|
133
|
+
return {
|
|
134
|
+
text: JSON.stringify(data, null, 2),
|
|
135
|
+
format: 'json',
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
return {
|
|
140
|
+
text: content,
|
|
141
|
+
format: 'json-invalid',
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// ============================================================================
|
|
146
|
+
// Plain Text Processing
|
|
147
|
+
// ============================================================================
|
|
148
|
+
function processPlainText(content) {
|
|
149
|
+
return {
|
|
150
|
+
text: content,
|
|
151
|
+
format: 'text',
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
// ============================================================================
|
|
155
|
+
// PDF Processing
|
|
156
|
+
// ============================================================================
|
|
157
|
+
async function processPdf(filePath) {
|
|
158
|
+
const parser = await getPdfParser();
|
|
159
|
+
if (!parser) {
|
|
160
|
+
return {
|
|
161
|
+
text: '[PDF processing not available - install pdf-parse]',
|
|
162
|
+
format: 'pdf-unsupported',
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
const buffer = await readFile(filePath);
|
|
166
|
+
const data = await parser(buffer);
|
|
167
|
+
return {
|
|
168
|
+
text: data.text,
|
|
169
|
+
format: 'pdf',
|
|
170
|
+
metadata: {
|
|
171
|
+
title: path.basename(filePath, '.pdf'),
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// ============================================================================
|
|
176
|
+
// Image Processing (for Claude Vision)
|
|
177
|
+
// ============================================================================
|
|
178
|
+
function getImageMediaType(ext) {
|
|
179
|
+
const types = {
|
|
180
|
+
'.jpg': 'image/jpeg',
|
|
181
|
+
'.jpeg': 'image/jpeg',
|
|
182
|
+
'.png': 'image/png',
|
|
183
|
+
'.gif': 'image/gif',
|
|
184
|
+
'.webp': 'image/webp',
|
|
185
|
+
};
|
|
186
|
+
return types[ext.toLowerCase()] || null;
|
|
187
|
+
}
|
|
188
|
+
async function processImage(filePath) {
|
|
189
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
190
|
+
const mediaType = getImageMediaType(ext);
|
|
191
|
+
if (!mediaType) {
|
|
192
|
+
return {
|
|
193
|
+
text: '[Unsupported image format]',
|
|
194
|
+
format: 'image-unsupported',
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
const buffer = await readFile(filePath);
|
|
198
|
+
const base64 = buffer.toString('base64');
|
|
199
|
+
return {
|
|
200
|
+
text: '', // Will be filled by Claude vision
|
|
201
|
+
format: 'image',
|
|
202
|
+
image: {
|
|
203
|
+
base64,
|
|
204
|
+
mediaType,
|
|
205
|
+
},
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
// ============================================================================
|
|
209
|
+
// CSV Processing
|
|
210
|
+
// ============================================================================
|
|
211
|
+
function processCsv(content, filePath) {
|
|
212
|
+
const lines = content.split('\n').filter(line => line.trim());
|
|
213
|
+
if (lines.length === 0) {
|
|
214
|
+
return { text: content, format: 'csv' };
|
|
215
|
+
}
|
|
216
|
+
// Parse header and data
|
|
217
|
+
const header = lines[0].split(',').map(h => h.trim().replace(/^"|"$/g, ''));
|
|
218
|
+
const rows = lines.slice(1);
|
|
219
|
+
// Convert to readable format
|
|
220
|
+
const formatted = rows.map((row, idx) => {
|
|
221
|
+
const values = row.split(',').map(v => v.trim().replace(/^"|"$/g, ''));
|
|
222
|
+
const pairs = header.map((h, i) => `${h}: ${values[i] || ''}`);
|
|
223
|
+
return `Row ${idx + 1}:\n ${pairs.join('\n ')}`;
|
|
224
|
+
}).join('\n\n');
|
|
225
|
+
return {
|
|
226
|
+
text: `CSV Data (${rows.length} rows, ${header.length} columns)\n\nColumns: ${header.join(', ')}\n\n${formatted}`,
|
|
227
|
+
format: 'csv',
|
|
228
|
+
metadata: {
|
|
229
|
+
title: path.basename(filePath, '.csv'),
|
|
230
|
+
},
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
// ============================================================================
|
|
234
|
+
// HTML Processing
|
|
235
|
+
// ============================================================================
|
|
236
|
+
function processHtml(content) {
|
|
237
|
+
// Simple HTML to text conversion
|
|
238
|
+
let text = content
|
|
239
|
+
// Remove scripts and styles
|
|
240
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
241
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
242
|
+
// Convert common elements
|
|
243
|
+
.replace(/<br\s*\/?>/gi, '\n')
|
|
244
|
+
.replace(/<\/p>/gi, '\n\n')
|
|
245
|
+
.replace(/<\/div>/gi, '\n')
|
|
246
|
+
.replace(/<\/h[1-6]>/gi, '\n\n')
|
|
247
|
+
.replace(/<li>/gi, '• ')
|
|
248
|
+
.replace(/<\/li>/gi, '\n')
|
|
249
|
+
// Remove remaining tags
|
|
250
|
+
.replace(/<[^>]+>/g, '')
|
|
251
|
+
// Decode common entities
|
|
252
|
+
.replace(/ /g, ' ')
|
|
253
|
+
.replace(/&/g, '&')
|
|
254
|
+
.replace(/</g, '<')
|
|
255
|
+
.replace(/>/g, '>')
|
|
256
|
+
.replace(/"/g, '"')
|
|
257
|
+
.replace(/'/g, "'")
|
|
258
|
+
// Clean up whitespace
|
|
259
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
260
|
+
.trim();
|
|
261
|
+
// Extract title from <title> tag
|
|
262
|
+
const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
263
|
+
return {
|
|
264
|
+
text,
|
|
265
|
+
format: 'html',
|
|
266
|
+
metadata: titleMatch ? { title: titleMatch[1].trim() } : undefined,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
// ============================================================================
|
|
270
|
+
// XML Processing
|
|
271
|
+
// ============================================================================
|
|
272
|
+
function processXml(content) {
|
|
273
|
+
// Extract text content from XML, preserving structure
|
|
274
|
+
const text = content
|
|
275
|
+
.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1') // Unwrap CDATA
|
|
276
|
+
.replace(/<[^>]+>/g, ' ') // Remove tags
|
|
277
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
278
|
+
.trim();
|
|
279
|
+
return {
|
|
280
|
+
text: `XML Document:\n\n${text}`,
|
|
281
|
+
format: 'xml',
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
// ============================================================================
|
|
285
|
+
// Main Processing Function
|
|
286
|
+
// ============================================================================
|
|
287
|
+
export async function processFile(filePath) {
|
|
288
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
289
|
+
// Handle binary formats first (before trying to read as utf-8)
|
|
290
|
+
if (ext === '.pdf') {
|
|
291
|
+
return processPdf(filePath);
|
|
292
|
+
}
|
|
293
|
+
const imageMediaType = getImageMediaType(ext);
|
|
294
|
+
if (imageMediaType) {
|
|
295
|
+
return processImage(filePath);
|
|
296
|
+
}
|
|
297
|
+
// Text-based formats
|
|
298
|
+
const content = await readFile(filePath, 'utf-8');
|
|
299
|
+
switch (ext) {
|
|
300
|
+
case '.md':
|
|
301
|
+
case '.markdown':
|
|
302
|
+
return processMarkdown(content);
|
|
303
|
+
case '.jsonl':
|
|
304
|
+
return processJSONL(content);
|
|
305
|
+
case '.json':
|
|
306
|
+
return processJSON(content, filePath);
|
|
307
|
+
case '.txt':
|
|
308
|
+
return processPlainText(content);
|
|
309
|
+
case '.csv':
|
|
310
|
+
return processCsv(content, filePath);
|
|
311
|
+
case '.html':
|
|
312
|
+
case '.htm':
|
|
313
|
+
return processHtml(content);
|
|
314
|
+
case '.xml':
|
|
315
|
+
case '.xhtml':
|
|
316
|
+
return processXml(content);
|
|
317
|
+
default:
|
|
318
|
+
// Try to detect format from content
|
|
319
|
+
if (content.trim().startsWith('{') || content.trim().startsWith('[')) {
|
|
320
|
+
return processJSON(content, filePath);
|
|
321
|
+
}
|
|
322
|
+
if (content.includes('{"')) {
|
|
323
|
+
// Might be JSONL
|
|
324
|
+
return processJSONL(content);
|
|
325
|
+
}
|
|
326
|
+
return processPlainText(content);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
// ============================================================================
|
|
330
|
+
// Batch Processing
|
|
331
|
+
// ============================================================================
|
|
332
|
+
export async function preprocessFiles(filePaths, options = {}) {
|
|
333
|
+
const { onProgress } = options;
|
|
334
|
+
const results = new Map();
|
|
335
|
+
for (let i = 0; i < filePaths.length; i++) {
|
|
336
|
+
try {
|
|
337
|
+
const processed = await processFile(filePaths[i]);
|
|
338
|
+
results.set(filePaths[i], processed);
|
|
339
|
+
}
|
|
340
|
+
catch (error) {
|
|
341
|
+
// Store error as metadata
|
|
342
|
+
results.set(filePaths[i], {
|
|
343
|
+
text: '',
|
|
344
|
+
format: 'error',
|
|
345
|
+
metadata: { title: `Error: ${error}` },
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
onProgress?.(i + 1, filePaths.length);
|
|
349
|
+
}
|
|
350
|
+
return results;
|
|
351
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ask handlers for the Lore Document Browser TUI
|
|
3
|
+
*
|
|
4
|
+
* Handles AI-powered queries with streaming responses.
|
|
5
|
+
* Supports slash commands and multi-turn conversations.
|
|
6
|
+
*
|
|
7
|
+
* Slash commands:
|
|
8
|
+
* /project <name> or /p <name> - Set project filter
|
|
9
|
+
* /type <type> or /t <type> - Set content type filter
|
|
10
|
+
* /clear - Clear all filters
|
|
11
|
+
* /new - Start new conversation
|
|
12
|
+
* /help or /? - Show available commands
|
|
13
|
+
*/
|
|
14
|
+
import type { BrowserState, UIComponents } from './browse-types.js';
|
|
15
|
+
/**
|
|
16
|
+
* Enter ask mode - show input for query
|
|
17
|
+
*/
|
|
18
|
+
export declare function enterAskMode(state: BrowserState, ui: UIComponents): void;
|
|
19
|
+
/**
|
|
20
|
+
* Exit ask mode - return to list
|
|
21
|
+
*/
|
|
22
|
+
export declare function exitAskMode(state: BrowserState, ui: UIComponents): void;
|
|
23
|
+
/**
|
|
24
|
+
* Prompt for next input after a response
|
|
25
|
+
*/
|
|
26
|
+
export declare function promptForFollowUp(state: BrowserState, ui: UIComponents): void;
|
|
27
|
+
/**
|
|
28
|
+
* Execute the ask query with streaming
|
|
29
|
+
*/
|
|
30
|
+
export declare function executeAsk(state: BrowserState, ui: UIComponents, dbPath: string, query: string): Promise<void>;
|