@getlore/cli 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,7 @@ export function registerDocsCommand(program, defaultDataDir) {
106
106
  .option('-d, --data-dir <dir>', 'Data directory', defaultDataDir)
107
107
  .option('--no-push', 'Skip git push')
108
108
  .action(async (content, options) => {
109
- const { handleRetain } = await import('../../mcp/handlers/retain.js');
109
+ const { handleIngest } = await import('../../mcp/handlers/ingest.js');
110
110
  const dataDir = options.dataDir;
111
111
  const dbPath = path.join(dataDir, 'lore.lance');
112
112
  const validTypes = ['insight', 'decision', 'requirement', 'note'];
@@ -114,21 +114,28 @@ export function registerDocsCommand(program, defaultDataDir) {
114
114
  console.error(`Invalid type: ${options.type}. Must be one of: ${validTypes.join(', ')}`);
115
115
  process.exit(1);
116
116
  }
117
- const result = await handleRetain(dbPath, dataDir, {
117
+ // Map CLI type to source_type
118
+ const sourceTypeMap = {
119
+ decision: 'notes',
120
+ requirement: 'notes',
121
+ insight: 'notes',
122
+ note: 'notes',
123
+ };
124
+ const result = await handleIngest(dbPath, dataDir, {
118
125
  content,
119
126
  project: options.project,
120
- type: options.type,
121
- source_context: options.context,
127
+ title: `${options.type.charAt(0).toUpperCase() + options.type.slice(1)}: ${content.slice(0, 50)}${content.length > 50 ? '...' : ''}`,
128
+ source_type: sourceTypeMap[options.type] || 'notes',
122
129
  tags: options.tags?.split(',').map((t) => t.trim()),
123
- }, { autoPush: options.push !== false });
130
+ }, { autoPush: options.push !== false, hookContext: { mode: 'cli' } });
124
131
  if (result.success) {
125
- console.log(`\n✓ ${result.message}`);
132
+ console.log(`\n✓ Created ${options.type} for project "${options.project}"`);
126
133
  console.log(` ID: ${result.id}`);
127
134
  console.log(` Indexed: ${result.indexed ? 'yes' : 'no'}`);
128
135
  console.log(` Synced: ${result.synced ? 'yes' : 'no'}`);
129
136
  }
130
137
  else {
131
- console.error(`\nFailed to create: ${result.message}`);
138
+ console.error(`\nFailed to create ${options.type}`);
132
139
  process.exit(1);
133
140
  }
134
141
  });
@@ -19,7 +19,6 @@ import path from 'path';
19
19
  export async function initDataRepo(dirPath) {
20
20
  await mkdir(dirPath, { recursive: true });
21
21
  await mkdir(path.join(dirPath, 'sources'), { recursive: true });
22
- await mkdir(path.join(dirPath, 'retained'), { recursive: true });
23
22
  // Create .gitignore if missing
24
23
  const gitignorePath = path.join(dirPath, '.gitignore');
25
24
  if (!existsSync(gitignorePath)) {
@@ -34,8 +33,7 @@ Your personal knowledge repository for Lore.
34
33
 
35
34
  ## Structure
36
35
 
37
- - \`sources/\` - Ingested documents
38
- - \`retained/\` - Explicitly saved insights
36
+ - \`sources/\` - Ingested content
39
37
 
40
38
  Vector embeddings are stored in Supabase (cloud) for multi-machine access.
41
39
  `);
@@ -8,7 +8,7 @@
8
8
  */
9
9
  export type SourceType = string;
10
10
  export type SearchMode = 'semantic' | 'keyword' | 'hybrid' | 'regex';
11
- export type ContentType = 'interview' | 'meeting' | 'conversation' | 'document' | 'note' | 'analysis' | 'survey' | 'research' | 'decision' | 'insight' | 'requirement';
11
+ export type ContentType = 'interview' | 'meeting' | 'conversation' | 'document' | 'note' | 'analysis' | 'survey' | 'research';
12
12
  export interface SourceDocument {
13
13
  id: string;
14
14
  source_type: SourceType;
@@ -154,13 +154,6 @@ export interface SearchArgs {
154
154
  limit?: number;
155
155
  mode?: SearchMode;
156
156
  }
157
- export interface RetainArgs {
158
- content: string;
159
- project: string;
160
- type: 'insight' | 'decision' | 'requirement' | 'note';
161
- source_context?: string;
162
- tags?: string[];
163
- }
164
157
  export interface ResearchArgs {
165
158
  task: string;
166
159
  project?: string;
@@ -111,9 +111,7 @@ export async function addSource(_dbPath, source, vector, extras) {
111
111
  if (extras?.source_name) {
112
112
  record.source_name = extras.source_name;
113
113
  }
114
- const { error } = await client.from('sources').upsert(record, {
115
- ignoreDuplicates: true,
116
- });
114
+ const { error } = await client.from('sources').upsert(record);
117
115
  if (error) {
118
116
  // Duplicate content_hash for this user — document already exists, skip silently
119
117
  if (error.code === '23505') {
@@ -2,13 +2,12 @@
2
2
  * Proposal-based write system for extensions
3
3
  */
4
4
  export interface ProposedChange {
5
- type: 'create_source' | 'update_source' | 'delete_source' | 'retain_insight' | 'add_tags';
5
+ type: 'create_source' | 'update_source' | 'delete_source' | 'add_tags';
6
6
  title?: string;
7
7
  content?: string;
8
8
  project?: string;
9
9
  sourceId?: string;
10
10
  changes?: Record<string, unknown>;
11
- insight?: string;
12
11
  tags?: string[];
13
12
  reason: string;
14
13
  }
@@ -7,7 +7,6 @@ import { mkdir, readFile, readdir, writeFile } from 'fs/promises';
7
7
  import os from 'os';
8
8
  import path from 'path';
9
9
  import { handleIngest } from '../mcp/handlers/ingest.js';
10
- import { handleRetain } from '../mcp/handlers/retain.js';
11
10
  import { getDatabase, getSourceById } from '../core/vector-store.js';
12
11
  export function getPendingDir() {
13
12
  return path.join(os.homedir(), '.config', 'lore', 'pending');
@@ -84,18 +83,6 @@ async function applyProposalChange(proposal, dbPath, dataDir) {
84
83
  }, { hookContext: { mode: 'cli' } });
85
84
  return;
86
85
  }
87
- case 'retain_insight': {
88
- if (!change.insight) {
89
- throw new Error('retain_insight requires insight');
90
- }
91
- const project = change.project || proposal.extensionName;
92
- await handleRetain(dbPath, dataDir, {
93
- content: change.insight,
94
- project,
95
- type: 'insight',
96
- }, {});
97
- return;
98
- }
99
86
  case 'update_source': {
100
87
  if (!change.sourceId || !change.changes) {
101
88
  throw new Error('update_source requires sourceId and changes');
@@ -83,7 +83,7 @@ export function createProposeFunction(extensionName, permissions) {
83
83
  return async (change) => {
84
84
  // Enforce permissions
85
85
  const perms = permissions || {};
86
- if (change.type === 'create_source' || change.type === 'retain_insight') {
86
+ if (change.type === 'create_source') {
87
87
  if (!perms.proposeCreate) {
88
88
  throw new Error(`Extension "${extensionName}" does not have permission to propose creating documents. Add permissions.proposeCreate = true to the extension.`);
89
89
  }
@@ -7,7 +7,7 @@
7
7
  */
8
8
  interface IngestArgs {
9
9
  content: string;
10
- title: string;
10
+ title?: string;
11
11
  project: string;
12
12
  source_type?: string;
13
13
  date?: string;
@@ -123,9 +123,11 @@ function mapContentType(sourceType) {
123
123
  }
124
124
  }
125
125
  export async function handleIngest(dbPath, dataDir, args, options = {}) {
126
- const { content, title, project, source_type: raw_source_type, date, participants = [], tags = [], source_url, source_name, } = args;
126
+ const { content, project, source_type: raw_source_type, date, participants = [], tags = [], source_url, source_name, } = args;
127
127
  const { autoPush = true, hookContext } = options;
128
128
  const source_type = normalizeSourceType(raw_source_type);
129
+ // Auto-generate title if not provided
130
+ const title = args.title || `${source_type.charAt(0).toUpperCase() + source_type.slice(1)}: ${content.slice(0, 50)}${content.length > 50 ? '...' : ''}`;
129
131
  // Content hash deduplication — skip everything if already ingested
130
132
  const contentHash = createHash('sha256').update(content).digest('hex');
131
133
  try {
@@ -170,12 +172,17 @@ export async function handleIngest(dbPath, dataDir, args, options = {}) {
170
172
  await writeFile(path.join(sourceDir, 'metadata.json'), JSON.stringify(metadata, null, 2));
171
173
  // Save content.md
172
174
  await writeFile(path.join(sourceDir, 'content.md'), content);
173
- // Extract insights using LLM
175
+ // Extract insights using LLM (skip for short content)
174
176
  let summary = content.slice(0, 200) + (content.length > 200 ? '...' : '');
175
177
  let themes = [];
176
178
  let quotes = [];
177
- try {
178
- if (content.trim().length > 100) {
179
+ const isShortContent = content.trim().length <= 500;
180
+ if (isShortContent) {
181
+ // Short content fast path — use content as its own summary, skip LLM extraction
182
+ summary = content;
183
+ }
184
+ else {
185
+ try {
179
186
  const insights = await extractInsights(content, title, id, { contentType });
180
187
  summary = insights.summary;
181
188
  themes = insights.themes.map((t) => ({ name: t.name, quotes: [] }));
@@ -183,10 +190,10 @@ export async function handleIngest(dbPath, dataDir, args, options = {}) {
183
190
  // Save insights.json
184
191
  await writeFile(path.join(sourceDir, 'insights.json'), JSON.stringify({ summary, themes, quotes }, null, 2));
185
192
  }
186
- }
187
- catch (error) {
188
- console.error('Failed to extract insights:', error);
189
- // Continue with basic summary
193
+ catch (error) {
194
+ console.error('Failed to extract insights:', error);
195
+ // Continue with basic summary
196
+ }
190
197
  }
191
198
  // Add to vector store immediately
192
199
  try {
@@ -8,6 +8,7 @@
8
8
  * 4. Synthesizes findings into a comprehensive research package
9
9
  */
10
10
  import type { ResearchPackage } from '../../core/types.js';
11
+ import type { ProgressCallback } from './research.js';
11
12
  interface ResearchAgentArgs {
12
13
  task: string;
13
14
  project?: string;
@@ -17,5 +18,5 @@ interface ResearchAgentArgs {
17
18
  /**
18
19
  * Run the agentic research
19
20
  */
20
- export declare function runResearchAgent(dbPath: string, dataDir: string, args: ResearchAgentArgs): Promise<ResearchPackage>;
21
+ export declare function runResearchAgent(dbPath: string, dataDir: string, args: ResearchAgentArgs, onProgress?: ProgressCallback): Promise<ResearchPackage>;
21
22
  export {};
@@ -27,9 +27,9 @@ function createLoreToolsServer(dbPath, dataDir, archivedProjects) {
27
27
  tool('search', 'Semantic search across all sources in the knowledge repository. Returns summaries with relevant quotes. Use this to find information related to a topic.', {
28
28
  query: z.string().describe('Semantic search query - describe what you\'re looking for'),
29
29
  source_type: z
30
- .enum(['granola', 'claude-code', 'claude-desktop', 'chatgpt', 'markdown', 'document'])
30
+ .string()
31
31
  .optional()
32
- .describe('Filter by source type (e.g., "granola" for meeting transcripts)'),
32
+ .describe('Filter by source type (e.g., "meeting", "slack", "document")'),
33
33
  content_type: z
34
34
  .enum(['interview', 'meeting', 'conversation', 'document', 'note', 'analysis'])
35
35
  .optional()
@@ -91,6 +91,8 @@ ${quotes}`;
91
91
  .slice(0, 10)
92
92
  .map((q) => `- [${q.speaker || 'unknown'}] "${q.text}"`)
93
93
  .join('\n');
94
+ const sourceUrlLine = source.source_url ? `\n**Source URL:** ${source.source_url}` : '';
95
+ const sourceNameLine = source.source_name ? `\n**Source:** ${source.source_name}` : '';
94
96
  return {
95
97
  content: [
96
98
  {
@@ -99,7 +101,7 @@ ${quotes}`;
99
101
 
100
102
  **Type:** ${source.source_type} / ${source.content_type}
101
103
  **Created:** ${source.created_at}
102
- **Projects:** ${source.projects.join(', ') || 'none'}
104
+ **Projects:** ${source.projects.join(', ') || 'none'}${sourceUrlLine}${sourceNameLine}
103
105
 
104
106
  ## Summary
105
107
  ${source.summary}
@@ -122,9 +124,9 @@ ${quotes || 'No quotes extracted'}`,
122
124
  // List sources - browse available sources
123
125
  tool('list_sources', 'List all sources in the repository. Use this to understand what knowledge is available before searching.', {
124
126
  source_type: z
125
- .enum(['granola', 'claude-code', 'claude-desktop', 'chatgpt', 'markdown', 'document'])
127
+ .string()
126
128
  .optional()
127
- .describe('Filter by source type'),
129
+ .describe('Filter by source type (e.g., "meeting", "slack", "document")'),
128
130
  project: z.string().optional().describe('Filter to specific project'),
129
131
  limit: z.number().optional().describe('Max results (default 20)'),
130
132
  }, async (args) => {
@@ -232,7 +234,7 @@ Now begin your research. Use the tools iteratively until you have comprehensive
232
234
  /**
233
235
  * Run the agentic research
234
236
  */
235
- export async function runResearchAgent(dbPath, dataDir, args) {
237
+ export async function runResearchAgent(dbPath, dataDir, args, onProgress) {
236
238
  const { task, project, include_sources = true } = args;
237
239
  // Load archived projects to filter (extract just the project names)
238
240
  const archivedProjectsData = await loadArchivedProjects(dataDir);
@@ -245,6 +247,8 @@ export async function runResearchAgent(dbPath, dataDir, args) {
245
247
  let lastAssistantMessage = '';
246
248
  try {
247
249
  // Run the agent
250
+ let turnCount = 0;
251
+ await onProgress?.(5, undefined, 'Starting research agent...');
248
252
  for await (const message of query({
249
253
  prompt: `Research task: ${task}${project ? ` (project: ${project})` : ''}`,
250
254
  options: {
@@ -261,8 +265,9 @@ export async function runResearchAgent(dbPath, dataDir, args) {
261
265
  permissionMode: 'acceptEdits', // Auto-approve tool calls
262
266
  },
263
267
  })) {
264
- // Capture assistant messages (intermediate)
268
+ // Capture assistant messages and extract tool call details
265
269
  if (message.type === 'assistant') {
270
+ turnCount++;
266
271
  const msg = message;
267
272
  if (msg.message?.content) {
268
273
  const content = msg.message.content;
@@ -270,9 +275,30 @@ export async function runResearchAgent(dbPath, dataDir, args) {
270
275
  lastAssistantMessage = content;
271
276
  }
272
277
  else if (Array.isArray(content)) {
273
- const textBlocks = content.filter((b) => b.type === 'text');
274
- if (textBlocks.length > 0) {
275
- lastAssistantMessage = textBlocks.map((b) => b.text).join('\n');
278
+ // Extract tool_use blocks to report what the agent is doing
279
+ for (const block of content) {
280
+ if (block.type === 'tool_use') {
281
+ const input = block.input;
282
+ const toolShort = block.name.replace('mcp__lore-tools__', '');
283
+ if (toolShort === 'search' && input.query) {
284
+ await onProgress?.(0, undefined, `Searching: "${input.query}"`);
285
+ }
286
+ else if (toolShort === 'get_source' && input.source_id) {
287
+ await onProgress?.(0, undefined, `Reading source: ${input.source_id}`);
288
+ }
289
+ else if (toolShort === 'list_sources') {
290
+ const filter = input.project ? ` (project: ${input.project})` : '';
291
+ await onProgress?.(0, undefined, `Listing sources${filter}`);
292
+ }
293
+ }
294
+ else if (block.type === 'text' && block.text) {
295
+ lastAssistantMessage = block.text;
296
+ // Send a brief snippet of agent reasoning
297
+ const snippet = block.text.substring(0, 120).replace(/\n/g, ' ');
298
+ if (snippet.length > 10) {
299
+ await onProgress?.(0, undefined, `Agent thinking: ${snippet}...`);
300
+ }
301
+ }
276
302
  }
277
303
  }
278
304
  }
@@ -282,16 +308,22 @@ export async function runResearchAgent(dbPath, dataDir, args) {
282
308
  const msg = message;
283
309
  if (msg.subtype === 'success' && msg.result) {
284
310
  lastAssistantMessage = msg.result;
311
+ await onProgress?.(0, undefined, `Research complete (${msg.num_turns} turns)`);
285
312
  console.error(`[research-agent] Completed in ${msg.num_turns} turns`);
286
313
  }
287
314
  else if (msg.subtype?.startsWith('error')) {
288
315
  console.error(`[research-agent] Error: ${msg.subtype}`, msg.errors);
289
316
  }
290
317
  }
291
- // Log tool usage for debugging
318
+ // Log tool results via the summary message
292
319
  if (message.type === 'tool_use_summary') {
293
320
  const msg = message;
294
- console.error(`[research-agent] Tool: ${msg.tool_name || 'unknown'}`);
321
+ if (msg.summary) {
322
+ // The summary often contains "Found X results" or similar
323
+ const summarySnippet = msg.summary.substring(0, 150).replace(/\n/g, ' ');
324
+ await onProgress?.(0, undefined, `Result: ${summarySnippet}`);
325
+ }
326
+ console.error(`[research-agent] Tool complete (turn ${turnCount})`);
295
327
  }
296
328
  }
297
329
  // Parse the final result from the agent's output
@@ -6,17 +6,36 @@
6
6
  * 2. SIMPLE (fallback): Single-pass search + GPT-4o-mini synthesis
7
7
  *
8
8
  * Set LORE_RESEARCH_MODE=simple to use the fallback mode.
9
+ *
10
+ * MCP integration: Research runs asynchronously. The `research` tool returns
11
+ * immediately with a job_id. Use `research_status` to poll for results.
9
12
  */
10
13
  import type { ResearchPackage } from '../../core/types.js';
14
+ /**
15
+ * Start research asynchronously and return a job ID immediately.
16
+ */
17
+ export declare function startResearchJob(dbPath: string, dataDir: string, args: ResearchArgs, options?: {
18
+ hookContext?: {
19
+ mode: 'mcp' | 'cli';
20
+ };
21
+ onProgress?: ProgressCallback;
22
+ }): {
23
+ job_id: string;
24
+ status: string;
25
+ message: string;
26
+ };
27
+ export declare function getResearchJobStatus(jobId: string): Promise<Record<string, unknown>>;
11
28
  interface ResearchArgs {
12
29
  task: string;
13
30
  project?: string;
14
31
  content_type?: string;
15
32
  include_sources?: boolean;
16
33
  }
34
+ export type ProgressCallback = (progress: number, total?: number, message?: string) => Promise<void>;
17
35
  export declare function handleResearch(dbPath: string, dataDir: string, args: ResearchArgs, options?: {
18
36
  hookContext?: {
19
37
  mode: 'mcp' | 'cli';
20
38
  };
39
+ onProgress?: ProgressCallback;
21
40
  }): Promise<ResearchPackage>;
22
41
  export {};
@@ -6,13 +6,145 @@
6
6
  * 2. SIMPLE (fallback): Single-pass search + GPT-4o-mini synthesis
7
7
  *
8
8
  * Set LORE_RESEARCH_MODE=simple to use the fallback mode.
9
+ *
10
+ * MCP integration: Research runs asynchronously. The `research` tool returns
11
+ * immediately with a job_id. Use `research_status` to poll for results.
9
12
  */
10
13
  import OpenAI from 'openai';
14
+ import { randomUUID } from 'crypto';
11
15
  import { searchSources } from '../../core/vector-store.js';
12
16
  import { generateEmbedding } from '../../core/embedder.js';
13
17
  import { loadArchivedProjects } from './archive-project.js';
14
18
  import { runResearchAgent } from './research-agent.js';
15
19
  import { getExtensionRegistry } from '../../extensions/registry.js';
20
+ const jobStore = new Map();
21
+ // Clean up old jobs after 10 minutes
22
+ const JOB_TTL_MS = 10 * 60 * 1000;
23
+ function cleanOldJobs() {
24
+ const now = Date.now();
25
+ for (const [id, job] of jobStore) {
26
+ const startTime = new Date(job.startedAt).getTime();
27
+ if (now - startTime > JOB_TTL_MS) {
28
+ jobStore.delete(id);
29
+ }
30
+ }
31
+ }
32
+ /**
33
+ * Start research asynchronously and return a job ID immediately.
34
+ */
35
+ export function startResearchJob(dbPath, dataDir, args, options = {}) {
36
+ cleanOldJobs();
37
+ const jobId = randomUUID();
38
+ const now = new Date().toISOString();
39
+ const job = {
40
+ id: jobId,
41
+ task: args.task,
42
+ project: args.project,
43
+ status: 'running',
44
+ startedAt: now,
45
+ lastActivityAt: now,
46
+ activity: ['Starting research...'],
47
+ };
48
+ jobStore.set(jobId, job);
49
+ // Fire and forget — runs in the background
50
+ handleResearch(dbPath, dataDir, args, {
51
+ ...options,
52
+ onProgress: async (_p, _t, message) => {
53
+ const j = jobStore.get(jobId);
54
+ if (j && message) {
55
+ j.activity.push(message);
56
+ j.lastActivityAt = new Date().toISOString();
57
+ }
58
+ },
59
+ })
60
+ .then((result) => {
61
+ const j = jobStore.get(jobId);
62
+ if (j) {
63
+ j.status = 'complete';
64
+ j.completedAt = new Date().toISOString();
65
+ j.result = result;
66
+ j.activity.push('Research complete');
67
+ }
68
+ })
69
+ .catch((err) => {
70
+ const j = jobStore.get(jobId);
71
+ if (j) {
72
+ j.status = 'error';
73
+ j.completedAt = new Date().toISOString();
74
+ j.error = err instanceof Error ? err.message : String(err);
75
+ j.activity.push(`Failed: ${j.error}`);
76
+ }
77
+ })
78
+ .catch((err) => {
79
+ // Final safety net for errors in the handlers above
80
+ console.error(`[research] Critical error in job ${jobId}:`, err);
81
+ });
82
+ return {
83
+ job_id: jobId,
84
+ status: 'running',
85
+ message: `Research started for: "${args.task}". Poll research_status with job_id "${jobId}" every 15-20 seconds. This typically takes 2-8 minutes — do not abandon early.`,
86
+ };
87
+ }
88
+ /**
89
+ * Check status of a research job.
90
+ * Long-polls for up to POLL_WAIT_MS, returning early if the job completes.
91
+ */
92
+ const POLL_WAIT_MS = 20_000;
93
+ const POLL_INTERVAL_MS = 1_000;
94
+ export async function getResearchJobStatus(jobId) {
95
+ let job = jobStore.get(jobId);
96
+ if (!job) {
97
+ return { status: 'not_found', job_id: jobId };
98
+ }
99
+ // If already done, return immediately
100
+ if (job.status !== 'running') {
101
+ return formatJobResponse(job);
102
+ }
103
+ // Long-poll: wait up to POLL_WAIT_MS for completion, checking every second
104
+ const deadline = Date.now() + POLL_WAIT_MS;
105
+ while (Date.now() < deadline) {
106
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
107
+ // Re-fetch to avoid stale reference if job was cleaned up
108
+ job = jobStore.get(jobId);
109
+ if (!job) {
110
+ return { status: 'not_found', job_id: jobId };
111
+ }
112
+ if (job.status !== 'running') {
113
+ return formatJobResponse(job);
114
+ }
115
+ }
116
+ return formatJobResponse(job);
117
+ }
118
+ function formatJobResponse(job) {
119
+ const elapsed = Math.round((Date.now() - new Date(job.startedAt).getTime()) / 1000);
120
+ if (job.status === 'complete') {
121
+ return {
122
+ status: 'complete',
123
+ job_id: job.id,
124
+ task: job.task,
125
+ elapsed_seconds: elapsed,
126
+ result: job.result,
127
+ };
128
+ }
129
+ if (job.status === 'error') {
130
+ return {
131
+ status: 'error',
132
+ job_id: job.id,
133
+ task: job.task,
134
+ elapsed_seconds: elapsed,
135
+ error: job.error,
136
+ };
137
+ }
138
+ return {
139
+ status: 'running',
140
+ job_id: job.id,
141
+ task: job.task,
142
+ elapsed_seconds: elapsed,
143
+ total_steps: job.activity.length,
144
+ activity: job.activity,
145
+ message: `Research is still running (${elapsed}s elapsed, ${job.activity.length} steps completed). This is normal — deep research takes 2-8 minutes. Keep polling.`,
146
+ };
147
+ }
16
148
  // Lazy initialization for OpenAI (only used in simple mode)
17
149
  let openaiClient = null;
18
150
  function getOpenAI() {
@@ -109,12 +241,15 @@ Respond with only the JSON object.`;
109
241
  }
110
242
  export async function handleResearch(dbPath, dataDir, args, options = {}) {
111
243
  const { task, project, include_sources = true } = args;
244
+ const { onProgress } = options;
112
245
  // Check if we should use agentic mode (default) or simple mode (fallback)
113
246
  const useAgenticMode = process.env.LORE_RESEARCH_MODE !== 'simple';
114
247
  if (useAgenticMode) {
115
248
  console.error('[research] Using agentic mode (Claude Agent SDK)');
249
+ await onProgress?.(0, undefined, 'Starting agentic research...');
116
250
  try {
117
- const result = await runResearchAgent(dbPath, dataDir, args);
251
+ const result = await runResearchAgent(dbPath, dataDir, args, onProgress);
252
+ await onProgress?.(100, 100, 'Research complete');
118
253
  await runResearchCompletedHook(result, {
119
254
  mode: options.hookContext?.mode || 'mcp',
120
255
  dataDir,
@@ -124,11 +259,14 @@ export async function handleResearch(dbPath, dataDir, args, options = {}) {
124
259
  }
125
260
  catch (error) {
126
261
  console.error('[research] Agentic mode failed, falling back to simple mode:', error);
262
+ await onProgress?.(0, undefined, 'Agentic mode failed, falling back to simple mode...');
127
263
  // Fall through to simple mode
128
264
  }
129
265
  }
130
266
  console.error('[research] Using simple mode (single-pass synthesis)');
131
- const result = await handleResearchSimple(dbPath, dataDir, args);
267
+ await onProgress?.(0, undefined, 'Starting simple research...');
268
+ const result = await handleResearchSimple(dbPath, dataDir, args, onProgress);
269
+ await onProgress?.(100, 100, 'Research complete');
132
270
  await runResearchCompletedHook(result, {
133
271
  mode: options.hookContext?.mode || 'mcp',
134
272
  dataDir,
@@ -140,7 +278,7 @@ export async function handleResearch(dbPath, dataDir, args, options = {}) {
140
278
  * Simple research mode - single pass search + synthesis
141
279
  * This is the fallback when agentic mode fails or is disabled
142
280
  */
143
- async function handleResearchSimple(dbPath, dataDir, args) {
281
+ async function handleResearchSimple(dbPath, dataDir, args, onProgress) {
144
282
  const { task, project, include_sources = true } = args;
145
283
  // Use sensible defaults for simple mode
146
284
  const sourceLimit = 10;
@@ -149,7 +287,9 @@ async function handleResearchSimple(dbPath, dataDir, args) {
149
287
  const archivedProjects = await loadArchivedProjects(dataDir);
150
288
  const archivedNames = new Set(archivedProjects.map((p) => p.project.toLowerCase()));
151
289
  // Step 1: Search for relevant sources (fetch extra to account for archived filtering)
290
+ await onProgress?.(10, 100, 'Generating embeddings...');
152
291
  const queryVector = await generateEmbedding(task);
292
+ await onProgress?.(30, 100, 'Searching sources...');
153
293
  const rawSources = await searchSources(dbPath, queryVector, {
154
294
  limit: sourceLimit * 2,
155
295
  project,
@@ -172,6 +312,7 @@ async function handleResearchSimple(dbPath, dataDir, args) {
172
312
  }
173
313
  }
174
314
  // Step 3: Synthesize findings with LLM (conflict-aware)
315
+ await onProgress?.(60, 100, 'Synthesizing findings...');
175
316
  // Note: Decisions are now extracted at query time by the agentic research mode
176
317
  const synthesis = await synthesizeFindings(task, sources.map((s) => ({
177
318
  id: s.id,
@@ -44,5 +44,6 @@ export declare function handleSync(dbPath: string, dataDir: string, args: SyncAr
44
44
  hookContext?: {
45
45
  mode: 'mcp' | 'cli';
46
46
  };
47
+ onProgress?: (progress: number, total?: number, message?: string) => Promise<void>;
47
48
  }): Promise<SyncResult>;
48
49
  export {};
@@ -233,8 +233,10 @@ export async function handleSync(dbPath, dataDir, args, options = {}) {
233
233
  already_indexed: 0,
234
234
  reconciled: 0,
235
235
  };
236
+ const { onProgress } = options;
236
237
  // 1. Git pull
237
238
  if (doPull) {
239
+ await onProgress?.(5, undefined, 'Pulling from git...');
238
240
  const pullResult = await gitPull(dataDir);
239
241
  result.git_pulled = pullResult.success && (pullResult.message?.includes('Pulled') || false);
240
242
  if (pullResult.error) {
@@ -248,17 +250,20 @@ export async function handleSync(dbPath, dataDir, args, options = {}) {
248
250
  const hasUniversalSources = getEnabledSources(config).length > 0;
249
251
  if (hasUniversalSources && !useLegacy) {
250
252
  // Use new universal sync
253
+ await onProgress?.(20, undefined, 'Discovering new files...');
251
254
  const { discovery, processing } = await universalSync(dataDir, dryRun, options.hookContext);
252
255
  result.discovery = discovery;
253
256
  result.processing = processing;
254
257
  }
255
258
  // Always run legacy disk sync for backward compatibility
256
259
  // (picks up sources added via old `lore ingest` command)
260
+ await onProgress?.(60, undefined, 'Running legacy sync...');
257
261
  const legacyResult = await legacyDiskSync(dbPath, dataDir);
258
262
  result.sources_found = legacyResult.sources_found;
259
263
  result.sources_indexed = legacyResult.sources_indexed;
260
264
  result.already_indexed = legacyResult.already_indexed;
261
265
  // Reconcile: ensure every Supabase source has local content.md
266
+ await onProgress?.(80, undefined, 'Reconciling local content...');
262
267
  result.reconciled = await reconcileLocalContent(dataDir);
263
268
  }
264
269
  // 3. Git push
@@ -19,9 +19,8 @@ import { toolDefinitions } from './tools.js';
19
19
  import { handleSearch } from './handlers/search.js';
20
20
  import { handleGetSource } from './handlers/get-source.js';
21
21
  import { handleListSources } from './handlers/list-sources.js';
22
- import { handleRetain } from './handlers/retain.js';
23
22
  import { handleIngest } from './handlers/ingest.js';
24
- import { handleResearch } from './handlers/research.js';
23
+ import { startResearchJob, getResearchJobStatus } from './handlers/research.js';
25
24
  import { handleListProjects } from './handlers/list-projects.js';
26
25
  import { handleSync } from './handlers/sync.js';
27
26
  import { handleArchiveProject } from './handlers/archive-project.js';
@@ -136,7 +135,7 @@ async function main() {
136
135
  }
137
136
  const server = new Server({
138
137
  name: 'lore',
139
- version: '0.1.0',
138
+ version: '0.8.0',
140
139
  }, {
141
140
  capabilities: {
142
141
  tools: {},
@@ -184,8 +183,25 @@ async function main() {
184
183
  return { tools: toolDefinitions };
185
184
  });
186
185
  // Handle tool calls (core tools only)
187
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
186
+ server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
188
187
  const { name, arguments: args } = request.params;
188
+ // Build a progress callback for long-running tools.
189
+ // If the client sent a progressToken, we send notifications/progress back;
190
+ // otherwise, onProgress is a no-op.
191
+ const progressToken = request.params._meta?.progressToken;
192
+ const onProgress = progressToken
193
+ ? async (progress, total, message) => {
194
+ try {
195
+ await extra.sendNotification({
196
+ method: 'notifications/progress',
197
+ params: { progressToken, progress, ...(total != null ? { total } : {}), ...(message ? { message } : {}) },
198
+ });
199
+ }
200
+ catch {
201
+ // Progress notifications are best-effort
202
+ }
203
+ }
204
+ : undefined;
189
205
  try {
190
206
  let result;
191
207
  switch (name) {
@@ -202,12 +218,6 @@ async function main() {
202
218
  case 'list_projects':
203
219
  result = await handleListProjects(DB_PATH);
204
220
  break;
205
- // Push-based retention
206
- case 'retain':
207
- result = await handleRetain(DB_PATH, LORE_DATA_DIR, args, {
208
- autoPush: AUTO_GIT_PUSH,
209
- });
210
- break;
211
221
  // Direct document ingestion
212
222
  case 'ingest':
213
223
  result = await handleIngest(DB_PATH, LORE_DATA_DIR, args, {
@@ -215,16 +225,22 @@ async function main() {
215
225
  hookContext: { mode: 'mcp' },
216
226
  });
217
227
  break;
218
- // Agentic research tool (uses Claude Agent SDK internally)
228
+ // Agentic research tool runs async, returns job_id immediately
219
229
  case 'research':
220
- result = await handleResearch(DB_PATH, LORE_DATA_DIR, args, {
230
+ result = startResearchJob(DB_PATH, LORE_DATA_DIR, args, {
221
231
  hookContext: { mode: 'mcp' },
232
+ onProgress,
222
233
  });
223
234
  break;
235
+ // Poll for research results (long-polls up to 20s)
236
+ case 'research_status':
237
+ result = await getResearchJobStatus(args?.job_id);
238
+ break;
224
239
  // Sync tool
225
240
  case 'sync':
226
241
  result = await handleSync(DB_PATH, LORE_DATA_DIR, args, {
227
242
  hookContext: { mode: 'mcp' },
243
+ onProgress,
228
244
  });
229
245
  break;
230
246
  // Project management
package/dist/mcp/tools.js CHANGED
@@ -86,18 +86,6 @@ const ListSourcesSchema = z.object({
86
86
  .describe('Filter by source type (matches the source_type passed during ingest, e.g. "meeting", "slack", "github-issue")'),
87
87
  limit: z.number().optional().describe('Max results (default 20)'),
88
88
  });
89
- const RetainSchema = z.object({
90
- content: z.string().describe('The insight, decision, or note to retain'),
91
- project: z.string().describe('Project this belongs to'),
92
- type: z
93
- .enum(['insight', 'decision', 'requirement', 'note'])
94
- .describe('Type of knowledge being retained'),
95
- source_context: z
96
- .string()
97
- .optional()
98
- .describe('Where this came from (e.g., "user interview with Sarah")'),
99
- tags: z.array(z.string()).optional().describe('Optional tags for categorization'),
100
- });
101
89
  // ============================================================================
102
90
  // Agentic Research Tool
103
91
  // ============================================================================
@@ -116,7 +104,7 @@ const ResearchSchema = z.object({
116
104
  // ============================================================================
117
105
  const IngestSchema = z.object({
118
106
  content: z.string().describe('The document content to ingest'),
119
- title: z.string().describe('Title for the document'),
107
+ title: z.string().optional().describe('Title for the document. Auto-generated from content if not provided.'),
120
108
  project: z.string().describe('Project this document belongs to'),
121
109
  source_type: z
122
110
  .string()
@@ -224,24 +212,12 @@ Use this to browse what exists in a project, understand the scope of available k
224
212
  properties: {},
225
213
  },
226
214
  },
227
- {
228
- name: 'retain',
229
- description: `Save a discrete insight, decision, requirement, or note to the knowledge base. These are short, synthesized pieces of knowledge — NOT full documents.
230
-
231
- Examples of what to retain:
232
- - A decision: "We chose JWT over session cookies because of mobile app requirements"
233
- - An insight: "3 out of 5 users mentioned export speed as their top frustration"
234
- - A requirement: "Must support SSO for enterprise customers"
235
-
236
- USE 'ingest' INSTEAD for full documents, meeting notes, transcripts, or any content longer than a few paragraphs.`,
237
- inputSchema: zodToJsonSchema(RetainSchema),
238
- },
239
215
  // Agentic tool
240
216
  {
241
217
  name: 'research',
242
218
  description: `Run a comprehensive research query across the knowledge base. An internal agent iteratively searches, reads sources, cross-references findings, and synthesizes a research package with full citations.
243
219
 
244
- Returns: summary, key findings, supporting quotes with citations, conflicts detected between sources, and suggested follow-up queries.
220
+ ASYNC: This tool returns immediately with a job_id. You MUST then poll 'research_status' with that job_id to get results. Research typically takes 2-8 minutes depending on the amount of data. Poll every 15-20 seconds. Do NOT assume it is stuck — check the 'activity' array in the status response to see what the agent is doing.
245
221
 
246
222
  WHEN TO USE:
247
223
  - Questions that span multiple sources ("What do we know about authentication?")
@@ -249,9 +225,23 @@ WHEN TO USE:
249
225
  - Building a cited research package for decision-making
250
226
  - Open-ended exploration of a topic
251
227
 
252
- COST: This tool makes multiple LLM calls internally (typically 3-8 search + read cycles). For simple lookups, use 'search' instead — it's 10x cheaper and faster.`,
228
+ COST: This tool makes multiple LLM calls internally (typically 10-30 search + read cycles). For simple lookups, use 'search' instead — it's 10x cheaper and faster.`,
253
229
  inputSchema: zodToJsonSchema(ResearchSchema),
254
230
  },
231
+ // Research status (polling for async results)
232
+ {
233
+ name: 'research_status',
234
+ description: `Check the status of a running research job. Returns the full research package when complete.
235
+
236
+ Call this after 'research' returns a job_id. Research typically takes 2-8 minutes. Poll every 15-20 seconds. The response includes an 'activity' array showing exactly what the research agent is doing (searches, sources being read, reasoning). As long as 'total_steps' is increasing or 'elapsed_seconds' is under 8 minutes, the research is progressing normally — do NOT abandon it.`,
237
+ inputSchema: {
238
+ type: 'object',
239
+ properties: {
240
+ job_id: { type: 'string', description: 'The job_id returned by the research tool' },
241
+ },
242
+ required: ['job_id'],
243
+ },
244
+ },
255
245
  // Ingest tool
256
246
  {
257
247
  name: 'ingest',
@@ -262,7 +252,7 @@ IDEMPOTENT: Content is deduplicated by SHA256 hash. Calling ingest with identica
262
252
  WHAT HAPPENS:
263
253
  1. Content hash checked for deduplication
264
254
  2. Document saved to disk
265
- 3. LLM extracts summary, themes, and key quotes
255
+ 3. LLM extracts summary, themes, and key quotes (skipped for short content ≤500 chars)
266
256
  4. Embedding generated for semantic search
267
257
  5. Indexed in Supabase for instant retrieval
268
258
 
@@ -270,7 +260,7 @@ BEST PRACTICES:
270
260
  - Always pass source_url when available (enables citation linking back to the original)
271
261
  - Use source_name for human-readable origin context (e.g., "Slack #product-team")
272
262
  - source_type is a free-form hint — use whatever describes the content (slack, email, notion, github-issue, etc.)
273
- - Use 'retain' instead for short discrete insights/decisions (not full documents)`,
263
+ - For short insights, decisions, or notes just pass the content. Title and source_type are optional.`,
274
264
  inputSchema: zodToJsonSchema(IngestSchema),
275
265
  },
276
266
  // Sync tool
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@getlore/cli",
3
- "version": "0.6.0",
3
+ "version": "0.8.0",
4
4
  "description": "Research knowledge repository with semantic search, citations, and project lineage tracking",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -36,8 +36,7 @@ After setup, Lore works autonomously.
36
36
  | `get_source` | Low | Full document retrieval by ID |
37
37
  | `list_sources` | Low | Browse what exists in a project |
38
38
  | `list_projects` | Low | Discover available knowledge domains |
39
- | `retain` | Low | Save discrete insights/decisions |
40
- | `ingest` | Medium | Push full documents into the knowledge base |
39
+ | `ingest` | Low-Medium | Push content — documents, insights, or decisions |
41
40
  | `research` | High | Cross-reference multiple sources, synthesize findings |
42
41
  | `sync` | Variable | Refresh from configured source directories |
43
42
 
@@ -50,6 +49,11 @@ Use `ingest` to push content into Lore when:
50
49
 
51
50
  Always pass `source_url` (original URL for linking) and `source_name` (human-readable label like "GitHub PR #123") when available. Ingestion is idempotent — safe to call repeatedly with the same content.
52
51
 
52
+ For short insights, decisions, or notes — title and source_type are optional:
53
+ ```
54
+ ingest(content: "We chose JWT for auth", project: "auth-system")
55
+ ```
56
+
53
57
  ## When to Search
54
58
 
55
59
  Before making recommendations or answering questions about past work:
@@ -57,13 +61,6 @@ Before making recommendations or answering questions about past work:
57
61
  2. Only use `research` if the question genuinely needs cross-referencing multiple sources
58
62
  3. Use `get_source(id, include_content: true)` when you need the full text
59
63
 
60
- ## When to Retain
61
-
62
- Use `retain` for short synthesized knowledge (not full documents):
63
- - Decisions made during a session
64
- - Key insights distilled from analysis
65
- - Requirements extracted from conversations
66
-
67
64
  ## Example: Grounding a Decision
68
65
 
69
66
  ```
@@ -73,11 +70,9 @@ search("database migration approach", project: "backend-rewrite")
73
70
  # 2. If results are relevant, get full context
74
71
  get_source("abc-123", include_content: true)
75
72
 
76
- # 3. After making a decision, retain it
77
- retain(
73
+ # 3. After making a decision, save it
74
+ ingest(
78
75
  content: "Chose pgvector over Pinecone for embeddings — lower latency, simpler ops, sufficient scale",
79
- project: "backend-rewrite",
80
- type: "decision",
81
- source_context: "Architecture review session"
76
+ project: "backend-rewrite"
82
77
  )
83
78
  ```
@@ -35,8 +35,7 @@ After setup, Lore works autonomously.
35
35
  | `get_source` | Low | Full document retrieval by ID |
36
36
  | `list_sources` | Low | Browse what exists in a project |
37
37
  | `list_projects` | Low | Discover available knowledge domains |
38
- | `retain` | Low | Save discrete insights/decisions |
39
- | `ingest` | Medium | Push full documents into the knowledge base |
38
+ | `ingest` | Low-Medium | Push content — documents, insights, or decisions |
40
39
  | `research` | High | Cross-reference multiple sources, synthesize findings |
41
40
  | `sync` | Variable | Refresh from configured source directories |
42
41
 
@@ -51,6 +50,4 @@ Before making recommendations or answering questions about past work:
51
50
  2. Only use `research` for multi-source synthesis (10x more expensive)
52
51
  3. Use `get_source(id, include_content: true)` for full text
53
52
 
54
- ## When to Retain
55
-
56
- Use `retain` for short synthesized knowledge (decisions, insights, requirements) — not full documents.
53
+ For short insights or decisions, just pass the content — title and source_type are optional and auto-generated from content.
@@ -30,8 +30,7 @@ After setup, Lore works autonomously.
30
30
  | `get_source` | Low | Full document retrieval by ID |
31
31
  | `list_sources` | Low | Browse what exists in a project |
32
32
  | `list_projects` | Low | Discover available knowledge domains |
33
- | `retain` | Low | Save discrete insights/decisions |
34
- | `ingest` | Medium | Push full documents into the knowledge base |
33
+ | `ingest` | Low-Medium | Push content — documents, insights, or decisions |
35
34
  | `research` | High | Cross-reference multiple sources, synthesize findings |
36
35
  | `sync` | Variable | Refresh from configured source directories |
37
36
 
@@ -46,6 +45,4 @@ Before making recommendations or answering questions about past work:
46
45
  2. Only use `research` for multi-source synthesis (10x more expensive)
47
46
  3. Use `get_source(id, include_content: true)` for full text
48
47
 
49
- ## When to Retain
50
-
51
- Use `retain` for short synthesized knowledge (decisions, insights, requirements) — not full documents.
48
+ For short insights or decisions, just pass the content — title and source_type are optional and auto-generated from content.
@@ -26,7 +26,6 @@ After setup, Lore works autonomously.
26
26
 
27
27
  - **Sources**: Full documents (meeting notes, interviews, Slack threads, specs, etc.)
28
28
  - **Projects**: Organizational grouping for sources
29
- - **Insights**: Short retained knowledge (decisions, requirements, observations)
30
29
  - **Citations**: Every piece of knowledge links back to its original source
31
30
 
32
31
  ## Tools Reference
@@ -47,10 +46,19 @@ The primary way to add content. Accepts any document with metadata.
47
46
  }
48
47
  ```
49
48
 
49
+ For short insights, decisions, or notes — title and source_type are optional:
50
+ ```json
51
+ {
52
+ "content": "We chose JWT over session cookies because of mobile app requirements",
53
+ "project": "auth-system"
54
+ }
55
+ ```
56
+
50
57
  - **Idempotent**: Duplicate content returns `{deduplicated: true}` with no processing cost.
51
58
  - **source_type**: Free-form string. Common values: `meeting`, `interview`, `document`, `notes`, `analysis`, `conversation`, `slack`, `email`, `github-issue`, `notion`.
52
59
  - **source_url**: Always pass when available — enables citation linking.
53
60
  - **source_name**: Human-readable origin label.
61
+ - Short content (≤500 chars) skips LLM extraction for speed.
54
62
 
55
63
  ### `search` — Find relevant sources
56
64
  Fast lookup. Returns summaries with relevance scores.
@@ -79,18 +87,6 @@ List sources filtered by project or type. Sorted by date (newest first).
79
87
  ### `list_projects` — Discover projects
80
88
  Lists all projects with source counts and activity dates.
81
89
 
82
- ### `retain` — Save discrete knowledge
83
- For short insights, decisions, or requirements — not full documents.
84
-
85
- ```json
86
- {
87
- "content": "Users consistently report export takes >30s for large datasets",
88
- "project": "my-project",
89
- "type": "insight",
90
- "source_context": "User interview synthesis — Jan batch"
91
- }
92
- ```
93
-
94
90
  ### `research` — Deep research with citations
95
91
  Runs an internal agent that iteratively searches, reads, and synthesizes findings.
96
92
 
@@ -101,7 +97,7 @@ Runs an internal agent that iteratively searches, reads, and synthesizes finding
101
97
  }
102
98
  ```
103
99
 
104
- **Cost warning**: Makes 3-8 internal LLM calls. Use `search` for simple lookups.
100
+ **Async**: Returns a `job_id` immediately. Poll `research_status` for results (typically 2-8 minutes). Makes 10-30 internal LLM calls. Use `search` for simple lookups.
105
101
 
106
102
  ### `sync` — Refresh from source directories
107
103
  Scans configured directories for new files. Use `ingest` for agent-pushed content instead.
@@ -114,6 +110,6 @@ Excludes from default search. Only use when explicitly requested.
114
110
  1. **Search before you answer**: If a question might have documented context, search Lore first.
115
111
  2. **Ingest what matters**: After meaningful conversations or when processing external content, ingest it.
116
112
  3. **Always pass source_url**: Enables citation linking back to the original.
117
- 4. **Use retain for synthesis**: After analyzing multiple sources, retain the key insight.
113
+ 4. **Ingest handles both long and short content**: For short insights, decisions, or notes — just pass the content. Title and source_type are optional.
118
114
  5. **Prefer search over research**: `search` is 10x cheaper. Only use `research` for multi-source synthesis.
119
115
  6. **Cite your sources**: When presenting Lore results, reference the source title and date.
@@ -37,12 +37,12 @@ Before answering questions about past decisions, user feedback, project history,
37
37
 
38
38
  3. **Use `get_source`** with `include_content=true` when you need the full original text of a specific document.
39
39
 
40
- ## When to Retain Insights
40
+ ## Short Content
41
41
 
42
- Use `retain` (not `ingest`) for short, discrete pieces of knowledge:
43
- - Key decisions: "We chose X because Y"
44
- - Synthesized insights: "3/5 users mentioned Z as their top issue"
45
- - Requirements: "Must support SSO for enterprise"
42
+ For short insights, decisions, or notes title and source_type are optional:
43
+ ```
44
+ ingest(content: "We chose X because Y", project: "my-project")
45
+ ```
46
46
 
47
47
  ## Citation Best Practices
48
48