agenr 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.6.2] - 2026-02-19
4
+
5
+ ### Added
6
+ - feat(extractor): elaborative encoding pre-fetch now runs before each chunk extraction, retrieves top-related memories from the vector index, and injects up to 3 references into the extractor prompt
7
+ - feat(cli): `--no-pre-fetch` flag added to `agenr extract`, `agenr ingest`, and `agenr watch` to opt out of prompt memory pre-fetch
8
+ - feat(cli): `--db` flag added to `agenr extract`, `agenr ingest`, and `agenr watch` for database path overrides
9
+ - feat(recall): exported `fetchRelatedEntries()` thin wrapper for direct ANN vector candidate queries
10
+
11
+ ### Changed
12
+ - tuning(extractor): pre-fetch similarity threshold set to `0.78` for `text-embedding-3-small` (1024 dimensions)
13
+ - tuning(extractor): fresh-install pre-fetch skip threshold set to 20 non-superseded entries
14
+ - tuning(extractor): pre-fetch timeout set to 5000ms to avoid chunk extraction stalls on hanging embedding calls
15
+
16
+ ### Security
17
+ - prompt: injected related memories are explicitly reference-only and do not lower the SKIP threshold
18
+ - runtime: pre-fetch is always best-effort and silently degrades to empty related-memory context on any error
19
+
20
+ ## [0.6.1] - 2026-02-19
21
+
22
+ ### Fixed
23
+ - fix(watch): context file generation failed with CLIENT_CLOSED when context path is configured
24
+ - fix(mcp): remove agenr_done tool (was not removed in v0.6.0 as intended)
25
+
3
26
  ## [0.6.0] - 2026-02-18
4
27
 
5
28
  ### Added
@@ -22,7 +45,7 @@
22
45
  - fix(watch): use real recall score breakdown in generated context variants
23
46
 
24
47
  ### Removed
25
- - agenr_done MCP tool removed (breaking change) -- use agenr_retire instead. agenr_retire accepts an entry ID from agenr_recall output and works on all entry types, not just todos.
48
+ - `agenr_done` MCP tool removed; use `agenr_retire` instead (supports all entry types, not just todos)
26
49
 
27
50
  ## [0.5.3] - 2026-02-18
28
51
 
@@ -1,5 +1,6 @@
1
1
  import { Command } from 'commander';
2
2
  import { Model, Api, Context, SimpleStreamOptions, AssistantMessageEvent, AssistantMessage } from '@mariozechner/pi-ai';
3
+ import { Client } from '@libsql/client';
3
4
 
4
5
  declare const KNOWLEDGE_TYPES: readonly ["fact", "decision", "preference", "todo", "relationship", "event", "lesson"];
5
6
  declare const EXPIRY_LEVELS: readonly ["core", "permanent", "temporary"];
@@ -130,8 +131,16 @@ interface LlmClient {
130
131
  credentials: ResolvedCredentials;
131
132
  }
132
133
 
134
+ declare function readConfig(env?: NodeJS.ProcessEnv): AgenrConfig | null;
135
+
133
136
  declare function deduplicateEntries(entries: KnowledgeEntry[]): KnowledgeEntry[];
134
137
 
138
+ declare function getDb(dbPath?: string): Client;
139
+ declare function initDb(client: Client): Promise<void>;
140
+ declare function closeDb(client: Client): void;
141
+
142
+ declare function resolveEmbeddingApiKey(config: AgenrConfig | null | undefined, env?: NodeJS.ProcessEnv): string;
143
+
135
144
  type SimpleAssistantStream = AsyncIterable<AssistantMessageEvent> & {
136
145
  result: () => Promise<AssistantMessage>;
137
146
  };
@@ -165,6 +174,10 @@ declare function extractKnowledgeFromChunks(params: {
165
174
  streamSimpleImpl?: StreamSimpleFn;
166
175
  sleepImpl?: (ms: number) => Promise<void>;
167
176
  retryDelayMs?: (attempt: number) => number;
177
+ db?: Client;
178
+ embeddingApiKey?: string;
179
+ noPreFetch?: boolean;
180
+ embedFn?: (texts: string[], apiKey: string) => Promise<number[][]>;
168
181
  }): Promise<ExtractChunksResult>;
169
182
 
170
183
  interface ResolveLlmClientInput {
@@ -204,8 +217,10 @@ interface ExtractCommandOptions {
204
217
  split?: boolean;
205
218
  model?: string;
206
219
  provider?: string;
220
+ db?: string;
207
221
  verbose?: boolean;
208
222
  noDedup?: boolean;
223
+ noPreFetch?: boolean;
209
224
  }
210
225
  interface CliDeps {
211
226
  expandInputFilesFn: typeof expandInputFiles;
@@ -215,6 +230,11 @@ interface CliDeps {
215
230
  extractKnowledgeFromChunksFn: typeof extractKnowledgeFromChunks;
216
231
  deduplicateEntriesFn: typeof deduplicateEntries;
217
232
  writeOutputFn: typeof writeOutput;
233
+ readConfigFn: typeof readConfig;
234
+ resolveEmbeddingApiKeyFn: typeof resolveEmbeddingApiKey;
235
+ getDbFn: typeof getDb;
236
+ initDbFn: typeof initDb;
237
+ closeDbFn: typeof closeDb;
218
238
  }
219
239
  declare function runExtractCommand(files: string[], options: ExtractCommandOptions, deps?: Partial<CliDeps>): Promise<{
220
240
  exitCode: number;
package/dist/cli-main.js CHANGED
@@ -3867,6 +3867,9 @@ async function fetchVectorCandidates(db, queryEmbedding, limit, platform, projec
3867
3867
  };
3868
3868
  });
3869
3869
  }
3870
+ async function fetchRelatedEntries(db, queryEmbedding, limit) {
3871
+ return fetchVectorCandidates(db, queryEmbedding, limit);
3872
+ }
3870
3873
  async function fetchSessionCandidates(db, limit, context, platform, project, excludeProject, projectStrict) {
3871
3874
  const normalizedProject = parseProjectList(project);
3872
3875
  const normalizedExclude = parseProjectList(excludeProject);
@@ -9176,11 +9179,18 @@ WHY: Routine execution. No durable knowledge, decisions, or lessons.
9176
9179
  - canonical_key: optional lowercase hyphenated 3-5 word identifier when clear (example: "preferred-package-manager")
9177
9180
  - content: clear declarative statement, not a quote. Min 20 chars.
9178
9181
  - source_context: one sentence, max 20 words.
9179
- - tags: 1-4 lowercase descriptive tags.`;
9182
+ - tags: 1-4 lowercase descriptive tags.
9183
+ When related memories are injected before a chunk, they are reference material only. They do not lower the emission threshold.`;
9180
9184
  var MAX_ATTEMPTS = 5;
9181
9185
  var DEFAULT_INTER_CHUNK_DELAY_MS = 150;
9182
9186
  var DEDUP_BATCH_SIZE = 50;
9183
9187
  var DEDUP_BATCH_TRIGGER = 100;
9188
+ var PREFETCH_SIMILARITY_THRESHOLD = 0.78;
9189
+ var PREFETCH_SIMILARITY_EPSILON = 1e-6;
9190
+ var PREFETCH_CANDIDATE_LIMIT = 10;
9191
+ var MAX_PREFETCH_RESULTS = 3;
9192
+ var PREFETCH_MIN_DB_ENTRIES = 20;
9193
+ var PREFETCH_TIMEOUT_MS = 5e3;
9184
9194
  var DEDUP_SYSTEM_PROMPT = `You are deduplicating a list of extracted knowledge entries.
9185
9195
 
9186
9196
  Rules:
@@ -9244,8 +9254,77 @@ var ParseResponseError = class extends Error {
9244
9254
  function normalize3(value) {
9245
9255
  return value.trim().toLowerCase();
9246
9256
  }
9247
- function buildUserPrompt(chunk) {
9257
+ async function preFetchRelated(chunkText2, db, embeddingApiKey, embedFn = embed, onVerbose) {
9258
+ const run = async () => {
9259
+ try {
9260
+ if (!chunkText2.trim()) {
9261
+ onVerbose?.("[pre-fetch] skipped (empty chunk text)");
9262
+ return [];
9263
+ }
9264
+ const countResult = await db.execute({
9265
+ sql: "SELECT COUNT(*) AS count FROM entries WHERE superseded_by IS NULL",
9266
+ args: []
9267
+ });
9268
+ const count = Number(countResult.rows[0]?.count ?? 0);
9269
+ if (count < PREFETCH_MIN_DB_ENTRIES) {
9270
+ onVerbose?.(`[pre-fetch] skipped (db count ${count} < ${PREFETCH_MIN_DB_ENTRIES})`);
9271
+ return [];
9272
+ }
9273
+ const vectors = await embedFn([chunkText2], embeddingApiKey);
9274
+ const queryVec = vectors[0];
9275
+ if (!queryVec || !Array.isArray(queryVec)) {
9276
+ onVerbose?.("[pre-fetch] skipped: embedding provider returned no query vector");
9277
+ return [];
9278
+ }
9279
+ onVerbose?.(`[pre-fetch] embedded chunk (${queryVec.length} dims)`);
9280
+ const candidates = await fetchRelatedEntries(db, queryVec, PREFETCH_CANDIDATE_LIMIT);
9281
+ onVerbose?.(`[pre-fetch] ${candidates.length} candidates returned`);
9282
+ const above = candidates.filter(
9283
+ (candidate) => candidate.vectorSim + PREFETCH_SIMILARITY_EPSILON >= PREFETCH_SIMILARITY_THRESHOLD
9284
+ );
9285
+ onVerbose?.(`[pre-fetch] ${above.length} above threshold ${PREFETCH_SIMILARITY_THRESHOLD}`);
9286
+ return above.slice(0, MAX_PREFETCH_RESULTS).map((candidate) => candidate.entry);
9287
+ } catch (error) {
9288
+ onVerbose?.(`[pre-fetch] skipped: ${error instanceof Error ? error.message : String(error)}`);
9289
+ return [];
9290
+ }
9291
+ };
9292
+ let timeoutId;
9293
+ const timeout = new Promise((resolve) => {
9294
+ timeoutId = setTimeout(() => {
9295
+ onVerbose?.(`[pre-fetch] skipped: timeout after ${PREFETCH_TIMEOUT_MS}ms`);
9296
+ resolve([]);
9297
+ }, PREFETCH_TIMEOUT_MS);
9298
+ });
9299
+ const result = await Promise.race([run(), timeout]);
9300
+ if (timeoutId) {
9301
+ clearTimeout(timeoutId);
9302
+ }
9303
+ return result;
9304
+ }
9305
+ function buildUserPrompt(chunk, related) {
9306
+ if (related === void 0) {
9307
+ return [
9308
+ "Selectively extract durable knowledge from this conversation transcript.",
9309
+ "",
9310
+ "Transcript:",
9311
+ "---",
9312
+ chunk.text,
9313
+ "---",
9314
+ "",
9315
+ 'Call submit_knowledge once with {"entries": [...]} and use an empty array if nothing qualifies.'
9316
+ ].join("\n");
9317
+ }
9318
+ const memoryBlock = related.length === 0 ? "[none found]" : related.map((entry) => `- [${entry.type}] ${entry.subject}: ${entry.content}`).join("\n");
9248
9319
  return [
9320
+ "Existing related memories (reference only -- your SKIP/emit threshold is unchanged):",
9321
+ memoryBlock,
9322
+ "",
9323
+ "Do not emit entries that express the same fact as any memory listed above, even in different words.",
9324
+ "If this chunk clearly contradicts a memory listed above, emit a fact entry stating the contradiction directly in the content field. Do not use inline citation markers like [1] or [2] in any field -- these become dead references.",
9325
+ "Only emit a cross-reference entry when this chunk extends, contradicts, or updates a specific fact. Do not cross-reference just because entries share the same project or general domain.",
9326
+ "Your SKIP/emit threshold is unchanged. The memories above are reference only.",
9327
+ "",
9249
9328
  "Selectively extract durable knowledge from this conversation transcript.",
9250
9329
  "",
9251
9330
  "Transcript:",
@@ -9826,12 +9905,13 @@ async function sleepMs2(ms) {
9826
9905
  await new Promise((resolve) => setTimeout(resolve, ms));
9827
9906
  }
9828
9907
  async function extractChunkOnce(params) {
9908
+ const prompt = buildUserPrompt(params.chunk, params.related);
9829
9909
  const context = {
9830
9910
  systemPrompt: SYSTEM_PROMPT,
9831
9911
  messages: [
9832
9912
  {
9833
9913
  role: "user",
9834
- content: buildUserPrompt(params.chunk),
9914
+ content: prompt,
9835
9915
  timestamp: Date.now()
9836
9916
  }
9837
9917
  ],
@@ -9902,6 +9982,13 @@ async function extractKnowledgeFromChunks(params) {
9902
9982
  let lastError = null;
9903
9983
  let chunkResult = null;
9904
9984
  let streamBuffer = [];
9985
+ const related = params.noPreFetch === true ? void 0 : params.db && params.embeddingApiKey ? await preFetchRelated(
9986
+ chunk.text,
9987
+ params.db,
9988
+ params.embeddingApiKey,
9989
+ params.embedFn,
9990
+ params.verbose ? params.onVerbose : void 0
9991
+ ) : void 0;
9905
9992
  for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt += 1) {
9906
9993
  if (params.verbose) {
9907
9994
  params.onVerbose?.(
@@ -9922,7 +10009,8 @@ async function extractKnowledgeFromChunks(params) {
9922
10009
  onStreamDelta: bufferStreamDeltas ? (delta, kind) => {
9923
10010
  streamBuffer.push({ delta, kind });
9924
10011
  } : params.onStreamDelta,
9925
- streamSimpleImpl: params.streamSimpleImpl
10012
+ streamSimpleImpl: params.streamSimpleImpl,
10013
+ related
9926
10014
  });
9927
10015
  warnings.push(...chunkResult.warnings);
9928
10016
  successfulChunks += 1;
@@ -12430,6 +12518,21 @@ async function runIngestCommand(inputPaths, options, deps) {
12430
12518
  let forceDeletedEntrySourceRows = 0;
12431
12519
  let completed = 0;
12432
12520
  let embeddingApiKey = null;
12521
+ if (!options.noPreFetch) {
12522
+ try {
12523
+ embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
12524
+ } catch (error) {
12525
+ embeddingApiKey = null;
12526
+ if (verbose) {
12527
+ clack4.log.warn(
12528
+ formatWarn(
12529
+ `Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
12530
+ ),
12531
+ clackOutput
12532
+ );
12533
+ }
12534
+ }
12535
+ }
12433
12536
  let watchStateLoaded = false;
12434
12537
  let watchState = createEmptyWatchState();
12435
12538
  let cursor = 0;
@@ -12604,6 +12707,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12604
12707
  client,
12605
12708
  verbose: false,
12606
12709
  llmConcurrency,
12710
+ db: options.noPreFetch ? void 0 : db,
12711
+ embeddingApiKey: options.noPreFetch ? void 0 : embeddingApiKey ?? void 0,
12712
+ noPreFetch: options.noPreFetch === true,
12607
12713
  onVerbose: verbose ? (line) => {
12608
12714
  clack4.log.info(line, clackOutput);
12609
12715
  } : void 0,
@@ -13202,26 +13308,6 @@ var TOOL_DEFINITIONS = [
13202
13308
  }
13203
13309
  }
13204
13310
  },
13205
- {
13206
- name: "agenr_done",
13207
- description: "Mark a todo as completed and remove it from active recall. Use when you have resolved a task or confirmed something is no longer needed. Fuzzy-matches by subject.",
13208
- inputSchema: {
13209
- type: "object",
13210
- additionalProperties: false,
13211
- required: ["subject"],
13212
- properties: {
13213
- subject: {
13214
- type: "string",
13215
- description: "The subject of the todo to mark as done. Partial/fuzzy match is supported."
13216
- },
13217
- confirm: {
13218
- type: "boolean",
13219
- description: "If true, skip confirmation prompt and mark done immediately. Default false.",
13220
- default: false
13221
- }
13222
- }
13223
- }
13224
- },
13225
13311
  {
13226
13312
  name: "agenr_extract",
13227
13313
  description: "Extract knowledge entries from raw text.",
@@ -13562,30 +13648,6 @@ function formatExtractedText(entries, stored) {
13562
13648
  }
13563
13649
  return lines.join("\n");
13564
13650
  }
13565
- function toActiveTodoRows(rows) {
13566
- return rows.map((row) => ({
13567
- id: typeof row.id === "string" ? row.id : String(row.id ?? ""),
13568
- subject: typeof row.subject === "string" ? row.subject : String(row.subject ?? ""),
13569
- content: typeof row.content === "string" ? row.content : String(row.content ?? "")
13570
- }));
13571
- }
13572
- function findTodoMatches(rows, subject) {
13573
- const normalizedSubject = subject.trim().toLowerCase();
13574
- return rows.filter((row) => {
13575
- const normalizedRow = row.subject.toLowerCase();
13576
- return normalizedRow.includes(normalizedSubject) || normalizedSubject.includes(normalizedRow);
13577
- });
13578
- }
13579
- function formatDoneCandidates(subject, candidates) {
13580
- const lines = [`Multiple active todos match "${subject}":`, ""];
13581
- for (let i = 0; i < candidates.length; i += 1) {
13582
- const candidate = candidates[i];
13583
- lines.push(`${i + 1}. ${candidate.subject}`);
13584
- }
13585
- lines.push("");
13586
- lines.push("Re-run with confirm=true to mark the top match.");
13587
- return lines.join("\n");
13588
- }
13589
13651
  function extractIdForError(raw) {
13590
13652
  if (!isRecord(raw) || !hasOwn(raw, "id")) {
13591
13653
  return null;
@@ -13782,47 +13844,6 @@ function createMcpServer(options = {}, deps = {}) {
13782
13844
  });
13783
13845
  return formatStoreSummary(result);
13784
13846
  }
13785
- async function callDoneTool(args) {
13786
- const subject = typeof args.subject === "string" ? args.subject.trim() : "";
13787
- if (!subject) {
13788
- throw new RpcError(JSON_RPC_INVALID_PARAMS, "subject is required");
13789
- }
13790
- if (args.confirm !== void 0 && typeof args.confirm !== "boolean") {
13791
- throw new RpcError(JSON_RPC_INVALID_PARAMS, "confirm must be a boolean");
13792
- }
13793
- const confirm5 = args.confirm === true;
13794
- const db = await ensureDb();
13795
- const result = await db.execute({
13796
- sql: `
13797
- SELECT id, subject, content
13798
- FROM entries
13799
- WHERE type = 'todo' AND superseded_by IS NULL
13800
- ORDER BY importance DESC, created_at DESC, subject ASC
13801
- `,
13802
- args: []
13803
- });
13804
- const todos = toActiveTodoRows(result.rows);
13805
- const matches = findTodoMatches(todos, subject);
13806
- if (matches.length === 0) {
13807
- return {
13808
- content: [{ type: "text", text: `No active todo matching: ${subject}` }],
13809
- isError: true
13810
- };
13811
- }
13812
- if (matches.length > 1 && !confirm5) {
13813
- return {
13814
- content: [{ type: "text", text: formatDoneCandidates(subject, matches.slice(0, 5)) }]
13815
- };
13816
- }
13817
- const selected = matches[0];
13818
- await db.execute({
13819
- sql: "UPDATE entries SET superseded_by = id, updated_at = datetime('now') WHERE id = ?",
13820
- args: [selected.id]
13821
- });
13822
- return {
13823
- content: [{ type: "text", text: `Marked done: ${selected.subject}` }]
13824
- };
13825
- }
13826
13847
  async function callExtractTool(args) {
13827
13848
  const text2 = typeof args.text === "string" ? args.text : "";
13828
13849
  if (!text2.trim()) {
@@ -13930,9 +13951,6 @@ function createMcpServer(options = {}, deps = {}) {
13930
13951
  content: [{ type: "text", text: await callStoreTool(params.args) }]
13931
13952
  };
13932
13953
  }
13933
- if (params.name === "agenr_done") {
13934
- return callDoneTool(params.args);
13935
- }
13936
13954
  if (params.name === "agenr_extract") {
13937
13955
  return {
13938
13956
  content: [{ type: "text", text: await callExtractTool(params.args) }]
@@ -15149,6 +15167,16 @@ async function runWatcher(options, deps) {
15149
15167
  if (db) {
15150
15168
  await resolvedDeps.initDbFn(db);
15151
15169
  }
15170
+ if (!options.noPreFetch) {
15171
+ try {
15172
+ embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
15173
+ } catch (error) {
15174
+ embeddingApiKey = null;
15175
+ options.onWarn?.(
15176
+ `Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
15177
+ );
15178
+ }
15179
+ }
15152
15180
  let dbChain = Promise.resolve();
15153
15181
  const withDbLock = async (fn) => {
15154
15182
  const previous = dbChain;
@@ -15378,6 +15406,9 @@ async function runWatcher(options, deps) {
15378
15406
  chunks: parsed.chunks,
15379
15407
  client,
15380
15408
  verbose: options.verbose,
15409
+ db: options.noPreFetch ? void 0 : db ?? void 0,
15410
+ embeddingApiKey: options.noPreFetch ? void 0 : embeddingApiKey ?? void 0,
15411
+ noPreFetch: options.noPreFetch === true,
15381
15412
  onChunkComplete: async (chunkResult) => {
15382
15413
  await processChunkEntries(chunkResult.entries);
15383
15414
  }
@@ -15836,6 +15867,7 @@ async function runWatchCommand(file, options, deps) {
15836
15867
  const once2 = options.once === true;
15837
15868
  const json = options.json === true;
15838
15869
  const raw = options.raw === true;
15870
+ const contextEnabled = Boolean(options.context);
15839
15871
  const modeConfig = await resolveWatchMode(file, options, resolvedDeps.statFileFn);
15840
15872
  const clackOutput = { output: process.stderr };
15841
15873
  clack10.intro(banner(), clackOutput);
@@ -15876,8 +15908,16 @@ async function runWatchCommand(file, options, deps) {
15876
15908
  );
15877
15909
  clack10.log.info("", clackOutput);
15878
15910
  clack10.log.info("Waiting for changes...", clackOutput);
15911
+ const emitWatchWarning = (message) => {
15912
+ if (message.startsWith("Filtered:")) {
15913
+ clack10.log.info(message, clackOutput);
15914
+ return;
15915
+ }
15916
+ clack10.log.warn(formatWarn(message), clackOutput);
15917
+ };
15879
15918
  let cycleCount = 0;
15880
15919
  let contextChain = Promise.resolve();
15920
+ let contextDb = null;
15881
15921
  const summary = await runWatcher(
15882
15922
  {
15883
15923
  filePath: modeConfig.filePath ?? void 0,
@@ -15892,17 +15932,12 @@ async function runWatchCommand(file, options, deps) {
15892
15932
  raw,
15893
15933
  once: once2,
15894
15934
  onlineDedup: options.onlineDedup !== false,
15935
+ noPreFetch: options.noPreFetch === true,
15895
15936
  model: options.model,
15896
15937
  provider: options.provider,
15897
15938
  dbPath: options.db,
15898
15939
  initialState: state,
15899
- onWarn: (message) => {
15900
- if (message.startsWith("Filtered:")) {
15901
- clack10.log.info(message, clackOutput);
15902
- return;
15903
- }
15904
- clack10.log.warn(formatWarn(message), clackOutput);
15905
- },
15940
+ onWarn: emitWatchWarning,
15906
15941
  onSwitch: (from, to, platform) => {
15907
15942
  const fromLabel = from ? formatSwitchLabel(from) : "(none)";
15908
15943
  const platformLabel = platform ? ` [${platform}]` : "";
@@ -15944,6 +15979,7 @@ async function runWatchCommand(file, options, deps) {
15944
15979
  clackOutput
15945
15980
  );
15946
15981
  if (!dryRun && options.context && result.entriesStored > 0 && ctx.db) {
15982
+ contextDb = ctx.db;
15947
15983
  const contextPath = path29.resolve(options.context.replace(/^~(?=$|\/)/, os19.homedir()));
15948
15984
  contextChain = contextChain.then(async () => {
15949
15985
  await resolvedDeps.generateContextFileFn(
@@ -15954,12 +15990,8 @@ async function runWatchCommand(file, options, deps) {
15954
15990
  );
15955
15991
  await writeContextVariants(ctx.db, contextPath, resolvedDeps.nowFn());
15956
15992
  }).catch((err) => {
15957
- if (verbose) {
15958
- clack10.log.warn(
15959
- `Context refresh failed: ${err instanceof Error ? err.message : String(err)}`,
15960
- clackOutput
15961
- );
15962
- }
15993
+ const msg = `Context refresh failed: ${err instanceof Error ? err.message : String(err)}`;
15994
+ emitWatchWarning(msg);
15963
15995
  });
15964
15996
  }
15965
15997
  }
@@ -15973,7 +16005,8 @@ async function runWatchCommand(file, options, deps) {
15973
16005
  deduplicateEntriesFn: resolvedDeps.deduplicateEntriesFn,
15974
16006
  getDbFn: resolvedDeps.getDbFn,
15975
16007
  initDbFn: resolvedDeps.initDbFn,
15976
- closeDbFn: resolvedDeps.closeDbFn,
16008
+ closeDbFn: contextEnabled ? () => {
16009
+ } : resolvedDeps.closeDbFn,
15977
16010
  storeEntriesFn: resolvedDeps.storeEntriesFn,
15978
16011
  loadWatchStateFn: resolvedDeps.loadWatchStateFn,
15979
16012
  saveWatchStateFn: resolvedDeps.saveWatchStateFn,
@@ -15984,6 +16017,12 @@ async function runWatchCommand(file, options, deps) {
15984
16017
  }
15985
16018
  );
15986
16019
  await contextChain.catch(() => void 0);
16020
+ if (contextEnabled && contextDb) {
16021
+ try {
16022
+ resolvedDeps.closeDbFn(contextDb);
16023
+ } catch {
16024
+ }
16025
+ }
15987
16026
  clack10.log.info(
15988
16027
  `Summary: ${summary.cycles} cycles | ${summary.entriesStored} entries stored | watched for ${formatDuration(summary.durationMs)}`,
15989
16028
  clackOutput
@@ -16334,6 +16373,10 @@ async function runSetup(env = process.env) {
16334
16373
  }
16335
16374
 
16336
16375
  // src/cli-main.ts
16376
+ function stderrLine2(message) {
16377
+ process.stderr.write(`${message}
16378
+ `);
16379
+ }
16337
16380
  async function assertReadableFile(filePath) {
16338
16381
  const stat = await fs32.stat(filePath);
16339
16382
  if (!stat.isFile()) {
@@ -16376,7 +16419,12 @@ async function runExtractCommand(files, options, deps) {
16376
16419
  createLlmClientFn: deps?.createLlmClientFn ?? createLlmClient,
16377
16420
  extractKnowledgeFromChunksFn: deps?.extractKnowledgeFromChunksFn ?? extractKnowledgeFromChunks,
16378
16421
  deduplicateEntriesFn: deps?.deduplicateEntriesFn ?? deduplicateEntries,
16379
- writeOutputFn: deps?.writeOutputFn ?? writeOutput
16422
+ writeOutputFn: deps?.writeOutputFn ?? writeOutput,
16423
+ readConfigFn: deps?.readConfigFn ?? readConfig,
16424
+ resolveEmbeddingApiKeyFn: deps?.resolveEmbeddingApiKeyFn ?? resolveEmbeddingApiKey,
16425
+ getDbFn: deps?.getDbFn ?? getDb,
16426
+ initDbFn: deps?.initDbFn ?? initDb,
16427
+ closeDbFn: deps?.closeDbFn ?? closeDb
16380
16428
  };
16381
16429
  const expanded = await resolvedDeps.expandInputFilesFn(files);
16382
16430
  if (expanded.length === 0) {
@@ -16390,6 +16438,30 @@ async function runExtractCommand(files, options, deps) {
16390
16438
  model: options.model,
16391
16439
  env: process.env
16392
16440
  });
16441
+ let db;
16442
+ let embeddingApiKey;
16443
+ if (!options.noPreFetch) {
16444
+ const config = resolvedDeps.readConfigFn(process.env);
16445
+ const dbPath = options.db?.trim() || config?.db?.path?.trim();
16446
+ if (dbPath) {
16447
+ let preFetchDb;
16448
+ try {
16449
+ embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
16450
+ preFetchDb = resolvedDeps.getDbFn(dbPath);
16451
+ await resolvedDeps.initDbFn(preFetchDb);
16452
+ db = preFetchDb;
16453
+ } catch (error) {
16454
+ if (preFetchDb) {
16455
+ resolvedDeps.closeDbFn(preFetchDb);
16456
+ }
16457
+ db = void 0;
16458
+ embeddingApiKey = void 0;
16459
+ if (verbose) {
16460
+ stderrLine2(`[pre-fetch] init skipped: ${error instanceof Error ? error.message : String(error)}`);
16461
+ }
16462
+ }
16463
+ }
16464
+ }
16393
16465
  const clackOutput = { output: process.stderr };
16394
16466
  clack12.intro(banner(), clackOutput);
16395
16467
  clack12.log.info(
@@ -16436,114 +16508,129 @@ async function runExtractCommand(files, options, deps) {
16436
16508
  summaryWarnings += 1;
16437
16509
  return warning;
16438
16510
  };
16439
- if (verbose) {
16440
- for (const [index, file] of expanded.entries()) {
16441
- const key = toReportKey(file, keySet);
16442
- clack12.log.info(`${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`, clackOutput);
16443
- try {
16444
- const parsed = await resolvedDeps.parseTranscriptFileFn(file);
16445
- clack12.log.info(
16446
- `[parse] ${key}: messages=${parsed.messages.length}, chunks=${parsed.chunks.length}`,
16447
- clackOutput
16448
- );
16449
- const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
16450
- file: key,
16451
- chunks: parsed.chunks,
16452
- client,
16453
- verbose: true,
16454
- noDedup: options.noDedup === true,
16455
- onVerbose: (line) => clack12.log.info(line, clackOutput),
16456
- onStreamDelta: (delta) => process.stderr.write(delta)
16457
- });
16458
- process.stderr.write("\n");
16459
- const stats = recordSuccess({
16460
- key,
16461
- chunks: parsed.chunks.length,
16462
- extracted,
16463
- parseWarnings: parsed.warnings
16464
- });
16465
- clack12.log.info(ui.success(`${stats.deduped_entries} entries (${stats.chunks} chunks)`), clackOutput);
16466
- } catch (error) {
16467
- const warning = recordFailure(key, error);
16468
- clack12.log.error(warning, clackOutput);
16511
+ try {
16512
+ if (verbose) {
16513
+ for (const [index, file] of expanded.entries()) {
16514
+ const key = toReportKey(file, keySet);
16515
+ clack12.log.info(`${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`, clackOutput);
16516
+ try {
16517
+ const parsed = await resolvedDeps.parseTranscriptFileFn(file);
16518
+ clack12.log.info(
16519
+ `[parse] ${key}: messages=${parsed.messages.length}, chunks=${parsed.chunks.length}`,
16520
+ clackOutput
16521
+ );
16522
+ const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
16523
+ file: key,
16524
+ chunks: parsed.chunks,
16525
+ client,
16526
+ verbose: true,
16527
+ noDedup: options.noDedup === true,
16528
+ db,
16529
+ embeddingApiKey,
16530
+ noPreFetch: options.noPreFetch === true,
16531
+ onVerbose: (line) => clack12.log.info(line, clackOutput),
16532
+ onStreamDelta: (delta) => process.stderr.write(delta)
16533
+ });
16534
+ process.stderr.write("\n");
16535
+ const stats = recordSuccess({
16536
+ key,
16537
+ chunks: parsed.chunks.length,
16538
+ extracted,
16539
+ parseWarnings: parsed.warnings
16540
+ });
16541
+ clack12.log.info(ui.success(`${stats.deduped_entries} entries (${stats.chunks} chunks)`), clackOutput);
16542
+ } catch (error) {
16543
+ const warning = recordFailure(key, error);
16544
+ clack12.log.error(warning, clackOutput);
16545
+ }
16469
16546
  }
16470
- }
16471
- } else {
16472
- await clack12.tasks(
16473
- expanded.map((file, index) => ({
16474
- title: `${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`,
16475
- task: async () => {
16476
- const key = toReportKey(file, keySet);
16477
- try {
16478
- const parsed = await resolvedDeps.parseTranscriptFileFn(file);
16479
- const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
16480
- file: key,
16481
- chunks: parsed.chunks,
16482
- client,
16483
- verbose: false,
16484
- noDedup: options.noDedup === true
16485
- });
16486
- const stats = recordSuccess({
16487
- key,
16488
- chunks: parsed.chunks.length,
16489
- extracted,
16490
- parseWarnings: parsed.warnings
16491
- });
16492
- return `${stats.deduped_entries} entries (${stats.chunks} chunks)`;
16493
- } catch (error) {
16494
- recordFailure(key, error);
16495
- return formatError("processing failed");
16547
+ } else {
16548
+ await clack12.tasks(
16549
+ expanded.map((file, index) => ({
16550
+ title: `${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`,
16551
+ task: async () => {
16552
+ const key = toReportKey(file, keySet);
16553
+ try {
16554
+ const parsed = await resolvedDeps.parseTranscriptFileFn(file);
16555
+ const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
16556
+ file: key,
16557
+ chunks: parsed.chunks,
16558
+ client,
16559
+ verbose: false,
16560
+ noDedup: options.noDedup === true,
16561
+ db,
16562
+ embeddingApiKey,
16563
+ noPreFetch: options.noPreFetch === true
16564
+ });
16565
+ const stats = recordSuccess({
16566
+ key,
16567
+ chunks: parsed.chunks.length,
16568
+ extracted,
16569
+ parseWarnings: parsed.warnings
16570
+ });
16571
+ return `${stats.deduped_entries} entries (${stats.chunks} chunks)`;
16572
+ } catch (error) {
16573
+ recordFailure(key, error);
16574
+ return formatError("processing failed");
16575
+ }
16496
16576
  }
16497
- }
16498
- })),
16577
+ })),
16578
+ clackOutput
16579
+ );
16580
+ }
16581
+ const report = {
16582
+ version: APP_VERSION,
16583
+ extracted_at: (/* @__PURE__ */ new Date()).toISOString(),
16584
+ provider: client.resolvedModel.provider,
16585
+ model: client.resolvedModel.modelId,
16586
+ files: fileMap,
16587
+ summary: {
16588
+ files: Object.keys(fileMap).length,
16589
+ chunks: summaryChunks,
16590
+ successful_chunks: summarySuccessChunks,
16591
+ failed_chunks: summaryFailedChunks,
16592
+ raw_entries: summaryRawEntries,
16593
+ deduped_entries: summaryDedupedEntries,
16594
+ warnings: summaryWarnings
16595
+ }
16596
+ };
16597
+ const writtenPaths = await resolvedDeps.writeOutputFn({
16598
+ report,
16599
+ format: options.format,
16600
+ output: options.output,
16601
+ split: options.split === true
16602
+ });
16603
+ clack12.note(
16604
+ [
16605
+ formatLabel("Files", String(report.summary.files)),
16606
+ formatLabel("Chunks", `${report.summary.successful_chunks}/${report.summary.chunks} successful`),
16607
+ formatLabel(
16608
+ "Entries",
16609
+ `${report.summary.deduped_entries} entries (${report.summary.raw_entries - report.summary.deduped_entries} duplicates removed)`
16610
+ ),
16611
+ report.summary.failed_chunks > 0 ? formatWarn(`${report.summary.failed_chunks} chunks failed`) : null,
16612
+ report.summary.warnings > 0 ? formatWarn(`${report.summary.warnings} warning(s)`) : null
16613
+ ].filter((line) => Boolean(line)).join("\n"),
16614
+ "Extraction Complete",
16499
16615
  clackOutput
16500
16616
  );
16501
- }
16502
- const report = {
16503
- version: APP_VERSION,
16504
- extracted_at: (/* @__PURE__ */ new Date()).toISOString(),
16505
- provider: client.resolvedModel.provider,
16506
- model: client.resolvedModel.modelId,
16507
- files: fileMap,
16508
- summary: {
16509
- files: Object.keys(fileMap).length,
16510
- chunks: summaryChunks,
16511
- successful_chunks: summarySuccessChunks,
16512
- failed_chunks: summaryFailedChunks,
16513
- raw_entries: summaryRawEntries,
16514
- deduped_entries: summaryDedupedEntries,
16515
- warnings: summaryWarnings
16617
+ for (const outPath of writtenPaths) {
16618
+ clack12.log.success("Wrote " + ui.bold(outPath), clackOutput);
16619
+ }
16620
+ clack12.outro(void 0, clackOutput);
16621
+ return {
16622
+ exitCode: report.summary.successful_chunks > 0 ? 0 : 1,
16623
+ report,
16624
+ writtenPaths
16625
+ };
16626
+ } finally {
16627
+ if (db) {
16628
+ try {
16629
+ resolvedDeps.closeDbFn(db);
16630
+ } catch {
16631
+ }
16516
16632
  }
16517
- };
16518
- const writtenPaths = await resolvedDeps.writeOutputFn({
16519
- report,
16520
- format: options.format,
16521
- output: options.output,
16522
- split: options.split === true
16523
- });
16524
- clack12.note(
16525
- [
16526
- formatLabel("Files", String(report.summary.files)),
16527
- formatLabel("Chunks", `${report.summary.successful_chunks}/${report.summary.chunks} successful`),
16528
- formatLabel(
16529
- "Entries",
16530
- `${report.summary.deduped_entries} entries (${report.summary.raw_entries - report.summary.deduped_entries} duplicates removed)`
16531
- ),
16532
- report.summary.failed_chunks > 0 ? formatWarn(`${report.summary.failed_chunks} chunks failed`) : null,
16533
- report.summary.warnings > 0 ? formatWarn(`${report.summary.warnings} warning(s)`) : null
16534
- ].filter((line) => Boolean(line)).join("\n"),
16535
- "Extraction Complete",
16536
- clackOutput
16537
- );
16538
- for (const outPath of writtenPaths) {
16539
- clack12.log.success("Wrote " + ui.bold(outPath), clackOutput);
16540
16633
  }
16541
- clack12.outro(void 0, clackOutput);
16542
- return {
16543
- exitCode: report.summary.successful_chunks > 0 ? 0 : 1,
16544
- report,
16545
- writtenPaths
16546
- };
16547
16634
  }
16548
16635
  function createProgram() {
16549
16636
  const program = new Command();
@@ -16575,15 +16662,17 @@ function createProgram() {
16575
16662
  program.outputHelp();
16576
16663
  clack12.outro(ui.dim("https://agenr.ai"));
16577
16664
  });
16578
- program.command("extract").description("Extract structured knowledge from conversation transcripts").argument("<files...>", "One or more transcript files (.jsonl, .md, .txt)").option("--json", "Output raw KnowledgeEntry[] JSON", false).option("--format <type>", "Output format: json, markdown", "markdown").option("--output <file>", "Write output to file (or directory with --split)").option("--split", "Write one output file per input transcript", false).option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--no-dedup", "Skip post-extraction LLM dedup pass", false).option("--verbose", "Show extraction progress and debug info", false).action(async (files, opts) => {
16665
+ program.command("extract").description("Extract structured knowledge from conversation transcripts").argument("<files...>", "One or more transcript files (.jsonl, .md, .txt)").option("--json", "Output raw KnowledgeEntry[] JSON", false).option("--format <type>", "Output format: json, markdown", "markdown").option("--output <file>", "Write output to file (or directory with --split)").option("--split", "Write one output file per input transcript", false).option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--no-dedup", "Skip post-extraction LLM dedup pass", false).option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--verbose", "Show extraction progress and debug info", false).action(async (files, opts) => {
16579
16666
  const selectedFormat = opts.json ? "json" : opts.format;
16580
16667
  const format = selectedFormat === "json" ? "json" : selectedFormat === "markdown" ? "markdown" : null;
16581
16668
  if (!format) {
16582
16669
  throw new Error("--format must be one of: json, markdown");
16583
16670
  }
16671
+ const noPreFetch = opts.noPreFetch === true;
16584
16672
  const result = await runExtractCommand(files, {
16585
16673
  ...opts,
16586
- format
16674
+ format,
16675
+ noPreFetch
16587
16676
  });
16588
16677
  process.exitCode = result.exitCode;
16589
16678
  });
@@ -16655,16 +16744,22 @@ function createProgram() {
16655
16744
  process.exitCode = result.exitCode;
16656
16745
  }
16657
16746
  );
16658
- program.command("watch").description("Watch a transcript file and auto-extract knowledge as it grows").argument("[file]", "Transcript file to watch (.jsonl, .md, .txt)").option("--dir <path>", "Sessions directory to watch (resolver picks active file)").option("--platform <name>", "Session platform: openclaw, claude-code, codex, mtime").option("--auto", "Deprecated: use --platform <name> instead", false).option("--interval <seconds>", "Polling interval in seconds", parseIntOption, 300).option("--min-chunk <chars>", "Minimum new chars before extraction", parseIntOption, 2e3).option("--context <path>", "Regenerate context file after each cycle").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--verbose", "Show extraction progress", false).option("--dry-run", "Extract without storing", false).option("--once", "Run one cycle and exit", false).option("--json", "Output JSON results", false).action(async (file, opts) => {
16659
- const result = await runWatchCommand(file, opts);
16747
+ program.command("watch").description("Watch a transcript file and auto-extract knowledge as it grows").argument("[file]", "Transcript file to watch (.jsonl, .md, .txt)").option("--dir <path>", "Sessions directory to watch (resolver picks active file)").option("--platform <name>", "Session platform: openclaw, claude-code, codex, mtime").option("--auto", "Deprecated: use --platform <name> instead", false).option("--interval <seconds>", "Polling interval in seconds", parseIntOption, 300).option("--min-chunk <chars>", "Minimum new chars before extraction", parseIntOption, 2e3).option("--context <path>", "Regenerate context file after each cycle").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--verbose", "Show extraction progress", false).option("--dry-run", "Extract without storing", false).option("--once", "Run one cycle and exit", false).option("--json", "Output JSON results", false).action(async (file, opts) => {
16748
+ const result = await runWatchCommand(file, {
16749
+ ...opts,
16750
+ noPreFetch: opts.noPreFetch === true
16751
+ });
16660
16752
  process.exitCode = result.exitCode;
16661
16753
  });
16662
16754
  program.command("todo <subcommand> <subject>").description("Manage todos in the knowledge base").option("--db <path>", "Database path override").action(async (subcommand, subject, opts) => {
16663
16755
  const result = await runTodoCommand(subcommand, subject, { db: opts.db });
16664
16756
  process.exitCode = result.exitCode;
16665
16757
  });
16666
- program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
16667
- const result = await runIngestCommand(paths, opts);
16758
+ program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
16759
+ const result = await runIngestCommand(paths, {
16760
+ ...opts,
16761
+ noPreFetch: opts.noPreFetch === true
16762
+ });
16668
16763
  process.exitCode = result.exitCode;
16669
16764
  });
16670
16765
  program.command("consolidate").description("Consolidate and clean up the knowledge database").option("--rules-only", "Only run rule-based cleanup (no LLM)", false).option("--dry-run", "Show what would happen without making changes", false).option("--forget", "Delete forgetting candidates after consolidation", false).option("--report", "Print pre-run stats report (with --dry-run: report only)", false).option("--platform <name>", "Scope consolidation to platform: openclaw, claude-code, codex").option("--project <name>", "Scope consolidation to project (repeatable)", (val, prev) => [...prev, val], []).option("--exclude-project <name>", "Exclude entries from project (repeatable)", (val, prev) => [...prev, val], []).option(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.6.0",
3
+ "version": "0.6.2",
4
4
  "description": "AGENt memoRy -- Memory infrastructure for AI agents",
5
5
  "type": "module",
6
6
  "bin": {