agenr 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -1
- package/dist/cli-main.d.ts +20 -0
- package/dist/cli-main.js +310 -215
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.6.2] - 2026-02-19
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- feat(extractor): elaborative encoding pre-fetch now runs before each chunk extraction, retrieves top-related memories from the vector index, and injects up to 3 references into the extractor prompt
|
|
7
|
+
- feat(cli): `--no-pre-fetch` flag added to `agenr extract`, `agenr ingest`, and `agenr watch` to opt out of prompt memory pre-fetch
|
|
8
|
+
- feat(cli): `--db` flag added to `agenr extract`, `agenr ingest`, and `agenr watch` for database path overrides
|
|
9
|
+
- feat(recall): exported `fetchRelatedEntries()` thin wrapper for direct ANN vector candidate queries
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- tuning(extractor): pre-fetch similarity threshold set to `0.78` for `text-embedding-3-small` (1024 dimensions)
|
|
13
|
+
- tuning(extractor): fresh-install pre-fetch skip threshold set to 20 non-superseded entries
|
|
14
|
+
- tuning(extractor): pre-fetch timeout set to 5000ms to avoid chunk extraction stalls on hanging embedding calls
|
|
15
|
+
|
|
16
|
+
### Security
|
|
17
|
+
- prompt: injected related memories are explicitly reference-only and do not lower the SKIP threshold
|
|
18
|
+
- runtime: pre-fetch is always best-effort and silently degrades to empty related-memory context on any error
|
|
19
|
+
|
|
20
|
+
## [0.6.1] - 2026-02-19
|
|
21
|
+
|
|
22
|
+
### Fixed
|
|
23
|
+
- fix(watch): context file generation failed with CLIENT_CLOSED when context path is configured
|
|
24
|
+
- fix(mcp): remove agenr_done tool (was not removed in v0.6.0 as intended)
|
|
25
|
+
|
|
3
26
|
## [0.6.0] - 2026-02-18
|
|
4
27
|
|
|
5
28
|
### Added
|
|
@@ -22,7 +45,7 @@
|
|
|
22
45
|
- fix(watch): use real recall score breakdown in generated context variants
|
|
23
46
|
|
|
24
47
|
### Removed
|
|
25
|
-
- agenr_done MCP tool removed
|
|
48
|
+
- `agenr_done` MCP tool removed; use `agenr_retire` instead (supports all entry types, not just todos)
|
|
26
49
|
|
|
27
50
|
## [0.5.3] - 2026-02-18
|
|
28
51
|
|
package/dist/cli-main.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
2
|
import { Model, Api, Context, SimpleStreamOptions, AssistantMessageEvent, AssistantMessage } from '@mariozechner/pi-ai';
|
|
3
|
+
import { Client } from '@libsql/client';
|
|
3
4
|
|
|
4
5
|
declare const KNOWLEDGE_TYPES: readonly ["fact", "decision", "preference", "todo", "relationship", "event", "lesson"];
|
|
5
6
|
declare const EXPIRY_LEVELS: readonly ["core", "permanent", "temporary"];
|
|
@@ -130,8 +131,16 @@ interface LlmClient {
|
|
|
130
131
|
credentials: ResolvedCredentials;
|
|
131
132
|
}
|
|
132
133
|
|
|
134
|
+
declare function readConfig(env?: NodeJS.ProcessEnv): AgenrConfig | null;
|
|
135
|
+
|
|
133
136
|
declare function deduplicateEntries(entries: KnowledgeEntry[]): KnowledgeEntry[];
|
|
134
137
|
|
|
138
|
+
declare function getDb(dbPath?: string): Client;
|
|
139
|
+
declare function initDb(client: Client): Promise<void>;
|
|
140
|
+
declare function closeDb(client: Client): void;
|
|
141
|
+
|
|
142
|
+
declare function resolveEmbeddingApiKey(config: AgenrConfig | null | undefined, env?: NodeJS.ProcessEnv): string;
|
|
143
|
+
|
|
135
144
|
type SimpleAssistantStream = AsyncIterable<AssistantMessageEvent> & {
|
|
136
145
|
result: () => Promise<AssistantMessage>;
|
|
137
146
|
};
|
|
@@ -165,6 +174,10 @@ declare function extractKnowledgeFromChunks(params: {
|
|
|
165
174
|
streamSimpleImpl?: StreamSimpleFn;
|
|
166
175
|
sleepImpl?: (ms: number) => Promise<void>;
|
|
167
176
|
retryDelayMs?: (attempt: number) => number;
|
|
177
|
+
db?: Client;
|
|
178
|
+
embeddingApiKey?: string;
|
|
179
|
+
noPreFetch?: boolean;
|
|
180
|
+
embedFn?: (texts: string[], apiKey: string) => Promise<number[][]>;
|
|
168
181
|
}): Promise<ExtractChunksResult>;
|
|
169
182
|
|
|
170
183
|
interface ResolveLlmClientInput {
|
|
@@ -204,8 +217,10 @@ interface ExtractCommandOptions {
|
|
|
204
217
|
split?: boolean;
|
|
205
218
|
model?: string;
|
|
206
219
|
provider?: string;
|
|
220
|
+
db?: string;
|
|
207
221
|
verbose?: boolean;
|
|
208
222
|
noDedup?: boolean;
|
|
223
|
+
noPreFetch?: boolean;
|
|
209
224
|
}
|
|
210
225
|
interface CliDeps {
|
|
211
226
|
expandInputFilesFn: typeof expandInputFiles;
|
|
@@ -215,6 +230,11 @@ interface CliDeps {
|
|
|
215
230
|
extractKnowledgeFromChunksFn: typeof extractKnowledgeFromChunks;
|
|
216
231
|
deduplicateEntriesFn: typeof deduplicateEntries;
|
|
217
232
|
writeOutputFn: typeof writeOutput;
|
|
233
|
+
readConfigFn: typeof readConfig;
|
|
234
|
+
resolveEmbeddingApiKeyFn: typeof resolveEmbeddingApiKey;
|
|
235
|
+
getDbFn: typeof getDb;
|
|
236
|
+
initDbFn: typeof initDb;
|
|
237
|
+
closeDbFn: typeof closeDb;
|
|
218
238
|
}
|
|
219
239
|
declare function runExtractCommand(files: string[], options: ExtractCommandOptions, deps?: Partial<CliDeps>): Promise<{
|
|
220
240
|
exitCode: number;
|
package/dist/cli-main.js
CHANGED
|
@@ -3867,6 +3867,9 @@ async function fetchVectorCandidates(db, queryEmbedding, limit, platform, projec
|
|
|
3867
3867
|
};
|
|
3868
3868
|
});
|
|
3869
3869
|
}
|
|
3870
|
+
async function fetchRelatedEntries(db, queryEmbedding, limit) {
|
|
3871
|
+
return fetchVectorCandidates(db, queryEmbedding, limit);
|
|
3872
|
+
}
|
|
3870
3873
|
async function fetchSessionCandidates(db, limit, context, platform, project, excludeProject, projectStrict) {
|
|
3871
3874
|
const normalizedProject = parseProjectList(project);
|
|
3872
3875
|
const normalizedExclude = parseProjectList(excludeProject);
|
|
@@ -9176,11 +9179,18 @@ WHY: Routine execution. No durable knowledge, decisions, or lessons.
|
|
|
9176
9179
|
- canonical_key: optional lowercase hyphenated 3-5 word identifier when clear (example: "preferred-package-manager")
|
|
9177
9180
|
- content: clear declarative statement, not a quote. Min 20 chars.
|
|
9178
9181
|
- source_context: one sentence, max 20 words.
|
|
9179
|
-
- tags: 1-4 lowercase descriptive tags
|
|
9182
|
+
- tags: 1-4 lowercase descriptive tags.
|
|
9183
|
+
When related memories are injected before a chunk, they are reference material only. They do not lower the emission threshold.`;
|
|
9180
9184
|
var MAX_ATTEMPTS = 5;
|
|
9181
9185
|
var DEFAULT_INTER_CHUNK_DELAY_MS = 150;
|
|
9182
9186
|
var DEDUP_BATCH_SIZE = 50;
|
|
9183
9187
|
var DEDUP_BATCH_TRIGGER = 100;
|
|
9188
|
+
var PREFETCH_SIMILARITY_THRESHOLD = 0.78;
|
|
9189
|
+
var PREFETCH_SIMILARITY_EPSILON = 1e-6;
|
|
9190
|
+
var PREFETCH_CANDIDATE_LIMIT = 10;
|
|
9191
|
+
var MAX_PREFETCH_RESULTS = 3;
|
|
9192
|
+
var PREFETCH_MIN_DB_ENTRIES = 20;
|
|
9193
|
+
var PREFETCH_TIMEOUT_MS = 5e3;
|
|
9184
9194
|
var DEDUP_SYSTEM_PROMPT = `You are deduplicating a list of extracted knowledge entries.
|
|
9185
9195
|
|
|
9186
9196
|
Rules:
|
|
@@ -9244,8 +9254,77 @@ var ParseResponseError = class extends Error {
|
|
|
9244
9254
|
function normalize3(value) {
|
|
9245
9255
|
return value.trim().toLowerCase();
|
|
9246
9256
|
}
|
|
9247
|
-
function
|
|
9257
|
+
async function preFetchRelated(chunkText2, db, embeddingApiKey, embedFn = embed, onVerbose) {
|
|
9258
|
+
const run = async () => {
|
|
9259
|
+
try {
|
|
9260
|
+
if (!chunkText2.trim()) {
|
|
9261
|
+
onVerbose?.("[pre-fetch] skipped (empty chunk text)");
|
|
9262
|
+
return [];
|
|
9263
|
+
}
|
|
9264
|
+
const countResult = await db.execute({
|
|
9265
|
+
sql: "SELECT COUNT(*) AS count FROM entries WHERE superseded_by IS NULL",
|
|
9266
|
+
args: []
|
|
9267
|
+
});
|
|
9268
|
+
const count = Number(countResult.rows[0]?.count ?? 0);
|
|
9269
|
+
if (count < PREFETCH_MIN_DB_ENTRIES) {
|
|
9270
|
+
onVerbose?.(`[pre-fetch] skipped (db count ${count} < ${PREFETCH_MIN_DB_ENTRIES})`);
|
|
9271
|
+
return [];
|
|
9272
|
+
}
|
|
9273
|
+
const vectors = await embedFn([chunkText2], embeddingApiKey);
|
|
9274
|
+
const queryVec = vectors[0];
|
|
9275
|
+
if (!queryVec || !Array.isArray(queryVec)) {
|
|
9276
|
+
onVerbose?.("[pre-fetch] skipped: embedding provider returned no query vector");
|
|
9277
|
+
return [];
|
|
9278
|
+
}
|
|
9279
|
+
onVerbose?.(`[pre-fetch] embedded chunk (${queryVec.length} dims)`);
|
|
9280
|
+
const candidates = await fetchRelatedEntries(db, queryVec, PREFETCH_CANDIDATE_LIMIT);
|
|
9281
|
+
onVerbose?.(`[pre-fetch] ${candidates.length} candidates returned`);
|
|
9282
|
+
const above = candidates.filter(
|
|
9283
|
+
(candidate) => candidate.vectorSim + PREFETCH_SIMILARITY_EPSILON >= PREFETCH_SIMILARITY_THRESHOLD
|
|
9284
|
+
);
|
|
9285
|
+
onVerbose?.(`[pre-fetch] ${above.length} above threshold ${PREFETCH_SIMILARITY_THRESHOLD}`);
|
|
9286
|
+
return above.slice(0, MAX_PREFETCH_RESULTS).map((candidate) => candidate.entry);
|
|
9287
|
+
} catch (error) {
|
|
9288
|
+
onVerbose?.(`[pre-fetch] skipped: ${error instanceof Error ? error.message : String(error)}`);
|
|
9289
|
+
return [];
|
|
9290
|
+
}
|
|
9291
|
+
};
|
|
9292
|
+
let timeoutId;
|
|
9293
|
+
const timeout = new Promise((resolve) => {
|
|
9294
|
+
timeoutId = setTimeout(() => {
|
|
9295
|
+
onVerbose?.(`[pre-fetch] skipped: timeout after ${PREFETCH_TIMEOUT_MS}ms`);
|
|
9296
|
+
resolve([]);
|
|
9297
|
+
}, PREFETCH_TIMEOUT_MS);
|
|
9298
|
+
});
|
|
9299
|
+
const result = await Promise.race([run(), timeout]);
|
|
9300
|
+
if (timeoutId) {
|
|
9301
|
+
clearTimeout(timeoutId);
|
|
9302
|
+
}
|
|
9303
|
+
return result;
|
|
9304
|
+
}
|
|
9305
|
+
function buildUserPrompt(chunk, related) {
|
|
9306
|
+
if (related === void 0) {
|
|
9307
|
+
return [
|
|
9308
|
+
"Selectively extract durable knowledge from this conversation transcript.",
|
|
9309
|
+
"",
|
|
9310
|
+
"Transcript:",
|
|
9311
|
+
"---",
|
|
9312
|
+
chunk.text,
|
|
9313
|
+
"---",
|
|
9314
|
+
"",
|
|
9315
|
+
'Call submit_knowledge once with {"entries": [...]} and use an empty array if nothing qualifies.'
|
|
9316
|
+
].join("\n");
|
|
9317
|
+
}
|
|
9318
|
+
const memoryBlock = related.length === 0 ? "[none found]" : related.map((entry) => `- [${entry.type}] ${entry.subject}: ${entry.content}`).join("\n");
|
|
9248
9319
|
return [
|
|
9320
|
+
"Existing related memories (reference only -- your SKIP/emit threshold is unchanged):",
|
|
9321
|
+
memoryBlock,
|
|
9322
|
+
"",
|
|
9323
|
+
"Do not emit entries that express the same fact as any memory listed above, even in different words.",
|
|
9324
|
+
"If this chunk clearly contradicts a memory listed above, emit a fact entry stating the contradiction directly in the content field. Do not use inline citation markers like [1] or [2] in any field -- these become dead references.",
|
|
9325
|
+
"Only emit a cross-reference entry when this chunk extends, contradicts, or updates a specific fact. Do not cross-reference just because entries share the same project or general domain.",
|
|
9326
|
+
"Your SKIP/emit threshold is unchanged. The memories above are reference only.",
|
|
9327
|
+
"",
|
|
9249
9328
|
"Selectively extract durable knowledge from this conversation transcript.",
|
|
9250
9329
|
"",
|
|
9251
9330
|
"Transcript:",
|
|
@@ -9826,12 +9905,13 @@ async function sleepMs2(ms) {
|
|
|
9826
9905
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
9827
9906
|
}
|
|
9828
9907
|
async function extractChunkOnce(params) {
|
|
9908
|
+
const prompt = buildUserPrompt(params.chunk, params.related);
|
|
9829
9909
|
const context = {
|
|
9830
9910
|
systemPrompt: SYSTEM_PROMPT,
|
|
9831
9911
|
messages: [
|
|
9832
9912
|
{
|
|
9833
9913
|
role: "user",
|
|
9834
|
-
content:
|
|
9914
|
+
content: prompt,
|
|
9835
9915
|
timestamp: Date.now()
|
|
9836
9916
|
}
|
|
9837
9917
|
],
|
|
@@ -9902,6 +9982,13 @@ async function extractKnowledgeFromChunks(params) {
|
|
|
9902
9982
|
let lastError = null;
|
|
9903
9983
|
let chunkResult = null;
|
|
9904
9984
|
let streamBuffer = [];
|
|
9985
|
+
const related = params.noPreFetch === true ? void 0 : params.db && params.embeddingApiKey ? await preFetchRelated(
|
|
9986
|
+
chunk.text,
|
|
9987
|
+
params.db,
|
|
9988
|
+
params.embeddingApiKey,
|
|
9989
|
+
params.embedFn,
|
|
9990
|
+
params.verbose ? params.onVerbose : void 0
|
|
9991
|
+
) : void 0;
|
|
9905
9992
|
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt += 1) {
|
|
9906
9993
|
if (params.verbose) {
|
|
9907
9994
|
params.onVerbose?.(
|
|
@@ -9922,7 +10009,8 @@ async function extractKnowledgeFromChunks(params) {
|
|
|
9922
10009
|
onStreamDelta: bufferStreamDeltas ? (delta, kind) => {
|
|
9923
10010
|
streamBuffer.push({ delta, kind });
|
|
9924
10011
|
} : params.onStreamDelta,
|
|
9925
|
-
streamSimpleImpl: params.streamSimpleImpl
|
|
10012
|
+
streamSimpleImpl: params.streamSimpleImpl,
|
|
10013
|
+
related
|
|
9926
10014
|
});
|
|
9927
10015
|
warnings.push(...chunkResult.warnings);
|
|
9928
10016
|
successfulChunks += 1;
|
|
@@ -12430,6 +12518,21 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12430
12518
|
let forceDeletedEntrySourceRows = 0;
|
|
12431
12519
|
let completed = 0;
|
|
12432
12520
|
let embeddingApiKey = null;
|
|
12521
|
+
if (!options.noPreFetch) {
|
|
12522
|
+
try {
|
|
12523
|
+
embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
|
|
12524
|
+
} catch (error) {
|
|
12525
|
+
embeddingApiKey = null;
|
|
12526
|
+
if (verbose) {
|
|
12527
|
+
clack4.log.warn(
|
|
12528
|
+
formatWarn(
|
|
12529
|
+
`Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
|
|
12530
|
+
),
|
|
12531
|
+
clackOutput
|
|
12532
|
+
);
|
|
12533
|
+
}
|
|
12534
|
+
}
|
|
12535
|
+
}
|
|
12433
12536
|
let watchStateLoaded = false;
|
|
12434
12537
|
let watchState = createEmptyWatchState();
|
|
12435
12538
|
let cursor = 0;
|
|
@@ -12604,6 +12707,9 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12604
12707
|
client,
|
|
12605
12708
|
verbose: false,
|
|
12606
12709
|
llmConcurrency,
|
|
12710
|
+
db: options.noPreFetch ? void 0 : db,
|
|
12711
|
+
embeddingApiKey: options.noPreFetch ? void 0 : embeddingApiKey ?? void 0,
|
|
12712
|
+
noPreFetch: options.noPreFetch === true,
|
|
12607
12713
|
onVerbose: verbose ? (line) => {
|
|
12608
12714
|
clack4.log.info(line, clackOutput);
|
|
12609
12715
|
} : void 0,
|
|
@@ -13202,26 +13308,6 @@ var TOOL_DEFINITIONS = [
|
|
|
13202
13308
|
}
|
|
13203
13309
|
}
|
|
13204
13310
|
},
|
|
13205
|
-
{
|
|
13206
|
-
name: "agenr_done",
|
|
13207
|
-
description: "Mark a todo as completed and remove it from active recall. Use when you have resolved a task or confirmed something is no longer needed. Fuzzy-matches by subject.",
|
|
13208
|
-
inputSchema: {
|
|
13209
|
-
type: "object",
|
|
13210
|
-
additionalProperties: false,
|
|
13211
|
-
required: ["subject"],
|
|
13212
|
-
properties: {
|
|
13213
|
-
subject: {
|
|
13214
|
-
type: "string",
|
|
13215
|
-
description: "The subject of the todo to mark as done. Partial/fuzzy match is supported."
|
|
13216
|
-
},
|
|
13217
|
-
confirm: {
|
|
13218
|
-
type: "boolean",
|
|
13219
|
-
description: "If true, skip confirmation prompt and mark done immediately. Default false.",
|
|
13220
|
-
default: false
|
|
13221
|
-
}
|
|
13222
|
-
}
|
|
13223
|
-
}
|
|
13224
|
-
},
|
|
13225
13311
|
{
|
|
13226
13312
|
name: "agenr_extract",
|
|
13227
13313
|
description: "Extract knowledge entries from raw text.",
|
|
@@ -13562,30 +13648,6 @@ function formatExtractedText(entries, stored) {
|
|
|
13562
13648
|
}
|
|
13563
13649
|
return lines.join("\n");
|
|
13564
13650
|
}
|
|
13565
|
-
function toActiveTodoRows(rows) {
|
|
13566
|
-
return rows.map((row) => ({
|
|
13567
|
-
id: typeof row.id === "string" ? row.id : String(row.id ?? ""),
|
|
13568
|
-
subject: typeof row.subject === "string" ? row.subject : String(row.subject ?? ""),
|
|
13569
|
-
content: typeof row.content === "string" ? row.content : String(row.content ?? "")
|
|
13570
|
-
}));
|
|
13571
|
-
}
|
|
13572
|
-
function findTodoMatches(rows, subject) {
|
|
13573
|
-
const normalizedSubject = subject.trim().toLowerCase();
|
|
13574
|
-
return rows.filter((row) => {
|
|
13575
|
-
const normalizedRow = row.subject.toLowerCase();
|
|
13576
|
-
return normalizedRow.includes(normalizedSubject) || normalizedSubject.includes(normalizedRow);
|
|
13577
|
-
});
|
|
13578
|
-
}
|
|
13579
|
-
function formatDoneCandidates(subject, candidates) {
|
|
13580
|
-
const lines = [`Multiple active todos match "${subject}":`, ""];
|
|
13581
|
-
for (let i = 0; i < candidates.length; i += 1) {
|
|
13582
|
-
const candidate = candidates[i];
|
|
13583
|
-
lines.push(`${i + 1}. ${candidate.subject}`);
|
|
13584
|
-
}
|
|
13585
|
-
lines.push("");
|
|
13586
|
-
lines.push("Re-run with confirm=true to mark the top match.");
|
|
13587
|
-
return lines.join("\n");
|
|
13588
|
-
}
|
|
13589
13651
|
function extractIdForError(raw) {
|
|
13590
13652
|
if (!isRecord(raw) || !hasOwn(raw, "id")) {
|
|
13591
13653
|
return null;
|
|
@@ -13782,47 +13844,6 @@ function createMcpServer(options = {}, deps = {}) {
|
|
|
13782
13844
|
});
|
|
13783
13845
|
return formatStoreSummary(result);
|
|
13784
13846
|
}
|
|
13785
|
-
async function callDoneTool(args) {
|
|
13786
|
-
const subject = typeof args.subject === "string" ? args.subject.trim() : "";
|
|
13787
|
-
if (!subject) {
|
|
13788
|
-
throw new RpcError(JSON_RPC_INVALID_PARAMS, "subject is required");
|
|
13789
|
-
}
|
|
13790
|
-
if (args.confirm !== void 0 && typeof args.confirm !== "boolean") {
|
|
13791
|
-
throw new RpcError(JSON_RPC_INVALID_PARAMS, "confirm must be a boolean");
|
|
13792
|
-
}
|
|
13793
|
-
const confirm5 = args.confirm === true;
|
|
13794
|
-
const db = await ensureDb();
|
|
13795
|
-
const result = await db.execute({
|
|
13796
|
-
sql: `
|
|
13797
|
-
SELECT id, subject, content
|
|
13798
|
-
FROM entries
|
|
13799
|
-
WHERE type = 'todo' AND superseded_by IS NULL
|
|
13800
|
-
ORDER BY importance DESC, created_at DESC, subject ASC
|
|
13801
|
-
`,
|
|
13802
|
-
args: []
|
|
13803
|
-
});
|
|
13804
|
-
const todos = toActiveTodoRows(result.rows);
|
|
13805
|
-
const matches = findTodoMatches(todos, subject);
|
|
13806
|
-
if (matches.length === 0) {
|
|
13807
|
-
return {
|
|
13808
|
-
content: [{ type: "text", text: `No active todo matching: ${subject}` }],
|
|
13809
|
-
isError: true
|
|
13810
|
-
};
|
|
13811
|
-
}
|
|
13812
|
-
if (matches.length > 1 && !confirm5) {
|
|
13813
|
-
return {
|
|
13814
|
-
content: [{ type: "text", text: formatDoneCandidates(subject, matches.slice(0, 5)) }]
|
|
13815
|
-
};
|
|
13816
|
-
}
|
|
13817
|
-
const selected = matches[0];
|
|
13818
|
-
await db.execute({
|
|
13819
|
-
sql: "UPDATE entries SET superseded_by = id, updated_at = datetime('now') WHERE id = ?",
|
|
13820
|
-
args: [selected.id]
|
|
13821
|
-
});
|
|
13822
|
-
return {
|
|
13823
|
-
content: [{ type: "text", text: `Marked done: ${selected.subject}` }]
|
|
13824
|
-
};
|
|
13825
|
-
}
|
|
13826
13847
|
async function callExtractTool(args) {
|
|
13827
13848
|
const text2 = typeof args.text === "string" ? args.text : "";
|
|
13828
13849
|
if (!text2.trim()) {
|
|
@@ -13930,9 +13951,6 @@ function createMcpServer(options = {}, deps = {}) {
|
|
|
13930
13951
|
content: [{ type: "text", text: await callStoreTool(params.args) }]
|
|
13931
13952
|
};
|
|
13932
13953
|
}
|
|
13933
|
-
if (params.name === "agenr_done") {
|
|
13934
|
-
return callDoneTool(params.args);
|
|
13935
|
-
}
|
|
13936
13954
|
if (params.name === "agenr_extract") {
|
|
13937
13955
|
return {
|
|
13938
13956
|
content: [{ type: "text", text: await callExtractTool(params.args) }]
|
|
@@ -15149,6 +15167,16 @@ async function runWatcher(options, deps) {
|
|
|
15149
15167
|
if (db) {
|
|
15150
15168
|
await resolvedDeps.initDbFn(db);
|
|
15151
15169
|
}
|
|
15170
|
+
if (!options.noPreFetch) {
|
|
15171
|
+
try {
|
|
15172
|
+
embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
|
|
15173
|
+
} catch (error) {
|
|
15174
|
+
embeddingApiKey = null;
|
|
15175
|
+
options.onWarn?.(
|
|
15176
|
+
`Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
|
|
15177
|
+
);
|
|
15178
|
+
}
|
|
15179
|
+
}
|
|
15152
15180
|
let dbChain = Promise.resolve();
|
|
15153
15181
|
const withDbLock = async (fn) => {
|
|
15154
15182
|
const previous = dbChain;
|
|
@@ -15378,6 +15406,9 @@ async function runWatcher(options, deps) {
|
|
|
15378
15406
|
chunks: parsed.chunks,
|
|
15379
15407
|
client,
|
|
15380
15408
|
verbose: options.verbose,
|
|
15409
|
+
db: options.noPreFetch ? void 0 : db ?? void 0,
|
|
15410
|
+
embeddingApiKey: options.noPreFetch ? void 0 : embeddingApiKey ?? void 0,
|
|
15411
|
+
noPreFetch: options.noPreFetch === true,
|
|
15381
15412
|
onChunkComplete: async (chunkResult) => {
|
|
15382
15413
|
await processChunkEntries(chunkResult.entries);
|
|
15383
15414
|
}
|
|
@@ -15836,6 +15867,7 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15836
15867
|
const once2 = options.once === true;
|
|
15837
15868
|
const json = options.json === true;
|
|
15838
15869
|
const raw = options.raw === true;
|
|
15870
|
+
const contextEnabled = Boolean(options.context);
|
|
15839
15871
|
const modeConfig = await resolveWatchMode(file, options, resolvedDeps.statFileFn);
|
|
15840
15872
|
const clackOutput = { output: process.stderr };
|
|
15841
15873
|
clack10.intro(banner(), clackOutput);
|
|
@@ -15876,8 +15908,16 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15876
15908
|
);
|
|
15877
15909
|
clack10.log.info("", clackOutput);
|
|
15878
15910
|
clack10.log.info("Waiting for changes...", clackOutput);
|
|
15911
|
+
const emitWatchWarning = (message) => {
|
|
15912
|
+
if (message.startsWith("Filtered:")) {
|
|
15913
|
+
clack10.log.info(message, clackOutput);
|
|
15914
|
+
return;
|
|
15915
|
+
}
|
|
15916
|
+
clack10.log.warn(formatWarn(message), clackOutput);
|
|
15917
|
+
};
|
|
15879
15918
|
let cycleCount = 0;
|
|
15880
15919
|
let contextChain = Promise.resolve();
|
|
15920
|
+
let contextDb = null;
|
|
15881
15921
|
const summary = await runWatcher(
|
|
15882
15922
|
{
|
|
15883
15923
|
filePath: modeConfig.filePath ?? void 0,
|
|
@@ -15892,17 +15932,12 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15892
15932
|
raw,
|
|
15893
15933
|
once: once2,
|
|
15894
15934
|
onlineDedup: options.onlineDedup !== false,
|
|
15935
|
+
noPreFetch: options.noPreFetch === true,
|
|
15895
15936
|
model: options.model,
|
|
15896
15937
|
provider: options.provider,
|
|
15897
15938
|
dbPath: options.db,
|
|
15898
15939
|
initialState: state,
|
|
15899
|
-
onWarn:
|
|
15900
|
-
if (message.startsWith("Filtered:")) {
|
|
15901
|
-
clack10.log.info(message, clackOutput);
|
|
15902
|
-
return;
|
|
15903
|
-
}
|
|
15904
|
-
clack10.log.warn(formatWarn(message), clackOutput);
|
|
15905
|
-
},
|
|
15940
|
+
onWarn: emitWatchWarning,
|
|
15906
15941
|
onSwitch: (from, to, platform) => {
|
|
15907
15942
|
const fromLabel = from ? formatSwitchLabel(from) : "(none)";
|
|
15908
15943
|
const platformLabel = platform ? ` [${platform}]` : "";
|
|
@@ -15944,6 +15979,7 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15944
15979
|
clackOutput
|
|
15945
15980
|
);
|
|
15946
15981
|
if (!dryRun && options.context && result.entriesStored > 0 && ctx.db) {
|
|
15982
|
+
contextDb = ctx.db;
|
|
15947
15983
|
const contextPath = path29.resolve(options.context.replace(/^~(?=$|\/)/, os19.homedir()));
|
|
15948
15984
|
contextChain = contextChain.then(async () => {
|
|
15949
15985
|
await resolvedDeps.generateContextFileFn(
|
|
@@ -15954,12 +15990,8 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15954
15990
|
);
|
|
15955
15991
|
await writeContextVariants(ctx.db, contextPath, resolvedDeps.nowFn());
|
|
15956
15992
|
}).catch((err) => {
|
|
15957
|
-
|
|
15958
|
-
|
|
15959
|
-
`Context refresh failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
15960
|
-
clackOutput
|
|
15961
|
-
);
|
|
15962
|
-
}
|
|
15993
|
+
const msg = `Context refresh failed: ${err instanceof Error ? err.message : String(err)}`;
|
|
15994
|
+
emitWatchWarning(msg);
|
|
15963
15995
|
});
|
|
15964
15996
|
}
|
|
15965
15997
|
}
|
|
@@ -15973,7 +16005,8 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15973
16005
|
deduplicateEntriesFn: resolvedDeps.deduplicateEntriesFn,
|
|
15974
16006
|
getDbFn: resolvedDeps.getDbFn,
|
|
15975
16007
|
initDbFn: resolvedDeps.initDbFn,
|
|
15976
|
-
closeDbFn:
|
|
16008
|
+
closeDbFn: contextEnabled ? () => {
|
|
16009
|
+
} : resolvedDeps.closeDbFn,
|
|
15977
16010
|
storeEntriesFn: resolvedDeps.storeEntriesFn,
|
|
15978
16011
|
loadWatchStateFn: resolvedDeps.loadWatchStateFn,
|
|
15979
16012
|
saveWatchStateFn: resolvedDeps.saveWatchStateFn,
|
|
@@ -15984,6 +16017,12 @@ async function runWatchCommand(file, options, deps) {
|
|
|
15984
16017
|
}
|
|
15985
16018
|
);
|
|
15986
16019
|
await contextChain.catch(() => void 0);
|
|
16020
|
+
if (contextEnabled && contextDb) {
|
|
16021
|
+
try {
|
|
16022
|
+
resolvedDeps.closeDbFn(contextDb);
|
|
16023
|
+
} catch {
|
|
16024
|
+
}
|
|
16025
|
+
}
|
|
15987
16026
|
clack10.log.info(
|
|
15988
16027
|
`Summary: ${summary.cycles} cycles | ${summary.entriesStored} entries stored | watched for ${formatDuration(summary.durationMs)}`,
|
|
15989
16028
|
clackOutput
|
|
@@ -16334,6 +16373,10 @@ async function runSetup(env = process.env) {
|
|
|
16334
16373
|
}
|
|
16335
16374
|
|
|
16336
16375
|
// src/cli-main.ts
|
|
16376
|
+
function stderrLine2(message) {
|
|
16377
|
+
process.stderr.write(`${message}
|
|
16378
|
+
`);
|
|
16379
|
+
}
|
|
16337
16380
|
async function assertReadableFile(filePath) {
|
|
16338
16381
|
const stat = await fs32.stat(filePath);
|
|
16339
16382
|
if (!stat.isFile()) {
|
|
@@ -16376,7 +16419,12 @@ async function runExtractCommand(files, options, deps) {
|
|
|
16376
16419
|
createLlmClientFn: deps?.createLlmClientFn ?? createLlmClient,
|
|
16377
16420
|
extractKnowledgeFromChunksFn: deps?.extractKnowledgeFromChunksFn ?? extractKnowledgeFromChunks,
|
|
16378
16421
|
deduplicateEntriesFn: deps?.deduplicateEntriesFn ?? deduplicateEntries,
|
|
16379
|
-
writeOutputFn: deps?.writeOutputFn ?? writeOutput
|
|
16422
|
+
writeOutputFn: deps?.writeOutputFn ?? writeOutput,
|
|
16423
|
+
readConfigFn: deps?.readConfigFn ?? readConfig,
|
|
16424
|
+
resolveEmbeddingApiKeyFn: deps?.resolveEmbeddingApiKeyFn ?? resolveEmbeddingApiKey,
|
|
16425
|
+
getDbFn: deps?.getDbFn ?? getDb,
|
|
16426
|
+
initDbFn: deps?.initDbFn ?? initDb,
|
|
16427
|
+
closeDbFn: deps?.closeDbFn ?? closeDb
|
|
16380
16428
|
};
|
|
16381
16429
|
const expanded = await resolvedDeps.expandInputFilesFn(files);
|
|
16382
16430
|
if (expanded.length === 0) {
|
|
@@ -16390,6 +16438,30 @@ async function runExtractCommand(files, options, deps) {
|
|
|
16390
16438
|
model: options.model,
|
|
16391
16439
|
env: process.env
|
|
16392
16440
|
});
|
|
16441
|
+
let db;
|
|
16442
|
+
let embeddingApiKey;
|
|
16443
|
+
if (!options.noPreFetch) {
|
|
16444
|
+
const config = resolvedDeps.readConfigFn(process.env);
|
|
16445
|
+
const dbPath = options.db?.trim() || config?.db?.path?.trim();
|
|
16446
|
+
if (dbPath) {
|
|
16447
|
+
let preFetchDb;
|
|
16448
|
+
try {
|
|
16449
|
+
embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
|
|
16450
|
+
preFetchDb = resolvedDeps.getDbFn(dbPath);
|
|
16451
|
+
await resolvedDeps.initDbFn(preFetchDb);
|
|
16452
|
+
db = preFetchDb;
|
|
16453
|
+
} catch (error) {
|
|
16454
|
+
if (preFetchDb) {
|
|
16455
|
+
resolvedDeps.closeDbFn(preFetchDb);
|
|
16456
|
+
}
|
|
16457
|
+
db = void 0;
|
|
16458
|
+
embeddingApiKey = void 0;
|
|
16459
|
+
if (verbose) {
|
|
16460
|
+
stderrLine2(`[pre-fetch] init skipped: ${error instanceof Error ? error.message : String(error)}`);
|
|
16461
|
+
}
|
|
16462
|
+
}
|
|
16463
|
+
}
|
|
16464
|
+
}
|
|
16393
16465
|
const clackOutput = { output: process.stderr };
|
|
16394
16466
|
clack12.intro(banner(), clackOutput);
|
|
16395
16467
|
clack12.log.info(
|
|
@@ -16436,114 +16508,129 @@ async function runExtractCommand(files, options, deps) {
|
|
|
16436
16508
|
summaryWarnings += 1;
|
|
16437
16509
|
return warning;
|
|
16438
16510
|
};
|
|
16439
|
-
|
|
16440
|
-
|
|
16441
|
-
const
|
|
16442
|
-
|
|
16443
|
-
|
|
16444
|
-
|
|
16445
|
-
|
|
16446
|
-
|
|
16447
|
-
|
|
16448
|
-
|
|
16449
|
-
|
|
16450
|
-
|
|
16451
|
-
|
|
16452
|
-
|
|
16453
|
-
|
|
16454
|
-
|
|
16455
|
-
|
|
16456
|
-
|
|
16457
|
-
|
|
16458
|
-
|
|
16459
|
-
|
|
16460
|
-
|
|
16461
|
-
|
|
16462
|
-
|
|
16463
|
-
|
|
16464
|
-
|
|
16465
|
-
|
|
16466
|
-
|
|
16467
|
-
|
|
16468
|
-
|
|
16511
|
+
try {
|
|
16512
|
+
if (verbose) {
|
|
16513
|
+
for (const [index, file] of expanded.entries()) {
|
|
16514
|
+
const key = toReportKey(file, keySet);
|
|
16515
|
+
clack12.log.info(`${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`, clackOutput);
|
|
16516
|
+
try {
|
|
16517
|
+
const parsed = await resolvedDeps.parseTranscriptFileFn(file);
|
|
16518
|
+
clack12.log.info(
|
|
16519
|
+
`[parse] ${key}: messages=${parsed.messages.length}, chunks=${parsed.chunks.length}`,
|
|
16520
|
+
clackOutput
|
|
16521
|
+
);
|
|
16522
|
+
const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
|
|
16523
|
+
file: key,
|
|
16524
|
+
chunks: parsed.chunks,
|
|
16525
|
+
client,
|
|
16526
|
+
verbose: true,
|
|
16527
|
+
noDedup: options.noDedup === true,
|
|
16528
|
+
db,
|
|
16529
|
+
embeddingApiKey,
|
|
16530
|
+
noPreFetch: options.noPreFetch === true,
|
|
16531
|
+
onVerbose: (line) => clack12.log.info(line, clackOutput),
|
|
16532
|
+
onStreamDelta: (delta) => process.stderr.write(delta)
|
|
16533
|
+
});
|
|
16534
|
+
process.stderr.write("\n");
|
|
16535
|
+
const stats = recordSuccess({
|
|
16536
|
+
key,
|
|
16537
|
+
chunks: parsed.chunks.length,
|
|
16538
|
+
extracted,
|
|
16539
|
+
parseWarnings: parsed.warnings
|
|
16540
|
+
});
|
|
16541
|
+
clack12.log.info(ui.success(`${stats.deduped_entries} entries (${stats.chunks} chunks)`), clackOutput);
|
|
16542
|
+
} catch (error) {
|
|
16543
|
+
const warning = recordFailure(key, error);
|
|
16544
|
+
clack12.log.error(warning, clackOutput);
|
|
16545
|
+
}
|
|
16469
16546
|
}
|
|
16470
|
-
}
|
|
16471
|
-
|
|
16472
|
-
|
|
16473
|
-
|
|
16474
|
-
|
|
16475
|
-
|
|
16476
|
-
|
|
16477
|
-
|
|
16478
|
-
|
|
16479
|
-
|
|
16480
|
-
|
|
16481
|
-
|
|
16482
|
-
|
|
16483
|
-
|
|
16484
|
-
|
|
16485
|
-
|
|
16486
|
-
|
|
16487
|
-
|
|
16488
|
-
|
|
16489
|
-
|
|
16490
|
-
|
|
16491
|
-
|
|
16492
|
-
|
|
16493
|
-
|
|
16494
|
-
|
|
16495
|
-
|
|
16547
|
+
} else {
|
|
16548
|
+
await clack12.tasks(
|
|
16549
|
+
expanded.map((file, index) => ({
|
|
16550
|
+
title: `${ui.dim(`[${index + 1}/${expanded.length}]`)} ${path31.basename(file)}`,
|
|
16551
|
+
task: async () => {
|
|
16552
|
+
const key = toReportKey(file, keySet);
|
|
16553
|
+
try {
|
|
16554
|
+
const parsed = await resolvedDeps.parseTranscriptFileFn(file);
|
|
16555
|
+
const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
|
|
16556
|
+
file: key,
|
|
16557
|
+
chunks: parsed.chunks,
|
|
16558
|
+
client,
|
|
16559
|
+
verbose: false,
|
|
16560
|
+
noDedup: options.noDedup === true,
|
|
16561
|
+
db,
|
|
16562
|
+
embeddingApiKey,
|
|
16563
|
+
noPreFetch: options.noPreFetch === true
|
|
16564
|
+
});
|
|
16565
|
+
const stats = recordSuccess({
|
|
16566
|
+
key,
|
|
16567
|
+
chunks: parsed.chunks.length,
|
|
16568
|
+
extracted,
|
|
16569
|
+
parseWarnings: parsed.warnings
|
|
16570
|
+
});
|
|
16571
|
+
return `${stats.deduped_entries} entries (${stats.chunks} chunks)`;
|
|
16572
|
+
} catch (error) {
|
|
16573
|
+
recordFailure(key, error);
|
|
16574
|
+
return formatError("processing failed");
|
|
16575
|
+
}
|
|
16496
16576
|
}
|
|
16497
|
-
}
|
|
16498
|
-
|
|
16577
|
+
})),
|
|
16578
|
+
clackOutput
|
|
16579
|
+
);
|
|
16580
|
+
}
|
|
16581
|
+
const report = {
|
|
16582
|
+
version: APP_VERSION,
|
|
16583
|
+
extracted_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
16584
|
+
provider: client.resolvedModel.provider,
|
|
16585
|
+
model: client.resolvedModel.modelId,
|
|
16586
|
+
files: fileMap,
|
|
16587
|
+
summary: {
|
|
16588
|
+
files: Object.keys(fileMap).length,
|
|
16589
|
+
chunks: summaryChunks,
|
|
16590
|
+
successful_chunks: summarySuccessChunks,
|
|
16591
|
+
failed_chunks: summaryFailedChunks,
|
|
16592
|
+
raw_entries: summaryRawEntries,
|
|
16593
|
+
deduped_entries: summaryDedupedEntries,
|
|
16594
|
+
warnings: summaryWarnings
|
|
16595
|
+
}
|
|
16596
|
+
};
|
|
16597
|
+
const writtenPaths = await resolvedDeps.writeOutputFn({
|
|
16598
|
+
report,
|
|
16599
|
+
format: options.format,
|
|
16600
|
+
output: options.output,
|
|
16601
|
+
split: options.split === true
|
|
16602
|
+
});
|
|
16603
|
+
clack12.note(
|
|
16604
|
+
[
|
|
16605
|
+
formatLabel("Files", String(report.summary.files)),
|
|
16606
|
+
formatLabel("Chunks", `${report.summary.successful_chunks}/${report.summary.chunks} successful`),
|
|
16607
|
+
formatLabel(
|
|
16608
|
+
"Entries",
|
|
16609
|
+
`${report.summary.deduped_entries} entries (${report.summary.raw_entries - report.summary.deduped_entries} duplicates removed)`
|
|
16610
|
+
),
|
|
16611
|
+
report.summary.failed_chunks > 0 ? formatWarn(`${report.summary.failed_chunks} chunks failed`) : null,
|
|
16612
|
+
report.summary.warnings > 0 ? formatWarn(`${report.summary.warnings} warning(s)`) : null
|
|
16613
|
+
].filter((line) => Boolean(line)).join("\n"),
|
|
16614
|
+
"Extraction Complete",
|
|
16499
16615
|
clackOutput
|
|
16500
16616
|
);
|
|
16501
|
-
|
|
16502
|
-
|
|
16503
|
-
|
|
16504
|
-
|
|
16505
|
-
|
|
16506
|
-
|
|
16507
|
-
|
|
16508
|
-
|
|
16509
|
-
|
|
16510
|
-
|
|
16511
|
-
|
|
16512
|
-
|
|
16513
|
-
|
|
16514
|
-
|
|
16515
|
-
|
|
16617
|
+
for (const outPath of writtenPaths) {
|
|
16618
|
+
clack12.log.success("Wrote " + ui.bold(outPath), clackOutput);
|
|
16619
|
+
}
|
|
16620
|
+
clack12.outro(void 0, clackOutput);
|
|
16621
|
+
return {
|
|
16622
|
+
exitCode: report.summary.successful_chunks > 0 ? 0 : 1,
|
|
16623
|
+
report,
|
|
16624
|
+
writtenPaths
|
|
16625
|
+
};
|
|
16626
|
+
} finally {
|
|
16627
|
+
if (db) {
|
|
16628
|
+
try {
|
|
16629
|
+
resolvedDeps.closeDbFn(db);
|
|
16630
|
+
} catch {
|
|
16631
|
+
}
|
|
16516
16632
|
}
|
|
16517
|
-
};
|
|
16518
|
-
const writtenPaths = await resolvedDeps.writeOutputFn({
|
|
16519
|
-
report,
|
|
16520
|
-
format: options.format,
|
|
16521
|
-
output: options.output,
|
|
16522
|
-
split: options.split === true
|
|
16523
|
-
});
|
|
16524
|
-
clack12.note(
|
|
16525
|
-
[
|
|
16526
|
-
formatLabel("Files", String(report.summary.files)),
|
|
16527
|
-
formatLabel("Chunks", `${report.summary.successful_chunks}/${report.summary.chunks} successful`),
|
|
16528
|
-
formatLabel(
|
|
16529
|
-
"Entries",
|
|
16530
|
-
`${report.summary.deduped_entries} entries (${report.summary.raw_entries - report.summary.deduped_entries} duplicates removed)`
|
|
16531
|
-
),
|
|
16532
|
-
report.summary.failed_chunks > 0 ? formatWarn(`${report.summary.failed_chunks} chunks failed`) : null,
|
|
16533
|
-
report.summary.warnings > 0 ? formatWarn(`${report.summary.warnings} warning(s)`) : null
|
|
16534
|
-
].filter((line) => Boolean(line)).join("\n"),
|
|
16535
|
-
"Extraction Complete",
|
|
16536
|
-
clackOutput
|
|
16537
|
-
);
|
|
16538
|
-
for (const outPath of writtenPaths) {
|
|
16539
|
-
clack12.log.success("Wrote " + ui.bold(outPath), clackOutput);
|
|
16540
16633
|
}
|
|
16541
|
-
clack12.outro(void 0, clackOutput);
|
|
16542
|
-
return {
|
|
16543
|
-
exitCode: report.summary.successful_chunks > 0 ? 0 : 1,
|
|
16544
|
-
report,
|
|
16545
|
-
writtenPaths
|
|
16546
|
-
};
|
|
16547
16634
|
}
|
|
16548
16635
|
function createProgram() {
|
|
16549
16636
|
const program = new Command();
|
|
@@ -16575,15 +16662,17 @@ function createProgram() {
|
|
|
16575
16662
|
program.outputHelp();
|
|
16576
16663
|
clack12.outro(ui.dim("https://agenr.ai"));
|
|
16577
16664
|
});
|
|
16578
|
-
program.command("extract").description("Extract structured knowledge from conversation transcripts").argument("<files...>", "One or more transcript files (.jsonl, .md, .txt)").option("--json", "Output raw KnowledgeEntry[] JSON", false).option("--format <type>", "Output format: json, markdown", "markdown").option("--output <file>", "Write output to file (or directory with --split)").option("--split", "Write one output file per input transcript", false).option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--no-dedup", "Skip post-extraction LLM dedup pass", false).option("--verbose", "Show extraction progress and debug info", false).action(async (files, opts) => {
|
|
16665
|
+
program.command("extract").description("Extract structured knowledge from conversation transcripts").argument("<files...>", "One or more transcript files (.jsonl, .md, .txt)").option("--json", "Output raw KnowledgeEntry[] JSON", false).option("--format <type>", "Output format: json, markdown", "markdown").option("--output <file>", "Write output to file (or directory with --split)").option("--split", "Write one output file per input transcript", false).option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--no-dedup", "Skip post-extraction LLM dedup pass", false).option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--verbose", "Show extraction progress and debug info", false).action(async (files, opts) => {
|
|
16579
16666
|
const selectedFormat = opts.json ? "json" : opts.format;
|
|
16580
16667
|
const format = selectedFormat === "json" ? "json" : selectedFormat === "markdown" ? "markdown" : null;
|
|
16581
16668
|
if (!format) {
|
|
16582
16669
|
throw new Error("--format must be one of: json, markdown");
|
|
16583
16670
|
}
|
|
16671
|
+
const noPreFetch = opts.noPreFetch === true;
|
|
16584
16672
|
const result = await runExtractCommand(files, {
|
|
16585
16673
|
...opts,
|
|
16586
|
-
format
|
|
16674
|
+
format,
|
|
16675
|
+
noPreFetch
|
|
16587
16676
|
});
|
|
16588
16677
|
process.exitCode = result.exitCode;
|
|
16589
16678
|
});
|
|
@@ -16655,16 +16744,22 @@ function createProgram() {
|
|
|
16655
16744
|
process.exitCode = result.exitCode;
|
|
16656
16745
|
}
|
|
16657
16746
|
);
|
|
16658
|
-
program.command("watch").description("Watch a transcript file and auto-extract knowledge as it grows").argument("[file]", "Transcript file to watch (.jsonl, .md, .txt)").option("--dir <path>", "Sessions directory to watch (resolver picks active file)").option("--platform <name>", "Session platform: openclaw, claude-code, codex, mtime").option("--auto", "Deprecated: use --platform <name> instead", false).option("--interval <seconds>", "Polling interval in seconds", parseIntOption, 300).option("--min-chunk <chars>", "Minimum new chars before extraction", parseIntOption, 2e3).option("--context <path>", "Regenerate context file after each cycle").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--verbose", "Show extraction progress", false).option("--dry-run", "Extract without storing", false).option("--once", "Run one cycle and exit", false).option("--json", "Output JSON results", false).action(async (file, opts) => {
|
|
16659
|
-
const result = await runWatchCommand(file,
|
|
16747
|
+
program.command("watch").description("Watch a transcript file and auto-extract knowledge as it grows").argument("[file]", "Transcript file to watch (.jsonl, .md, .txt)").option("--dir <path>", "Sessions directory to watch (resolver picks active file)").option("--platform <name>", "Session platform: openclaw, claude-code, codex, mtime").option("--auto", "Deprecated: use --platform <name> instead", false).option("--interval <seconds>", "Polling interval in seconds", parseIntOption, 300).option("--min-chunk <chars>", "Minimum new chars before extraction", parseIntOption, 2e3).option("--context <path>", "Regenerate context file after each cycle").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--verbose", "Show extraction progress", false).option("--dry-run", "Extract without storing", false).option("--once", "Run one cycle and exit", false).option("--json", "Output JSON results", false).action(async (file, opts) => {
|
|
16748
|
+
const result = await runWatchCommand(file, {
|
|
16749
|
+
...opts,
|
|
16750
|
+
noPreFetch: opts.noPreFetch === true
|
|
16751
|
+
});
|
|
16660
16752
|
process.exitCode = result.exitCode;
|
|
16661
16753
|
});
|
|
16662
16754
|
program.command("todo <subcommand> <subject>").description("Manage todos in the knowledge base").option("--db <path>", "Database path override").action(async (subcommand, subject, opts) => {
|
|
16663
16755
|
const result = await runTodoCommand(subcommand, subject, { db: opts.db });
|
|
16664
16756
|
process.exitCode = result.exitCode;
|
|
16665
16757
|
});
|
|
16666
|
-
program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
|
|
16667
|
-
const result = await runIngestCommand(paths,
|
|
16758
|
+
program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
|
|
16759
|
+
const result = await runIngestCommand(paths, {
|
|
16760
|
+
...opts,
|
|
16761
|
+
noPreFetch: opts.noPreFetch === true
|
|
16762
|
+
});
|
|
16668
16763
|
process.exitCode = result.exitCode;
|
|
16669
16764
|
});
|
|
16670
16765
|
program.command("consolidate").description("Consolidate and clean up the knowledge database").option("--rules-only", "Only run rule-based cleanup (no LLM)", false).option("--dry-run", "Show what would happen without making changes", false).option("--forget", "Delete forgetting candidates after consolidation", false).option("--report", "Print pre-run stats report (with --dry-run: report only)", false).option("--platform <name>", "Scope consolidation to platform: openclaw, claude-code, codex").option("--project <name>", "Scope consolidation to project (repeatable)", (val, prev) => [...prev, val], []).option("--exclude-project <name>", "Exclude entries from project (repeatable)", (val, prev) => [...prev, val], []).option(
|