@cleocode/core 2026.3.69 → 2026.3.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/retry.d.ts.map +1 -1
- package/dist/agents/retry.js +23 -42
- package/dist/agents/retry.js.map +1 -1
- package/dist/cleo.d.ts +2 -300
- package/dist/cleo.d.ts.map +1 -1
- package/dist/cleo.js +2 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +30 -0
- package/dist/config.js.map +1 -1
- package/dist/hooks/handlers/file-hooks.d.ts +5 -2
- package/dist/hooks/handlers/file-hooks.d.ts.map +1 -1
- package/dist/hooks/handlers/index.d.ts +2 -0
- package/dist/hooks/handlers/index.d.ts.map +1 -1
- package/dist/hooks/handlers/mcp-hooks.d.ts +11 -7
- package/dist/hooks/handlers/mcp-hooks.d.ts.map +1 -1
- package/dist/hooks/handlers/memory-bridge-refresh.d.ts +20 -0
- package/dist/hooks/handlers/memory-bridge-refresh.d.ts.map +1 -0
- package/dist/hooks/handlers/memory-bridge-refresh.js +42 -0
- package/dist/hooks/handlers/memory-bridge-refresh.js.map +1 -0
- package/dist/hooks/handlers/session-hooks.d.ts +10 -0
- package/dist/hooks/handlers/session-hooks.d.ts.map +1 -1
- package/dist/hooks/handlers/session-hooks.js +36 -0
- package/dist/hooks/handlers/session-hooks.js.map +1 -1
- package/dist/hooks/handlers/task-hooks.d.ts +4 -0
- package/dist/hooks/handlers/task-hooks.d.ts.map +1 -1
- package/dist/hooks/handlers/task-hooks.js +7 -0
- package/dist/hooks/handlers/task-hooks.js.map +1 -1
- package/dist/hooks/handlers/work-capture-hooks.d.ts +40 -0
- package/dist/hooks/handlers/work-capture-hooks.d.ts.map +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5069 -4678
- package/dist/index.js.map +4 -4
- package/dist/internal.d.ts +10 -2
- package/dist/internal.d.ts.map +1 -1
- package/dist/internal.js +10 -3
- package/dist/internal.js.map +1 -1
- package/dist/memory/auto-extract.d.ts +13 -0
- package/dist/memory/auto-extract.d.ts.map +1 -1
- package/dist/memory/auto-extract.js +34 -0
- package/dist/memory/auto-extract.js.map +1 -1
- package/dist/memory/brain-embedding.d.ts +13 -0
- package/dist/memory/brain-embedding.d.ts.map +1 -1
- package/dist/memory/brain-embedding.js +17 -0
- package/dist/memory/brain-embedding.js.map +1 -1
- package/dist/memory/brain-maintenance.d.ts +110 -0
- package/dist/memory/brain-maintenance.d.ts.map +1 -0
- package/dist/memory/brain-maintenance.js +98 -0
- package/dist/memory/brain-maintenance.js.map +1 -0
- package/dist/memory/brain-retrieval.d.ts +31 -5
- package/dist/memory/brain-retrieval.d.ts.map +1 -1
- package/dist/memory/brain-retrieval.js +53 -6
- package/dist/memory/brain-retrieval.js.map +1 -1
- package/dist/memory/embedding-local.d.ts +55 -0
- package/dist/memory/embedding-local.d.ts.map +1 -0
- package/dist/memory/embedding-local.js +97 -0
- package/dist/memory/embedding-local.js.map +1 -0
- package/dist/memory/embedding-queue.d.ts +90 -0
- package/dist/memory/embedding-queue.d.ts.map +1 -0
- package/dist/memory/embedding-queue.js +271 -0
- package/dist/memory/embedding-queue.js.map +1 -0
- package/dist/memory/embedding-worker.d.ts +19 -0
- package/dist/memory/embedding-worker.d.ts.map +1 -0
- package/dist/memory/embedding-worker.js +58 -0
- package/dist/memory/embedding-worker.js.map +1 -0
- package/dist/memory/memory-bridge.d.ts +21 -1
- package/dist/memory/memory-bridge.d.ts.map +1 -1
- package/dist/memory/memory-bridge.js +83 -2
- package/dist/memory/memory-bridge.js.map +1 -1
- package/dist/memory/session-memory.d.ts +26 -0
- package/dist/memory/session-memory.d.ts.map +1 -1
- package/dist/memory/session-memory.js +105 -0
- package/dist/memory/session-memory.js.map +1 -1
- package/dist/pagination.js +3 -0
- package/dist/pagination.js.map +1 -1
- package/dist/sessions/index.d.ts.map +1 -1
- package/dist/sessions/index.js +2 -6
- package/dist/sessions/index.js.map +1 -1
- package/dist/store/brain-sqlite.js +13 -62
- package/dist/store/brain-sqlite.js.map +1 -1
- package/dist/store/migration-manager.js +151 -0
- package/dist/store/migration-manager.js.map +1 -0
- package/dist/store/sqlite.d.ts.map +1 -1
- package/dist/store/sqlite.js +16 -134
- package/dist/store/sqlite.js.map +1 -1
- package/dist/tasks/add.js +27 -22
- package/dist/tasks/add.js.map +1 -1
- package/dist/tasks/complete.d.ts.map +1 -1
- package/dist/tasks/complete.js +13 -40
- package/dist/tasks/complete.js.map +1 -1
- package/dist/tasks/enforcement.js +12 -15
- package/dist/tasks/enforcement.js.map +1 -1
- package/dist/upgrade.js +246 -3
- package/dist/upgrade.js.map +1 -1
- package/migrations/drizzle-tasks/20260320013731_wave0-schema-hardening/migration.sql +17 -17
- package/package.json +6 -5
- package/src/agents/retry.ts +30 -24
- package/src/cleo.ts +30 -251
- package/src/config.ts +18 -0
- package/src/hooks/handlers/file-hooks.ts +29 -3
- package/src/hooks/handlers/index.ts +2 -0
- package/src/hooks/handlers/mcp-hooks.ts +32 -13
- package/src/hooks/handlers/memory-bridge-refresh.ts +47 -0
- package/src/hooks/handlers/session-hooks.ts +38 -0
- package/src/hooks/handlers/task-hooks.ts +8 -0
- package/src/hooks/handlers/work-capture-hooks.ts +184 -0
- package/src/index.ts +5 -0
- package/src/internal.ts +28 -2
- package/src/memory/__tests__/brain-automation.test.ts +941 -0
- package/src/memory/auto-extract.ts +40 -0
- package/src/memory/brain-embedding.ts +18 -0
- package/src/memory/brain-maintenance.ts +183 -0
- package/src/memory/brain-retrieval.ts +85 -7
- package/src/memory/embedding-local.ts +107 -0
- package/src/memory/embedding-queue.ts +304 -0
- package/src/memory/embedding-worker.ts +79 -0
- package/src/memory/memory-bridge.ts +101 -2
- package/src/memory/session-memory.ts +123 -0
- package/src/sessions/index.ts +2 -6
- package/src/store/__tests__/test-db-helper.js +14 -2
- package/src/store/__tests__/test-db-helper.ts +4 -1
- package/src/store/sqlite.ts +28 -0
- package/src/tasks/__tests__/complete-unblocks.test.ts +4 -1
- package/src/tasks/__tests__/complete.test.ts +18 -6
- package/src/tasks/__tests__/epic-enforcement.test.ts +4 -1
- package/src/tasks/__tests__/update.test.ts +4 -1
- package/src/tasks/complete.ts +8 -8
- package/templates/config.template.json +19 -0
- package/templates/global-config.template.json +19 -0
|
@@ -159,3 +159,43 @@ export async function resolveTaskDetails(projectRoot: string, taskIds: string[])
|
|
|
159
159
|
await accessor.close();
|
|
160
160
|
}
|
|
161
161
|
}
|
|
162
|
+
|
|
163
|
+
/** Action words that indicate a meaningful assistant turn worth storing. */
|
|
164
|
+
const ACTION_PATTERNS =
|
|
165
|
+
/\b(implement|fix|add|create|update|remove|refactor|extract|migrate|resolve|complete|found|learned|discovered)\b/i;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Extract key observations from a provider session transcript and store
|
|
169
|
+
* them in brain.db as learnings.
|
|
170
|
+
*
|
|
171
|
+
* Filters assistant lines that contain action words, stores up to 5 as
|
|
172
|
+
* learnings with 0.6 confidence. Always best-effort — never throws.
|
|
173
|
+
*
|
|
174
|
+
* @param projectRoot - Absolute path to project root.
|
|
175
|
+
* @param sessionId - The CLEO session ID being processed.
|
|
176
|
+
* @param transcript - Plain-text provider transcript (user/assistant turns).
|
|
177
|
+
* @task T144 @epic T134
|
|
178
|
+
*/
|
|
179
|
+
export async function extractFromTranscript(
|
|
180
|
+
projectRoot: string,
|
|
181
|
+
sessionId: string,
|
|
182
|
+
transcript: string,
|
|
183
|
+
): Promise<void> {
|
|
184
|
+
try {
|
|
185
|
+
const lines = transcript.split('\n').filter((l) => l.trim().length > 20);
|
|
186
|
+
const actionLines = lines.filter((l) => ACTION_PATTERNS.test(l)).slice(0, 5);
|
|
187
|
+
if (actionLines.length === 0) return;
|
|
188
|
+
|
|
189
|
+
const { storeLearning } = await import('./learnings.js');
|
|
190
|
+
for (const line of actionLines) {
|
|
191
|
+
await storeLearning(projectRoot, {
|
|
192
|
+
insight: line.trim().slice(0, 250),
|
|
193
|
+
source: `transcript:${sessionId}`,
|
|
194
|
+
confidence: 0.6,
|
|
195
|
+
actionable: false,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
} catch {
|
|
199
|
+
// Best-effort: must never throw
|
|
200
|
+
}
|
|
201
|
+
}
|
|
@@ -64,3 +64,21 @@ export async function embedText(text: string): Promise<Float32Array | null> {
|
|
|
64
64
|
export function isEmbeddingAvailable(): boolean {
|
|
65
65
|
return currentProvider?.isAvailable() ?? false;
|
|
66
66
|
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Initialize the default local embedding provider.
|
|
70
|
+
*
|
|
71
|
+
* Loads the LocalEmbeddingProvider dynamically and registers it via
|
|
72
|
+
* setEmbeddingProvider(). Should be called once at startup when
|
|
73
|
+
* `brain.embedding.enabled` is true.
|
|
74
|
+
*
|
|
75
|
+
* Uses dynamic import to avoid loading the heavy @xenova/transformers
|
|
76
|
+
* bundle unless embedding is actually requested.
|
|
77
|
+
*
|
|
78
|
+
* @task T136 @epic T134
|
|
79
|
+
*/
|
|
80
|
+
export async function initDefaultProvider(): Promise<void> {
|
|
81
|
+
const { LocalEmbeddingProvider } = await import('./embedding-local.js');
|
|
82
|
+
const provider = new LocalEmbeddingProvider();
|
|
83
|
+
setEmbeddingProvider(provider);
|
|
84
|
+
}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Brain Maintenance Runner
|
|
3
|
+
*
|
|
4
|
+
* Combines temporal decay, memory consolidation, and embedding backfill
|
|
5
|
+
* into a single idempotent maintenance pass. Designed to be run on a
|
|
6
|
+
* schedule or on-demand via `cleo brain maintenance`.
|
|
7
|
+
*
|
|
8
|
+
* Steps run in order:
|
|
9
|
+
* 1. Temporal decay — reduce confidence of stale learnings
|
|
10
|
+
* 2. Consolidation — merge duplicate/similar old observations
|
|
11
|
+
* 3. Embedding backfill — populate vectors for observations without them
|
|
12
|
+
*
|
|
13
|
+
* Each step is individually opt-outable via skip flags, making the
|
|
14
|
+
* operation safe to re-run at any frequency.
|
|
15
|
+
*
|
|
16
|
+
* @task T143
|
|
17
|
+
* @epic T134
|
|
18
|
+
* @why Enable scheduled brain optimization via single command
|
|
19
|
+
* @what Combined maintenance runner with CLI command and progress reporting
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { applyTemporalDecay, consolidateMemories } from './brain-lifecycle.js';
|
|
23
|
+
import { populateEmbeddings } from './brain-retrieval.js';
|
|
24
|
+
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// Types
|
|
27
|
+
// ============================================================================
|
|
28
|
+
|
|
29
|
+
/** Temporal decay step result subset used in maintenance output. */
|
|
30
|
+
export interface BrainMaintenanceDecayResult {
|
|
31
|
+
/** Number of learnings whose confidence was updated. */
|
|
32
|
+
affected: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Memory consolidation step result subset used in maintenance output. */
|
|
36
|
+
export interface BrainMaintenanceConsolidationResult {
|
|
37
|
+
/** Number of new summary observations created. */
|
|
38
|
+
merged: number;
|
|
39
|
+
/** Number of original observations archived. */
|
|
40
|
+
removed: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Embedding backfill step result. */
|
|
44
|
+
export interface BrainMaintenanceEmbeddingsResult {
|
|
45
|
+
/** Observations successfully embedded. */
|
|
46
|
+
processed: number;
|
|
47
|
+
/** Observations skipped (no provider or no narrative). */
|
|
48
|
+
skipped: number;
|
|
49
|
+
/** Observations that failed embedding. */
|
|
50
|
+
errors: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Aggregated result from a full brain maintenance run.
|
|
55
|
+
*
|
|
56
|
+
* All counts are zero when a step is skipped via the corresponding
|
|
57
|
+
* `skip*` option.
|
|
58
|
+
*/
|
|
59
|
+
export interface BrainMaintenanceResult {
|
|
60
|
+
/** Results from the temporal decay step. */
|
|
61
|
+
decay: BrainMaintenanceDecayResult;
|
|
62
|
+
/** Results from the memory consolidation step. */
|
|
63
|
+
consolidation: BrainMaintenanceConsolidationResult;
|
|
64
|
+
/** Results from the embedding backfill step. */
|
|
65
|
+
embeddings: BrainMaintenanceEmbeddingsResult;
|
|
66
|
+
/** Total wall-clock duration of the maintenance run in milliseconds. */
|
|
67
|
+
duration: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Options for {@link runBrainMaintenance}.
|
|
72
|
+
*
|
|
73
|
+
* All `skip*` flags default to `false` — the full maintenance pass runs
|
|
74
|
+
* unless specific steps are disabled.
|
|
75
|
+
*/
|
|
76
|
+
export interface BrainMaintenanceOptions {
|
|
77
|
+
/** Skip the temporal decay step. Default: false. */
|
|
78
|
+
skipDecay?: boolean;
|
|
79
|
+
/** Skip the memory consolidation step. Default: false. */
|
|
80
|
+
skipConsolidation?: boolean;
|
|
81
|
+
/** Skip the embedding backfill step. Default: false. */
|
|
82
|
+
skipEmbeddings?: boolean;
|
|
83
|
+
/**
|
|
84
|
+
* Progress callback invoked before each step starts and after
|
|
85
|
+
* completion of each sub-item.
|
|
86
|
+
*
|
|
87
|
+
* @param step - Human-readable step name (e.g. "decay", "consolidation", "embeddings")
|
|
88
|
+
* @param current - Items processed so far within the current step (0 before step starts)
|
|
89
|
+
* @param total - Total items expected for the current step (0 if unknown before start)
|
|
90
|
+
*/
|
|
91
|
+
onProgress?: (step: string, current: number, total: number) => void;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// Runner
|
|
96
|
+
// ============================================================================
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Run a combined brain maintenance pass: decay, consolidation, and embeddings.
|
|
100
|
+
*
|
|
101
|
+
* The three steps always run in the same order:
|
|
102
|
+
* 1. `applyTemporalDecay` — decay stale learning confidence values
|
|
103
|
+
* 2. `consolidateMemories` — merge clustered old observations
|
|
104
|
+
* 3. `populateEmbeddings` — backfill missing vectors
|
|
105
|
+
*
|
|
106
|
+
* Each step is optional via the `skip*` flags. The function is idempotent:
|
|
107
|
+
* re-running it when there is nothing to process returns zero counts.
|
|
108
|
+
*
|
|
109
|
+
* @param projectRoot - Absolute path to the project root (used to locate brain.db)
|
|
110
|
+
* @param options - Optional skip flags and progress callback
|
|
111
|
+
* @returns Aggregated counts from each step plus total wall-clock duration
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* ```ts
|
|
115
|
+
* const result = await runBrainMaintenance('/my/project', {
|
|
116
|
+
* onProgress: (step, current, total) => {
|
|
117
|
+
* console.log(`[${step}] ${current}/${total}`);
|
|
118
|
+
* },
|
|
119
|
+
* });
|
|
120
|
+
* console.log(`Done in ${result.duration}ms`);
|
|
121
|
+
* ```
|
|
122
|
+
*
|
|
123
|
+
* @task T143
|
|
124
|
+
* @epic T134
|
|
125
|
+
*/
|
|
126
|
+
export async function runBrainMaintenance(
|
|
127
|
+
projectRoot: string,
|
|
128
|
+
options?: BrainMaintenanceOptions,
|
|
129
|
+
): Promise<BrainMaintenanceResult> {
|
|
130
|
+
const {
|
|
131
|
+
skipDecay = false,
|
|
132
|
+
skipConsolidation = false,
|
|
133
|
+
skipEmbeddings = false,
|
|
134
|
+
onProgress,
|
|
135
|
+
} = options ?? {};
|
|
136
|
+
|
|
137
|
+
const startTime = Date.now();
|
|
138
|
+
|
|
139
|
+
// Default zero values for each step (used when step is skipped).
|
|
140
|
+
const decayResult: BrainMaintenanceDecayResult = { affected: 0 };
|
|
141
|
+
const consolidationResult: BrainMaintenanceConsolidationResult = { merged: 0, removed: 0 };
|
|
142
|
+
const embeddingsResult: BrainMaintenanceEmbeddingsResult = {
|
|
143
|
+
processed: 0,
|
|
144
|
+
skipped: 0,
|
|
145
|
+
errors: 0,
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// Step 1: Temporal decay
|
|
149
|
+
if (!skipDecay) {
|
|
150
|
+
onProgress?.('decay', 0, 1);
|
|
151
|
+
const raw = await applyTemporalDecay(projectRoot);
|
|
152
|
+
decayResult.affected = raw.updated;
|
|
153
|
+
onProgress?.('decay', 1, 1);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Step 2: Memory consolidation
|
|
157
|
+
if (!skipConsolidation) {
|
|
158
|
+
onProgress?.('consolidation', 0, 1);
|
|
159
|
+
const raw = await consolidateMemories(projectRoot);
|
|
160
|
+
consolidationResult.merged = raw.merged;
|
|
161
|
+
consolidationResult.removed = raw.archived;
|
|
162
|
+
onProgress?.('consolidation', 1, 1);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Step 3: Embedding backfill (with per-item progress relay)
|
|
166
|
+
if (!skipEmbeddings) {
|
|
167
|
+
const raw = await populateEmbeddings(projectRoot, {
|
|
168
|
+
onProgress: (current, total) => {
|
|
169
|
+
onProgress?.('embeddings', current, total);
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
embeddingsResult.processed = raw.processed;
|
|
173
|
+
embeddingsResult.skipped = raw.skipped;
|
|
174
|
+
embeddingsResult.errors = raw.errors;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
decay: decayResult,
|
|
179
|
+
consolidation: consolidationResult,
|
|
180
|
+
embeddings: embeddingsResult,
|
|
181
|
+
duration: Date.now() - startTime,
|
|
182
|
+
};
|
|
183
|
+
}
|
|
@@ -596,6 +596,14 @@ export async function observeBrain(
|
|
|
596
596
|
});
|
|
597
597
|
}
|
|
598
598
|
|
|
599
|
+
// Auto-link observation to the currently focused task when a session is active. (T141)
|
|
600
|
+
// This is a fire-and-forget side effect — linking failure MUST NOT block the return.
|
|
601
|
+
if (sourceSessionId) {
|
|
602
|
+
autoLinkObservationToTask(projectRoot, row.id, accessor).catch(() => {
|
|
603
|
+
/* Auto-linking is best-effort */
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
|
|
599
607
|
return {
|
|
600
608
|
id: row.id,
|
|
601
609
|
type: row.type,
|
|
@@ -603,6 +611,40 @@ export async function observeBrain(
|
|
|
603
611
|
};
|
|
604
612
|
}
|
|
605
613
|
|
|
614
|
+
/**
|
|
615
|
+
* Auto-link a newly created observation to the currently focused task.
|
|
616
|
+
*
|
|
617
|
+
* Queries the active session via sessionStatus() and reads taskWork.taskId.
|
|
618
|
+
* If a task is focused, inserts a brain_memory_links row linking the
|
|
619
|
+
* observation to that task with linkType 'produced_by'.
|
|
620
|
+
*
|
|
621
|
+
* All failures are silently swallowed — this is a best-effort side effect.
|
|
622
|
+
*
|
|
623
|
+
* @param projectRoot - Project root directory
|
|
624
|
+
* @param observationId - ID of the newly created observation
|
|
625
|
+
* @param accessor - BrainDataAccessor to use for the link insert
|
|
626
|
+
*/
|
|
627
|
+
async function autoLinkObservationToTask(
|
|
628
|
+
projectRoot: string,
|
|
629
|
+
observationId: string,
|
|
630
|
+
accessor: Awaited<ReturnType<typeof getBrainAccessor>>,
|
|
631
|
+
): Promise<void> {
|
|
632
|
+
const { sessionStatus } = await import('../sessions/index.js');
|
|
633
|
+
const session = await sessionStatus(projectRoot);
|
|
634
|
+
|
|
635
|
+
if (!session) return;
|
|
636
|
+
|
|
637
|
+
const taskId = session.taskWork?.taskId;
|
|
638
|
+
if (!taskId) return;
|
|
639
|
+
|
|
640
|
+
await accessor.addLink({
|
|
641
|
+
memoryType: 'observation',
|
|
642
|
+
memoryId: observationId,
|
|
643
|
+
taskId,
|
|
644
|
+
linkType: 'produced_by',
|
|
645
|
+
});
|
|
646
|
+
}
|
|
647
|
+
|
|
606
648
|
// ============================================================================
|
|
607
649
|
// Embedding Backfill Pipeline (T5387)
|
|
608
650
|
// ============================================================================
|
|
@@ -611,6 +653,29 @@ export async function observeBrain(
|
|
|
611
653
|
export interface PopulateEmbeddingsResult {
|
|
612
654
|
processed: number;
|
|
613
655
|
skipped: number;
|
|
656
|
+
errors: number;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Options for the embedding backfill pipeline.
|
|
661
|
+
*
|
|
662
|
+
* @example
|
|
663
|
+
* ```ts
|
|
664
|
+
* await populateEmbeddings(root, {
|
|
665
|
+
* batchSize: 25,
|
|
666
|
+
* onProgress: (current, total) => console.log(`${current}/${total}`),
|
|
667
|
+
* });
|
|
668
|
+
* ```
|
|
669
|
+
*/
|
|
670
|
+
export interface PopulateEmbeddingsOptions {
|
|
671
|
+
/** Maximum items processed per batch cycle. Defaults to 50. */
|
|
672
|
+
batchSize?: number;
|
|
673
|
+
/**
|
|
674
|
+
* Progress callback invoked after each observation is attempted.
|
|
675
|
+
* `current` is the 1-based count of observations attempted so far;
|
|
676
|
+
* `total` is the full count of observations that need embeddings.
|
|
677
|
+
*/
|
|
678
|
+
onProgress?: (current: number, total: number) => void;
|
|
614
679
|
}
|
|
615
680
|
|
|
616
681
|
/**
|
|
@@ -620,16 +685,22 @@ export interface PopulateEmbeddingsResult {
|
|
|
620
685
|
* generates vectors using the registered embedding provider.
|
|
621
686
|
* Processes in batches to avoid memory pressure.
|
|
622
687
|
*
|
|
688
|
+
* An optional {@link PopulateEmbeddingsOptions.onProgress} callback is called
|
|
689
|
+
* after each observation is attempted, enabling callers to report progress.
|
|
690
|
+
*
|
|
623
691
|
* @param projectRoot - Project root directory
|
|
624
|
-
* @param options - Optional batch size
|
|
625
|
-
* @returns Count of processed and
|
|
692
|
+
* @param options - Optional batch size and progress callback
|
|
693
|
+
* @returns Count of processed, skipped, and errored observations
|
|
694
|
+
*
|
|
695
|
+
* @epic T134
|
|
696
|
+
* @task T142
|
|
626
697
|
*/
|
|
627
698
|
export async function populateEmbeddings(
|
|
628
699
|
projectRoot: string,
|
|
629
|
-
options?:
|
|
700
|
+
options?: PopulateEmbeddingsOptions,
|
|
630
701
|
): Promise<PopulateEmbeddingsResult> {
|
|
631
702
|
if (!isEmbeddingAvailable()) {
|
|
632
|
-
return { processed: 0, skipped: 0 };
|
|
703
|
+
return { processed: 0, skipped: 0, errors: 0 };
|
|
633
704
|
}
|
|
634
705
|
|
|
635
706
|
const { getBrainDb, getBrainNativeDb } = await import('../store/brain-sqlite.js');
|
|
@@ -637,12 +708,14 @@ export async function populateEmbeddings(
|
|
|
637
708
|
const nativeDb = getBrainNativeDb();
|
|
638
709
|
|
|
639
710
|
if (!nativeDb) {
|
|
640
|
-
return { processed: 0, skipped: 0 };
|
|
711
|
+
return { processed: 0, skipped: 0, errors: 0 };
|
|
641
712
|
}
|
|
642
713
|
|
|
643
714
|
const batchSize = options?.batchSize ?? 50;
|
|
715
|
+
const { onProgress } = options ?? {};
|
|
644
716
|
let processed = 0;
|
|
645
717
|
let skipped = 0;
|
|
718
|
+
let errors = 0;
|
|
646
719
|
|
|
647
720
|
// Find observations without embeddings
|
|
648
721
|
const rows = typedAll<BrainNarrativeRow>(
|
|
@@ -655,6 +728,9 @@ export async function populateEmbeddings(
|
|
|
655
728
|
`),
|
|
656
729
|
);
|
|
657
730
|
|
|
731
|
+
const total = rows.length;
|
|
732
|
+
let attempted = 0;
|
|
733
|
+
|
|
658
734
|
for (let i = 0; i < rows.length; i += batchSize) {
|
|
659
735
|
const batch = rows.slice(i, i + batchSize);
|
|
660
736
|
for (const row of batch) {
|
|
@@ -669,10 +745,12 @@ export async function populateEmbeddings(
|
|
|
669
745
|
skipped++;
|
|
670
746
|
}
|
|
671
747
|
} catch {
|
|
672
|
-
|
|
748
|
+
errors++;
|
|
673
749
|
}
|
|
750
|
+
attempted++;
|
|
751
|
+
onProgress?.(attempted, total);
|
|
674
752
|
}
|
|
675
753
|
}
|
|
676
754
|
|
|
677
|
-
return { processed, skipped };
|
|
755
|
+
return { processed, skipped, errors };
|
|
678
756
|
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding provider using @xenova/transformers.
|
|
3
|
+
*
|
|
4
|
+
* Implements the EmbeddingProvider interface for brain memory vector search.
|
|
5
|
+
* Uses all-MiniLM-L6-v2 (22MB, 384 dimensions) — matches the brain_embeddings
|
|
6
|
+
* vec0 table schema. Model downloads on first call and is cached locally by
|
|
7
|
+
* the transformers library.
|
|
8
|
+
*
|
|
9
|
+
* @epic T134
|
|
10
|
+
* @task T136
|
|
11
|
+
* @why Ship vector search out-of-the-box without external API keys
|
|
12
|
+
* @what Local embedding provider using @xenova/transformers all-MiniLM-L6-v2
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { EmbeddingProvider } from './brain-embedding.js';
|
|
16
|
+
import { EMBEDDING_DIMENSIONS } from './brain-embedding.js';
|
|
17
|
+
|
|
18
|
+
/** Model identifier for all-MiniLM-L6-v2 via Xenova hub. */
|
|
19
|
+
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2';
|
|
20
|
+
|
|
21
|
+
/** Pipeline singleton — initialized lazily on first call. */
|
|
22
|
+
let _pipeline: import('@xenova/transformers').FeatureExtractionPipeline | null = null;
|
|
23
|
+
|
|
24
|
+
/** Whether the pipeline has been successfully initialized. */
|
|
25
|
+
let _ready = false;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Load the transformers feature-extraction pipeline lazily.
|
|
29
|
+
* Dynamic import prevents the heavy model from loading unless embedding is enabled.
|
|
30
|
+
*/
|
|
31
|
+
async function loadPipeline(): Promise<void> {
|
|
32
|
+
if (_ready) return;
|
|
33
|
+
// Dynamic import — only resolves when embedding is explicitly enabled
|
|
34
|
+
const { pipeline } = await import('@xenova/transformers');
|
|
35
|
+
_pipeline = await pipeline('feature-extraction', MODEL_NAME);
|
|
36
|
+
_ready = true;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Local embedding provider backed by @xenova/transformers.
|
|
41
|
+
*
|
|
42
|
+
* Produces 384-dimension Float32Array vectors compatible with the
|
|
43
|
+
* brain_embeddings vec0 table. The model is downloaded on first use
|
|
44
|
+
* and cached locally by the transformers library.
|
|
45
|
+
*
|
|
46
|
+
* Use {@link initDefaultProvider} (in brain-embedding.ts) to register an
|
|
47
|
+
* instance when brain.embedding.enabled=true and
|
|
48
|
+
* brain.embedding.provider='local'.
|
|
49
|
+
*/
|
|
50
|
+
export class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
51
|
+
/** Number of dimensions produced — must match brain_embeddings vec0 table. */
|
|
52
|
+
readonly dimensions = EMBEDDING_DIMENSIONS;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Whether the pipeline has been successfully initialized and is ready to produce embeddings.
|
|
56
|
+
*/
|
|
57
|
+
isAvailable(): boolean {
|
|
58
|
+
return _ready;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Convert a single text string into a 384-dimension float vector.
|
|
63
|
+
* Triggers model download on first call if not already cached.
|
|
64
|
+
*
|
|
65
|
+
* @param text - The text to embed.
|
|
66
|
+
* @returns A Float32Array of length 384.
|
|
67
|
+
*/
|
|
68
|
+
async embed(text: string): Promise<Float32Array> {
|
|
69
|
+
await loadPipeline();
|
|
70
|
+
const output = await _pipeline!(text, { pooling: 'mean', normalize: true });
|
|
71
|
+
// output.data is DataArray (AnyTypedArray | any[]). For feature-extraction
|
|
72
|
+
// with all-MiniLM-L6-v2, the runtime value is always Float32Array. Copy via
|
|
73
|
+
// Float32Array constructor which accepts any iterable of numbers.
|
|
74
|
+
return Float32Array.from(output.data as Float32Array);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Embed multiple texts in sequence, reusing the cached pipeline.
|
|
79
|
+
*
|
|
80
|
+
* @param texts - Array of text strings to embed.
|
|
81
|
+
* @returns Array of Float32Array vectors, one per input text.
|
|
82
|
+
*/
|
|
83
|
+
async embedBatch(texts: string[]): Promise<Float32Array[]> {
|
|
84
|
+
await loadPipeline();
|
|
85
|
+
const results: Float32Array[] = [];
|
|
86
|
+
for (const text of texts) {
|
|
87
|
+
const output = await _pipeline!(text, { pooling: 'mean', normalize: true });
|
|
88
|
+
results.push(Float32Array.from(output.data as Float32Array));
|
|
89
|
+
}
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Module-level singleton instance. */
|
|
95
|
+
let _instance: LocalEmbeddingProvider | null = null;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Get or create the shared LocalEmbeddingProvider singleton.
|
|
99
|
+
*
|
|
100
|
+
* @returns The shared LocalEmbeddingProvider instance.
|
|
101
|
+
*/
|
|
102
|
+
export function getLocalEmbeddingProvider(): LocalEmbeddingProvider {
|
|
103
|
+
if (!_instance) {
|
|
104
|
+
_instance = new LocalEmbeddingProvider();
|
|
105
|
+
}
|
|
106
|
+
return _instance;
|
|
107
|
+
}
|