byterover-cli 3.10.1 → 3.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/infra/agent/service-initializer.js +8 -2
- package/dist/agent/infra/llm/agent-llm-service.d.ts +9 -9
- package/dist/agent/infra/llm/agent-llm-service.js +28 -18
- package/dist/agent/infra/llm/generators/ai-sdk-content-generator.d.ts +10 -1
- package/dist/agent/infra/llm/generators/ai-sdk-content-generator.js +21 -4
- package/dist/agent/infra/llm/generators/ai-sdk-message-converter.d.ts +4 -0
- package/dist/agent/infra/llm/generators/ai-sdk-message-converter.js +8 -1
- package/dist/agent/infra/map/abstract-generator.d.ts +29 -0
- package/dist/agent/infra/map/abstract-generator.js +161 -0
- package/dist/agent/infra/map/abstract-queue.d.ts +7 -0
- package/dist/agent/infra/map/abstract-queue.js +100 -26
- package/dist/agent/infra/system-prompt/contributors/file-contributor.js +6 -2
- package/dist/agent/infra/tools/tool-manager.d.ts +10 -1
- package/dist/agent/infra/tools/tool-manager.js +10 -1
- package/dist/server/infra/dream/dream-state-schema.d.ts +35 -0
- package/dist/server/infra/dream/dream-state-schema.js +15 -0
- package/dist/server/infra/dream/dream-state-service.d.ts +22 -0
- package/dist/server/infra/dream/dream-state-service.js +62 -3
- package/dist/server/infra/dream/dream-trigger.js +6 -2
- package/dist/server/infra/executor/curate-executor.d.ts +16 -0
- package/dist/server/infra/executor/curate-executor.js +76 -5
- package/dist/server/infra/executor/dream-executor.d.ts +16 -0
- package/dist/server/infra/executor/dream-executor.js +44 -7
- package/dist/server/infra/transport/handlers/provider-handler.js +20 -3
- package/dist/tui/features/auth/api/get-auth-state.js +6 -3
- package/dist/tui/features/auth/components/auth-initializer.js +4 -2
- package/oclif.manifest.json +413 -413
- package/package.json +1 -1
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import { appendFileSync } from 'node:fs';
|
|
2
2
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
3
|
-
import { join } from 'node:path';
|
|
4
|
-
import {
|
|
3
|
+
import { isAbsolute, join } from 'node:path';
|
|
4
|
+
import { generateFileAbstractsBatch } from './abstract-generator.js';
|
|
5
|
+
/**
|
|
6
|
+
* Maximum files combined into a single batched L0/L1 LLM call.
|
|
7
|
+
*
|
|
8
|
+
* Two parallel calls fire per cycle: one L0 batch (~80 tok output × N files +
|
|
9
|
+
* tags), one L1 batch (~1500 tok output × N files + tags). At N=5 the L1
|
|
10
|
+
* output budget caps at ~8K tokens; raising N further risks output truncation
|
|
11
|
+
* on smaller-context models. Lowering N reduces savings without quality gain.
|
|
12
|
+
*/
|
|
13
|
+
const BATCH_SIZE_CAP = 5;
|
|
5
14
|
const QUEUE_TRACE_ENABLED = process.env.BRV_QUEUE_TRACE === '1';
|
|
6
15
|
const LOG_PATH = process.env.BRV_SESSION_LOG;
|
|
7
16
|
function queueLog(message) {
|
|
@@ -26,6 +35,13 @@ function queueLog(message) {
|
|
|
26
35
|
export class AbstractGenerationQueue {
|
|
27
36
|
projectRoot;
|
|
28
37
|
maxAttempts;
|
|
38
|
+
/**
|
|
39
|
+
* When true, scheduleNext fires the next batch even if pending is below
|
|
40
|
+
* BATCH_SIZE_CAP. Set by drain(); reset once the queue is fully idle.
|
|
41
|
+
* Without this, items below the cap would be buffered indefinitely with
|
|
42
|
+
* no flush trigger when a curate writes fewer files than the cap.
|
|
43
|
+
*/
|
|
44
|
+
drainRequested = false;
|
|
29
45
|
drainResolvers = [];
|
|
30
46
|
failed = 0;
|
|
31
47
|
generator;
|
|
@@ -48,7 +64,12 @@ export class AbstractGenerationQueue {
|
|
|
48
64
|
*/
|
|
49
65
|
async drain() {
|
|
50
66
|
queueLog(`drain:start idle=${this.isIdle()} pending=${this.pending.length} retrying=${this.retrying} processing=${this.processing}`);
|
|
67
|
+
// Force any buffered (below-cap) pending items to fire as a final batch.
|
|
68
|
+
// scheduleNext respects drainRequested even when pending < BATCH_SIZE_CAP.
|
|
69
|
+
this.drainRequested = true;
|
|
70
|
+
this.scheduleNext();
|
|
51
71
|
if (this.isIdle()) {
|
|
72
|
+
this.drainRequested = false;
|
|
52
73
|
await this.statusWritePromise.catch(() => { });
|
|
53
74
|
queueLog('drain:resolved-immediate');
|
|
54
75
|
return;
|
|
@@ -63,6 +84,17 @@ export class AbstractGenerationQueue {
|
|
|
63
84
|
* Add a file to the abstract generation queue.
|
|
64
85
|
*/
|
|
65
86
|
enqueue(item) {
|
|
87
|
+
// Background batch writes derive .abstract.md / .overview.md from
|
|
88
|
+
// contextPath via raw `writeFile`. A relative path would resolve under
|
|
89
|
+
// process.cwd() rather than the intended context-tree location, and the
|
|
90
|
+
// failure would be invisible because batch errors are catch-suppressed.
|
|
91
|
+
// Drop misconfigured items at the entry point with a trace breadcrumb
|
|
92
|
+
// rather than failing loudly — callers are internal and treat the queue
|
|
93
|
+
// as fail-open.
|
|
94
|
+
if (!isAbsolute(item.contextPath)) {
|
|
95
|
+
queueLog(`enqueue:dropped non-absolute path=${item.contextPath}`);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
66
98
|
// Guard against paths that must never trigger abstract generation:
|
|
67
99
|
// - derived artifacts (.abstract.md, .overview.md) — would produce .abstract.abstract.md
|
|
68
100
|
// - summary index files (_index.md) — domain/topic summaries, not knowledge nodes
|
|
@@ -77,7 +109,13 @@ export class AbstractGenerationQueue {
|
|
|
77
109
|
this.pending.push({ attempts: 0, contextPath: item.contextPath, fullContent: item.fullContent });
|
|
78
110
|
queueLog(`enqueue path=${item.contextPath} pending=${this.pending.length} retrying=${this.retrying} processing=${this.processing}`);
|
|
79
111
|
this.queueStatusWrite();
|
|
80
|
-
|
|
112
|
+
// Buffer until cap is reached; drain() will trigger the final flush
|
|
113
|
+
// for partial batches at curate-end. Without this gating, the first
|
|
114
|
+
// enqueue starts a 1-item batch before the curate finishes writing
|
|
115
|
+
// the rest of its files.
|
|
116
|
+
if (this.pending.length >= BATCH_SIZE_CAP || this.drainRequested) {
|
|
117
|
+
this.scheduleNext();
|
|
118
|
+
}
|
|
81
119
|
}
|
|
82
120
|
/**
|
|
83
121
|
* Return current queue status snapshot.
|
|
@@ -110,14 +148,25 @@ export class AbstractGenerationQueue {
|
|
|
110
148
|
return this.pending.length === 0 && !this.processing && this.retrying === 0;
|
|
111
149
|
}
|
|
112
150
|
async processNext() {
|
|
113
|
-
|
|
151
|
+
// Capture the generator in a local const so type narrowing survives the
|
|
152
|
+
// `await` boundary below — TS won't keep `this.generator` narrow across
|
|
153
|
+
// suspensions because another async path could reassign the property.
|
|
154
|
+
const { generator } = this;
|
|
155
|
+
if (!generator || this.processing || this.pending.length === 0) {
|
|
114
156
|
this.resolveDrainersIfIdle();
|
|
115
157
|
return;
|
|
116
158
|
}
|
|
117
159
|
this.processing = true;
|
|
118
160
|
this.queueStatusWrite();
|
|
119
|
-
|
|
120
|
-
|
|
161
|
+
// Drain up to BATCH_SIZE_CAP items into a single batch. Items beyond the
|
|
162
|
+
// cap stay pending for the next cycle. Note: `maxAttempts` counts BATCH
|
|
163
|
+
// attempts for this item, not individual-call attempts — a transient
|
|
164
|
+
// failure on attempt 1 consumes one retry token for every item in the
|
|
165
|
+
// batch, including ones whose content was unrelated to the failure.
|
|
166
|
+
// Acceptable: batches are small (cap=5) and the per-item re-enqueue on
|
|
167
|
+
// batch failure preserves attempts independently across cycles.
|
|
168
|
+
const batch = this.pending.splice(0, BATCH_SIZE_CAP);
|
|
169
|
+
queueLog(`process:start batchSize=${batch.length} remaining=${this.pending.length} retrying=${this.retrying}`);
|
|
121
170
|
try {
|
|
122
171
|
// Refresh credentials before each generation (OAuth tokens may expire)
|
|
123
172
|
try {
|
|
@@ -127,23 +176,40 @@ export class AbstractGenerationQueue {
|
|
|
127
176
|
const msg = error instanceof Error ? error.message : String(error);
|
|
128
177
|
console.debug(`[AbstractQueue] token refresh failed, proceeding with existing generator: ${msg}`);
|
|
129
178
|
}
|
|
130
|
-
const
|
|
131
|
-
//
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
179
|
+
const results = await generateFileAbstractsBatch(batch.map((it) => ({ contextPath: it.contextPath, fullContent: it.fullContent })), generator);
|
|
180
|
+
// Write all batched outputs in parallel. Empty strings are valid (model
|
|
181
|
+
// produced no content for that path) — preserves existing fail-open.
|
|
182
|
+
await Promise.all(results.flatMap((r) => {
|
|
183
|
+
const abstractPath = r.contextPath.replace(/\.md$/, '.abstract.md');
|
|
184
|
+
const overviewPath = r.contextPath.replace(/\.md$/, '.overview.md');
|
|
185
|
+
return [
|
|
186
|
+
writeFile(abstractPath, r.abstractContent, 'utf8'),
|
|
187
|
+
writeFile(overviewPath, r.overviewContent, 'utf8'),
|
|
188
|
+
];
|
|
189
|
+
}));
|
|
190
|
+
this.processed += batch.length;
|
|
191
|
+
queueLog(`process:success batchSize=${batch.length} processed=${this.processed}`);
|
|
140
192
|
}
|
|
141
193
|
catch (error) {
|
|
194
|
+
// Batch-level failure → re-enqueue each item individually with its own
|
|
195
|
+
// attempts counter, mirroring per-item retry semantics. Items past
|
|
196
|
+
// maxAttempts count as failed.
|
|
142
197
|
const msg = error instanceof Error ? error.message : String(error);
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
198
|
+
const failedThisCycle = [];
|
|
199
|
+
const retryThisCycle = [];
|
|
200
|
+
for (const item of batch) {
|
|
201
|
+
item.attempts++;
|
|
202
|
+
if (item.attempts < this.maxAttempts) {
|
|
203
|
+
retryThisCycle.push(item);
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
this.failed++;
|
|
207
|
+
failedThisCycle.push(item);
|
|
208
|
+
queueLog(`process:failed path=${item.contextPath} failed=${this.failed}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
console.debug(`[AbstractQueue] batch attempt failed (${msg}); retrying=${retryThisCycle.length}, exhausted=${failedThisCycle.length}`);
|
|
212
|
+
for (const item of retryThisCycle) {
|
|
147
213
|
const delay = 500 * 2 ** (item.attempts - 1);
|
|
148
214
|
this.retrying++;
|
|
149
215
|
this.queueStatusWrite();
|
|
@@ -155,14 +221,10 @@ export class AbstractGenerationQueue {
|
|
|
155
221
|
this.scheduleNext();
|
|
156
222
|
}, delay);
|
|
157
223
|
}
|
|
158
|
-
else {
|
|
159
|
-
this.failed++;
|
|
160
|
-
queueLog(`process:failed path=${item.contextPath} failed=${this.failed}`);
|
|
161
|
-
}
|
|
162
224
|
}
|
|
163
225
|
finally {
|
|
164
226
|
this.processing = false;
|
|
165
|
-
queueLog(`process:finally
|
|
227
|
+
queueLog(`process:finally batchSize=${batch.length} pending=${this.pending.length} retrying=${this.retrying} processed=${this.processed} failed=${this.failed}`);
|
|
166
228
|
this.queueStatusWrite();
|
|
167
229
|
}
|
|
168
230
|
this.scheduleNext();
|
|
@@ -177,6 +239,9 @@ export class AbstractGenerationQueue {
|
|
|
177
239
|
if (!this.isIdle() || this.drainResolvers.length === 0) {
|
|
178
240
|
return;
|
|
179
241
|
}
|
|
242
|
+
// Reset drain state once the queue settles — next curate's enqueue burst
|
|
243
|
+
// should buffer normally up to BATCH_SIZE_CAP again.
|
|
244
|
+
this.drainRequested = false;
|
|
180
245
|
queueLog(`drain:idle pending=${this.pending.length} retrying=${this.retrying} processed=${this.processed} failed=${this.failed}`);
|
|
181
246
|
const resolvers = this.drainResolvers.splice(0);
|
|
182
247
|
const settledStatusWrite = this.statusWritePromise.catch(() => { });
|
|
@@ -185,10 +250,19 @@ export class AbstractGenerationQueue {
|
|
|
185
250
|
}
|
|
186
251
|
}
|
|
187
252
|
scheduleNext() {
|
|
188
|
-
if (!this.generator || this.processing
|
|
253
|
+
if (!this.generator || this.processing) {
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
if (this.pending.length === 0) {
|
|
189
257
|
this.resolveDrainersIfIdle();
|
|
190
258
|
return;
|
|
191
259
|
}
|
|
260
|
+
// Buffer items below the cap unless drain has been requested (curate-end
|
|
261
|
+
// signal). This keeps the queue from firing partial 1-item batches in the
|
|
262
|
+
// middle of a multi-file curate.
|
|
263
|
+
if (this.pending.length < BATCH_SIZE_CAP && !this.drainRequested) {
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
192
266
|
// eslint-disable-next-line no-void
|
|
193
267
|
setImmediate(() => { void this.processNext(); });
|
|
194
268
|
}
|
|
@@ -110,12 +110,16 @@ export class FileContributor {
|
|
|
110
110
|
*/
|
|
111
111
|
renderTemplateVariables(template, context) {
|
|
112
112
|
let result = template;
|
|
113
|
-
// Build variables from context
|
|
113
|
+
// Build variables from context.
|
|
114
|
+
// Note: a `datetime` template variable is intentionally NOT exposed here.
|
|
115
|
+
// Per-call timestamps must never enter the system prompt — they would
|
|
116
|
+
// poison the prefix cache from that byte onward. The current date/time
|
|
117
|
+
// is injected once into the iter-0 user message instead (see
|
|
118
|
+
// agent-llm-service.ts).
|
|
114
119
|
/* eslint-disable camelcase */
|
|
115
120
|
const variables = {
|
|
116
121
|
available_markers: context.availableMarkers ? Object.keys(context.availableMarkers).join(', ') : '',
|
|
117
122
|
available_tools: context.availableTools?.join(', ') ?? '',
|
|
118
|
-
datetime: `<dateTime>Current date and time: ${new Date().toISOString()}</dateTime>`,
|
|
119
123
|
};
|
|
120
124
|
/* eslint-enable camelcase */
|
|
121
125
|
// Replace {{variable}} with values
|
|
@@ -31,12 +31,21 @@ export declare class ToolManager {
|
|
|
31
31
|
/**
|
|
32
32
|
* Tools allowed for curate operations.
|
|
33
33
|
* Uses code_exec only - curate operations available via tools.curate() in sandbox.
|
|
34
|
+
*
|
|
35
|
+
* NOTE: Insertion order is load-bearing for Anthropic prompt caching.
|
|
36
|
+
* `toAiSdkTools` attaches `cacheControl: ephemeral` to the LAST tool in
|
|
37
|
+
* iteration order, which becomes the cache breakpoint. Reordering this
|
|
38
|
+
* list (or the per-call sort in `filterToolsForCommand`) silently shifts
|
|
39
|
+
* the breakpoint and can halve cache hit-rate. Append new tools at the end.
|
|
34
40
|
*/
|
|
35
41
|
private static readonly CURATE_TOOL_NAMES;
|
|
36
42
|
/**
|
|
37
43
|
* Tools allowed for query operations - only code_exec for programmatic search
|
|
38
44
|
* All search operations (searchKnowledge, glob, grep, readFile) are available
|
|
39
|
-
* via tools.* SDK inside the sandbox
|
|
45
|
+
* via tools.* SDK inside the sandbox.
|
|
46
|
+
*
|
|
47
|
+
* Same insertion-order contract as CURATE_TOOL_NAMES (Anthropic cache
|
|
48
|
+
* breakpoint lands on the last tool).
|
|
40
49
|
*/
|
|
41
50
|
private static readonly QUERY_TOOL_NAMES;
|
|
42
51
|
private cacheValid;
|
|
@@ -27,6 +27,12 @@ export class ToolManager {
|
|
|
27
27
|
/**
|
|
28
28
|
* Tools allowed for curate operations.
|
|
29
29
|
* Uses code_exec only - curate operations available via tools.curate() in sandbox.
|
|
30
|
+
*
|
|
31
|
+
* NOTE: Insertion order is load-bearing for Anthropic prompt caching.
|
|
32
|
+
* `toAiSdkTools` attaches `cacheControl: ephemeral` to the LAST tool in
|
|
33
|
+
* iteration order, which becomes the cache breakpoint. Reordering this
|
|
34
|
+
* list (or the per-call sort in `filterToolsForCommand`) silently shifts
|
|
35
|
+
* the breakpoint and can halve cache hit-rate. Append new tools at the end.
|
|
30
36
|
*/
|
|
31
37
|
static CURATE_TOOL_NAMES = [
|
|
32
38
|
'agentic_map',
|
|
@@ -37,7 +43,10 @@ export class ToolManager {
|
|
|
37
43
|
/**
|
|
38
44
|
* Tools allowed for query operations - only code_exec for programmatic search
|
|
39
45
|
* All search operations (searchKnowledge, glob, grep, readFile) are available
|
|
40
|
-
* via tools.* SDK inside the sandbox
|
|
46
|
+
* via tools.* SDK inside the sandbox.
|
|
47
|
+
*
|
|
48
|
+
* Same insertion-order contract as CURATE_TOOL_NAMES (Anthropic cache
|
|
49
|
+
* breakpoint lands on the last tool).
|
|
41
50
|
*/
|
|
42
51
|
static QUERY_TOOL_NAMES = [
|
|
43
52
|
'code_exec',
|
|
@@ -15,6 +15,22 @@ export declare const PendingMergeSchema: z.ZodObject<{
|
|
|
15
15
|
sourceFile: string;
|
|
16
16
|
suggestedByDreamId: string;
|
|
17
17
|
}>;
|
|
18
|
+
/**
|
|
19
|
+
* One entry in the stale-summary queue drained at the next dream cycle.
|
|
20
|
+
* `enqueuedAt` is preserved across dedup'd re-enqueues so future telemetry
|
|
21
|
+
* (e.g., "oldest waiting path") can read meaningful wait times even though
|
|
22
|
+
* no consumer reads it today.
|
|
23
|
+
*/
|
|
24
|
+
export declare const StaleSummaryEntrySchema: z.ZodObject<{
|
|
25
|
+
enqueuedAt: z.ZodNumber;
|
|
26
|
+
path: z.ZodString;
|
|
27
|
+
}, "strip", z.ZodTypeAny, {
|
|
28
|
+
path: string;
|
|
29
|
+
enqueuedAt: number;
|
|
30
|
+
}, {
|
|
31
|
+
path: string;
|
|
32
|
+
enqueuedAt: number;
|
|
33
|
+
}>;
|
|
18
34
|
export declare const DreamStateSchema: z.ZodObject<{
|
|
19
35
|
curationsSinceDream: z.ZodNumber;
|
|
20
36
|
lastDreamAt: z.ZodNullable<z.ZodString>;
|
|
@@ -35,6 +51,16 @@ export declare const DreamStateSchema: z.ZodObject<{
|
|
|
35
51
|
sourceFile: string;
|
|
36
52
|
suggestedByDreamId: string;
|
|
37
53
|
}>, "many">>>;
|
|
54
|
+
staleSummaryPaths: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
55
|
+
enqueuedAt: z.ZodNumber;
|
|
56
|
+
path: z.ZodString;
|
|
57
|
+
}, "strip", z.ZodTypeAny, {
|
|
58
|
+
path: string;
|
|
59
|
+
enqueuedAt: number;
|
|
60
|
+
}, {
|
|
61
|
+
path: string;
|
|
62
|
+
enqueuedAt: number;
|
|
63
|
+
}>, "many">>>;
|
|
38
64
|
totalDreams: z.ZodNumber;
|
|
39
65
|
version: z.ZodLiteral<1>;
|
|
40
66
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -48,6 +74,10 @@ export declare const DreamStateSchema: z.ZodObject<{
|
|
|
48
74
|
sourceFile: string;
|
|
49
75
|
suggestedByDreamId: string;
|
|
50
76
|
}[];
|
|
77
|
+
staleSummaryPaths: {
|
|
78
|
+
path: string;
|
|
79
|
+
enqueuedAt: number;
|
|
80
|
+
}[];
|
|
51
81
|
totalDreams: number;
|
|
52
82
|
}, {
|
|
53
83
|
version: 1;
|
|
@@ -61,7 +91,12 @@ export declare const DreamStateSchema: z.ZodObject<{
|
|
|
61
91
|
sourceFile: string;
|
|
62
92
|
suggestedByDreamId: string;
|
|
63
93
|
}[] | undefined;
|
|
94
|
+
staleSummaryPaths?: {
|
|
95
|
+
path: string;
|
|
96
|
+
enqueuedAt: number;
|
|
97
|
+
}[] | undefined;
|
|
64
98
|
}>;
|
|
65
99
|
export type DreamState = z.infer<typeof DreamStateSchema>;
|
|
66
100
|
export type PendingMerge = z.infer<typeof PendingMergeSchema>;
|
|
101
|
+
export type StaleSummaryEntry = z.infer<typeof StaleSummaryEntrySchema>;
|
|
67
102
|
export declare const EMPTY_DREAM_STATE: DreamState;
|
|
@@ -5,11 +5,25 @@ export const PendingMergeSchema = z.object({
|
|
|
5
5
|
sourceFile: z.string(),
|
|
6
6
|
suggestedByDreamId: z.string(),
|
|
7
7
|
});
|
|
8
|
+
/**
|
|
9
|
+
* One entry in the stale-summary queue drained at the next dream cycle.
|
|
10
|
+
* `enqueuedAt` is preserved across dedup'd re-enqueues so future telemetry
|
|
11
|
+
* (e.g., "oldest waiting path") can read meaningful wait times even though
|
|
12
|
+
* no consumer reads it today.
|
|
13
|
+
*/
|
|
14
|
+
export const StaleSummaryEntrySchema = z.object({
|
|
15
|
+
enqueuedAt: z.number().int().nonnegative(),
|
|
16
|
+
// Empty paths indicate a bug at the call site (a malformed diff entry would
|
|
17
|
+
// resolve to an empty parent dir); reject them at the schema boundary so
|
|
18
|
+
// garbage cannot persist into dream-state.json.
|
|
19
|
+
path: z.string().min(1),
|
|
20
|
+
});
|
|
8
21
|
export const DreamStateSchema = z.object({
|
|
9
22
|
curationsSinceDream: z.number().int().min(0),
|
|
10
23
|
lastDreamAt: z.string().datetime().nullable(),
|
|
11
24
|
lastDreamLogId: z.string().nullable(),
|
|
12
25
|
pendingMerges: z.array(PendingMergeSchema).optional().default([]),
|
|
26
|
+
staleSummaryPaths: z.array(StaleSummaryEntrySchema).optional().default([]),
|
|
13
27
|
totalDreams: z.number().int().min(0),
|
|
14
28
|
version: z.literal(1),
|
|
15
29
|
});
|
|
@@ -18,6 +32,7 @@ export const EMPTY_DREAM_STATE = {
|
|
|
18
32
|
lastDreamAt: null,
|
|
19
33
|
lastDreamLogId: null,
|
|
20
34
|
pendingMerges: [],
|
|
35
|
+
staleSummaryPaths: [],
|
|
21
36
|
totalDreams: 0,
|
|
22
37
|
version: 1,
|
|
23
38
|
};
|
|
@@ -11,6 +11,28 @@ type DreamStateServiceOptions = {
|
|
|
11
11
|
export declare class DreamStateService {
|
|
12
12
|
private readonly stateFilePath;
|
|
13
13
|
constructor(opts: DreamStateServiceOptions);
|
|
14
|
+
/**
|
|
15
|
+
* Atomic drain — reads the current queue and clears it in a single RMW,
|
|
16
|
+
* returning the deduped path list. The caller is responsible for retrying
|
|
17
|
+
* (re-enqueueing the returned snapshot) if the downstream work fails.
|
|
18
|
+
*
|
|
19
|
+
* Atomicity is the load-bearing property: any enqueue that runs after the
|
|
20
|
+
* drain returns sees an empty queue, so it always appends a fresh entry
|
|
21
|
+
* that survives independently of whether the downstream propagation succeeds
|
|
22
|
+
* or fails. Earlier "snapshot + clear-later" approaches lost same-path
|
|
23
|
+
* enqueues: the dedup check on enqueue saw the still-present snapshot entry
|
|
24
|
+
* and skipped, then `clear()` removed it.
|
|
25
|
+
*/
|
|
26
|
+
drainStaleSummaryPaths(): Promise<string[]>;
|
|
27
|
+
/**
|
|
28
|
+
* Append the given file paths to the stale-summary queue, deduping by path.
|
|
29
|
+
* A path already in the queue keeps its original `enqueuedAt` timestamp so
|
|
30
|
+
* "how long has this been waiting?" telemetry stays meaningful.
|
|
31
|
+
*
|
|
32
|
+
* Serialized through {@link update} so concurrent enqueues from parallel
|
|
33
|
+
* curate tasks do not lose entries. Empty input is a no-op (no write).
|
|
34
|
+
*/
|
|
35
|
+
enqueueStaleSummaryPaths(paths: string[]): Promise<void>;
|
|
14
36
|
/**
|
|
15
37
|
* Read-modify-write under a per-file mutex. Serializes concurrent increments
|
|
16
38
|
* from parallel curate tasks within the same agent process so no updates are lost.
|
|
@@ -37,6 +37,57 @@ export class DreamStateService {
|
|
|
37
37
|
constructor(opts) {
|
|
38
38
|
this.stateFilePath = join(opts.baseDir, STATE_FILENAME);
|
|
39
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Atomic drain — reads the current queue and clears it in a single RMW,
|
|
42
|
+
* returning the deduped path list. The caller is responsible for retrying
|
|
43
|
+
* (re-enqueueing the returned snapshot) if the downstream work fails.
|
|
44
|
+
*
|
|
45
|
+
* Atomicity is the load-bearing property: any enqueue that runs after the
|
|
46
|
+
* drain returns sees an empty queue, so it always appends a fresh entry
|
|
47
|
+
* that survives independently of whether the downstream propagation succeeds
|
|
48
|
+
* or fails. Earlier "snapshot + clear-later" approaches lost same-path
|
|
49
|
+
* enqueues: the dedup check on enqueue saw the still-present snapshot entry
|
|
50
|
+
* and skipped, then `clear()` removed it.
|
|
51
|
+
*/
|
|
52
|
+
async drainStaleSummaryPaths() {
|
|
53
|
+
let snapshot = [];
|
|
54
|
+
await this.update((state) => {
|
|
55
|
+
snapshot = state.staleSummaryPaths.map((e) => e.path);
|
|
56
|
+
if (snapshot.length === 0)
|
|
57
|
+
return state;
|
|
58
|
+
return { ...state, staleSummaryPaths: [] };
|
|
59
|
+
});
|
|
60
|
+
return snapshot;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Append the given file paths to the stale-summary queue, deduping by path.
|
|
64
|
+
* A path already in the queue keeps its original `enqueuedAt` timestamp so
|
|
65
|
+
* "how long has this been waiting?" telemetry stays meaningful.
|
|
66
|
+
*
|
|
67
|
+
* Serialized through {@link update} so concurrent enqueues from parallel
|
|
68
|
+
* curate tasks do not lose entries. Empty input is a no-op (no write).
|
|
69
|
+
*/
|
|
70
|
+
async enqueueStaleSummaryPaths(paths) {
|
|
71
|
+
if (paths.length === 0)
|
|
72
|
+
return;
|
|
73
|
+
// Dedup the input itself before checking against the queue — callers may
|
|
74
|
+
// pass non-unique arrays (e.g. multiple changed paths within a single
|
|
75
|
+
// curate that round-trip through the same parent dir).
|
|
76
|
+
const incoming = [...new Set(paths)];
|
|
77
|
+
const enqueuedAt = Date.now();
|
|
78
|
+
await this.update((state) => {
|
|
79
|
+
const existing = new Set(state.staleSummaryPaths.map((e) => e.path));
|
|
80
|
+
const additions = incoming
|
|
81
|
+
.filter((p) => !existing.has(p))
|
|
82
|
+
.map((p) => ({ enqueuedAt, path: p }));
|
|
83
|
+
if (additions.length === 0)
|
|
84
|
+
return state;
|
|
85
|
+
return {
|
|
86
|
+
...state,
|
|
87
|
+
staleSummaryPaths: [...state.staleSummaryPaths, ...additions],
|
|
88
|
+
};
|
|
89
|
+
});
|
|
90
|
+
}
|
|
40
91
|
/**
|
|
41
92
|
* Read-modify-write under a per-file mutex. Serializes concurrent increments
|
|
42
93
|
* from parallel curate tasks within the same agent process so no updates are lost.
|
|
@@ -49,11 +100,11 @@ export class DreamStateService {
|
|
|
49
100
|
const raw = await readFile(this.stateFilePath, 'utf8');
|
|
50
101
|
const parsed = DreamStateSchema.safeParse(JSON.parse(raw));
|
|
51
102
|
if (!parsed.success)
|
|
52
|
-
return { ...EMPTY_DREAM_STATE
|
|
103
|
+
return { ...EMPTY_DREAM_STATE };
|
|
53
104
|
return parsed.data;
|
|
54
105
|
}
|
|
55
106
|
catch {
|
|
56
|
-
return { ...EMPTY_DREAM_STATE
|
|
107
|
+
return { ...EMPTY_DREAM_STATE };
|
|
57
108
|
}
|
|
58
109
|
}
|
|
59
110
|
/**
|
|
@@ -68,7 +119,15 @@ export class DreamStateService {
|
|
|
68
119
|
return mutex.withLock(async () => {
|
|
69
120
|
const state = await this.read();
|
|
70
121
|
const next = updater(state);
|
|
71
|
-
|
|
122
|
+
// Skip the write when the updater returned the same state reference.
|
|
123
|
+
// Existing call sites (drainStaleSummaryPaths on empty queue,
|
|
124
|
+
// enqueueStaleSummaryPaths with all-duplicate input) already follow
|
|
125
|
+
// this convention by returning `state` unchanged — making the no-op
|
|
126
|
+
// contract observable at the disk level avoids a tmpfile + rename on
|
|
127
|
+
// every empty drain.
|
|
128
|
+
if (next !== state) {
|
|
129
|
+
await this.write(next);
|
|
130
|
+
}
|
|
72
131
|
return next;
|
|
73
132
|
});
|
|
74
133
|
}
|
|
@@ -48,8 +48,12 @@ export class DreamTrigger {
|
|
|
48
48
|
return { eligible: false, reason: `Too recent (${hoursSince.toFixed(1)}h < ${minHours}h)` };
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
-
// Gate 2: Activity
|
|
52
|
-
|
|
51
|
+
// Gate 2: Activity. Bypassed when the stale-summary queue has deferred
|
|
52
|
+
// work — leaving entries indefinitely strands `_index.md` regeneration
|
|
53
|
+
// in low-activity projects (the very projects ENG-2485 most affects,
|
|
54
|
+
// since 1–2 curates over a 12h window otherwise sit under minCurations
|
|
55
|
+
// forever). Dream is the canonical drain point; if it has work, run.
|
|
56
|
+
if (state.curationsSinceDream < minCurations && state.staleSummaryPaths.length === 0) {
|
|
53
57
|
return {
|
|
54
58
|
eligible: false,
|
|
55
59
|
reason: `Not enough activity (${state.curationsSinceDream} < ${minCurations} curations)`,
|
|
@@ -69,4 +69,20 @@ export declare class CurateExecutor implements ICurateExecutor {
|
|
|
69
69
|
* @throws {FileValidationError} If all files fail validation
|
|
70
70
|
*/
|
|
71
71
|
private processFileReferences;
|
|
72
|
+
/**
|
|
73
|
+
* Phase 4: snapshot diff → enqueue stale paths for dream → rebuild manifest.
|
|
74
|
+
*
|
|
75
|
+
* Summary cascade regeneration (the LLM-driven `propagateStaleness` walk) is
|
|
76
|
+
* deferred to the next dream cycle to keep curate's hot path free of LLM
|
|
77
|
+
* calls. The manifest is rebuilt inline because it is a pure file scan (no
|
|
78
|
+
* LLM) and keeps newly-curated leaf files immediately discoverable via
|
|
79
|
+
* manifest-driven retrieval.
|
|
80
|
+
*
|
|
81
|
+
* Two independent fail-open concerns: (a) enqueue the deferred summary-cascade
|
|
82
|
+
* work to dream's queue; (b) rebuild the search manifest. They share
|
|
83
|
+
* `changedPaths` but otherwise are unrelated — a transient disk error on the
|
|
84
|
+
* dream-state write must not skip the pure-filesystem manifest scan. Each
|
|
85
|
+
* runs in its own try block so one failure cannot mask the other's work.
|
|
86
|
+
*/
|
|
87
|
+
private propagateAndRebuild;
|
|
72
88
|
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
+
import { recon as reconHelper } from '../../../agent/infra/sandbox/curation-helpers.js';
|
|
2
3
|
import { BRV_DIR } from '../../constants.js';
|
|
3
4
|
import { FileValidationError } from '../../core/domain/errors/task-error.js';
|
|
4
5
|
import { createFileContentReader, } from '../../utils/file-content-reader.js';
|
|
5
6
|
import { validateFileForCurate } from '../../utils/file-validator.js';
|
|
7
|
+
import { FileContextTreeManifestService } from '../context-tree/file-context-tree-manifest-service.js';
|
|
6
8
|
import { FileContextTreeSnapshotService } from '../context-tree/file-context-tree-snapshot-service.js';
|
|
7
|
-
import {
|
|
9
|
+
import { diffStates } from '../context-tree/snapshot-diff.js';
|
|
8
10
|
import { DreamStateService } from '../dream/dream-state-service.js';
|
|
9
11
|
import { PreCompactionService } from './pre-compaction/pre-compaction-service.js';
|
|
10
12
|
/**
|
|
@@ -94,12 +96,30 @@ export class CurateExecutor {
|
|
|
94
96
|
preview: effectiveContext.slice(0, 500),
|
|
95
97
|
type: 'string',
|
|
96
98
|
};
|
|
97
|
-
//
|
|
99
|
+
// Pre-pipeline the recon step (deterministic helper) so the agent loop
|
|
100
|
+
// doesn't spend its first iteration calling tools.curation.recon. The
|
|
101
|
+
// result is injected as a sandbox variable for code-exec access AND
|
|
102
|
+
// its key findings are surfaced inline in the prompt so the agent's
|
|
103
|
+
// first iteration can proceed directly to extraction. recon is pure
|
|
104
|
+
// JS — no LLM judgment is needed for whether to call it; the answer
|
|
105
|
+
// is always "yes, first thing." Surfacing it as an agent-tool meant
|
|
106
|
+
// paying a full LLM iteration just to invoke a deterministic helper.
|
|
107
|
+
const initialHistory = { entries: [], totalProcessed: 0 };
|
|
108
|
+
// The `metadata` arg is currently unused by `recon` — the helper
|
|
109
|
+
// recomputes char/line/message counts from `effectiveContext`
|
|
110
|
+
// directly. Passed through here to match the helper's existing
|
|
111
|
+
// signature; do NOT assume changing `metadata` will alter
|
|
112
|
+
// `reconResult`.
|
|
113
|
+
const reconResult = reconHelper(effectiveContext, metadata, initialHistory);
|
|
114
|
+
const reconVar = `__recon_result_${taskIdSafe}`;
|
|
115
|
+
// Inject context, metadata, empty history, taskId, and pre-computed
|
|
116
|
+
// recon result into the TASK session's sandbox.
|
|
98
117
|
const taskIdVar = `__taskId_${taskIdSafe}`;
|
|
99
118
|
agent.setSandboxVariableOnSession(taskSessionId, ctxVar, effectiveContext);
|
|
100
|
-
agent.setSandboxVariableOnSession(taskSessionId, histVar,
|
|
119
|
+
agent.setSandboxVariableOnSession(taskSessionId, histVar, initialHistory);
|
|
101
120
|
agent.setSandboxVariableOnSession(taskSessionId, metaVar, metadata);
|
|
102
121
|
agent.setSandboxVariableOnSession(taskSessionId, taskIdVar, taskId);
|
|
122
|
+
agent.setSandboxVariableOnSession(taskSessionId, reconVar, reconResult);
|
|
103
123
|
// Prompt with curation helpers guidance (tools.curation.* replaces manual infrastructure code)
|
|
104
124
|
const prompt = [
|
|
105
125
|
`Curate using RLM approach.`,
|
|
@@ -107,7 +127,8 @@ export class CurateExecutor {
|
|
|
107
127
|
`History variable: ${histVar}`,
|
|
108
128
|
`Metadata variable: ${metaVar}`,
|
|
109
129
|
`Task ID variable: ${taskIdVar} (pass as bare variable, not a string)`,
|
|
110
|
-
`
|
|
130
|
+
`Recon already computed in ${reconVar}: suggestedMode=${reconResult.suggestedMode}, suggestedChunkCount=${reconResult.suggestedChunkCount}, charCount=${reconResult.meta.charCount}, lineCount=${reconResult.meta.lineCount}, messageCount=${reconResult.meta.messageCount}.`,
|
|
131
|
+
`IMPORTANT: Do NOT print raw context. Do NOT call tools.curation.recon — it has been pre-computed. Proceed directly to extraction.`,
|
|
111
132
|
`For chunked extraction use tools.curation.mapExtract(). Pass taskId: ${taskIdVar} (bare variable).`,
|
|
112
133
|
`IMPORTANT: Any code_exec call containing mapExtract MUST use timeout: 300000 on the code_exec tool call itself (not inside mapExtract options).`,
|
|
113
134
|
`Use tools.curation.groupBySubject() and tools.curation.dedup() to organize extractions.`,
|
|
@@ -129,7 +150,7 @@ export class CurateExecutor {
|
|
|
129
150
|
}
|
|
130
151
|
const finalize = async () => {
|
|
131
152
|
try {
|
|
132
|
-
await
|
|
153
|
+
await this.propagateAndRebuild({ baseDir, preState, snapshotService });
|
|
133
154
|
await this.incrementDreamCounter(baseDir);
|
|
134
155
|
await agent.drainBackgroundWork?.();
|
|
135
156
|
}
|
|
@@ -273,4 +294,54 @@ export class CurateExecutor {
|
|
|
273
294
|
// Format with actual content
|
|
274
295
|
return this.formatFileContentsForPrompt(readResults, skippedFiles, projectRoot);
|
|
275
296
|
}
|
|
297
|
+
/**
|
|
298
|
+
* Phase 4: snapshot diff → enqueue stale paths for dream → rebuild manifest.
|
|
299
|
+
*
|
|
300
|
+
* Summary cascade regeneration (the LLM-driven `propagateStaleness` walk) is
|
|
301
|
+
* deferred to the next dream cycle to keep curate's hot path free of LLM
|
|
302
|
+
* calls. The manifest is rebuilt inline because it is a pure file scan (no
|
|
303
|
+
* LLM) and keeps newly-curated leaf files immediately discoverable via
|
|
304
|
+
* manifest-driven retrieval.
|
|
305
|
+
*
|
|
306
|
+
* Two independent fail-open concerns: (a) enqueue the deferred summary-cascade
|
|
307
|
+
* work to dream's queue; (b) rebuild the search manifest. They share
|
|
308
|
+
* `changedPaths` but otherwise are unrelated — a transient disk error on the
|
|
309
|
+
* dream-state write must not skip the pure-filesystem manifest scan. Each
|
|
310
|
+
* runs in its own try block so one failure cannot mask the other's work.
|
|
311
|
+
*/
|
|
312
|
+
async propagateAndRebuild(args) {
|
|
313
|
+
const { baseDir, preState, snapshotService } = args;
|
|
314
|
+
if (!preState)
|
|
315
|
+
return;
|
|
316
|
+
let changedPaths = [];
|
|
317
|
+
try {
|
|
318
|
+
const postState = await snapshotService.getCurrentState(baseDir);
|
|
319
|
+
changedPaths = diffStates(preState, postState);
|
|
320
|
+
}
|
|
321
|
+
catch {
|
|
322
|
+
// Fail-open: snapshot errors leave changedPaths empty → no enqueue,
|
|
323
|
+
// no manifest rebuild. Next curate's snapshot will pick up the diff.
|
|
324
|
+
}
|
|
325
|
+
if (changedPaths.length === 0)
|
|
326
|
+
return;
|
|
327
|
+
try {
|
|
328
|
+
const dreamStateService = new DreamStateService({ baseDir: path.join(baseDir, BRV_DIR) });
|
|
329
|
+
await dreamStateService.enqueueStaleSummaryPaths(changedPaths);
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
// Fail-open: queue write errors never block curation. If this write
|
|
333
|
+
// fails the changed paths are lost from the deferred queue; they will
|
|
334
|
+
// only be re-captured if the same files are modified in a later curate
|
|
335
|
+
// (diffStates compares a fresh pre/post snapshot pair, not a persistent
|
|
336
|
+
// accumulator) or picked up by dream's own snapshot diff if dream
|
|
337
|
+
// touches them.
|
|
338
|
+
}
|
|
339
|
+
try {
|
|
340
|
+
const manifestService = new FileContextTreeManifestService({ baseDirectory: baseDir });
|
|
341
|
+
await manifestService.buildManifest(baseDir);
|
|
342
|
+
}
|
|
343
|
+
catch {
|
|
344
|
+
// Fail-open: manifest rebuild is best-effort pre-warming.
|
|
345
|
+
}
|
|
346
|
+
}
|
|
276
347
|
}
|