substrate-ai 0.13.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { BMAD_BASELINE_TOKENS_FULL, DoltMergeConflict, FileStateStore, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, buildPipelineStatusOutput, createDatabaseAdapter, formatOutput, formatPipelineSummary, formatTokenTelemetry, inspectProcessTree, parseDbTimestampAsUtc, resolveMainRepoRoot, validateStoryKey } from "./health-DswaC1q5.js";
|
|
2
2
|
import { createLogger } from "./logger-KeHncl-f.js";
|
|
3
3
|
import { TypedEventBusImpl, createEventBus, createTuiApp, isTuiCapable, printNonTtyWarning, sleep } from "./helpers-CElYrONe.js";
|
|
4
|
-
import { ADVISORY_NOTES, Categorizer, ConsumerAnalyzer, DEFAULT_GLOBAL_SETTINGS, DispatcherImpl, DoltClient, ESCALATION_DIAGNOSIS, EfficiencyScorer, IngestionServer, LogTurnAnalyzer, OPERATIONAL_FINDING, Recommender, RoutingRecommender, RoutingResolver, RoutingTelemetry, RoutingTokenAccumulator, RoutingTuner, STORY_METRICS, STORY_OUTCOME, SubstrateConfigSchema, TEST_EXPANSION_FINDING, TEST_PLAN, TelemetryNormalizer, TelemetryPipeline, TurnAnalyzer, addTokenUsage, aggregateTokenUsageForRun, aggregateTokenUsageForStory, callLLM, createConfigSystem, createDecision, createPipelineRun, createRequirement, detectInterfaceChanges, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getRunningPipelineRuns, getStoryMetricsForRun, getTokenUsageSummary, initSchema, loadModelRoutingConfig, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision, writeRunMetrics, writeStoryMetrics } from "./dist-CLvAwmT7.js";
|
|
4
|
+
import { ADVISORY_NOTES, Categorizer, ConsumerAnalyzer, DEFAULT_GLOBAL_SETTINGS, DispatcherImpl, DoltClient, ESCALATION_DIAGNOSIS, EfficiencyScorer, IngestionServer, LogTurnAnalyzer, OPERATIONAL_FINDING, Recommender, RoutingRecommender, RoutingResolver, RoutingTelemetry, RoutingTokenAccumulator, RoutingTuner, STORY_METRICS, STORY_OUTCOME, SubstrateConfigSchema, TEST_EXPANSION_FINDING, TEST_PLAN, TelemetryNormalizer, TelemetryPipeline, TurnAnalyzer, addTokenUsage, aggregateTokenUsageForRun, aggregateTokenUsageForStory, callLLM, createConfigSystem, createDatabaseAdapter$1, createDecision, createPipelineRun, createRequirement, detectInterfaceChanges, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getRunningPipelineRuns, getStoryMetricsForRun, getTokenUsageSummary, initSchema, loadModelRoutingConfig, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision, writeRunMetrics, writeStoryMetrics } from "./dist-CLvAwmT7.js";
|
|
5
5
|
import { basename, dirname, extname, join } from "path";
|
|
6
6
|
import { access, readFile, readdir, stat } from "fs/promises";
|
|
7
7
|
import { EventEmitter } from "node:events";
|
|
@@ -22162,6 +22162,10 @@ var RunStateManager = class {
|
|
|
22162
22162
|
*/
|
|
22163
22163
|
function createConvergenceController() {
|
|
22164
22164
|
const outcomes = new Map();
|
|
22165
|
+
/** Returns true only when id is non-empty AND exists in graph.nodes. */
|
|
22166
|
+
function isValidTarget(id, graph) {
|
|
22167
|
+
return id !== "" && graph.nodes.has(id);
|
|
22168
|
+
}
|
|
22165
22169
|
return {
|
|
22166
22170
|
recordOutcome(nodeId, status) {
|
|
22167
22171
|
outcomes.set(nodeId, status);
|
|
@@ -22177,10 +22181,656 @@ function createConvergenceController() {
|
|
|
22177
22181
|
satisfied: failingNodes.length === 0,
|
|
22178
22182
|
failingNodes
|
|
22179
22183
|
};
|
|
22184
|
+
},
|
|
22185
|
+
checkGoalGates(graph, runId, eventBus, options) {
|
|
22186
|
+
const failedGates = [];
|
|
22187
|
+
for (const [id, node] of graph.nodes) {
|
|
22188
|
+
if (!node.goalGate) continue;
|
|
22189
|
+
if (options?.satisfactionThreshold !== void 0 && options?.context !== void 0) {
|
|
22190
|
+
const score = options.context.getNumber("satisfaction_score", 0);
|
|
22191
|
+
const satisfied = score >= options.satisfactionThreshold;
|
|
22192
|
+
eventBus?.emit("graph:goal-gate-checked", {
|
|
22193
|
+
runId,
|
|
22194
|
+
nodeId: id,
|
|
22195
|
+
satisfied,
|
|
22196
|
+
score
|
|
22197
|
+
});
|
|
22198
|
+
if (!satisfied) failedGates.push(id);
|
|
22199
|
+
} else {
|
|
22200
|
+
const status = outcomes.get(id);
|
|
22201
|
+
const satisfied = status === "SUCCESS" || status === "PARTIAL_SUCCESS";
|
|
22202
|
+
eventBus?.emit("graph:goal-gate-checked", {
|
|
22203
|
+
runId,
|
|
22204
|
+
nodeId: id,
|
|
22205
|
+
satisfied
|
|
22206
|
+
});
|
|
22207
|
+
if (!satisfied) failedGates.push(id);
|
|
22208
|
+
}
|
|
22209
|
+
}
|
|
22210
|
+
return {
|
|
22211
|
+
satisfied: failedGates.length === 0,
|
|
22212
|
+
failedGates
|
|
22213
|
+
};
|
|
22214
|
+
},
|
|
22215
|
+
resolveRetryTarget(failedNode, graph) {
|
|
22216
|
+
const candidates = [
|
|
22217
|
+
failedNode.retryTarget,
|
|
22218
|
+
failedNode.fallbackRetryTarget,
|
|
22219
|
+
graph.retryTarget,
|
|
22220
|
+
graph.fallbackRetryTarget
|
|
22221
|
+
];
|
|
22222
|
+
for (const candidate of candidates) if (isValidTarget(candidate, graph)) return candidate;
|
|
22223
|
+
return null;
|
|
22180
22224
|
}
|
|
22181
22225
|
};
|
|
22182
22226
|
}
|
|
22183
22227
|
|
|
22228
|
+
//#endregion
|
|
22229
|
+
//#region packages/factory/dist/convergence/budget.js
|
|
22230
|
+
const DEFAULT_BACKOFF = {
|
|
22231
|
+
initialDelay: 200,
|
|
22232
|
+
factor: 2,
|
|
22233
|
+
maxDelay: 6e4,
|
|
22234
|
+
jitterFactor: .5
|
|
22235
|
+
};
|
|
22236
|
+
/**
|
|
22237
|
+
* Compute the delay (in milliseconds) before the next retry attempt.
|
|
22238
|
+
*
|
|
22239
|
+
* Formula (before jitter):
|
|
22240
|
+
* `baseDelay = initialDelay * factor^attemptIndex`
|
|
22241
|
+
* `cappedDelay = Math.min(baseDelay, maxDelay)`
|
|
22242
|
+
*
|
|
22243
|
+
* Jitter:
|
|
22244
|
+
* `jitter = (Math.random() * 2 - 1) * jitterFactor * cappedDelay`
|
|
22245
|
+
* `delay = Math.max(0, Math.round(cappedDelay + jitter))`
|
|
22246
|
+
*
|
|
22247
|
+
* Passing `{ jitterFactor: 0 }` disables jitter for deterministic tests.
|
|
22248
|
+
*
|
|
22249
|
+
* @param attemptIndex - Zero-based index of the current attempt (0 = first retry).
|
|
22250
|
+
* @param options - Optional overrides for the backoff parameters.
|
|
22251
|
+
*/
|
|
22252
|
+
function computeBackoffDelay(attemptIndex, options) {
|
|
22253
|
+
const { initialDelay, factor, maxDelay, jitterFactor } = {
|
|
22254
|
+
...DEFAULT_BACKOFF,
|
|
22255
|
+
...options
|
|
22256
|
+
};
|
|
22257
|
+
const baseDelay = initialDelay * Math.pow(factor, attemptIndex);
|
|
22258
|
+
const cappedDelay = Math.min(baseDelay, maxDelay);
|
|
22259
|
+
const jitter = (Math.random() * 2 - 1) * jitterFactor * cappedDelay;
|
|
22260
|
+
return Math.max(0, Math.round(cappedDelay + jitter));
|
|
22261
|
+
}
|
|
22262
|
+
/**
|
|
22263
|
+
* Determine whether a pipeline is permitted to dispatch the next node.
|
|
22264
|
+
*
|
|
22265
|
+
* **Unlimited mode:** When `cap === 0` the function returns `{ allowed: true }`
|
|
22266
|
+
* immediately, regardless of `accumulatedCost`. This matches the
|
|
22267
|
+
* `FactoryConfigSchema` default of `budget_cap_usd: 0` which means "no limit".
|
|
22268
|
+
*
|
|
22269
|
+
* **Strict greater-than boundary:** Enforcement triggers only when
|
|
22270
|
+
* `accumulatedCost > cap`. A cost that exactly equals the cap is still allowed,
|
|
22271
|
+
* consistent with the PRD wording "halts *before* dispatching further nodes
|
|
22272
|
+
* when accumulated cost **exceeds** the cap."
|
|
22273
|
+
*
|
|
22274
|
+
* @param accumulatedCost - Total cost (USD) spent so far during this pipeline run.
|
|
22275
|
+
* @param cap - Maximum allowed cost (USD). `0` disables enforcement.
|
|
22276
|
+
* @returns `{ allowed: true }` when the pipeline may continue, or
|
|
22277
|
+
* `{ allowed: false, reason: '...' }` when the budget is exhausted.
|
|
22278
|
+
*/
|
|
22279
|
+
function checkPipelineBudget(accumulatedCost, cap) {
|
|
22280
|
+
if (cap === 0) return { allowed: true };
|
|
22281
|
+
if (accumulatedCost > cap) return {
|
|
22282
|
+
allowed: false,
|
|
22283
|
+
reason: `pipeline budget exhausted: $${accumulatedCost.toFixed(2)} > $${cap.toFixed(2)}`
|
|
22284
|
+
};
|
|
22285
|
+
return { allowed: true };
|
|
22286
|
+
}
|
|
22287
|
+
/**
|
|
22288
|
+
* Tracks accumulated cost for a single pipeline run and enforces a configurable
|
|
22289
|
+
* spending cap via `checkPipelineBudget`.
|
|
22290
|
+
*
|
|
22291
|
+
* **Lifecycle:** Create one instance per pipeline run. Story 45-8 will call
|
|
22292
|
+
* `addCost()` after each node dispatch completes and `checkBudget()` before
|
|
22293
|
+
* the next dispatch. Call `reset()` between pipeline runs or in tests to clear
|
|
22294
|
+
* accumulated state.
|
|
22295
|
+
*/
|
|
22296
|
+
var PipelineBudgetManager = class {
|
|
22297
|
+
totalCost = 0;
|
|
22298
|
+
/**
|
|
22299
|
+
* Add `amount` USD to the running total for this pipeline run.
|
|
22300
|
+
*
|
|
22301
|
+
* @param amount - Cost (USD) for the just-completed node dispatch.
|
|
22302
|
+
*/
|
|
22303
|
+
addCost(amount) {
|
|
22304
|
+
this.totalCost += amount;
|
|
22305
|
+
}
|
|
22306
|
+
/**
|
|
22307
|
+
* Return the total cost (USD) accumulated so far during this pipeline run.
|
|
22308
|
+
*/
|
|
22309
|
+
getTotalCost() {
|
|
22310
|
+
return this.totalCost;
|
|
22311
|
+
}
|
|
22312
|
+
/**
|
|
22313
|
+
* Reset the accumulated cost to zero.
|
|
22314
|
+
* Useful for test isolation and future pipeline reuse scenarios.
|
|
22315
|
+
*/
|
|
22316
|
+
reset() {
|
|
22317
|
+
this.totalCost = 0;
|
|
22318
|
+
}
|
|
22319
|
+
/**
|
|
22320
|
+
* Determine whether the pipeline may dispatch the next node, delegating to
|
|
22321
|
+
* `checkPipelineBudget` with the current accumulated cost.
|
|
22322
|
+
*
|
|
22323
|
+
* @param cap - Maximum allowed cost (USD). `0` disables enforcement.
|
|
22324
|
+
*/
|
|
22325
|
+
checkBudget(cap) {
|
|
22326
|
+
return checkPipelineBudget(this.totalCost, cap);
|
|
22327
|
+
}
|
|
22328
|
+
};
|
|
22329
|
+
/**
|
|
22330
|
+
* Determine whether a pipeline session is permitted to dispatch the next node
|
|
22331
|
+
* based on wall-clock elapsed time.
|
|
22332
|
+
*
|
|
22333
|
+
* **Unlimited mode:** When `capMs === 0` the function returns `{ allowed: true }`
|
|
22334
|
+
* immediately, regardless of `elapsedMs`. This matches the `FactoryConfigSchema`
|
|
22335
|
+
* default of `wall_clock_cap_seconds: 0` which means "no limit".
|
|
22336
|
+
*
|
|
22337
|
+
* **Strict greater-than boundary:** Enforcement triggers only when
|
|
22338
|
+
* `elapsedMs > capMs`. An elapsed time that exactly equals the cap is still
|
|
22339
|
+
* allowed, consistent with the PRD wording "halts *before* dispatching further
|
|
22340
|
+
* nodes when elapsed time **exceeds** the cap."
|
|
22341
|
+
*
|
|
22342
|
+
* @param elapsedMs - Milliseconds elapsed since the pipeline session started.
|
|
22343
|
+
* @param capMs - Maximum allowed elapsed time in milliseconds. `0` disables
|
|
22344
|
+
* enforcement (unlimited mode).
|
|
22345
|
+
* @returns `{ allowed: true }` when the session may continue, or
|
|
22346
|
+
* `{ allowed: false, reason: 'wall clock budget exhausted' }` when the
|
|
22347
|
+
* cap has been exceeded.
|
|
22348
|
+
*/
|
|
22349
|
+
function checkSessionBudget(elapsedMs, capMs) {
|
|
22350
|
+
if (capMs === 0) return { allowed: true };
|
|
22351
|
+
if (elapsedMs > capMs) return {
|
|
22352
|
+
allowed: false,
|
|
22353
|
+
reason: "wall clock budget exhausted"
|
|
22354
|
+
};
|
|
22355
|
+
return { allowed: true };
|
|
22356
|
+
}
|
|
22357
|
+
/**
|
|
22358
|
+
* Tracks wall-clock elapsed time for a single pipeline session and enforces a
|
|
22359
|
+
* configurable time cap via `checkSessionBudget`.
|
|
22360
|
+
*
|
|
22361
|
+
* **Lifecycle:** Create one instance per pipeline run, constructed at pipeline
|
|
22362
|
+
* launch. Story 45-8 will call `checkBudget()` before each node dispatch as
|
|
22363
|
+
* the highest-priority budget check (before `PipelineBudgetManager`).
|
|
22364
|
+
* Call `reset()` between pipeline runs or in tests for
|
|
22365
|
+
* isolation.
|
|
22366
|
+
*
|
|
22367
|
+
* **Cap 0 means unlimited:** A `capSeconds` value of `0` passed to `checkBudget`
|
|
22368
|
+
* disables all wall-clock enforcement and always returns `{ allowed: true }`.
|
|
22369
|
+
*/
|
|
22370
|
+
var SessionBudgetManager = class {
|
|
22371
|
+
startTime;
|
|
22372
|
+
constructor() {
|
|
22373
|
+
this.startTime = Date.now();
|
|
22374
|
+
}
|
|
22375
|
+
/**
|
|
22376
|
+
* Return the number of milliseconds elapsed since this manager was constructed
|
|
22377
|
+
* (or since the last `reset()` call). Always returns a non-negative number.
|
|
22378
|
+
*/
|
|
22379
|
+
getElapsedMs() {
|
|
22380
|
+
return Date.now() - this.startTime;
|
|
22381
|
+
}
|
|
22382
|
+
/**
|
|
22383
|
+
* Reset the session start timestamp to the current time. Subsequent calls to
|
|
22384
|
+
* `getElapsedMs()` will measure from this new baseline. Useful for test
|
|
22385
|
+
* isolation and future pipeline reuse scenarios.
|
|
22386
|
+
*/
|
|
22387
|
+
reset() {
|
|
22388
|
+
this.startTime = Date.now();
|
|
22389
|
+
}
|
|
22390
|
+
/**
|
|
22391
|
+
* Determine whether the pipeline session may dispatch the next node, delegating
|
|
22392
|
+
* to `checkSessionBudget` with the current elapsed time converted from seconds
|
|
22393
|
+
* to milliseconds.
|
|
22394
|
+
*
|
|
22395
|
+
* @param capSeconds - Maximum allowed elapsed time in **seconds** (as stored in
|
|
22396
|
+
* `FactoryConfig.wall_clock_cap_seconds`). A value of `0`
|
|
22397
|
+
* disables enforcement.
|
|
22398
|
+
*/
|
|
22399
|
+
checkBudget(capSeconds) {
|
|
22400
|
+
return checkSessionBudget(this.getElapsedMs(), capSeconds * 1e3);
|
|
22401
|
+
}
|
|
22402
|
+
};
|
|
22403
|
+
|
|
22404
|
+
//#endregion
|
|
22405
|
+
//#region packages/factory/dist/convergence/plateau.js
|
|
22406
|
+
/**
|
|
22407
|
+
* Plateau detection for the convergence loop.
|
|
22408
|
+
* Story 45-6: provides pure plateau detection primitives — no I/O, no side effects.
|
|
22409
|
+
*
|
|
22410
|
+
* Algorithm: Track the last N satisfaction scores (N = `window`, default 3).
|
|
22411
|
+
* If max−min of the window falls strictly below threshold, declare plateau.
|
|
22412
|
+
*
|
|
22413
|
+
* Consumed by:
|
|
22414
|
+
* - Story 45-8 (convergence controller integration)
|
|
22415
|
+
*/
|
|
22416
|
+
const DEFAULT_WINDOW = 3;
|
|
22417
|
+
const DEFAULT_THRESHOLD = .05;
|
|
22418
|
+
/**
|
|
22419
|
+
* Create a new PlateauDetector with the given options.
|
|
22420
|
+
*
|
|
22421
|
+
* **Defaults:** `window=3`, `threshold=0.05` — matching `FactoryConfigSchema.plateau_window`
|
|
22422
|
+
* and `FactoryConfigSchema.plateau_threshold`. Story 45-8 will read these values from
|
|
22423
|
+
* `FactoryConfig` and pass them in.
|
|
22424
|
+
*
|
|
22425
|
+
* **Insufficient-data guard:** `isPlateaued()` always returns `false` when fewer than
|
|
22426
|
+
* `window` scores have been recorded. A plateau can only be declared once the window is full.
|
|
22427
|
+
*
|
|
22428
|
+
* @param options - Optional configuration for window size and threshold.
|
|
22429
|
+
*/
|
|
22430
|
+
function createPlateauDetector(options) {
|
|
22431
|
+
const window = options?.window ?? DEFAULT_WINDOW;
|
|
22432
|
+
const threshold = options?.threshold ?? DEFAULT_THRESHOLD;
|
|
22433
|
+
let scores = [];
|
|
22434
|
+
return {
|
|
22435
|
+
recordScore(_iteration, score) {
|
|
22436
|
+
scores.push(score);
|
|
22437
|
+
scores = scores.slice(-window);
|
|
22438
|
+
},
|
|
22439
|
+
isPlateaued() {
|
|
22440
|
+
if (scores.length < window) return false;
|
|
22441
|
+
const delta = Math.max(...scores) - Math.min(...scores);
|
|
22442
|
+
return delta < threshold;
|
|
22443
|
+
},
|
|
22444
|
+
getWindow() {
|
|
22445
|
+
return window;
|
|
22446
|
+
},
|
|
22447
|
+
getScores() {
|
|
22448
|
+
return [...scores];
|
|
22449
|
+
}
|
|
22450
|
+
};
|
|
22451
|
+
}
|
|
22452
|
+
/**
|
|
22453
|
+
* Check whether the detector has reached a plateau and, if so, emit the
|
|
22454
|
+
* `convergence:plateau-detected` event on the provided event bus.
|
|
22455
|
+
*
|
|
22456
|
+
* This mirrors the `checkGoalGates()` pattern:
|
|
22457
|
+
* - Pure detection is isolated in `PlateauDetector` (no side effects).
|
|
22458
|
+
* - Event emission is isolated here in this wrapper.
|
|
22459
|
+
* - Callers may omit `eventBus` for pure check behavior (no event is emitted).
|
|
22460
|
+
*
|
|
22461
|
+
* @param detector - A `PlateauDetector` instance.
|
|
22462
|
+
* @param context - Run/node identifiers and an optional event bus.
|
|
22463
|
+
* @returns `{ plateaued: true, scores }` with event emitted when plateaued;
|
|
22464
|
+
* `{ plateaued: false, scores }` with no event emitted otherwise.
|
|
22465
|
+
*/
|
|
22466
|
+
function checkPlateauAndEmit(detector, context) {
|
|
22467
|
+
const { runId, nodeId, eventBus } = context;
|
|
22468
|
+
const scores = detector.getScores();
|
|
22469
|
+
if (detector.isPlateaued()) {
|
|
22470
|
+
eventBus?.emit("convergence:plateau-detected", {
|
|
22471
|
+
runId,
|
|
22472
|
+
nodeId,
|
|
22473
|
+
scores,
|
|
22474
|
+
window: detector.getWindow()
|
|
22475
|
+
});
|
|
22476
|
+
return {
|
|
22477
|
+
plateaued: true,
|
|
22478
|
+
scores
|
|
22479
|
+
};
|
|
22480
|
+
}
|
|
22481
|
+
return {
|
|
22482
|
+
plateaued: false,
|
|
22483
|
+
scores
|
|
22484
|
+
};
|
|
22485
|
+
}
|
|
22486
|
+
|
|
22487
|
+
//#endregion
|
|
22488
|
+
//#region packages/factory/dist/convergence/remediation.js
|
|
22489
|
+
/**
|
|
22490
|
+
* Remediation context injection for the convergence loop.
|
|
22491
|
+
* Story 45-7: builds structured remediation context from failure data and
|
|
22492
|
+
* injects it into a retried node's IGraphContext.
|
|
22493
|
+
*
|
|
22494
|
+
* Architecture reference: Section 6.5 — Remediation Context fields
|
|
22495
|
+
*
|
|
22496
|
+
* Pure functions (`formatScenarioDiff`, `deriveFixScope`, `buildRemediationContext`)
|
|
22497
|
+
* have no I/O and no side effects.
|
|
22498
|
+
* Only `injectRemediationContext` mutates state (the IGraphContext).
|
|
22499
|
+
*
|
|
22500
|
+
* Consumed by:
|
|
22501
|
+
* - Story 45-8 (convergence controller integration with executor)
|
|
22502
|
+
* - CodergenBackend handlers (via `getRemediationContext`)
|
|
22503
|
+
*/
|
|
22504
|
+
/**
|
|
22505
|
+
* The agreed key under which remediation context is stored in `IGraphContext`.
|
|
22506
|
+
* Namespaced under `convergence.` to avoid collision with user-defined context keys.
|
|
22507
|
+
* Story 45-8 writes this key; CodergenBackend handlers read it via `getRemediationContext()`.
|
|
22508
|
+
*/
|
|
22509
|
+
const REMEDIATION_CONTEXT_KEY = "convergence.remediation";
|
|
22510
|
+
/**
|
|
22511
|
+
* Formats a human-readable diff of failed scenarios from a ScenarioRunResult.
|
|
22512
|
+
*
|
|
22513
|
+
* This is a pure formatting function with no side effects. For each failed
|
|
22514
|
+
* scenario it produces a line `"- {name}: {stderr || stdout || '(no output)'}"`,
|
|
22515
|
+
* preferring stderr (most useful for debugging), falling back to stdout
|
|
22516
|
+
* (some tools write errors to stdout), then to the literal `'(no output)'`.
|
|
22517
|
+
*
|
|
22518
|
+
* Returns `"All scenarios passed"` when there are no failures.
|
|
22519
|
+
*/
|
|
22520
|
+
function formatScenarioDiff(results) {
|
|
22521
|
+
const failed = results.scenarios.filter((s$1) => s$1.status === "fail");
|
|
22522
|
+
if (failed.length === 0) return "All scenarios passed";
|
|
22523
|
+
const lines = failed.map((s$1) => {
|
|
22524
|
+
const output = s$1.stderr || s$1.stdout || "(no output)";
|
|
22525
|
+
return `- ${s$1.name}: ${output}`;
|
|
22526
|
+
});
|
|
22527
|
+
return lines.join("\n");
|
|
22528
|
+
}
|
|
22529
|
+
/**
|
|
22530
|
+
* Derives a focused fix instruction string from failed scenarios.
|
|
22531
|
+
*
|
|
22532
|
+
* This function produces human-readable fix instructions for the retried agent.
|
|
22533
|
+
* Returns `"Fix {n} failing scenario{s}: {name1}, {name2}, ..."` when there are
|
|
22534
|
+
* failures, or `""` when all scenarios pass.
|
|
22535
|
+
*
|
|
22536
|
+
* Pluralization: singular "scenario" when n === 1, plural "scenarios" otherwise.
|
|
22537
|
+
*/
|
|
22538
|
+
function deriveFixScope(results) {
|
|
22539
|
+
const failed = results.scenarios.filter((s$1) => s$1.status === "fail");
|
|
22540
|
+
if (failed.length === 0) return "";
|
|
22541
|
+
const n$1 = failed.length;
|
|
22542
|
+
const plural = n$1 === 1 ? "scenario" : "scenarios";
|
|
22543
|
+
const names = failed.map((s$1) => s$1.name).join(", ");
|
|
22544
|
+
return `Fix ${n$1} failing ${plural}: ${names}`;
|
|
22545
|
+
}
|
|
22546
|
+
/**
|
|
22547
|
+
* Builds a complete `RemediationContext` from the provided parameters.
|
|
22548
|
+
*
|
|
22549
|
+
* `scenarioResults` is optional — first-iteration retries may not have scenario
|
|
22550
|
+
* data yet. When omitted, `scenarioDiff` defaults to
|
|
22551
|
+
* `"No scenario results available"` and `fixScope` defaults to `""`.
|
|
22552
|
+
*
|
|
22553
|
+
* Stores `satisfactionScoreHistory` as a defensive copy (`[...params.satisfactionScoreHistory]`)
|
|
22554
|
+
* so external mutation of the caller's array does not corrupt the stored history.
|
|
22555
|
+
*/
|
|
22556
|
+
function buildRemediationContext(params) {
|
|
22557
|
+
const scenarioDiff = params.scenarioResults ? formatScenarioDiff(params.scenarioResults) : "No scenario results available";
|
|
22558
|
+
const fixScope = params.scenarioResults ? deriveFixScope(params.scenarioResults) : "";
|
|
22559
|
+
return {
|
|
22560
|
+
previousFailureReason: params.previousFailureReason,
|
|
22561
|
+
scenarioDiff,
|
|
22562
|
+
iterationCount: params.iterationCount,
|
|
22563
|
+
satisfactionScoreHistory: [...params.satisfactionScoreHistory],
|
|
22564
|
+
fixScope
|
|
22565
|
+
};
|
|
22566
|
+
}
|
|
22567
|
+
/**
|
|
22568
|
+
* Injects a `RemediationContext` into an `IGraphContext` under `REMEDIATION_CONTEXT_KEY`.
|
|
22569
|
+
*
|
|
22570
|
+
* Called by the executor's retry loop before dispatching to the retried node —
|
|
22571
|
+
* story 45-8 wires this call into the graph executor.
|
|
22572
|
+
*/
|
|
22573
|
+
function injectRemediationContext(context, remediation) {
|
|
22574
|
+
context.set(REMEDIATION_CONTEXT_KEY, remediation);
|
|
22575
|
+
}
|
|
22576
|
+
|
|
22577
|
+
//#endregion
|
|
22578
|
+
//#region packages/factory/dist/convergence/dual-signal.js
|
|
22579
|
+
/**
|
|
22580
|
+
* Dual-signal coordinator — evaluates code review and scenario signals together.
|
|
22581
|
+
*
|
|
22582
|
+
* Code review is the authoritative Phase 2 decision-maker.
|
|
22583
|
+
* Scenario score is a parallel signal used for monitoring and agreement tracking.
|
|
22584
|
+
*
|
|
22585
|
+
* Story 46-5.
|
|
22586
|
+
*/
|
|
22587
|
+
/**
|
|
22588
|
+
* Context key under which code review handlers store their verdict.
|
|
22589
|
+
* Used by the executor to read the verdict when emitting advisory events.
|
|
22590
|
+
* Story 46-6.
|
|
22591
|
+
*/
|
|
22592
|
+
const CONTEXT_KEY_CODE_REVIEW_VERDICT = "factory.codeReviewVerdict";
|
|
22593
|
+
/**
|
|
22594
|
+
* Evaluate dual signals without side effects.
|
|
22595
|
+
*
|
|
22596
|
+
* `SHIP_IT` and `LGTM_WITH_NOTES` are treated as code review passes.
|
|
22597
|
+
* Code review verdict is always the authoritative decision.
|
|
22598
|
+
*/
|
|
22599
|
+
function evaluateDualSignal(verdict, score, threshold) {
|
|
22600
|
+
const codeReviewPassed = verdict === "SHIP_IT" || verdict === "LGTM_WITH_NOTES";
|
|
22601
|
+
const scenarioPassed = score >= threshold;
|
|
22602
|
+
const agreement = codeReviewPassed === scenarioPassed ? "AGREE" : "DISAGREE";
|
|
22603
|
+
return {
|
|
22604
|
+
codeReviewPassed,
|
|
22605
|
+
scenarioPassed,
|
|
22606
|
+
agreement,
|
|
22607
|
+
authoritativeDecision: verdict,
|
|
22608
|
+
score,
|
|
22609
|
+
threshold
|
|
22610
|
+
};
|
|
22611
|
+
}
|
|
22612
|
+
/**
|
|
22613
|
+
* Create a DualSignalCoordinator that evaluates signals and emits
|
|
22614
|
+
* `scenario:score-computed` events on each call.
|
|
22615
|
+
*
|
|
22616
|
+
* When `options.qualityMode === 'scenario-primary'`, also emits
|
|
22617
|
+
* `scenario:advisory-computed` with the code review verdict as advisory info.
|
|
22618
|
+
* Story 46-6.
|
|
22619
|
+
*/
|
|
22620
|
+
function createDualSignalCoordinator(options) {
|
|
22621
|
+
return { evaluate(verdict, score, runId) {
|
|
22622
|
+
const result = evaluateDualSignal(verdict, score, options.threshold);
|
|
22623
|
+
options.eventBus.emit("scenario:score-computed", {
|
|
22624
|
+
runId,
|
|
22625
|
+
score: result.score,
|
|
22626
|
+
threshold: result.threshold,
|
|
22627
|
+
passes: result.scenarioPassed,
|
|
22628
|
+
agreement: result.agreement,
|
|
22629
|
+
codeReviewPassed: result.codeReviewPassed,
|
|
22630
|
+
scenarioPassed: result.scenarioPassed,
|
|
22631
|
+
authoritativeDecision: result.authoritativeDecision
|
|
22632
|
+
});
|
|
22633
|
+
if (options.qualityMode === "scenario-primary") options.eventBus.emit("scenario:advisory-computed", {
|
|
22634
|
+
runId,
|
|
22635
|
+
verdict: result.authoritativeDecision,
|
|
22636
|
+
codeReviewPassed: result.codeReviewPassed,
|
|
22637
|
+
score: result.score,
|
|
22638
|
+
threshold: result.threshold,
|
|
22639
|
+
agreement: result.agreement
|
|
22640
|
+
});
|
|
22641
|
+
return result;
|
|
22642
|
+
} };
|
|
22643
|
+
}
|
|
22644
|
+
|
|
22645
|
+
//#endregion
|
|
22646
|
+
//#region packages/factory/dist/scenarios/scorer.js
|
|
22647
|
+
/**
|
|
22648
|
+
* Compute a satisfaction score from a ScenarioRunResult.
|
|
22649
|
+
*
|
|
22650
|
+
* All scenarios are weighted equally (weight 1.0). The result includes a
|
|
22651
|
+
* `breakdown` array with one entry per scenario (added in Epic 46, story 46-1).
|
|
22652
|
+
*
|
|
22653
|
+
* @param result - The aggregated scenario run result.
|
|
22654
|
+
* @param threshold - Minimum score to consider passing (default 0.8).
|
|
22655
|
+
* @returns A SatisfactionScore with score, passes, threshold, and breakdown.
|
|
22656
|
+
*/
|
|
22657
|
+
function computeSatisfactionScore(result, threshold = .8) {
|
|
22658
|
+
const scenarios = result.scenarios;
|
|
22659
|
+
const totalWeight = scenarios.length;
|
|
22660
|
+
const breakdown = scenarios.map((s$1) => {
|
|
22661
|
+
const passed$1 = s$1.status === "pass";
|
|
22662
|
+
const contribution = totalWeight > 0 ? passed$1 ? 1 / totalWeight : 0 : 0;
|
|
22663
|
+
return {
|
|
22664
|
+
name: s$1.name,
|
|
22665
|
+
passed: passed$1,
|
|
22666
|
+
weight: 1,
|
|
22667
|
+
contribution
|
|
22668
|
+
};
|
|
22669
|
+
});
|
|
22670
|
+
const { total, passed } = result.summary;
|
|
22671
|
+
const score = total === 0 ? 0 : passed / total;
|
|
22672
|
+
return {
|
|
22673
|
+
score,
|
|
22674
|
+
passes: score >= threshold,
|
|
22675
|
+
threshold,
|
|
22676
|
+
breakdown
|
|
22677
|
+
};
|
|
22678
|
+
}
|
|
22679
|
+
|
|
22680
|
+
//#endregion
|
|
22681
|
+
//#region packages/factory/dist/persistence/factory-queries.js
|
|
22682
|
+
/**
|
|
22683
|
+
* Factory persistence query functions for graph runs, node results, and scenario results.
|
|
22684
|
+
*
|
|
22685
|
+
* All functions accept a DatabaseAdapter as first argument, follow the established
|
|
22686
|
+
* pattern from @substrate-ai/core's metrics queries, and use portable SQL patterns
|
|
22687
|
+
* (select-then-delete-then-insert in transactions) compatible with both
|
|
22688
|
+
* InMemoryDatabaseAdapter and DoltDatabaseAdapter.
|
|
22689
|
+
*
|
|
22690
|
+
* Story 46-3: Score Persistence to Database.
|
|
22691
|
+
*/
|
|
22692
|
+
/**
|
|
22693
|
+
* Insert or replace a graph_runs row.
|
|
22694
|
+
*
|
|
22695
|
+
* Uses portable select-then-delete-then-insert pattern inside a transaction
|
|
22696
|
+
* (not INSERT OR REPLACE which is SQLite-specific).
|
|
22697
|
+
*
|
|
22698
|
+
* - First call (status: 'running'): inserts the row.
|
|
22699
|
+
* - Second call (status: 'completed'/'failed'): deletes the old row and inserts
|
|
22700
|
+
* the updated row with completion details.
|
|
22701
|
+
*/
|
|
22702
|
+
async function upsertGraphRun(adapter, input) {
|
|
22703
|
+
await adapter.transaction(async (tx) => {
|
|
22704
|
+
const existing = await tx.query("SELECT id FROM graph_runs WHERE id = ?", [input.id]);
|
|
22705
|
+
if (existing.length > 0) await tx.query("DELETE FROM graph_runs WHERE id = ?", [input.id]);
|
|
22706
|
+
await tx.query(`INSERT INTO graph_runs (
|
|
22707
|
+
id, graph_file, graph_goal, status, started_at, completed_at,
|
|
22708
|
+
total_cost_usd, node_count, final_outcome, checkpoint_path
|
|
22709
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
22710
|
+
input.id,
|
|
22711
|
+
input.graph_file,
|
|
22712
|
+
input.graph_goal ?? null,
|
|
22713
|
+
input.status,
|
|
22714
|
+
input.started_at,
|
|
22715
|
+
input.completed_at ?? null,
|
|
22716
|
+
input.total_cost_usd ?? 0,
|
|
22717
|
+
input.node_count ?? 0,
|
|
22718
|
+
input.final_outcome ?? null,
|
|
22719
|
+
input.checkpoint_path ?? null
|
|
22720
|
+
]);
|
|
22721
|
+
});
|
|
22722
|
+
}
|
|
22723
|
+
/**
|
|
22724
|
+
* Append a graph_node_results row for a single node execution attempt.
|
|
22725
|
+
*
|
|
22726
|
+
* Each attempt is a distinct row — no upsert needed since run_id + node_id + attempt
|
|
22727
|
+
* together uniquely identify each record.
|
|
22728
|
+
*/
|
|
22729
|
+
async function insertGraphNodeResult(adapter, input) {
|
|
22730
|
+
await adapter.transaction(async (tx) => {
|
|
22731
|
+
await tx.query(`INSERT INTO graph_node_results (
|
|
22732
|
+
run_id, node_id, attempt, status, started_at, completed_at,
|
|
22733
|
+
duration_ms, cost_usd, failure_reason, context_snapshot
|
|
22734
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
22735
|
+
input.run_id,
|
|
22736
|
+
input.node_id,
|
|
22737
|
+
input.attempt,
|
|
22738
|
+
input.status,
|
|
22739
|
+
input.started_at,
|
|
22740
|
+
input.completed_at ?? null,
|
|
22741
|
+
input.duration_ms ?? null,
|
|
22742
|
+
input.cost_usd ?? 0,
|
|
22743
|
+
input.failure_reason ?? null,
|
|
22744
|
+
input.context_snapshot ?? null
|
|
22745
|
+
]);
|
|
22746
|
+
});
|
|
22747
|
+
}
|
|
22748
|
+
/**
|
|
22749
|
+
* Append a scenario_results row for a single scenario run iteration.
|
|
22750
|
+
*
|
|
22751
|
+
* The `details` field (score breakdown) should be serialized as JSON string
|
|
22752
|
+
* before calling this function, or passed raw if already serialized.
|
|
22753
|
+
*/
|
|
22754
|
+
async function insertScenarioResult(adapter, input) {
|
|
22755
|
+
await adapter.transaction(async (tx) => {
|
|
22756
|
+
await tx.query(`INSERT INTO scenario_results (
|
|
22757
|
+
run_id, node_id, iteration, total_scenarios, passed, failed,
|
|
22758
|
+
satisfaction_score, threshold, passes, details, executed_at
|
|
22759
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
22760
|
+
input.run_id,
|
|
22761
|
+
input.node_id,
|
|
22762
|
+
input.iteration,
|
|
22763
|
+
input.total_scenarios,
|
|
22764
|
+
input.passed,
|
|
22765
|
+
input.failed,
|
|
22766
|
+
input.satisfaction_score,
|
|
22767
|
+
input.threshold,
|
|
22768
|
+
input.passes ? 1 : 0,
|
|
22769
|
+
input.details ?? null,
|
|
22770
|
+
input.executed_at ?? new Date().toISOString()
|
|
22771
|
+
]);
|
|
22772
|
+
});
|
|
22773
|
+
}
|
|
22774
|
+
/**
|
|
22775
|
+
* Retrieve all scenario results for a given run, ordered by iteration ascending.
|
|
22776
|
+
*
|
|
22777
|
+
* @returns Array of ScenarioResultRow ordered by iteration. Empty array if none exist.
|
|
22778
|
+
*/
|
|
22779
|
+
async function getScenarioResultsForRun(adapter, runId) {
|
|
22780
|
+
const rows = await adapter.query("SELECT * FROM scenario_results WHERE run_id = ? ORDER BY iteration ASC", [runId]);
|
|
22781
|
+
return rows.map((row) => ({
|
|
22782
|
+
...row,
|
|
22783
|
+
passes: Boolean(row.passes)
|
|
22784
|
+
}));
|
|
22785
|
+
}
|
|
22786
|
+
/**
|
|
22787
|
+
* List graph run records in descending started_at order.
|
|
22788
|
+
*
|
|
22789
|
+
* @param limit - Maximum number of rows to return (default 20).
|
|
22790
|
+
*/
|
|
22791
|
+
async function listGraphRuns(adapter, limit = 20) {
|
|
22792
|
+
return adapter.query("SELECT * FROM graph_runs ORDER BY started_at DESC LIMIT ?", [limit]);
|
|
22793
|
+
}
|
|
22794
|
+
/**
|
|
22795
|
+
* Retrieve a summarized list of factory graph runs, enriched with per-run
|
|
22796
|
+
* iteration counts and latest satisfaction scores from `scenario_results`.
|
|
22797
|
+
*
|
|
22798
|
+
* Uses two queries (not a JOIN) for portability across adapters:
|
|
22799
|
+
* 1. SELECT from graph_runs ordered by started_at DESC
|
|
22800
|
+
* 2. GROUP BY aggregation on scenario_results for iteration counts and scores
|
|
22801
|
+
*
|
|
22802
|
+
* @param adapter - Database adapter (already opened)
|
|
22803
|
+
* @param limit - Maximum number of graph runs to return (default 20)
|
|
22804
|
+
* @returns Array of FactoryRunSummary ordered by started_at DESC; empty array on error
|
|
22805
|
+
*/
|
|
22806
|
+
async function getFactoryRunSummaries(adapter, limit = 20) {
|
|
22807
|
+
const runs = await adapter.query("SELECT id, started_at, completed_at, total_cost_usd, final_outcome FROM graph_runs ORDER BY started_at DESC LIMIT ?", [limit]);
|
|
22808
|
+
if (runs.length === 0) return [];
|
|
22809
|
+
const scenarioAgg = await adapter.query("SELECT run_id, COUNT(*) as iterations, MAX(satisfaction_score) as satisfaction_score FROM scenario_results GROUP BY run_id", []);
|
|
22810
|
+
const latestPassesRows = await adapter.query("SELECT s.run_id, s.passes FROM scenario_results s INNER JOIN (SELECT run_id, MAX(iteration) AS max_iter FROM scenario_results GROUP BY run_id) latest ON s.run_id = latest.run_id AND s.iteration = latest.max_iter", []);
|
|
22811
|
+
const scenarioMap = new Map();
|
|
22812
|
+
for (const row of scenarioAgg) scenarioMap.set(row.run_id, {
|
|
22813
|
+
iterations: row.iterations,
|
|
22814
|
+
satisfaction_score: row.satisfaction_score
|
|
22815
|
+
});
|
|
22816
|
+
const latestPassesMap = new Map();
|
|
22817
|
+
for (const row of latestPassesRows) latestPassesMap.set(row.run_id, row.passes !== 0);
|
|
22818
|
+
return runs.map((run) => {
|
|
22819
|
+
const agg = scenarioMap.get(run.id);
|
|
22820
|
+
return {
|
|
22821
|
+
run_id: run.id,
|
|
22822
|
+
satisfaction_score: agg !== void 0 ? agg.satisfaction_score : null,
|
|
22823
|
+
iterations: agg !== void 0 ? agg.iterations : 0,
|
|
22824
|
+
convergence_status: run.final_outcome,
|
|
22825
|
+
started_at: run.started_at,
|
|
22826
|
+
completed_at: run.completed_at,
|
|
22827
|
+
total_cost_usd: run.total_cost_usd,
|
|
22828
|
+
type: "factory",
|
|
22829
|
+
passes: latestPassesMap.has(run.id) ? latestPassesMap.get(run.id) ?? null : null
|
|
22830
|
+
};
|
|
22831
|
+
});
|
|
22832
|
+
}
|
|
22833
|
+
|
|
22184
22834
|
//#endregion
|
|
22185
22835
|
//#region packages/factory/dist/graph/executor.js
|
|
22186
22836
|
/**
|
|
@@ -22220,17 +22870,6 @@ function normalizeOutcomeStatus(raw) {
|
|
|
22220
22870
|
};
|
|
22221
22871
|
}
|
|
22222
22872
|
/**
|
|
22223
|
-
* Compute exponential backoff delay with ±50% jitter.
|
|
22224
|
-
*
|
|
22225
|
-
* @param attempt - Zero-indexed attempt number (0 = first retry, 1 = second, etc.)
|
|
22226
|
-
* @returns Delay in milliseconds, floored at 0 and capped at 60,000ms
|
|
22227
|
-
*/
|
|
22228
|
-
function computeBackoffDelay(attempt) {
|
|
22229
|
-
const rawDelay = Math.min(200 * Math.pow(2, attempt), 6e4);
|
|
22230
|
-
const jitter = rawDelay * .5 * (2 * Math.random() - 1);
|
|
22231
|
-
return Math.max(0, rawDelay + jitter);
|
|
22232
|
-
}
|
|
22233
|
-
/**
|
|
22234
22873
|
* Dispatch a node handler with exponential backoff retry on FAIL outcomes.
|
|
22235
22874
|
*
|
|
22236
22875
|
* Emits `graph:node-retried` before each retry attempt.
|
|
@@ -22289,6 +22928,31 @@ function createGraphExecutor() {
|
|
|
22289
22928
|
const checkpointManager = new CheckpointManager();
|
|
22290
22929
|
const checkpointFilePath = path.join(config.logsRoot, "checkpoint.json");
|
|
22291
22930
|
const controller = createConvergenceController();
|
|
22931
|
+
const sessionManager = new SessionBudgetManager();
|
|
22932
|
+
const pipelineManager = new PipelineBudgetManager();
|
|
22933
|
+
const plateauDetector = createPlateauDetector({
|
|
22934
|
+
...config.plateauWindow !== void 0 ? { window: config.plateauWindow } : {},
|
|
22935
|
+
...config.plateauThreshold !== void 0 ? { threshold: config.plateauThreshold } : {}
|
|
22936
|
+
});
|
|
22937
|
+
let convergenceIteration = 0;
|
|
22938
|
+
const runStartedAt = new Date().toISOString();
|
|
22939
|
+
let lastScenarioNodeId = "";
|
|
22940
|
+
const persistExit = async (finalStatus, finalOutcome) => {
|
|
22941
|
+
if (!config.adapter) return;
|
|
22942
|
+
try {
|
|
22943
|
+
await upsertGraphRun(config.adapter, {
|
|
22944
|
+
id: config.runId,
|
|
22945
|
+
graph_file: graph.id || config.runId,
|
|
22946
|
+
...graph.goal ? { graph_goal: graph.goal } : {},
|
|
22947
|
+
status: finalStatus,
|
|
22948
|
+
started_at: runStartedAt,
|
|
22949
|
+
completed_at: new Date().toISOString(),
|
|
22950
|
+
final_outcome: finalOutcome,
|
|
22951
|
+
total_cost_usd: pipelineManager.getTotalCost(),
|
|
22952
|
+
node_count: graph.nodes.size
|
|
22953
|
+
});
|
|
22954
|
+
} catch {}
|
|
22955
|
+
};
|
|
22292
22956
|
let completedNodes = [];
|
|
22293
22957
|
let nodeRetries = {};
|
|
22294
22958
|
let context = new GraphContext();
|
|
@@ -22302,6 +22966,40 @@ function createGraphExecutor() {
|
|
|
22302
22966
|
if (config.scenarioStore) scenarioManifest = await config.scenarioStore.discover();
|
|
22303
22967
|
const runStateManager = config.dotSource ? new RunStateManager({ runDir: config.logsRoot }) : null;
|
|
22304
22968
|
if (runStateManager) await runStateManager.initRun(config.dotSource);
|
|
22969
|
+
if (config.adapter && config.eventBus) {
|
|
22970
|
+
const persistAdapter = config.adapter;
|
|
22971
|
+
const scenarioHandler = (payload) => {
|
|
22972
|
+
const threshold = config.satisfactionThreshold ?? .8;
|
|
22973
|
+
const scored = computeSatisfactionScore(payload.results, threshold);
|
|
22974
|
+
insertScenarioResult(persistAdapter, {
|
|
22975
|
+
run_id: config.runId,
|
|
22976
|
+
node_id: lastScenarioNodeId || "unknown",
|
|
22977
|
+
iteration: payload.iteration,
|
|
22978
|
+
total_scenarios: payload.results.summary.total,
|
|
22979
|
+
passed: payload.results.summary.passed,
|
|
22980
|
+
failed: payload.results.summary.failed,
|
|
22981
|
+
satisfaction_score: scored.score,
|
|
22982
|
+
threshold: scored.threshold,
|
|
22983
|
+
passes: scored.passes,
|
|
22984
|
+
details: JSON.stringify(scored.breakdown),
|
|
22985
|
+
executed_at: new Date().toISOString()
|
|
22986
|
+
}).catch((err) => {
|
|
22987
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
22988
|
+
console.debug(`[executor] scenario:completed persistence failed for run ${config.runId}: ${msg}`);
|
|
22989
|
+
});
|
|
22990
|
+
};
|
|
22991
|
+
config.eventBus.on("scenario:completed", scenarioHandler);
|
|
22992
|
+
}
|
|
22993
|
+
if (config.adapter) try {
|
|
22994
|
+
await upsertGraphRun(config.adapter, {
|
|
22995
|
+
id: config.runId,
|
|
22996
|
+
graph_file: graph.id || config.runId,
|
|
22997
|
+
...graph.goal ? { graph_goal: graph.goal } : {},
|
|
22998
|
+
status: "running",
|
|
22999
|
+
started_at: runStartedAt,
|
|
23000
|
+
node_count: graph.nodes.size
|
|
23001
|
+
});
|
|
23002
|
+
} catch {}
|
|
22305
23003
|
let currentNode;
|
|
22306
23004
|
if (config.checkpointPath) {
|
|
22307
23005
|
const checkpoint = await checkpointManager.load(config.checkpointPath);
|
|
@@ -22328,32 +23026,102 @@ function createGraphExecutor() {
|
|
|
22328
23026
|
}
|
|
22329
23027
|
} else currentNode = graph.startNode();
|
|
22330
23028
|
while (true) {
|
|
23029
|
+
const sessionResult = sessionManager.checkBudget((config.wallClockCapMs ?? 0) / 1e3);
|
|
23030
|
+
if (!sessionResult.allowed) {
|
|
23031
|
+
config.eventBus?.emit("convergence:budget-exhausted", {
|
|
23032
|
+
runId: config.runId,
|
|
23033
|
+
level: "session",
|
|
23034
|
+
reason: sessionResult.reason
|
|
23035
|
+
});
|
|
23036
|
+
await persistExit("failed", `Session budget exceeded: ${sessionResult.reason}`);
|
|
23037
|
+
return {
|
|
23038
|
+
status: "FAIL",
|
|
23039
|
+
failureReason: `Session budget exceeded: ${sessionResult.reason}`
|
|
23040
|
+
};
|
|
23041
|
+
}
|
|
23042
|
+
const pipelineResult = pipelineManager.checkBudget(config.pipelineBudgetCapUsd ?? 0);
|
|
23043
|
+
if (!pipelineResult.allowed) {
|
|
23044
|
+
config.eventBus?.emit("convergence:budget-exhausted", {
|
|
23045
|
+
runId: config.runId,
|
|
23046
|
+
level: "pipeline",
|
|
23047
|
+
reason: pipelineResult.reason
|
|
23048
|
+
});
|
|
23049
|
+
await persistExit("failed", `Pipeline budget exceeded: ${pipelineResult.reason}`);
|
|
23050
|
+
return {
|
|
23051
|
+
status: "FAIL",
|
|
23052
|
+
failureReason: `Pipeline budget exceeded: ${pipelineResult.reason}`
|
|
23053
|
+
};
|
|
23054
|
+
}
|
|
22331
23055
|
const exitNode = graph.exitNode();
|
|
22332
23056
|
if (currentNode.id === exitNode.id) {
|
|
22333
|
-
const
|
|
23057
|
+
const useScenarioPrimary = config.qualityMode === "scenario-primary";
|
|
23058
|
+
const gateResult = controller.checkGoalGates(graph, config.runId, config.eventBus, useScenarioPrimary || config.satisfactionThreshold !== void 0 ? {
|
|
23059
|
+
context,
|
|
23060
|
+
satisfactionThreshold: config.satisfactionThreshold ?? .8
|
|
23061
|
+
} : void 0);
|
|
23062
|
+
if (useScenarioPrimary && config.eventBus) {
|
|
23063
|
+
const rawVerdict = context.getString(CONTEXT_KEY_CODE_REVIEW_VERDICT, "");
|
|
23064
|
+
if (rawVerdict !== "") {
|
|
23065
|
+
const codeReviewVerdict = rawVerdict;
|
|
23066
|
+
const coordinator = createDualSignalCoordinator({
|
|
23067
|
+
eventBus: config.eventBus,
|
|
23068
|
+
threshold: config.satisfactionThreshold ?? .8,
|
|
23069
|
+
qualityMode: "scenario-primary"
|
|
23070
|
+
});
|
|
23071
|
+
const score = context.getNumber("satisfaction_score", 0);
|
|
23072
|
+
coordinator.evaluate(codeReviewVerdict, score, config.runId);
|
|
23073
|
+
}
|
|
23074
|
+
}
|
|
22334
23075
|
if (!gateResult.satisfied) {
|
|
22335
|
-
const failingNodeId = gateResult.
|
|
23076
|
+
const failingNodeId = gateResult.failedGates[0];
|
|
22336
23077
|
const failingGateNode = graph.nodes.get(failingNodeId);
|
|
22337
|
-
const
|
|
22338
|
-
if (
|
|
22339
|
-
|
|
22340
|
-
|
|
22341
|
-
|
|
22342
|
-
|
|
23078
|
+
const retryTargetId = failingGateNode ? controller.resolveRetryTarget(failingGateNode, graph) : null;
|
|
23079
|
+
if (!retryTargetId) {
|
|
23080
|
+
await persistExit("failed", "Goal gate failed: no retry target");
|
|
23081
|
+
return {
|
|
23082
|
+
status: "FAIL",
|
|
23083
|
+
failureReason: "Goal gate failed: no retry target"
|
|
23084
|
+
};
|
|
22343
23085
|
}
|
|
22344
|
-
|
|
22345
|
-
|
|
22346
|
-
|
|
22347
|
-
|
|
23086
|
+
const retryNode = graph.nodes.get(retryTargetId);
|
|
23087
|
+
if (!retryNode) throw new Error(`Retry target node "${retryTargetId}" not found in graph`);
|
|
23088
|
+
convergenceIteration++;
|
|
23089
|
+
const satisfactionScore = context.getNumber("satisfaction_score", 0);
|
|
23090
|
+
plateauDetector.recordScore(convergenceIteration, satisfactionScore);
|
|
23091
|
+
const plateauResult = checkPlateauAndEmit(plateauDetector, {
|
|
23092
|
+
runId: config.runId,
|
|
23093
|
+
nodeId: retryTargetId,
|
|
23094
|
+
...config.eventBus ? { eventBus: config.eventBus } : {}
|
|
23095
|
+
});
|
|
23096
|
+
if (plateauResult.plateaued) {
|
|
23097
|
+
await persistExit("failed", `Convergence plateau detected after ${convergenceIteration} iteration(s)`);
|
|
23098
|
+
return {
|
|
23099
|
+
status: "FAIL",
|
|
23100
|
+
failureReason: `Convergence plateau detected after ${convergenceIteration} iteration(s): scores plateaued at [${plateauResult.scores.join(", ")}]`
|
|
23101
|
+
};
|
|
23102
|
+
}
|
|
23103
|
+
const remediation = buildRemediationContext({
|
|
23104
|
+
previousFailureReason: `Goal gate unsatisfied: ${gateResult.failedGates.join(", ")}`,
|
|
23105
|
+
iterationCount: convergenceIteration,
|
|
23106
|
+
satisfactionScoreHistory: plateauResult.scores
|
|
23107
|
+
});
|
|
23108
|
+
injectRemediationContext(context, remediation);
|
|
23109
|
+
skipCycleCheck = true;
|
|
23110
|
+
currentNode = retryNode;
|
|
23111
|
+
continue;
|
|
22348
23112
|
}
|
|
23113
|
+
await persistExit("completed", "SUCCESS");
|
|
22349
23114
|
return { status: "SUCCESS" };
|
|
22350
23115
|
}
|
|
22351
23116
|
if (resumeCompletedSet?.has(currentNode.id)) {
|
|
22352
23117
|
const skipEdge = selectEdge(currentNode, { status: "SUCCESS" }, context, graph);
|
|
22353
|
-
if (!skipEdge)
|
|
22354
|
-
|
|
22355
|
-
|
|
22356
|
-
|
|
23118
|
+
if (!skipEdge) {
|
|
23119
|
+
await persistExit("failed", `No outgoing edge from node ${currentNode.id}`);
|
|
23120
|
+
return {
|
|
23121
|
+
status: "FAIL",
|
|
23122
|
+
failureReason: `No outgoing edge from node ${currentNode.id}`
|
|
23123
|
+
};
|
|
23124
|
+
}
|
|
22357
23125
|
config.eventBus?.emit("graph:edge-selected", {
|
|
22358
23126
|
runId: config.runId,
|
|
22359
23127
|
fromNode: currentNode.id,
|
|
@@ -22381,6 +23149,7 @@ function createGraphExecutor() {
|
|
|
22381
23149
|
nodeId: currentNode.id,
|
|
22382
23150
|
tampered: integrityResult.tampered
|
|
22383
23151
|
});
|
|
23152
|
+
await persistExit("failed", `Scenario integrity violation before node "${currentNode.id}"`);
|
|
22384
23153
|
return {
|
|
22385
23154
|
status: "FAIL",
|
|
22386
23155
|
failureReason: `Scenario integrity violation detected before node "${currentNode.id}": tampered files: ${integrityResult.tampered.join(", ")}`
|
|
@@ -22392,6 +23161,7 @@ function createGraphExecutor() {
|
|
|
22392
23161
|
scenarioCount: scenarioManifest.scenarios.length
|
|
22393
23162
|
});
|
|
22394
23163
|
}
|
|
23164
|
+
lastScenarioNodeId = currentNode.id;
|
|
22395
23165
|
config.eventBus?.emit("graph:node-started", {
|
|
22396
23166
|
runId: config.runId,
|
|
22397
23167
|
nodeId: currentNode.id,
|
|
@@ -22441,6 +23211,24 @@ function createGraphExecutor() {
|
|
|
22441
23211
|
controller.recordOutcome(nodeToDispatch.id, controllerStatus);
|
|
22442
23212
|
}
|
|
22443
23213
|
if (outcome.contextUpdates) for (const [key, value] of Object.entries(outcome.contextUpdates)) context.set(key, value);
|
|
23214
|
+
const nodeCost = context.getNumber("factory.lastNodeCostUsd", 0);
|
|
23215
|
+
if (nodeCost > 0) pipelineManager.addCost(nodeCost);
|
|
23216
|
+
if (config.adapter) {
|
|
23217
|
+
const nodeCompletedAt = Date.now();
|
|
23218
|
+
try {
|
|
23219
|
+
await insertGraphNodeResult(config.adapter, {
|
|
23220
|
+
run_id: config.runId,
|
|
23221
|
+
node_id: nodeToDispatch.id,
|
|
23222
|
+
attempt: (nodeRetries[nodeToDispatch.id] ?? 0) + 1,
|
|
23223
|
+
status: outcome.status,
|
|
23224
|
+
started_at: new Date(startedAt).toISOString(),
|
|
23225
|
+
completed_at: new Date(nodeCompletedAt).toISOString(),
|
|
23226
|
+
duration_ms: nodeCompletedAt - startedAt,
|
|
23227
|
+
cost_usd: nodeCost,
|
|
23228
|
+
...outcome.failureReason !== void 0 ? { failure_reason: outcome.failureReason } : {}
|
|
23229
|
+
});
|
|
23230
|
+
} catch {}
|
|
23231
|
+
}
|
|
22444
23232
|
if (!skipCompletedPush) completedNodes.push(currentNode.id);
|
|
22445
23233
|
skipCompletedPush = false;
|
|
22446
23234
|
await checkpointManager.save(config.logsRoot, {
|
|
@@ -22455,7 +23243,7 @@ function createGraphExecutor() {
|
|
|
22455
23243
|
checkpointPath: checkpointFilePath
|
|
22456
23244
|
});
|
|
22457
23245
|
if (outcome.status === "FAIL") {
|
|
22458
|
-
const retryTarget =
|
|
23246
|
+
const retryTarget = controller.resolveRetryTarget(currentNode, graph);
|
|
22459
23247
|
if (retryTarget) {
|
|
22460
23248
|
const retryNode = graph.nodes.get(retryTarget);
|
|
22461
23249
|
if (!retryNode) throw new Error(`Retry target node "${retryTarget}" not found in graph`);
|
|
@@ -22463,16 +23251,20 @@ function createGraphExecutor() {
|
|
|
22463
23251
|
currentNode = retryNode;
|
|
22464
23252
|
continue;
|
|
22465
23253
|
}
|
|
23254
|
+
await persistExit("failed", outcome.failureReason ?? "FAIL");
|
|
22466
23255
|
return {
|
|
22467
23256
|
status: "FAIL",
|
|
22468
23257
|
...outcome.failureReason !== void 0 && { failureReason: outcome.failureReason }
|
|
22469
23258
|
};
|
|
22470
23259
|
}
|
|
22471
23260
|
const edge = selectEdge(currentNode, outcome, context, graph);
|
|
22472
|
-
if (!edge)
|
|
22473
|
-
|
|
22474
|
-
|
|
22475
|
-
|
|
23261
|
+
if (!edge) {
|
|
23262
|
+
await persistExit("failed", `No outgoing edge from node ${currentNode.id}`);
|
|
23263
|
+
return {
|
|
23264
|
+
status: "FAIL",
|
|
23265
|
+
failureReason: `No outgoing edge from node ${currentNode.id}`
|
|
23266
|
+
};
|
|
23267
|
+
}
|
|
22476
23268
|
config.eventBus?.emit("graph:edge-selected", {
|
|
22477
23269
|
runId: config.runId,
|
|
22478
23270
|
fromNode: currentNode.id,
|
|
@@ -22706,33 +23498,6 @@ function createCodergenHandler(options) {
|
|
|
22706
23498
|
};
|
|
22707
23499
|
}
|
|
22708
23500
|
|
|
22709
|
-
//#endregion
|
|
22710
|
-
//#region packages/factory/dist/scenarios/scorer.js
|
|
22711
|
-
/**
|
|
22712
|
-
* SatisfactionScorer — computes a satisfaction score from ScenarioRunResult.
|
|
22713
|
-
*
|
|
22714
|
-
* Score = passed / total (0.0 when total === 0).
|
|
22715
|
-
* Passes = score >= threshold (default 0.8).
|
|
22716
|
-
*
|
|
22717
|
-
* Story 44-5.
|
|
22718
|
-
*/
|
|
22719
|
-
/**
|
|
22720
|
-
* Compute a satisfaction score from a ScenarioRunResult.
|
|
22721
|
-
*
|
|
22722
|
-
* @param result - The aggregated scenario run result.
|
|
22723
|
-
* @param threshold - Minimum score to consider passing (default 0.8).
|
|
22724
|
-
* @returns A SatisfactionScore with score, passes, and threshold.
|
|
22725
|
-
*/
|
|
22726
|
-
function computeSatisfactionScore(result, threshold = .8) {
|
|
22727
|
-
const { total, passed } = result.summary;
|
|
22728
|
-
const score = total === 0 ? 0 : passed / total;
|
|
22729
|
-
return {
|
|
22730
|
-
score,
|
|
22731
|
-
passes: score >= threshold,
|
|
22732
|
-
threshold
|
|
22733
|
-
};
|
|
22734
|
-
}
|
|
22735
|
-
|
|
22736
23501
|
//#endregion
|
|
22737
23502
|
//#region packages/factory/dist/handlers/tool.js
|
|
22738
23503
|
/**
|
|
@@ -22783,7 +23548,7 @@ function createToolHandler(options) {
|
|
|
22783
23548
|
parsed = JSON.parse(stdoutBuf.trim());
|
|
22784
23549
|
} catch {}
|
|
22785
23550
|
if (isScenarioRunResult(parsed)) {
|
|
22786
|
-
const scored = computeSatisfactionScore(parsed);
|
|
23551
|
+
const scored = computeSatisfactionScore(parsed, options?.satisfactionThreshold);
|
|
22787
23552
|
resolve$2({
|
|
22788
23553
|
status: "SUCCESS",
|
|
22789
23554
|
contextUpdates: { satisfaction_score: scored.score }
|
|
@@ -27738,7 +28503,13 @@ const FactoryConfigSchema = z.object({
|
|
|
27738
28503
|
wall_clock_cap_seconds: z.number().min(0).default(0),
|
|
27739
28504
|
plateau_window: z.number().int().min(2).default(3),
|
|
27740
28505
|
plateau_threshold: z.number().min(0).max(1).default(.05),
|
|
27741
|
-
backend: z.enum(["cli", "direct"]).default("cli")
|
|
28506
|
+
backend: z.enum(["cli", "direct"]).default("cli"),
|
|
28507
|
+
quality_mode: z.enum([
|
|
28508
|
+
"code-review",
|
|
28509
|
+
"dual-signal",
|
|
28510
|
+
"scenario-primary",
|
|
28511
|
+
"scenario-only"
|
|
28512
|
+
]).default("dual-signal")
|
|
27742
28513
|
}).strict();
|
|
27743
28514
|
/**
|
|
27744
28515
|
* Extends SubstrateConfigSchema with an optional `factory:` section.
|
|
@@ -27776,6 +28547,71 @@ async function loadFactoryConfig(projectDir, explicitConfigPath) {
|
|
|
27776
28547
|
});
|
|
27777
28548
|
}
|
|
27778
28549
|
|
|
28550
|
+
//#endregion
|
|
28551
|
+
//#region packages/factory/dist/persistence/factory-schema.js
|
|
28552
|
+
/**
|
|
28553
|
+
* Factory schema DDL for graph execution and scenario validation tables.
|
|
28554
|
+
* Companion to `@substrate-ai/core`'s `initSchema` — call both during factory initialization.
|
|
28555
|
+
*/
|
|
28556
|
+
/**
|
|
28557
|
+
* Initialize all factory-specific persistence tables on the given adapter.
|
|
28558
|
+
* Idempotent — safe to call multiple times.
|
|
28559
|
+
*
|
|
28560
|
+
* Creates:
|
|
28561
|
+
* - graph_runs: top-level graph execution run records
|
|
28562
|
+
* - graph_node_results: per-node execution results within a run
|
|
28563
|
+
* - scenario_results: scenario validation outcomes within a run
|
|
28564
|
+
*/
|
|
28565
|
+
async function factorySchema(adapter) {
|
|
28566
|
+
await adapter.exec(`
|
|
28567
|
+
CREATE TABLE IF NOT EXISTS graph_runs (
|
|
28568
|
+
id VARCHAR(255) PRIMARY KEY,
|
|
28569
|
+
graph_file TEXT NOT NULL,
|
|
28570
|
+
graph_goal TEXT,
|
|
28571
|
+
status VARCHAR(32) NOT NULL DEFAULT 'running',
|
|
28572
|
+
started_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
28573
|
+
completed_at DATETIME,
|
|
28574
|
+
total_cost_usd DOUBLE NOT NULL DEFAULT 0.0,
|
|
28575
|
+
node_count INTEGER NOT NULL DEFAULT 0,
|
|
28576
|
+
final_outcome VARCHAR(32),
|
|
28577
|
+
checkpoint_path TEXT
|
|
28578
|
+
)
|
|
28579
|
+
`);
|
|
28580
|
+
await adapter.exec(`
|
|
28581
|
+
CREATE TABLE IF NOT EXISTS graph_node_results (
|
|
28582
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
28583
|
+
run_id VARCHAR(255) NOT NULL REFERENCES graph_runs(id),
|
|
28584
|
+
node_id VARCHAR(255) NOT NULL,
|
|
28585
|
+
attempt INTEGER NOT NULL DEFAULT 1,
|
|
28586
|
+
status VARCHAR(32) NOT NULL,
|
|
28587
|
+
started_at DATETIME NOT NULL,
|
|
28588
|
+
completed_at DATETIME,
|
|
28589
|
+
duration_ms INTEGER,
|
|
28590
|
+
cost_usd DOUBLE NOT NULL DEFAULT 0.0,
|
|
28591
|
+
failure_reason TEXT,
|
|
28592
|
+
context_snapshot TEXT
|
|
28593
|
+
)
|
|
28594
|
+
`);
|
|
28595
|
+
await adapter.exec("CREATE INDEX IF NOT EXISTS idx_graph_node_results_run ON graph_node_results(run_id)");
|
|
28596
|
+
await adapter.exec(`
|
|
28597
|
+
CREATE TABLE IF NOT EXISTS scenario_results (
|
|
28598
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
28599
|
+
run_id VARCHAR(255) NOT NULL REFERENCES graph_runs(id),
|
|
28600
|
+
node_id VARCHAR(255) NOT NULL,
|
|
28601
|
+
iteration INTEGER NOT NULL DEFAULT 1,
|
|
28602
|
+
total_scenarios INTEGER NOT NULL,
|
|
28603
|
+
passed INTEGER NOT NULL,
|
|
28604
|
+
failed INTEGER NOT NULL,
|
|
28605
|
+
satisfaction_score DOUBLE NOT NULL,
|
|
28606
|
+
threshold DOUBLE NOT NULL DEFAULT 0.8,
|
|
28607
|
+
passes BOOLEAN NOT NULL,
|
|
28608
|
+
details TEXT,
|
|
28609
|
+
executed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
28610
|
+
)
|
|
28611
|
+
`);
|
|
28612
|
+
await adapter.exec("CREATE INDEX IF NOT EXISTS idx_scenario_results_run ON scenario_results(run_id)");
|
|
28613
|
+
}
|
|
28614
|
+
|
|
27779
28615
|
//#endregion
|
|
27780
28616
|
//#region packages/factory/dist/factory-command.js
|
|
27781
28617
|
/**
|
|
@@ -27805,10 +28641,17 @@ async function resolveGraphPath(opts, projectDir) {
|
|
|
27805
28641
|
return null;
|
|
27806
28642
|
}
|
|
27807
28643
|
/**
|
|
28644
|
+
* Total number of validation rules loaded by `createValidator()`.
|
|
28645
|
+
* 8 error rules + 5 warning rules = 13 total (stories 42-4, 42-5).
|
|
28646
|
+
* This is a fixed constant — the GraphValidator interface does not expose a rule count.
|
|
28647
|
+
*/
|
|
28648
|
+
const TOTAL_RULE_COUNT = 13;
|
|
28649
|
+
/**
|
|
27808
28650
|
* Register the `factory` command group on the provided Commander program.
|
|
27809
28651
|
*
|
|
27810
28652
|
* Story 44-8: registers the `scenarios` subcommand.
|
|
27811
28653
|
* Story 44-9: registers the `run` subcommand.
|
|
28654
|
+
* Story 46-7: registers the `validate` subcommand.
|
|
27812
28655
|
*/
|
|
27813
28656
|
function registerFactoryCommand(program) {
|
|
27814
28657
|
const factoryCmd = program.command("factory").description("Factory pipeline and scenario management commands");
|
|
@@ -27851,13 +28694,27 @@ function registerFactoryCommand(program) {
|
|
|
27851
28694
|
const logsRoot = path.join(projectDir, ".substrate", "runs", runId);
|
|
27852
28695
|
const stateManager = new RunStateManager({ runDir: logsRoot });
|
|
27853
28696
|
await stateManager.initRun(dotSource);
|
|
28697
|
+
/** wallClockCapMs: FactoryConfig.wall_clock_cap_seconds × 1000 (story 45-10) */
|
|
28698
|
+
const factoryConfig = await loadFactoryConfig(projectDir, opts.config);
|
|
28699
|
+
const adapter = createDatabaseAdapter$1({
|
|
28700
|
+
backend: "auto",
|
|
28701
|
+
basePath: projectDir
|
|
28702
|
+
});
|
|
28703
|
+
await factorySchema(adapter);
|
|
27854
28704
|
const executor = createGraphExecutor();
|
|
27855
28705
|
await executor.run(graph, {
|
|
27856
28706
|
runId,
|
|
27857
28707
|
logsRoot,
|
|
27858
28708
|
handlerRegistry: createDefaultRegistry(),
|
|
27859
28709
|
eventBus,
|
|
27860
|
-
dotSource
|
|
28710
|
+
dotSource,
|
|
28711
|
+
adapter,
|
|
28712
|
+
wallClockCapMs: (factoryConfig.factory?.wall_clock_cap_seconds ?? 0) * 1e3,
|
|
28713
|
+
pipelineBudgetCapUsd: factoryConfig.factory?.budget_cap_usd ?? 0,
|
|
28714
|
+
plateauWindow: factoryConfig.factory?.plateau_window ?? 3,
|
|
28715
|
+
plateauThreshold: factoryConfig.factory?.plateau_threshold ?? .05,
|
|
28716
|
+
satisfactionThreshold: factoryConfig.factory?.satisfaction_threshold ?? .8,
|
|
28717
|
+
qualityMode: factoryConfig.factory?.quality_mode ?? "dual-signal"
|
|
27861
28718
|
});
|
|
27862
28719
|
} catch (err) {
|
|
27863
28720
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -27865,6 +28722,53 @@ function registerFactoryCommand(program) {
|
|
|
27865
28722
|
process.exit(1);
|
|
27866
28723
|
}
|
|
27867
28724
|
});
|
|
28725
|
+
factoryCmd.command("validate <graph-file>").description("Parse and lint a DOT graph against all 13 validation rules").option("--output-format <format>", "Output format: json | text", "text").action(async (graphFile, opts) => {
|
|
28726
|
+
let source;
|
|
28727
|
+
try {
|
|
28728
|
+
source = await readFile$1(graphFile, "utf-8");
|
|
28729
|
+
} catch (err) {
|
|
28730
|
+
const isEnoent = err instanceof Error && err.code === "ENOENT";
|
|
28731
|
+
if (isEnoent) process.stderr.write(`Error: file not found: ${graphFile}\n`);
|
|
28732
|
+
else {
|
|
28733
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
28734
|
+
process.stderr.write(`Error: file not found: ${graphFile} (${msg})\n`);
|
|
28735
|
+
}
|
|
28736
|
+
process.exit(2);
|
|
28737
|
+
return;
|
|
28738
|
+
}
|
|
28739
|
+
let graph;
|
|
28740
|
+
try {
|
|
28741
|
+
graph = parseGraph(source);
|
|
28742
|
+
} catch (err) {
|
|
28743
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
28744
|
+
process.stderr.write(`Error: failed to parse graph: ${msg}\n`);
|
|
28745
|
+
process.exit(2);
|
|
28746
|
+
return;
|
|
28747
|
+
}
|
|
28748
|
+
const diagnostics = createValidator().validate(graph);
|
|
28749
|
+
const errors = diagnostics.filter((d) => d.severity === "error");
|
|
28750
|
+
const warnings = diagnostics.filter((d) => d.severity === "warning");
|
|
28751
|
+
const firedRuleIds = new Set(diagnostics.map((d) => d.ruleId));
|
|
28752
|
+
const passedCount = TOTAL_RULE_COUNT - firedRuleIds.size;
|
|
28753
|
+
if (opts.outputFormat === "json") {
|
|
28754
|
+
process.stdout.write(JSON.stringify(diagnostics, null, 2) + "\n");
|
|
28755
|
+
if (errors.length > 0) process.exit(1);
|
|
28756
|
+
return;
|
|
28757
|
+
}
|
|
28758
|
+
if (diagnostics.length > 0) {
|
|
28759
|
+
for (const d of diagnostics) {
|
|
28760
|
+
const nodeStr = d.nodeId ? ` [node: ${d.nodeId}]` : "";
|
|
28761
|
+
const edgeStr = d.edgeIndex !== void 0 ? ` [edge: ${d.edgeIndex}]` : "";
|
|
28762
|
+
process.stdout.write(` ${d.severity.padEnd(7)} ${d.ruleId.padEnd(24)} ${d.message}${nodeStr}${edgeStr}\n`);
|
|
28763
|
+
}
|
|
28764
|
+
process.stdout.write("\n");
|
|
28765
|
+
}
|
|
28766
|
+
const errLabel = errors.length !== 1 ? "errors" : "error";
|
|
28767
|
+
const warnLabel = warnings.length !== 1 ? "warnings" : "warning";
|
|
28768
|
+
if (diagnostics.length === 0) process.stdout.write(`✓ ${TOTAL_RULE_COUNT}/${TOTAL_RULE_COUNT} rules passed, 0 errors, 0 warnings\n`);
|
|
28769
|
+
else process.stdout.write(`✗ ${passedCount}/${TOTAL_RULE_COUNT} rules passed, ${errors.length} ${errLabel}, ${warnings.length} ${warnLabel}\n`);
|
|
28770
|
+
if (errors.length > 0) process.exit(1);
|
|
28771
|
+
});
|
|
27868
28772
|
}
|
|
27869
28773
|
|
|
27870
28774
|
//#endregion
|
|
@@ -29339,5 +30243,5 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
|
|
|
29339
30243
|
}
|
|
29340
30244
|
|
|
29341
30245
|
//#endregion
|
|
29342
|
-
export { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, normalizeGraphSummaryToStatus, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
|
|
29343
|
-
//# sourceMappingURL=run-
|
|
30246
|
+
export { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, listGraphRuns, normalizeGraphSummaryToStatus, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
|
|
30247
|
+
//# sourceMappingURL=run-bhGoAbu9.js.map
|