substrate-ai 0.5.10 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-
|
|
2
|
+
import { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-B1WEe6SY.js";
|
|
3
3
|
import { createLogger } from "../logger-D2fS2ccL.js";
|
|
4
4
|
import { AdapterRegistry } from "../adapter-registry-D2zdMwVu.js";
|
|
5
5
|
import { CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, PartialSubstrateConfigSchema } from "../config-migrator-DtZW1maj.js";
|
|
@@ -2969,7 +2969,7 @@ async function runSupervisorAction(options, deps = {}) {
|
|
|
2969
2969
|
await initSchema(expAdapter);
|
|
2970
2970
|
const { runRunAction: runPipeline } = await import(
|
|
2971
2971
|
/* @vite-ignore */
|
|
2972
|
-
"../run-
|
|
2972
|
+
"../run-IU38JGTV.js"
|
|
2973
2973
|
);
|
|
2974
2974
|
const runStoryFn = async (opts) => {
|
|
2975
2975
|
const exitCode = await runPipeline({
|
|
@@ -3430,7 +3430,8 @@ async function runMetricsAction(options) {
|
|
|
3430
3430
|
compositeScore: scoreB.compositeScore - scoreA.compositeScore,
|
|
3431
3431
|
cacheHitSubScore: scoreB.cacheHitSubScore - scoreA.cacheHitSubScore,
|
|
3432
3432
|
ioRatioSubScore: scoreB.ioRatioSubScore - scoreA.ioRatioSubScore,
|
|
3433
|
-
contextManagementSubScore: scoreB.contextManagementSubScore - scoreA.contextManagementSubScore
|
|
3433
|
+
contextManagementSubScore: scoreB.contextManagementSubScore - scoreA.contextManagementSubScore,
|
|
3434
|
+
tokenDensitySubScore: (scoreB.tokenDensitySubScore ?? 0) - (scoreA.tokenDensitySubScore ?? 0)
|
|
3434
3435
|
};
|
|
3435
3436
|
if (outputFormat === "json") process.stdout.write(formatOutput({
|
|
3436
3437
|
storyA: scoreA,
|
|
@@ -3447,6 +3448,7 @@ async function runMetricsAction(options) {
|
|
|
3447
3448
|
process.stdout.write(` ${"Cache Hit Sub-Score".padEnd(30)} ${scoreA.cacheHitSubScore.toFixed(1).padStart(12)} ${scoreB.cacheHitSubScore.toFixed(1).padStart(12)} ${`${sign(delta.cacheHitSubScore)}${delta.cacheHitSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3448
3449
|
process.stdout.write(` ${"I/O Ratio Sub-Score".padEnd(30)} ${scoreA.ioRatioSubScore.toFixed(1).padStart(12)} ${scoreB.ioRatioSubScore.toFixed(1).padStart(12)} ${`${sign(delta.ioRatioSubScore)}${delta.ioRatioSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3449
3450
|
process.stdout.write(` ${"Context Mgmt Sub-Score".padEnd(30)} ${scoreA.contextManagementSubScore.toFixed(1).padStart(12)} ${scoreB.contextManagementSubScore.toFixed(1).padStart(12)} ${`${sign(delta.contextManagementSubScore)}${delta.contextManagementSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3451
|
+
process.stdout.write(` ${"Token Density Sub-Score".padEnd(30)} ${(scoreA.tokenDensitySubScore ?? 0).toFixed(1).padStart(12)} ${(scoreB.tokenDensitySubScore ?? 0).toFixed(1).padStart(12)} ${`${sign(delta.tokenDensitySubScore)}${delta.tokenDensitySubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3450
3452
|
}
|
|
3451
3453
|
return 0;
|
|
3452
3454
|
}
|
|
@@ -12194,10 +12194,12 @@ const EfficiencyScoreSchema = z.object({
|
|
|
12194
12194
|
cacheHitSubScore: z.number().min(0).max(100),
|
|
12195
12195
|
ioRatioSubScore: z.number().min(0).max(100),
|
|
12196
12196
|
contextManagementSubScore: z.number().min(0).max(100),
|
|
12197
|
+
tokenDensitySubScore: z.number().min(0).max(100).default(0),
|
|
12197
12198
|
avgCacheHitRate: z.number(),
|
|
12198
12199
|
avgIoRatio: z.number(),
|
|
12199
12200
|
contextSpikeCount: z.number().int().nonnegative(),
|
|
12200
12201
|
totalTurns: z.number().int().nonnegative(),
|
|
12202
|
+
coldStartTurnsExcluded: z.number().int().nonnegative().default(0),
|
|
12201
12203
|
perModelBreakdown: z.array(ModelEfficiencySchema),
|
|
12202
12204
|
perSourceBreakdown: z.array(SourceEfficiencySchema),
|
|
12203
12205
|
dispatchId: z.string().optional(),
|
|
@@ -12297,6 +12299,8 @@ var AdapterTelemetryPersistence = class {
|
|
|
12297
12299
|
total_turns INTEGER NOT NULL DEFAULT 0,
|
|
12298
12300
|
per_model_json TEXT NOT NULL DEFAULT '[]',
|
|
12299
12301
|
per_source_json TEXT NOT NULL DEFAULT '[]',
|
|
12302
|
+
token_density_sub_score DOUBLE NOT NULL DEFAULT 0,
|
|
12303
|
+
cold_start_turns_excluded INTEGER NOT NULL DEFAULT 0,
|
|
12300
12304
|
dispatch_id TEXT,
|
|
12301
12305
|
task_type TEXT,
|
|
12302
12306
|
phase TEXT,
|
|
@@ -12307,6 +12311,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12307
12311
|
CREATE INDEX IF NOT EXISTS idx_efficiency_story
|
|
12308
12312
|
ON efficiency_scores (story_key, timestamp DESC)
|
|
12309
12313
|
`);
|
|
12314
|
+
try {
|
|
12315
|
+
await this._adapter.exec(`ALTER TABLE efficiency_scores ADD COLUMN token_density_sub_score DOUBLE NOT NULL DEFAULT 0`);
|
|
12316
|
+
} catch {}
|
|
12317
|
+
try {
|
|
12318
|
+
await this._adapter.exec(`ALTER TABLE efficiency_scores ADD COLUMN cold_start_turns_excluded INTEGER NOT NULL DEFAULT 0`);
|
|
12319
|
+
} catch {}
|
|
12310
12320
|
await this._adapter.exec(`
|
|
12311
12321
|
CREATE TABLE IF NOT EXISTS recommendations (
|
|
12312
12322
|
id VARCHAR(16) NOT NULL,
|
|
@@ -12443,13 +12453,17 @@ var AdapterTelemetryPersistence = class {
|
|
|
12443
12453
|
await this._adapter.query(`INSERT INTO efficiency_scores (
|
|
12444
12454
|
story_key, timestamp, composite_score,
|
|
12445
12455
|
cache_hit_sub_score, io_ratio_sub_score, context_management_sub_score,
|
|
12456
|
+
token_density_sub_score,
|
|
12446
12457
|
avg_cache_hit_rate, avg_io_ratio, context_spike_count, total_turns,
|
|
12458
|
+
cold_start_turns_excluded,
|
|
12447
12459
|
per_model_json, per_source_json,
|
|
12448
12460
|
dispatch_id, task_type, phase
|
|
12449
12461
|
) VALUES (
|
|
12450
12462
|
?, ?, ?,
|
|
12451
12463
|
?, ?, ?,
|
|
12464
|
+
?,
|
|
12452
12465
|
?, ?, ?, ?,
|
|
12466
|
+
?,
|
|
12453
12467
|
?, ?,
|
|
12454
12468
|
?, ?, ?
|
|
12455
12469
|
)`, [
|
|
@@ -12459,10 +12473,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12459
12473
|
score.cacheHitSubScore,
|
|
12460
12474
|
score.ioRatioSubScore,
|
|
12461
12475
|
score.contextManagementSubScore,
|
|
12476
|
+
score.tokenDensitySubScore ?? 0,
|
|
12462
12477
|
score.avgCacheHitRate,
|
|
12463
12478
|
score.avgIoRatio,
|
|
12464
12479
|
score.contextSpikeCount,
|
|
12465
12480
|
score.totalTurns,
|
|
12481
|
+
score.coldStartTurnsExcluded ?? 0,
|
|
12466
12482
|
JSON.stringify(score.perModelBreakdown),
|
|
12467
12483
|
JSON.stringify(score.perSourceBreakdown),
|
|
12468
12484
|
score.dispatchId ?? null,
|
|
@@ -12482,10 +12498,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12482
12498
|
cacheHitSubScore: row.cache_hit_sub_score,
|
|
12483
12499
|
ioRatioSubScore: row.io_ratio_sub_score,
|
|
12484
12500
|
contextManagementSubScore: row.context_management_sub_score,
|
|
12501
|
+
tokenDensitySubScore: row.token_density_sub_score ?? 0,
|
|
12485
12502
|
avgCacheHitRate: row.avg_cache_hit_rate,
|
|
12486
12503
|
avgIoRatio: row.avg_io_ratio,
|
|
12487
12504
|
contextSpikeCount: row.context_spike_count,
|
|
12488
12505
|
totalTurns: row.total_turns,
|
|
12506
|
+
coldStartTurnsExcluded: row.cold_start_turns_excluded ?? 0,
|
|
12489
12507
|
perModelBreakdown: JSON.parse(row.per_model_json),
|
|
12490
12508
|
perSourceBreakdown: JSON.parse(row.per_source_json),
|
|
12491
12509
|
...row.dispatch_id != null && { dispatchId: row.dispatch_id },
|
|
@@ -13119,8 +13137,53 @@ var IngestionServer = class {
|
|
|
13119
13137
|
}
|
|
13120
13138
|
};
|
|
13121
13139
|
|
|
13140
|
+
//#endregion
|
|
13141
|
+
//#region src/modules/telemetry/task-baselines.ts
|
|
13142
|
+
const TASK_BASELINES = {
|
|
13143
|
+
"dev-story": {
|
|
13144
|
+
expectedOutputPerTurn: 550,
|
|
13145
|
+
targetIoRatio: 100
|
|
13146
|
+
},
|
|
13147
|
+
"create-story": {
|
|
13148
|
+
expectedOutputPerTurn: 1500,
|
|
13149
|
+
targetIoRatio: 100
|
|
13150
|
+
},
|
|
13151
|
+
"code-review": {
|
|
13152
|
+
expectedOutputPerTurn: 3900,
|
|
13153
|
+
targetIoRatio: 50
|
|
13154
|
+
},
|
|
13155
|
+
"minor-fixes": {
|
|
13156
|
+
expectedOutputPerTurn: 700,
|
|
13157
|
+
targetIoRatio: 100
|
|
13158
|
+
},
|
|
13159
|
+
"test-plan": {
|
|
13160
|
+
expectedOutputPerTurn: 1600,
|
|
13161
|
+
targetIoRatio: 30
|
|
13162
|
+
},
|
|
13163
|
+
"test-expansion": {
|
|
13164
|
+
expectedOutputPerTurn: 1950,
|
|
13165
|
+
targetIoRatio: 15
|
|
13166
|
+
}
|
|
13167
|
+
};
|
|
13168
|
+
const DEFAULT_BASELINE = {
|
|
13169
|
+
expectedOutputPerTurn: 800,
|
|
13170
|
+
targetIoRatio: 100
|
|
13171
|
+
};
|
|
13172
|
+
/**
|
|
13173
|
+
* Get the baseline for a task type, falling back to DEFAULT_BASELINE
|
|
13174
|
+
* when taskType is undefined, empty, or unknown.
|
|
13175
|
+
*/
|
|
13176
|
+
function getBaseline(taskType) {
|
|
13177
|
+
if (taskType === void 0 || taskType === "") return DEFAULT_BASELINE;
|
|
13178
|
+
return TASK_BASELINES[taskType] ?? DEFAULT_BASELINE;
|
|
13179
|
+
}
|
|
13180
|
+
|
|
13122
13181
|
//#endregion
|
|
13123
13182
|
//#region src/modules/telemetry/efficiency-scorer.ts
|
|
13183
|
+
const W_CACHE = .25;
|
|
13184
|
+
const W_IO_RATIO = .25;
|
|
13185
|
+
const W_CONTEXT = .25;
|
|
13186
|
+
const W_TOKEN_DENSITY = .25;
|
|
13124
13187
|
var EfficiencyScorer = class {
|
|
13125
13188
|
_logger;
|
|
13126
13189
|
constructor(logger$27) {
|
|
@@ -13142,27 +13205,36 @@ var EfficiencyScorer = class {
|
|
|
13142
13205
|
cacheHitSubScore: 0,
|
|
13143
13206
|
ioRatioSubScore: 0,
|
|
13144
13207
|
contextManagementSubScore: 0,
|
|
13208
|
+
tokenDensitySubScore: 0,
|
|
13145
13209
|
avgCacheHitRate: 0,
|
|
13146
13210
|
avgIoRatio: 0,
|
|
13147
13211
|
contextSpikeCount: 0,
|
|
13148
13212
|
totalTurns: 0,
|
|
13213
|
+
coldStartTurnsExcluded: 0,
|
|
13149
13214
|
perModelBreakdown: [],
|
|
13150
13215
|
perSourceBreakdown: []
|
|
13151
13216
|
};
|
|
13152
|
-
const
|
|
13153
|
-
const
|
|
13217
|
+
const taskType = this._inferTaskType(turns);
|
|
13218
|
+
const baseline = getBaseline(taskType);
|
|
13219
|
+
const coldStartIds = this._identifyColdStartTurns(turns);
|
|
13220
|
+
let scoringTurns = turns.filter((t) => !coldStartIds.has(t.spanId));
|
|
13221
|
+
if (scoringTurns.length === 0) scoringTurns = turns;
|
|
13222
|
+
const avgCacheHitRate = this._computeAvgCacheHitRate(scoringTurns);
|
|
13223
|
+
const avgIoRatio = this._computeAvgIoRatio(scoringTurns);
|
|
13154
13224
|
const contextSpikeCount = turns.filter((t) => t.isContextSpike).length;
|
|
13155
13225
|
const totalTurns = turns.length;
|
|
13156
|
-
const cacheHitSubScore = this._computeCacheHitSubScore(
|
|
13157
|
-
const ioRatioSubScore = this._computeIoRatioSubScore(
|
|
13158
|
-
const contextManagementSubScore = this._computeContextManagementSubScore(
|
|
13159
|
-
const
|
|
13160
|
-
const
|
|
13161
|
-
const
|
|
13226
|
+
const cacheHitSubScore = this._computeCacheHitSubScore(scoringTurns);
|
|
13227
|
+
const ioRatioSubScore = this._computeIoRatioSubScore(scoringTurns, baseline.targetIoRatio);
|
|
13228
|
+
const contextManagementSubScore = this._computeContextManagementSubScore(scoringTurns);
|
|
13229
|
+
const tokenDensitySubScore = this._computeTokenDensitySubScore(scoringTurns, baseline.expectedOutputPerTurn);
|
|
13230
|
+
const compositeScore = Math.round(cacheHitSubScore * W_CACHE + ioRatioSubScore * W_IO_RATIO + contextManagementSubScore * W_CONTEXT + tokenDensitySubScore * W_TOKEN_DENSITY);
|
|
13231
|
+
const perModelBreakdown = this._buildPerModelBreakdown(scoringTurns);
|
|
13232
|
+
const perSourceBreakdown = this._buildPerSourceBreakdown(scoringTurns, baseline.targetIoRatio, baseline.expectedOutputPerTurn);
|
|
13162
13233
|
this._logger.info({
|
|
13163
13234
|
storyKey,
|
|
13164
13235
|
compositeScore,
|
|
13165
|
-
contextSpikeCount
|
|
13236
|
+
contextSpikeCount,
|
|
13237
|
+
coldStartTurnsExcluded: coldStartIds.size
|
|
13166
13238
|
}, "Computed efficiency score");
|
|
13167
13239
|
return {
|
|
13168
13240
|
storyKey,
|
|
@@ -13171,15 +13243,41 @@ var EfficiencyScorer = class {
|
|
|
13171
13243
|
cacheHitSubScore,
|
|
13172
13244
|
ioRatioSubScore,
|
|
13173
13245
|
contextManagementSubScore,
|
|
13246
|
+
tokenDensitySubScore,
|
|
13174
13247
|
avgCacheHitRate,
|
|
13175
13248
|
avgIoRatio,
|
|
13176
13249
|
contextSpikeCount,
|
|
13177
13250
|
totalTurns,
|
|
13251
|
+
coldStartTurnsExcluded: coldStartIds.size,
|
|
13178
13252
|
perModelBreakdown,
|
|
13179
13253
|
perSourceBreakdown
|
|
13180
13254
|
};
|
|
13181
13255
|
}
|
|
13182
13256
|
/**
|
|
13257
|
+
* Identify cold-start turns: the first turn per dispatchId.
|
|
13258
|
+
* Returns a set of spanIds that should be excluded from scoring.
|
|
13259
|
+
* Only considers turns with a non-empty dispatchId.
|
|
13260
|
+
*/
|
|
13261
|
+
_identifyColdStartTurns(turns) {
|
|
13262
|
+
const coldStarts = new Set();
|
|
13263
|
+
const seenDispatches = new Set();
|
|
13264
|
+
for (const turn of turns) if (turn.dispatchId !== void 0 && turn.dispatchId !== "" && !seenDispatches.has(turn.dispatchId)) {
|
|
13265
|
+
seenDispatches.add(turn.dispatchId);
|
|
13266
|
+
coldStarts.add(turn.spanId);
|
|
13267
|
+
}
|
|
13268
|
+
return coldStarts;
|
|
13269
|
+
}
|
|
13270
|
+
/**
|
|
13271
|
+
* Infer the task type from turns. Returns the task type only when all turns
|
|
13272
|
+
* with a taskType agree (unanimous). For mixed task types (story-level
|
|
13273
|
+
* scoring across dispatches), returns undefined → default baseline.
|
|
13274
|
+
*/
|
|
13275
|
+
_inferTaskType(turns) {
|
|
13276
|
+
const types$1 = new Set();
|
|
13277
|
+
for (const turn of turns) if (turn.taskType !== void 0 && turn.taskType !== "") types$1.add(turn.taskType);
|
|
13278
|
+
return types$1.size === 1 ? [...types$1][0] : void 0;
|
|
13279
|
+
}
|
|
13280
|
+
/**
|
|
13183
13281
|
* Average cache hit rate across all turns, clamped to [0, 100].
|
|
13184
13282
|
* Formula: clamp(avgCacheHitRate × 100, 0, 100)
|
|
13185
13283
|
*/
|
|
@@ -13188,23 +13286,26 @@ var EfficiencyScorer = class {
|
|
|
13188
13286
|
return this._clamp(avg * 100, 0, 100);
|
|
13189
13287
|
}
|
|
13190
13288
|
/**
|
|
13191
|
-
* I/O ratio sub-score:
|
|
13289
|
+
* I/O ratio sub-score: logarithmic output/freshInput productivity curve (Story 35-1).
|
|
13192
13290
|
*
|
|
13193
|
-
*
|
|
13194
|
-
*
|
|
13195
|
-
*
|
|
13196
|
-
* - outputTokens / max(freshInputTokens, 1)
|
|
13197
|
-
* -
|
|
13198
|
-
*
|
|
13199
|
-
*
|
|
13291
|
+
* Replaces the old binary threshold (>=1 → 100) with a logarithmic curve
|
|
13292
|
+
* that provides gradient across the observed range:
|
|
13293
|
+
* - score = clamp(log10(ratio) / log10(targetRatio) * 100, 0, 100)
|
|
13294
|
+
* - ratio = avg(outputTokens / max(freshInputTokens, 1)) across turns
|
|
13295
|
+
* - targetRatio is calibrated per task type (Story 35-2)
|
|
13296
|
+
*
|
|
13297
|
+
* Examples (TARGET=100): ratio 1→0, 10→50, 50→85, 100→100, 200→100(clamped)
|
|
13200
13298
|
*/
|
|
13201
|
-
_computeIoRatioSubScore(turns) {
|
|
13299
|
+
_computeIoRatioSubScore(turns, targetRatio) {
|
|
13202
13300
|
if (turns.length === 0) return 0;
|
|
13203
13301
|
const avg = turns.reduce((acc, t) => {
|
|
13204
13302
|
const freshInput = Math.max(t.inputTokens, 1);
|
|
13205
13303
|
return acc + t.outputTokens / freshInput;
|
|
13206
13304
|
}, 0) / turns.length;
|
|
13207
|
-
|
|
13305
|
+
if (avg <= 0) return 0;
|
|
13306
|
+
const logTarget = Math.log10(Math.max(targetRatio, 2));
|
|
13307
|
+
const score = Math.log10(avg) / logTarget * 100;
|
|
13308
|
+
return this._clamp(score, 0, 100);
|
|
13208
13309
|
}
|
|
13209
13310
|
/**
|
|
13210
13311
|
* Context management sub-score: penalizes context spike frequency.
|
|
@@ -13217,6 +13318,22 @@ var EfficiencyScorer = class {
|
|
|
13217
13318
|
const spikeRatio = spikeCount / totalTurns;
|
|
13218
13319
|
return this._clamp(100 - spikeRatio * 100, 0, 100);
|
|
13219
13320
|
}
|
|
13321
|
+
/**
|
|
13322
|
+
* Token density sub-score: output tokens per turn vs task-type baseline (Story 35-4).
|
|
13323
|
+
*
|
|
13324
|
+
* Measures whether the agent is producing useful output or spinning:
|
|
13325
|
+
* - score = clamp(avgOutputPerTurn / expectedOutputPerTurn * 100, 0, 100)
|
|
13326
|
+
* - expectedOutputPerTurn is calibrated per task type (Story 35-2)
|
|
13327
|
+
*
|
|
13328
|
+
* Below-baseline dispatches get proportionally lower scores.
|
|
13329
|
+
* At-or-above-baseline dispatches score 100.
|
|
13330
|
+
*/
|
|
13331
|
+
_computeTokenDensitySubScore(turns, expectedOutputPerTurn) {
|
|
13332
|
+
if (turns.length === 0) return 0;
|
|
13333
|
+
const avgOutput = turns.reduce((acc, t) => acc + t.outputTokens, 0) / turns.length;
|
|
13334
|
+
const ratio = avgOutput / Math.max(expectedOutputPerTurn, 1);
|
|
13335
|
+
return this._clamp(ratio * 100, 0, 100);
|
|
13336
|
+
}
|
|
13220
13337
|
_computeAvgCacheHitRate(turns) {
|
|
13221
13338
|
if (turns.length === 0) return 0;
|
|
13222
13339
|
const sum = turns.reduce((acc, t) => acc + t.cacheHitRate, 0);
|
|
@@ -13269,7 +13386,7 @@ var EfficiencyScorer = class {
|
|
|
13269
13386
|
* Group turns by source, computing a per-group composite score using the
|
|
13270
13387
|
* same formula as the overall score. Sources with zero turns are excluded.
|
|
13271
13388
|
*/
|
|
13272
|
-
_buildPerSourceBreakdown(turns) {
|
|
13389
|
+
_buildPerSourceBreakdown(turns, targetIoRatio, expectedOutputPerTurn) {
|
|
13273
13390
|
const groups = new Map();
|
|
13274
13391
|
for (const turn of turns) {
|
|
13275
13392
|
const key = turn.source;
|
|
@@ -13280,10 +13397,11 @@ var EfficiencyScorer = class {
|
|
|
13280
13397
|
const result = [];
|
|
13281
13398
|
for (const [source, groupTurns] of groups) {
|
|
13282
13399
|
if (groupTurns.length === 0) continue;
|
|
13283
|
-
const cacheHitSub = this.
|
|
13284
|
-
const ioRatioSub = this.
|
|
13285
|
-
const contextSub = this.
|
|
13286
|
-
const
|
|
13400
|
+
const cacheHitSub = this._computeCacheHitSubScore(groupTurns);
|
|
13401
|
+
const ioRatioSub = this._computeIoRatioSubScore(groupTurns, targetIoRatio);
|
|
13402
|
+
const contextSub = this._computeContextManagementSubScore(groupTurns);
|
|
13403
|
+
const tokenDensitySub = this._computeTokenDensitySubScore(groupTurns, expectedOutputPerTurn);
|
|
13404
|
+
const compositeScore = Math.round(cacheHitSub * W_CACHE + ioRatioSub * W_IO_RATIO + contextSub * W_CONTEXT + tokenDensitySub * W_TOKEN_DENSITY);
|
|
13287
13405
|
result.push({
|
|
13288
13406
|
source,
|
|
13289
13407
|
compositeScore,
|
|
@@ -13292,25 +13410,6 @@ var EfficiencyScorer = class {
|
|
|
13292
13410
|
}
|
|
13293
13411
|
return result;
|
|
13294
13412
|
}
|
|
13295
|
-
_computeCacheHitSubScoreForGroup(turns) {
|
|
13296
|
-
if (turns.length === 0) return 0;
|
|
13297
|
-
const avg = turns.reduce((acc, t) => acc + t.cacheHitRate, 0) / turns.length;
|
|
13298
|
-
return this._clamp(avg * 100, 0, 100);
|
|
13299
|
-
}
|
|
13300
|
-
_computeIoRatioSubScoreForGroup(turns) {
|
|
13301
|
-
if (turns.length === 0) return 0;
|
|
13302
|
-
const avg = turns.reduce((acc, t) => {
|
|
13303
|
-
const freshInput = Math.max(t.inputTokens, 1);
|
|
13304
|
-
return acc + t.outputTokens / freshInput;
|
|
13305
|
-
}, 0) / turns.length;
|
|
13306
|
-
return this._clamp(avg >= 1 ? 100 : avg * 100, 0, 100);
|
|
13307
|
-
}
|
|
13308
|
-
_computeContextManagementSubScoreForGroup(turns) {
|
|
13309
|
-
if (turns.length === 0) return 0;
|
|
13310
|
-
const spikeCount = turns.filter((t) => t.isContextSpike).length;
|
|
13311
|
-
const spikeRatio = spikeCount / turns.length;
|
|
13312
|
-
return this._clamp(100 - spikeRatio * 100, 0, 100);
|
|
13313
|
-
}
|
|
13314
13413
|
_clamp(value, min, max) {
|
|
13315
13414
|
return Math.max(min, Math.min(max, value));
|
|
13316
13415
|
}
|
|
@@ -13963,13 +14062,20 @@ var Recommender = class Recommender {
|
|
|
13963
14062
|
});
|
|
13964
14063
|
}
|
|
13965
14064
|
/**
|
|
14065
|
+
* Categories whose growth is inherent to normal agentic work and not actionable.
|
|
14066
|
+
* tool_outputs grows as the agent reads files, runs commands, etc. — expected behavior.
|
|
14067
|
+
* conversation_history grows naturally as conversations progress.
|
|
14068
|
+
*/
|
|
14069
|
+
static EXPECTED_GROWTH_CATEGORIES = new Set(["tool_outputs", "conversation_history"]);
|
|
14070
|
+
/**
|
|
13966
14071
|
* Flag semantic categories with trend === 'growing'.
|
|
13967
14072
|
* Severity is 'info' by default; 'warning' if percentage > 25%.
|
|
14073
|
+
* Skips categories whose growth is inherent to normal agentic work.
|
|
13968
14074
|
*/
|
|
13969
14075
|
_runGrowingCategories(ctx) {
|
|
13970
14076
|
const { categories, storyKey, sprintId, generatedAt } = ctx;
|
|
13971
14077
|
if (categories.length === 0) return [];
|
|
13972
|
-
const growing = categories.filter((c) => c.trend === "growing");
|
|
14078
|
+
const growing = categories.filter((c) => c.trend === "growing" && !Recommender.EXPECTED_GROWTH_CATEGORIES.has(c.category));
|
|
13973
14079
|
return growing.map((cat, index) => {
|
|
13974
14080
|
const severity = cat.percentage > 25 ? "warning" : "info";
|
|
13975
14081
|
const actionTarget = cat.category;
|
|
@@ -14029,8 +14135,15 @@ var Recommender = class Recommender {
|
|
|
14029
14135
|
}];
|
|
14030
14136
|
}
|
|
14031
14137
|
/**
|
|
14138
|
+
* Models known to be intentionally routed for lightweight tasks.
|
|
14139
|
+
* These should not be flagged as "underperforming" — the orchestrator chose them
|
|
14140
|
+
* deliberately for cost/speed reasons, not as a performance optimization target.
|
|
14141
|
+
*/
|
|
14142
|
+
static INTENTIONAL_LIGHTWEIGHT_MODELS = new Set(["claude-haiku-4-5-20251001", "claude-haiku-4-5"]);
|
|
14143
|
+
/**
|
|
14032
14144
|
* If more than one model is present, flag the underperforming model.
|
|
14033
14145
|
* Severity is 'info' by default; 'warning' if cache efficiency gap > 20pp.
|
|
14146
|
+
* Skips flagging models that are intentionally routed for lightweight tasks.
|
|
14034
14147
|
*/
|
|
14035
14148
|
_runModelComparison(ctx) {
|
|
14036
14149
|
const { efficiencyScore, storyKey, sprintId, generatedAt } = ctx;
|
|
@@ -14039,6 +14152,7 @@ var Recommender = class Recommender {
|
|
|
14039
14152
|
const sorted = [...models].sort((a, b) => b.cacheHitRate - a.cacheHitRate);
|
|
14040
14153
|
const best = sorted[0];
|
|
14041
14154
|
const worst = sorted[sorted.length - 1];
|
|
14155
|
+
if (Recommender.INTENTIONAL_LIGHTWEIGHT_MODELS.has(worst.model)) return [];
|
|
14042
14156
|
if (best.model === worst.model) return [];
|
|
14043
14157
|
const gapPP = (best.cacheHitRate - worst.cacheHitRate) * 100;
|
|
14044
14158
|
const severity = gapPP > 20 ? "warning" : "info";
|
|
@@ -15150,8 +15264,10 @@ var TelemetryAdvisor = class {
|
|
|
15150
15264
|
cacheHitSubScore: score.cacheHitSubScore,
|
|
15151
15265
|
ioRatioSubScore: score.ioRatioSubScore,
|
|
15152
15266
|
contextManagementSubScore: score.contextManagementSubScore,
|
|
15267
|
+
tokenDensitySubScore: score.tokenDensitySubScore ?? 0,
|
|
15153
15268
|
totalTurns: score.totalTurns,
|
|
15154
|
-
contextSpikeCount: score.contextSpikeCount
|
|
15269
|
+
contextSpikeCount: score.contextSpikeCount,
|
|
15270
|
+
coldStartTurnsExcluded: score.coldStartTurnsExcluded ?? 0
|
|
15155
15271
|
};
|
|
15156
15272
|
} catch (err) {
|
|
15157
15273
|
logger$6.warn({
|
|
@@ -16626,7 +16742,7 @@ function createImplementationOrchestrator(deps) {
|
|
|
16626
16742
|
const fixTemplate = await pack.getPrompt("fix-story");
|
|
16627
16743
|
const storyContent = await readFile$1(storyFilePath ?? "", "utf-8");
|
|
16628
16744
|
const complexity = computeStoryComplexity(storyContent);
|
|
16629
|
-
autoApproveMaxTurns = resolveFixStoryMaxTurns(complexity.complexityScore);
|
|
16745
|
+
autoApproveMaxTurns = Math.max(15, Math.floor(resolveFixStoryMaxTurns(complexity.complexityScore) / 2));
|
|
16630
16746
|
logComplexityResult(storyKey, complexity, autoApproveMaxTurns);
|
|
16631
16747
|
let reviewFeedback;
|
|
16632
16748
|
if (issueList.length === 0) reviewFeedback = `Verdict: ${verdict}\nIssues: Minor issues flagged but no specifics provided. Review the story ACs and fix any remaining gaps.`;
|
|
@@ -16730,7 +16846,8 @@ function createImplementationOrchestrator(deps) {
|
|
|
16730
16846
|
const storyContent = await readFile$1(storyFilePath ?? "", "utf-8");
|
|
16731
16847
|
{
|
|
16732
16848
|
const complexity = computeStoryComplexity(storyContent);
|
|
16733
|
-
|
|
16849
|
+
const fullBudget = resolveFixStoryMaxTurns(complexity.complexityScore);
|
|
16850
|
+
fixMaxTurns = taskType === "minor-fixes" ? Math.max(15, Math.floor(fullBudget / 2)) : fullBudget;
|
|
16734
16851
|
logComplexityResult(storyKey, complexity, fixMaxTurns);
|
|
16735
16852
|
}
|
|
16736
16853
|
let reviewFeedback;
|
|
@@ -22675,4 +22792,4 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
|
|
|
22675
22792
|
|
|
22676
22793
|
//#endregion
|
|
22677
22794
|
export { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
|
|
22678
|
-
//# sourceMappingURL=run-
|
|
22795
|
+
//# sourceMappingURL=run-B1WEe6SY.js.map
|