substrate-ai 0.5.11 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-
|
|
2
|
+
import { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-B1WEe6SY.js";
|
|
3
3
|
import { createLogger } from "../logger-D2fS2ccL.js";
|
|
4
4
|
import { AdapterRegistry } from "../adapter-registry-D2zdMwVu.js";
|
|
5
5
|
import { CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, PartialSubstrateConfigSchema } from "../config-migrator-DtZW1maj.js";
|
|
@@ -2969,7 +2969,7 @@ async function runSupervisorAction(options, deps = {}) {
|
|
|
2969
2969
|
await initSchema(expAdapter);
|
|
2970
2970
|
const { runRunAction: runPipeline } = await import(
|
|
2971
2971
|
/* @vite-ignore */
|
|
2972
|
-
"../run-
|
|
2972
|
+
"../run-IU38JGTV.js"
|
|
2973
2973
|
);
|
|
2974
2974
|
const runStoryFn = async (opts) => {
|
|
2975
2975
|
const exitCode = await runPipeline({
|
|
@@ -3430,7 +3430,8 @@ async function runMetricsAction(options) {
|
|
|
3430
3430
|
compositeScore: scoreB.compositeScore - scoreA.compositeScore,
|
|
3431
3431
|
cacheHitSubScore: scoreB.cacheHitSubScore - scoreA.cacheHitSubScore,
|
|
3432
3432
|
ioRatioSubScore: scoreB.ioRatioSubScore - scoreA.ioRatioSubScore,
|
|
3433
|
-
contextManagementSubScore: scoreB.contextManagementSubScore - scoreA.contextManagementSubScore
|
|
3433
|
+
contextManagementSubScore: scoreB.contextManagementSubScore - scoreA.contextManagementSubScore,
|
|
3434
|
+
tokenDensitySubScore: (scoreB.tokenDensitySubScore ?? 0) - (scoreA.tokenDensitySubScore ?? 0)
|
|
3434
3435
|
};
|
|
3435
3436
|
if (outputFormat === "json") process.stdout.write(formatOutput({
|
|
3436
3437
|
storyA: scoreA,
|
|
@@ -3447,6 +3448,7 @@ async function runMetricsAction(options) {
|
|
|
3447
3448
|
process.stdout.write(` ${"Cache Hit Sub-Score".padEnd(30)} ${scoreA.cacheHitSubScore.toFixed(1).padStart(12)} ${scoreB.cacheHitSubScore.toFixed(1).padStart(12)} ${`${sign(delta.cacheHitSubScore)}${delta.cacheHitSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3448
3449
|
process.stdout.write(` ${"I/O Ratio Sub-Score".padEnd(30)} ${scoreA.ioRatioSubScore.toFixed(1).padStart(12)} ${scoreB.ioRatioSubScore.toFixed(1).padStart(12)} ${`${sign(delta.ioRatioSubScore)}${delta.ioRatioSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3449
3450
|
process.stdout.write(` ${"Context Mgmt Sub-Score".padEnd(30)} ${scoreA.contextManagementSubScore.toFixed(1).padStart(12)} ${scoreB.contextManagementSubScore.toFixed(1).padStart(12)} ${`${sign(delta.contextManagementSubScore)}${delta.contextManagementSubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3451
|
+
process.stdout.write(` ${"Token Density Sub-Score".padEnd(30)} ${(scoreA.tokenDensitySubScore ?? 0).toFixed(1).padStart(12)} ${(scoreB.tokenDensitySubScore ?? 0).toFixed(1).padStart(12)} ${`${sign(delta.tokenDensitySubScore)}${delta.tokenDensitySubScore.toFixed(1)}`.padStart(10)}\n`);
|
|
3450
3452
|
}
|
|
3451
3453
|
return 0;
|
|
3452
3454
|
}
|
|
@@ -12194,10 +12194,12 @@ const EfficiencyScoreSchema = z.object({
|
|
|
12194
12194
|
cacheHitSubScore: z.number().min(0).max(100),
|
|
12195
12195
|
ioRatioSubScore: z.number().min(0).max(100),
|
|
12196
12196
|
contextManagementSubScore: z.number().min(0).max(100),
|
|
12197
|
+
tokenDensitySubScore: z.number().min(0).max(100).default(0),
|
|
12197
12198
|
avgCacheHitRate: z.number(),
|
|
12198
12199
|
avgIoRatio: z.number(),
|
|
12199
12200
|
contextSpikeCount: z.number().int().nonnegative(),
|
|
12200
12201
|
totalTurns: z.number().int().nonnegative(),
|
|
12202
|
+
coldStartTurnsExcluded: z.number().int().nonnegative().default(0),
|
|
12201
12203
|
perModelBreakdown: z.array(ModelEfficiencySchema),
|
|
12202
12204
|
perSourceBreakdown: z.array(SourceEfficiencySchema),
|
|
12203
12205
|
dispatchId: z.string().optional(),
|
|
@@ -12297,6 +12299,8 @@ var AdapterTelemetryPersistence = class {
|
|
|
12297
12299
|
total_turns INTEGER NOT NULL DEFAULT 0,
|
|
12298
12300
|
per_model_json TEXT NOT NULL DEFAULT '[]',
|
|
12299
12301
|
per_source_json TEXT NOT NULL DEFAULT '[]',
|
|
12302
|
+
token_density_sub_score DOUBLE NOT NULL DEFAULT 0,
|
|
12303
|
+
cold_start_turns_excluded INTEGER NOT NULL DEFAULT 0,
|
|
12300
12304
|
dispatch_id TEXT,
|
|
12301
12305
|
task_type TEXT,
|
|
12302
12306
|
phase TEXT,
|
|
@@ -12307,6 +12311,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12307
12311
|
CREATE INDEX IF NOT EXISTS idx_efficiency_story
|
|
12308
12312
|
ON efficiency_scores (story_key, timestamp DESC)
|
|
12309
12313
|
`);
|
|
12314
|
+
try {
|
|
12315
|
+
await this._adapter.exec(`ALTER TABLE efficiency_scores ADD COLUMN token_density_sub_score DOUBLE NOT NULL DEFAULT 0`);
|
|
12316
|
+
} catch {}
|
|
12317
|
+
try {
|
|
12318
|
+
await this._adapter.exec(`ALTER TABLE efficiency_scores ADD COLUMN cold_start_turns_excluded INTEGER NOT NULL DEFAULT 0`);
|
|
12319
|
+
} catch {}
|
|
12310
12320
|
await this._adapter.exec(`
|
|
12311
12321
|
CREATE TABLE IF NOT EXISTS recommendations (
|
|
12312
12322
|
id VARCHAR(16) NOT NULL,
|
|
@@ -12443,13 +12453,17 @@ var AdapterTelemetryPersistence = class {
|
|
|
12443
12453
|
await this._adapter.query(`INSERT INTO efficiency_scores (
|
|
12444
12454
|
story_key, timestamp, composite_score,
|
|
12445
12455
|
cache_hit_sub_score, io_ratio_sub_score, context_management_sub_score,
|
|
12456
|
+
token_density_sub_score,
|
|
12446
12457
|
avg_cache_hit_rate, avg_io_ratio, context_spike_count, total_turns,
|
|
12458
|
+
cold_start_turns_excluded,
|
|
12447
12459
|
per_model_json, per_source_json,
|
|
12448
12460
|
dispatch_id, task_type, phase
|
|
12449
12461
|
) VALUES (
|
|
12450
12462
|
?, ?, ?,
|
|
12451
12463
|
?, ?, ?,
|
|
12464
|
+
?,
|
|
12452
12465
|
?, ?, ?, ?,
|
|
12466
|
+
?,
|
|
12453
12467
|
?, ?,
|
|
12454
12468
|
?, ?, ?
|
|
12455
12469
|
)`, [
|
|
@@ -12459,10 +12473,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12459
12473
|
score.cacheHitSubScore,
|
|
12460
12474
|
score.ioRatioSubScore,
|
|
12461
12475
|
score.contextManagementSubScore,
|
|
12476
|
+
score.tokenDensitySubScore ?? 0,
|
|
12462
12477
|
score.avgCacheHitRate,
|
|
12463
12478
|
score.avgIoRatio,
|
|
12464
12479
|
score.contextSpikeCount,
|
|
12465
12480
|
score.totalTurns,
|
|
12481
|
+
score.coldStartTurnsExcluded ?? 0,
|
|
12466
12482
|
JSON.stringify(score.perModelBreakdown),
|
|
12467
12483
|
JSON.stringify(score.perSourceBreakdown),
|
|
12468
12484
|
score.dispatchId ?? null,
|
|
@@ -12482,10 +12498,12 @@ var AdapterTelemetryPersistence = class {
|
|
|
12482
12498
|
cacheHitSubScore: row.cache_hit_sub_score,
|
|
12483
12499
|
ioRatioSubScore: row.io_ratio_sub_score,
|
|
12484
12500
|
contextManagementSubScore: row.context_management_sub_score,
|
|
12501
|
+
tokenDensitySubScore: row.token_density_sub_score ?? 0,
|
|
12485
12502
|
avgCacheHitRate: row.avg_cache_hit_rate,
|
|
12486
12503
|
avgIoRatio: row.avg_io_ratio,
|
|
12487
12504
|
contextSpikeCount: row.context_spike_count,
|
|
12488
12505
|
totalTurns: row.total_turns,
|
|
12506
|
+
coldStartTurnsExcluded: row.cold_start_turns_excluded ?? 0,
|
|
12489
12507
|
perModelBreakdown: JSON.parse(row.per_model_json),
|
|
12490
12508
|
perSourceBreakdown: JSON.parse(row.per_source_json),
|
|
12491
12509
|
...row.dispatch_id != null && { dispatchId: row.dispatch_id },
|
|
@@ -13119,8 +13137,53 @@ var IngestionServer = class {
|
|
|
13119
13137
|
}
|
|
13120
13138
|
};
|
|
13121
13139
|
|
|
13140
|
+
//#endregion
|
|
13141
|
+
//#region src/modules/telemetry/task-baselines.ts
|
|
13142
|
+
const TASK_BASELINES = {
|
|
13143
|
+
"dev-story": {
|
|
13144
|
+
expectedOutputPerTurn: 550,
|
|
13145
|
+
targetIoRatio: 100
|
|
13146
|
+
},
|
|
13147
|
+
"create-story": {
|
|
13148
|
+
expectedOutputPerTurn: 1500,
|
|
13149
|
+
targetIoRatio: 100
|
|
13150
|
+
},
|
|
13151
|
+
"code-review": {
|
|
13152
|
+
expectedOutputPerTurn: 3900,
|
|
13153
|
+
targetIoRatio: 50
|
|
13154
|
+
},
|
|
13155
|
+
"minor-fixes": {
|
|
13156
|
+
expectedOutputPerTurn: 700,
|
|
13157
|
+
targetIoRatio: 100
|
|
13158
|
+
},
|
|
13159
|
+
"test-plan": {
|
|
13160
|
+
expectedOutputPerTurn: 1600,
|
|
13161
|
+
targetIoRatio: 30
|
|
13162
|
+
},
|
|
13163
|
+
"test-expansion": {
|
|
13164
|
+
expectedOutputPerTurn: 1950,
|
|
13165
|
+
targetIoRatio: 15
|
|
13166
|
+
}
|
|
13167
|
+
};
|
|
13168
|
+
const DEFAULT_BASELINE = {
|
|
13169
|
+
expectedOutputPerTurn: 800,
|
|
13170
|
+
targetIoRatio: 100
|
|
13171
|
+
};
|
|
13172
|
+
/**
|
|
13173
|
+
* Get the baseline for a task type, falling back to DEFAULT_BASELINE
|
|
13174
|
+
* when taskType is undefined, empty, or unknown.
|
|
13175
|
+
*/
|
|
13176
|
+
function getBaseline(taskType) {
|
|
13177
|
+
if (taskType === void 0 || taskType === "") return DEFAULT_BASELINE;
|
|
13178
|
+
return TASK_BASELINES[taskType] ?? DEFAULT_BASELINE;
|
|
13179
|
+
}
|
|
13180
|
+
|
|
13122
13181
|
//#endregion
|
|
13123
13182
|
//#region src/modules/telemetry/efficiency-scorer.ts
|
|
13183
|
+
const W_CACHE = .25;
|
|
13184
|
+
const W_IO_RATIO = .25;
|
|
13185
|
+
const W_CONTEXT = .25;
|
|
13186
|
+
const W_TOKEN_DENSITY = .25;
|
|
13124
13187
|
var EfficiencyScorer = class {
|
|
13125
13188
|
_logger;
|
|
13126
13189
|
constructor(logger$27) {
|
|
@@ -13142,27 +13205,36 @@ var EfficiencyScorer = class {
|
|
|
13142
13205
|
cacheHitSubScore: 0,
|
|
13143
13206
|
ioRatioSubScore: 0,
|
|
13144
13207
|
contextManagementSubScore: 0,
|
|
13208
|
+
tokenDensitySubScore: 0,
|
|
13145
13209
|
avgCacheHitRate: 0,
|
|
13146
13210
|
avgIoRatio: 0,
|
|
13147
13211
|
contextSpikeCount: 0,
|
|
13148
13212
|
totalTurns: 0,
|
|
13213
|
+
coldStartTurnsExcluded: 0,
|
|
13149
13214
|
perModelBreakdown: [],
|
|
13150
13215
|
perSourceBreakdown: []
|
|
13151
13216
|
};
|
|
13152
|
-
const
|
|
13153
|
-
const
|
|
13217
|
+
const taskType = this._inferTaskType(turns);
|
|
13218
|
+
const baseline = getBaseline(taskType);
|
|
13219
|
+
const coldStartIds = this._identifyColdStartTurns(turns);
|
|
13220
|
+
let scoringTurns = turns.filter((t) => !coldStartIds.has(t.spanId));
|
|
13221
|
+
if (scoringTurns.length === 0) scoringTurns = turns;
|
|
13222
|
+
const avgCacheHitRate = this._computeAvgCacheHitRate(scoringTurns);
|
|
13223
|
+
const avgIoRatio = this._computeAvgIoRatio(scoringTurns);
|
|
13154
13224
|
const contextSpikeCount = turns.filter((t) => t.isContextSpike).length;
|
|
13155
13225
|
const totalTurns = turns.length;
|
|
13156
|
-
const cacheHitSubScore = this._computeCacheHitSubScore(
|
|
13157
|
-
const ioRatioSubScore = this._computeIoRatioSubScore(
|
|
13158
|
-
const contextManagementSubScore = this._computeContextManagementSubScore(
|
|
13159
|
-
const
|
|
13160
|
-
const
|
|
13161
|
-
const
|
|
13226
|
+
const cacheHitSubScore = this._computeCacheHitSubScore(scoringTurns);
|
|
13227
|
+
const ioRatioSubScore = this._computeIoRatioSubScore(scoringTurns, baseline.targetIoRatio);
|
|
13228
|
+
const contextManagementSubScore = this._computeContextManagementSubScore(scoringTurns);
|
|
13229
|
+
const tokenDensitySubScore = this._computeTokenDensitySubScore(scoringTurns, baseline.expectedOutputPerTurn);
|
|
13230
|
+
const compositeScore = Math.round(cacheHitSubScore * W_CACHE + ioRatioSubScore * W_IO_RATIO + contextManagementSubScore * W_CONTEXT + tokenDensitySubScore * W_TOKEN_DENSITY);
|
|
13231
|
+
const perModelBreakdown = this._buildPerModelBreakdown(scoringTurns);
|
|
13232
|
+
const perSourceBreakdown = this._buildPerSourceBreakdown(scoringTurns, baseline.targetIoRatio, baseline.expectedOutputPerTurn);
|
|
13162
13233
|
this._logger.info({
|
|
13163
13234
|
storyKey,
|
|
13164
13235
|
compositeScore,
|
|
13165
|
-
contextSpikeCount
|
|
13236
|
+
contextSpikeCount,
|
|
13237
|
+
coldStartTurnsExcluded: coldStartIds.size
|
|
13166
13238
|
}, "Computed efficiency score");
|
|
13167
13239
|
return {
|
|
13168
13240
|
storyKey,
|
|
@@ -13171,15 +13243,41 @@ var EfficiencyScorer = class {
|
|
|
13171
13243
|
cacheHitSubScore,
|
|
13172
13244
|
ioRatioSubScore,
|
|
13173
13245
|
contextManagementSubScore,
|
|
13246
|
+
tokenDensitySubScore,
|
|
13174
13247
|
avgCacheHitRate,
|
|
13175
13248
|
avgIoRatio,
|
|
13176
13249
|
contextSpikeCount,
|
|
13177
13250
|
totalTurns,
|
|
13251
|
+
coldStartTurnsExcluded: coldStartIds.size,
|
|
13178
13252
|
perModelBreakdown,
|
|
13179
13253
|
perSourceBreakdown
|
|
13180
13254
|
};
|
|
13181
13255
|
}
|
|
13182
13256
|
/**
|
|
13257
|
+
* Identify cold-start turns: the first turn per dispatchId.
|
|
13258
|
+
* Returns a set of spanIds that should be excluded from scoring.
|
|
13259
|
+
* Only considers turns with a non-empty dispatchId.
|
|
13260
|
+
*/
|
|
13261
|
+
_identifyColdStartTurns(turns) {
|
|
13262
|
+
const coldStarts = new Set();
|
|
13263
|
+
const seenDispatches = new Set();
|
|
13264
|
+
for (const turn of turns) if (turn.dispatchId !== void 0 && turn.dispatchId !== "" && !seenDispatches.has(turn.dispatchId)) {
|
|
13265
|
+
seenDispatches.add(turn.dispatchId);
|
|
13266
|
+
coldStarts.add(turn.spanId);
|
|
13267
|
+
}
|
|
13268
|
+
return coldStarts;
|
|
13269
|
+
}
|
|
13270
|
+
/**
|
|
13271
|
+
* Infer the task type from turns. Returns the task type only when all turns
|
|
13272
|
+
* with a taskType agree (unanimous). For mixed task types (story-level
|
|
13273
|
+
* scoring across dispatches), returns undefined → default baseline.
|
|
13274
|
+
*/
|
|
13275
|
+
_inferTaskType(turns) {
|
|
13276
|
+
const types$1 = new Set();
|
|
13277
|
+
for (const turn of turns) if (turn.taskType !== void 0 && turn.taskType !== "") types$1.add(turn.taskType);
|
|
13278
|
+
return types$1.size === 1 ? [...types$1][0] : void 0;
|
|
13279
|
+
}
|
|
13280
|
+
/**
|
|
13183
13281
|
* Average cache hit rate across all turns, clamped to [0, 100].
|
|
13184
13282
|
* Formula: clamp(avgCacheHitRate × 100, 0, 100)
|
|
13185
13283
|
*/
|
|
@@ -13188,23 +13286,26 @@ var EfficiencyScorer = class {
|
|
|
13188
13286
|
return this._clamp(avg * 100, 0, 100);
|
|
13189
13287
|
}
|
|
13190
13288
|
/**
|
|
13191
|
-
* I/O ratio sub-score:
|
|
13289
|
+
* I/O ratio sub-score: logarithmic output/freshInput productivity curve (Story 35-1).
|
|
13290
|
+
*
|
|
13291
|
+
* Replaces the old binary threshold (>=1 → 100) with a logarithmic curve
|
|
13292
|
+
* that provides gradient across the observed range:
|
|
13293
|
+
* - score = clamp(log10(ratio) / log10(targetRatio) * 100, 0, 100)
|
|
13294
|
+
* - ratio = avg(outputTokens / max(freshInputTokens, 1)) across turns
|
|
13295
|
+
* - targetRatio is calibrated per task type (Story 35-2)
|
|
13192
13296
|
*
|
|
13193
|
-
*
|
|
13194
|
-
* desirable (agent reads large cached context, produces substantial code).
|
|
13195
|
-
* The old formula penalized this. New formula uses output-to-fresh-input ratio:
|
|
13196
|
-
* - outputTokens / max(freshInputTokens, 1) per turn
|
|
13197
|
-
* - Ratio > 1 means productive (more output than fresh input) → score 100
|
|
13198
|
-
* - Ratio < 1 → scaled linearly: ratio * 100
|
|
13199
|
-
* - Averaged across turns
|
|
13297
|
+
* Examples (TARGET=100): ratio 1→0, 10→50, 50→85, 100→100, 200→100(clamped)
|
|
13200
13298
|
*/
|
|
13201
|
-
_computeIoRatioSubScore(turns) {
|
|
13299
|
+
_computeIoRatioSubScore(turns, targetRatio) {
|
|
13202
13300
|
if (turns.length === 0) return 0;
|
|
13203
13301
|
const avg = turns.reduce((acc, t) => {
|
|
13204
13302
|
const freshInput = Math.max(t.inputTokens, 1);
|
|
13205
13303
|
return acc + t.outputTokens / freshInput;
|
|
13206
13304
|
}, 0) / turns.length;
|
|
13207
|
-
|
|
13305
|
+
if (avg <= 0) return 0;
|
|
13306
|
+
const logTarget = Math.log10(Math.max(targetRatio, 2));
|
|
13307
|
+
const score = Math.log10(avg) / logTarget * 100;
|
|
13308
|
+
return this._clamp(score, 0, 100);
|
|
13208
13309
|
}
|
|
13209
13310
|
/**
|
|
13210
13311
|
* Context management sub-score: penalizes context spike frequency.
|
|
@@ -13217,6 +13318,22 @@ var EfficiencyScorer = class {
|
|
|
13217
13318
|
const spikeRatio = spikeCount / totalTurns;
|
|
13218
13319
|
return this._clamp(100 - spikeRatio * 100, 0, 100);
|
|
13219
13320
|
}
|
|
13321
|
+
/**
|
|
13322
|
+
* Token density sub-score: output tokens per turn vs task-type baseline (Story 35-4).
|
|
13323
|
+
*
|
|
13324
|
+
* Measures whether the agent is producing useful output or spinning:
|
|
13325
|
+
* - score = clamp(avgOutputPerTurn / expectedOutputPerTurn * 100, 0, 100)
|
|
13326
|
+
* - expectedOutputPerTurn is calibrated per task type (Story 35-2)
|
|
13327
|
+
*
|
|
13328
|
+
* Below-baseline dispatches get proportionally lower scores.
|
|
13329
|
+
* At-or-above-baseline dispatches score 100.
|
|
13330
|
+
*/
|
|
13331
|
+
_computeTokenDensitySubScore(turns, expectedOutputPerTurn) {
|
|
13332
|
+
if (turns.length === 0) return 0;
|
|
13333
|
+
const avgOutput = turns.reduce((acc, t) => acc + t.outputTokens, 0) / turns.length;
|
|
13334
|
+
const ratio = avgOutput / Math.max(expectedOutputPerTurn, 1);
|
|
13335
|
+
return this._clamp(ratio * 100, 0, 100);
|
|
13336
|
+
}
|
|
13220
13337
|
_computeAvgCacheHitRate(turns) {
|
|
13221
13338
|
if (turns.length === 0) return 0;
|
|
13222
13339
|
const sum = turns.reduce((acc, t) => acc + t.cacheHitRate, 0);
|
|
@@ -13269,7 +13386,7 @@ var EfficiencyScorer = class {
|
|
|
13269
13386
|
* Group turns by source, computing a per-group composite score using the
|
|
13270
13387
|
* same formula as the overall score. Sources with zero turns are excluded.
|
|
13271
13388
|
*/
|
|
13272
|
-
_buildPerSourceBreakdown(turns) {
|
|
13389
|
+
_buildPerSourceBreakdown(turns, targetIoRatio, expectedOutputPerTurn) {
|
|
13273
13390
|
const groups = new Map();
|
|
13274
13391
|
for (const turn of turns) {
|
|
13275
13392
|
const key = turn.source;
|
|
@@ -13280,10 +13397,11 @@ var EfficiencyScorer = class {
|
|
|
13280
13397
|
const result = [];
|
|
13281
13398
|
for (const [source, groupTurns] of groups) {
|
|
13282
13399
|
if (groupTurns.length === 0) continue;
|
|
13283
|
-
const cacheHitSub = this.
|
|
13284
|
-
const ioRatioSub = this.
|
|
13285
|
-
const contextSub = this.
|
|
13286
|
-
const
|
|
13400
|
+
const cacheHitSub = this._computeCacheHitSubScore(groupTurns);
|
|
13401
|
+
const ioRatioSub = this._computeIoRatioSubScore(groupTurns, targetIoRatio);
|
|
13402
|
+
const contextSub = this._computeContextManagementSubScore(groupTurns);
|
|
13403
|
+
const tokenDensitySub = this._computeTokenDensitySubScore(groupTurns, expectedOutputPerTurn);
|
|
13404
|
+
const compositeScore = Math.round(cacheHitSub * W_CACHE + ioRatioSub * W_IO_RATIO + contextSub * W_CONTEXT + tokenDensitySub * W_TOKEN_DENSITY);
|
|
13287
13405
|
result.push({
|
|
13288
13406
|
source,
|
|
13289
13407
|
compositeScore,
|
|
@@ -13292,25 +13410,6 @@ var EfficiencyScorer = class {
|
|
|
13292
13410
|
}
|
|
13293
13411
|
return result;
|
|
13294
13412
|
}
|
|
13295
|
-
_computeCacheHitSubScoreForGroup(turns) {
|
|
13296
|
-
if (turns.length === 0) return 0;
|
|
13297
|
-
const avg = turns.reduce((acc, t) => acc + t.cacheHitRate, 0) / turns.length;
|
|
13298
|
-
return this._clamp(avg * 100, 0, 100);
|
|
13299
|
-
}
|
|
13300
|
-
_computeIoRatioSubScoreForGroup(turns) {
|
|
13301
|
-
if (turns.length === 0) return 0;
|
|
13302
|
-
const avg = turns.reduce((acc, t) => {
|
|
13303
|
-
const freshInput = Math.max(t.inputTokens, 1);
|
|
13304
|
-
return acc + t.outputTokens / freshInput;
|
|
13305
|
-
}, 0) / turns.length;
|
|
13306
|
-
return this._clamp(avg >= 1 ? 100 : avg * 100, 0, 100);
|
|
13307
|
-
}
|
|
13308
|
-
_computeContextManagementSubScoreForGroup(turns) {
|
|
13309
|
-
if (turns.length === 0) return 0;
|
|
13310
|
-
const spikeCount = turns.filter((t) => t.isContextSpike).length;
|
|
13311
|
-
const spikeRatio = spikeCount / turns.length;
|
|
13312
|
-
return this._clamp(100 - spikeRatio * 100, 0, 100);
|
|
13313
|
-
}
|
|
13314
13413
|
_clamp(value, min, max) {
|
|
13315
13414
|
return Math.max(min, Math.min(max, value));
|
|
13316
13415
|
}
|
|
@@ -15165,8 +15264,10 @@ var TelemetryAdvisor = class {
|
|
|
15165
15264
|
cacheHitSubScore: score.cacheHitSubScore,
|
|
15166
15265
|
ioRatioSubScore: score.ioRatioSubScore,
|
|
15167
15266
|
contextManagementSubScore: score.contextManagementSubScore,
|
|
15267
|
+
tokenDensitySubScore: score.tokenDensitySubScore ?? 0,
|
|
15168
15268
|
totalTurns: score.totalTurns,
|
|
15169
|
-
contextSpikeCount: score.contextSpikeCount
|
|
15269
|
+
contextSpikeCount: score.contextSpikeCount,
|
|
15270
|
+
coldStartTurnsExcluded: score.coldStartTurnsExcluded ?? 0
|
|
15170
15271
|
};
|
|
15171
15272
|
} catch (err) {
|
|
15172
15273
|
logger$6.warn({
|
|
@@ -22691,4 +22792,4 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
|
|
|
22691
22792
|
|
|
22692
22793
|
//#endregion
|
|
22693
22794
|
export { AdapterTelemetryPersistence, AppError, DEFAULT_CONFIG, DEFAULT_ROUTING_POLICY, DoltClient, DoltNotInstalled, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, FileStateStore, GitClient, GrammarLoader, IngestionServer, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SUBSTRATE_OWNED_SETTINGS_KEYS, SymbolParser, VALID_PHASES, WorkGraphRepository, buildPipelineStatusOutput, checkDoltInstalled, createConfigSystem, createContextCompiler, createDatabaseAdapter, createDispatcher, createDoltClient, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStateStore, createStopAfterGate, createTelemetryAdvisor, detectCycles, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, initSchema, initializeDolt, isSyncAdapter, parseDbTimestampAsUtc, registerHealthCommand, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
|
|
22694
|
-
//# sourceMappingURL=run-
|
|
22795
|
+
//# sourceMappingURL=run-B1WEe6SY.js.map
|