substrate-ai 0.4.11 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter-registry-BRQXdPnB.js +3 -0
- package/dist/{adapter-registry-Cd-7lG5v.js → adapter-registry-BkUvZSKJ.js} +2 -2
- package/dist/cli/index.js +571 -572
- package/dist/{decisions-D7Ao_KcL.js → decisions-BxYj_a1X.js} +1 -1
- package/dist/{decisions-Db8GTbH2.js → decisions-C6MF2Cax.js} +113 -88
- package/dist/{experimenter-CvxtqzXz.js → experimenter-CoR0k66d.js} +10 -10
- package/dist/index.js +1 -1
- package/dist/{operational-C0_y8DAs.js → operational-CidppHy-.js} +104 -89
- package/dist/run-C-yCMYlt.js +9 -0
- package/dist/{run-B_08vO01.js → run-GqmIa5YW.js} +1603 -1487
- package/package.json +2 -4
- package/dist/adapter-registry-X5X81xdJ.js +0 -3
- package/dist/run-DbCq_Ynr.js +0 -9
|
@@ -2,68 +2,75 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Write or update run-level metrics.
|
|
4
4
|
*
|
|
5
|
-
* Uses
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
5
|
+
* Uses a portable delete-then-insert pattern inside a transaction to work on
|
|
6
|
+
* both SQLite/WASM and Dolt/MySQL. When a row already exists, the `restarts`
|
|
7
|
+
* and `is_baseline` values are preserved from the existing row (so any
|
|
8
|
+
* `incrementRunRestarts()` calls made by the supervisor between the caller's
|
|
9
|
+
* read and this write are not silently overwritten).
|
|
9
10
|
*/
|
|
10
|
-
function writeRunMetrics(
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
11
|
+
async function writeRunMetrics(adapter, input) {
|
|
12
|
+
await adapter.transaction(async (tx) => {
|
|
13
|
+
const existing = await tx.query("SELECT restarts, is_baseline FROM run_metrics WHERE run_id = ?", [input.run_id]);
|
|
14
|
+
if (existing.length > 0) await tx.query("DELETE FROM run_metrics WHERE run_id = ?", [input.run_id]);
|
|
15
|
+
const restarts = existing[0]?.restarts ?? input.restarts ?? 0;
|
|
16
|
+
const isBaseline = existing[0]?.is_baseline ?? input.is_baseline ?? 0;
|
|
17
|
+
await tx.query(`INSERT INTO run_metrics (
|
|
18
|
+
run_id, methodology, status, started_at, completed_at,
|
|
19
|
+
wall_clock_seconds, total_input_tokens, total_output_tokens, total_cost_usd,
|
|
20
|
+
stories_attempted, stories_succeeded, stories_failed, stories_escalated,
|
|
21
|
+
total_review_cycles, total_dispatches, concurrency_setting, max_concurrent_actual, restarts,
|
|
22
|
+
is_baseline
|
|
23
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
24
|
+
input.run_id,
|
|
25
|
+
input.methodology,
|
|
26
|
+
input.status,
|
|
27
|
+
input.started_at,
|
|
28
|
+
input.completed_at ?? null,
|
|
29
|
+
input.wall_clock_seconds ?? 0,
|
|
30
|
+
input.total_input_tokens ?? 0,
|
|
31
|
+
input.total_output_tokens ?? 0,
|
|
32
|
+
input.total_cost_usd ?? 0,
|
|
33
|
+
input.stories_attempted ?? 0,
|
|
34
|
+
input.stories_succeeded ?? 0,
|
|
35
|
+
input.stories_failed ?? 0,
|
|
36
|
+
input.stories_escalated ?? 0,
|
|
37
|
+
input.total_review_cycles ?? 0,
|
|
38
|
+
input.total_dispatches ?? 0,
|
|
39
|
+
input.concurrency_setting ?? 1,
|
|
40
|
+
input.max_concurrent_actual ?? 1,
|
|
41
|
+
restarts,
|
|
42
|
+
isBaseline
|
|
43
|
+
]);
|
|
44
|
+
});
|
|
40
45
|
}
|
|
41
46
|
/**
|
|
42
47
|
* Get run metrics for a specific run.
|
|
43
48
|
*/
|
|
44
|
-
function getRunMetrics(
|
|
45
|
-
|
|
49
|
+
async function getRunMetrics(adapter, runId) {
|
|
50
|
+
const rows = await adapter.query("SELECT * FROM run_metrics WHERE run_id = ?", [runId]);
|
|
51
|
+
return rows[0];
|
|
46
52
|
}
|
|
47
53
|
/**
|
|
48
54
|
* List the most recent N run metrics rows, newest first.
|
|
49
55
|
*/
|
|
50
|
-
function listRunMetrics(
|
|
51
|
-
return
|
|
56
|
+
async function listRunMetrics(adapter, limit = 10) {
|
|
57
|
+
return adapter.query("SELECT * FROM run_metrics ORDER BY started_at DESC LIMIT ?", [limit]);
|
|
52
58
|
}
|
|
53
59
|
/**
|
|
54
60
|
* Tag a run as the baseline (clears any existing baseline first).
|
|
55
61
|
*/
|
|
56
|
-
function tagRunAsBaseline(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
})
|
|
62
|
+
async function tagRunAsBaseline(adapter, runId) {
|
|
63
|
+
await adapter.transaction(async (tx) => {
|
|
64
|
+
await tx.query("UPDATE run_metrics SET is_baseline = 0");
|
|
65
|
+
await tx.query("UPDATE run_metrics SET is_baseline = 1 WHERE run_id = ?", [runId]);
|
|
66
|
+
});
|
|
61
67
|
}
|
|
62
68
|
/**
|
|
63
69
|
* Get the current baseline run metrics (if any).
|
|
64
70
|
*/
|
|
65
|
-
function getBaselineRunMetrics(
|
|
66
|
-
|
|
71
|
+
async function getBaselineRunMetrics(adapter) {
|
|
72
|
+
const rows = await adapter.query("SELECT * FROM run_metrics WHERE is_baseline = 1 LIMIT 1");
|
|
73
|
+
return rows[0];
|
|
67
74
|
}
|
|
68
75
|
/**
|
|
69
76
|
* Increment the restart count for a run by 1.
|
|
@@ -71,52 +78,64 @@ function getBaselineRunMetrics(db) {
|
|
|
71
78
|
* If the run_id does not yet exist in run_metrics, a placeholder row is
|
|
72
79
|
* inserted so the restart count is not lost — writeRunMetrics will overwrite
|
|
73
80
|
* all other fields when the run reaches a terminal state.
|
|
81
|
+
*
|
|
82
|
+
* Uses a portable select-then-update/insert pattern inside a transaction to
|
|
83
|
+
* work on both SQLite/WASM and Dolt/MySQL.
|
|
74
84
|
*/
|
|
75
|
-
function incrementRunRestarts(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
85
|
+
async function incrementRunRestarts(adapter, runId) {
|
|
86
|
+
await adapter.transaction(async (tx) => {
|
|
87
|
+
const existing = await tx.query("SELECT restarts FROM run_metrics WHERE run_id = ?", [runId]);
|
|
88
|
+
if (existing.length > 0) await tx.query("UPDATE run_metrics SET restarts = ? WHERE run_id = ?", [existing[0].restarts + 1, runId]);
|
|
89
|
+
else await tx.query(`INSERT INTO run_metrics (run_id, methodology, status, started_at, restarts)
|
|
90
|
+
VALUES (?, 'unknown', 'running', ?, 1)`, [runId, new Date().toISOString()]);
|
|
91
|
+
});
|
|
81
92
|
}
|
|
82
93
|
/**
|
|
83
94
|
* Write or update story-level metrics.
|
|
95
|
+
*
|
|
96
|
+
* Uses a portable delete-then-insert pattern inside a transaction to work on
|
|
97
|
+
* both SQLite/WASM and Dolt/MySQL. When a row already exists, the `started_at`
|
|
98
|
+
* value is preserved from the existing row if the new value is null.
|
|
84
99
|
*/
|
|
85
|
-
function writeStoryMetrics(
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
100
|
+
async function writeStoryMetrics(adapter, input) {
|
|
101
|
+
await adapter.transaction(async (tx) => {
|
|
102
|
+
const existing = await tx.query("SELECT started_at FROM story_metrics WHERE run_id = ? AND story_key = ?", [input.run_id, input.story_key]);
|
|
103
|
+
if (existing.length > 0) await tx.query("DELETE FROM story_metrics WHERE run_id = ? AND story_key = ?", [input.run_id, input.story_key]);
|
|
104
|
+
const startedAt = input.started_at ?? existing[0]?.started_at ?? null;
|
|
105
|
+
await tx.query(`INSERT INTO story_metrics (
|
|
106
|
+
run_id, story_key, result, phase_durations_json, started_at, completed_at,
|
|
107
|
+
wall_clock_seconds, input_tokens, output_tokens, cost_usd,
|
|
108
|
+
review_cycles, dispatches
|
|
109
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
110
|
+
input.run_id,
|
|
111
|
+
input.story_key,
|
|
112
|
+
input.result,
|
|
113
|
+
input.phase_durations_json ?? null,
|
|
114
|
+
startedAt,
|
|
115
|
+
input.completed_at ?? null,
|
|
116
|
+
input.wall_clock_seconds ?? 0,
|
|
117
|
+
input.input_tokens ?? 0,
|
|
118
|
+
input.output_tokens ?? 0,
|
|
119
|
+
input.cost_usd ?? 0,
|
|
120
|
+
input.review_cycles ?? 0,
|
|
121
|
+
input.dispatches ?? 0
|
|
122
|
+
]);
|
|
123
|
+
});
|
|
105
124
|
}
|
|
106
125
|
/**
|
|
107
126
|
* Get all story metrics for a given run.
|
|
108
127
|
*/
|
|
109
|
-
function getStoryMetricsForRun(
|
|
110
|
-
return
|
|
128
|
+
async function getStoryMetricsForRun(adapter, runId) {
|
|
129
|
+
return adapter.query("SELECT * FROM story_metrics WHERE run_id = ? ORDER BY id ASC", [runId]);
|
|
111
130
|
}
|
|
112
131
|
/**
|
|
113
132
|
* Compare two runs and return percentage deltas for key numeric fields.
|
|
114
133
|
* Positive deltas mean run B is larger/longer than run A.
|
|
115
134
|
* Returns null if either run does not exist.
|
|
116
135
|
*/
|
|
117
|
-
function compareRunMetrics(
|
|
118
|
-
const a = getRunMetrics(
|
|
119
|
-
const b = getRunMetrics(
|
|
136
|
+
async function compareRunMetrics(adapter, runIdA, runIdB) {
|
|
137
|
+
const a = await getRunMetrics(adapter, runIdA);
|
|
138
|
+
const b = await getRunMetrics(adapter, runIdB);
|
|
120
139
|
if (!a || !b) return null;
|
|
121
140
|
const pct = (base, diff) => base === 0 ? null : Math.round(diff / base * 100 * 10) / 10;
|
|
122
141
|
const inputDelta = b.total_input_tokens - a.total_input_tokens;
|
|
@@ -142,16 +161,14 @@ function compareRunMetrics(db, runIdA, runIdB) {
|
|
|
142
161
|
/**
|
|
143
162
|
* Aggregate token usage from the token_usage table for a pipeline run.
|
|
144
163
|
*/
|
|
145
|
-
function aggregateTokenUsageForRun(
|
|
146
|
-
const
|
|
147
|
-
SELECT
|
|
164
|
+
async function aggregateTokenUsageForRun(adapter, runId) {
|
|
165
|
+
const rows = await adapter.query(`SELECT
|
|
148
166
|
COALESCE(SUM(input_tokens), 0) as input,
|
|
149
167
|
COALESCE(SUM(output_tokens), 0) as output,
|
|
150
168
|
COALESCE(SUM(cost_usd), 0) as cost
|
|
151
169
|
FROM token_usage
|
|
152
|
-
WHERE pipeline_run_id =
|
|
153
|
-
|
|
154
|
-
return row ?? {
|
|
170
|
+
WHERE pipeline_run_id = ?`, [runId]);
|
|
171
|
+
return rows[0] ?? {
|
|
155
172
|
input: 0,
|
|
156
173
|
output: 0,
|
|
157
174
|
cost: 0
|
|
@@ -161,18 +178,16 @@ function aggregateTokenUsageForRun(db, runId) {
|
|
|
161
178
|
* Aggregate token usage for a specific story within a pipeline run.
|
|
162
179
|
* Matches rows where the metadata JSON contains the given storyKey.
|
|
163
180
|
*/
|
|
164
|
-
function aggregateTokenUsageForStory(
|
|
165
|
-
const
|
|
166
|
-
SELECT
|
|
181
|
+
async function aggregateTokenUsageForStory(adapter, runId, storyKey) {
|
|
182
|
+
const rows = await adapter.query(`SELECT
|
|
167
183
|
COALESCE(SUM(input_tokens), 0) as input,
|
|
168
184
|
COALESCE(SUM(output_tokens), 0) as output,
|
|
169
185
|
COALESCE(SUM(cost_usd), 0) as cost
|
|
170
186
|
FROM token_usage
|
|
171
187
|
WHERE pipeline_run_id = ?
|
|
172
188
|
AND metadata IS NOT NULL
|
|
173
|
-
AND json_extract(metadata, '$.storyKey') =
|
|
174
|
-
|
|
175
|
-
return row ?? {
|
|
189
|
+
AND json_extract(metadata, '$.storyKey') = ?`, [runId, storyKey]);
|
|
190
|
+
return rows[0] ?? {
|
|
176
191
|
input: 0,
|
|
177
192
|
output: 0,
|
|
178
193
|
cost: 0
|
|
@@ -356,4 +371,4 @@ const ADVISORY_NOTES = "advisory-notes";
|
|
|
356
371
|
|
|
357
372
|
//#endregion
|
|
358
373
|
export { ADVISORY_NOTES, ESCALATION_DIAGNOSIS, EXPERIMENT_RESULT, OPERATIONAL_FINDING, STORY_METRICS, STORY_OUTCOME, TEST_EXPANSION_FINDING, TEST_PLAN, aggregateTokenUsageForRun, aggregateTokenUsageForStory, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline, writeRunMetrics, writeStoryMetrics };
|
|
359
|
-
//# sourceMappingURL=operational-
|
|
374
|
+
//# sourceMappingURL=operational-CidppHy-.js.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { registerRunCommand, runRunAction } from "./run-GqmIa5YW.js";
|
|
2
|
+
import "./logger-D2fS2ccL.js";
|
|
3
|
+
import "./config-migrator-DtZW1maj.js";
|
|
4
|
+
import "./helpers-BihqWgVe.js";
|
|
5
|
+
import "./routing-BUE9pIxW.js";
|
|
6
|
+
import "./decisions-C6MF2Cax.js";
|
|
7
|
+
import "./operational-CidppHy-.js";
|
|
8
|
+
|
|
9
|
+
export { runRunAction };
|