agent-scenario-loop 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -167,6 +167,7 @@ function parseKeyValueProfileSessionEntry(payload) {
|
|
|
167
167
|
return null;
|
|
168
168
|
}
|
|
169
169
|
const timestamp = coerceNumber(entry.timestamp);
|
|
170
|
+
const startedAt = coerceNumber(entry.startedAt);
|
|
170
171
|
const atMs = coerceNumber(entry.atMs);
|
|
171
172
|
const sequence = coerceNumber(entry.sequence);
|
|
172
173
|
const waitMs = coerceNumber(entry.waitMs);
|
|
@@ -177,6 +178,9 @@ function parseKeyValueProfileSessionEntry(payload) {
|
|
|
177
178
|
if (timestamp !== null) {
|
|
178
179
|
entry.timestamp = timestamp;
|
|
179
180
|
}
|
|
181
|
+
if (startedAt !== null) {
|
|
182
|
+
entry.startedAt = startedAt;
|
|
183
|
+
}
|
|
180
184
|
if (sequence !== null) {
|
|
181
185
|
entry.sequence = sequence;
|
|
182
186
|
}
|
|
@@ -473,10 +477,15 @@ function buildMetricsFromProfileEvents({ scenario, runId, events, expectedIterat
|
|
|
473
477
|
(typeof record.milestoneCount === 'number' &&
|
|
474
478
|
record.milestoneCount >= requiredMilestoneEventsPerIteration)) &&
|
|
475
479
|
record.milestoneAt >= 0;
|
|
476
|
-
if (hasMilestoneDuration) {
|
|
480
|
+
if (hasMilestoneDuration && expectedIterations === 1) {
|
|
477
481
|
durationsMs.push(roundMs(record.milestoneAt));
|
|
478
482
|
openDurationsMs.push(roundMs(record.milestoneAt));
|
|
479
483
|
}
|
|
484
|
+
else if (hasMilestoneDuration) {
|
|
485
|
+
// Repeated completion-only milestones prove that cycles finished, but
|
|
486
|
+
// their atMs values are positions on the session timeline, not per-cycle
|
|
487
|
+
// latency samples. Repeated latency budgets need explicit interval anchors.
|
|
488
|
+
}
|
|
480
489
|
else if (hasCycleDuration) {
|
|
481
490
|
durationsMs.push(roundMs(record.dismissedAt - record.presentRequestedAt));
|
|
482
491
|
}
|
|
@@ -126,6 +126,17 @@ type ProviderCommandFailure = {
|
|
|
126
126
|
providerId: string;
|
|
127
127
|
rawPath?: string;
|
|
128
128
|
};
|
|
129
|
+
type ProfileSessionSeed = {
|
|
130
|
+
runId: string;
|
|
131
|
+
scenario: string;
|
|
132
|
+
startedAt: number;
|
|
133
|
+
};
|
|
134
|
+
type ProfileSessionFreshness = {
|
|
135
|
+
appStartedAt?: number;
|
|
136
|
+
reason?: string;
|
|
137
|
+
seed: ProfileSessionSeed;
|
|
138
|
+
status: 'fresh' | 'missing-app-session' | 'stale';
|
|
139
|
+
};
|
|
129
140
|
/**
|
|
130
141
|
* Prints CLI usage to stderr.
|
|
131
142
|
*
|
|
@@ -173,10 +184,10 @@ declare function resolveAttachedEvidence({ args, layout, providerInputs, }: {
|
|
|
173
184
|
/**
|
|
174
185
|
* Builds scenario health from profile metrics.
|
|
175
186
|
*
|
|
176
|
-
* @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
|
|
187
|
+
* @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
|
|
177
188
|
* @returns {Record<string, unknown>}
|
|
178
189
|
*/
|
|
179
|
-
declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, }: {
|
|
190
|
+
declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, sessionFreshness, }: {
|
|
180
191
|
scenario: Record<string, any>;
|
|
181
192
|
runId: string;
|
|
182
193
|
metrics: Record<string, any>;
|
|
@@ -185,6 +196,7 @@ declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, pro
|
|
|
185
196
|
profileSessionEntryCount?: number;
|
|
186
197
|
commandTransport?: string;
|
|
187
198
|
sessionEntries?: Record<string, any>[];
|
|
199
|
+
sessionFreshness?: ProfileSessionFreshness | null;
|
|
188
200
|
}): Record<string, unknown>;
|
|
189
201
|
/**
|
|
190
202
|
* Builds failed scenario health from evidence-provider command failures.
|
|
@@ -1204,10 +1204,10 @@ function buildRequiredDiagnosticHealthChecks(diagnostics = []) {
|
|
|
1204
1204
|
/**
|
|
1205
1205
|
* Builds scenario health from profile metrics.
|
|
1206
1206
|
*
|
|
1207
|
-
* @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
|
|
1207
|
+
* @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
|
|
1208
1208
|
* @returns {Record<string, unknown>}
|
|
1209
1209
|
*/
|
|
1210
|
-
function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], }) {
|
|
1210
|
+
function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], sessionFreshness = null, }) {
|
|
1211
1211
|
const passed = metrics.status === 'passed';
|
|
1212
1212
|
const metadata = {
|
|
1213
1213
|
failures: typeof metrics.failures === 'number' ? metrics.failures : null,
|
|
@@ -1270,7 +1270,39 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
|
|
|
1270
1270
|
const commandChecksPassed = commandChecks.every((check) => check.status === 'passed');
|
|
1271
1271
|
const diagnosticChecks = buildRequiredDiagnosticHealthChecks(diagnostics);
|
|
1272
1272
|
const diagnosticChecksPassed = diagnosticChecks.every((check) => check.status === 'passed');
|
|
1273
|
-
const
|
|
1273
|
+
const sessionFreshnessChecks = sessionFreshness
|
|
1274
|
+
? [
|
|
1275
|
+
{
|
|
1276
|
+
name: 'profile_session_freshness',
|
|
1277
|
+
status: sessionFreshness.status === 'fresh'
|
|
1278
|
+
? 'passed'
|
|
1279
|
+
: sessionFreshness.status === 'missing-app-session'
|
|
1280
|
+
? 'warning'
|
|
1281
|
+
: 'failed',
|
|
1282
|
+
source: 'runner',
|
|
1283
|
+
code: sessionFreshness.status === 'fresh'
|
|
1284
|
+
? 'profile_session_fresh'
|
|
1285
|
+
: sessionFreshness.status === 'missing-app-session'
|
|
1286
|
+
? 'profile_session_start_missing'
|
|
1287
|
+
: 'profile_session_stale',
|
|
1288
|
+
message: sessionFreshness.status === 'fresh'
|
|
1289
|
+
? 'App-side profile-session start matched the runner-written session seed.'
|
|
1290
|
+
: sessionFreshness.reason ?? 'App-side profile-session evidence did not match the runner-written session seed.',
|
|
1291
|
+
metadata: {
|
|
1292
|
+
appStartedAt: sessionFreshness.appStartedAt ?? null,
|
|
1293
|
+
nextAction: sessionFreshness.status === 'fresh'
|
|
1294
|
+
? 'No action required.'
|
|
1295
|
+
: 'Clear stale app/session state, reload the expected app bundle, and rerun before treating profile events or metrics as product evidence.',
|
|
1296
|
+
nextActionCode: sessionFreshness.status === 'fresh'
|
|
1297
|
+
? 'none'
|
|
1298
|
+
: 'rerun_with_fresh_profile_session',
|
|
1299
|
+
seedStartedAt: sessionFreshness.seed.startedAt,
|
|
1300
|
+
},
|
|
1301
|
+
},
|
|
1302
|
+
]
|
|
1303
|
+
: [];
|
|
1304
|
+
const sessionFreshnessChecksPassed = sessionFreshnessChecks.every((check) => check.status !== 'failed');
|
|
1305
|
+
const healthPassed = passed && commandChecksPassed && diagnosticChecksPassed && sessionFreshnessChecksPassed;
|
|
1274
1306
|
return assertValidJson({
|
|
1275
1307
|
schemaVersion: '1.0.0',
|
|
1276
1308
|
scenarioId: scenario.name,
|
|
@@ -1288,6 +1320,7 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
|
|
|
1288
1320
|
: 'Profile events did not complete every expected iteration.',
|
|
1289
1321
|
metadata,
|
|
1290
1322
|
},
|
|
1323
|
+
...sessionFreshnessChecks,
|
|
1291
1324
|
...commandChecks,
|
|
1292
1325
|
...diagnosticChecks,
|
|
1293
1326
|
],
|
|
@@ -1541,6 +1574,141 @@ function resolveProfileSessionEntriesPath({ args, platform }) {
|
|
|
1541
1574
|
}
|
|
1542
1575
|
return null;
|
|
1543
1576
|
}
|
|
1577
|
+
/**
|
|
1578
|
+
* Reads one JSON object candidate from raw command text.
|
|
1579
|
+
*
|
|
1580
|
+
* @param {string} text
|
|
1581
|
+
* @returns {Record<string, unknown>[]}
|
|
1582
|
+
*/
|
|
1583
|
+
function parseJsonObjectsFromText(text) {
|
|
1584
|
+
const matches = text.match(/\{[^{}\n]*\}/gu) ?? [];
|
|
1585
|
+
const objects = [];
|
|
1586
|
+
for (const match of matches) {
|
|
1587
|
+
try {
|
|
1588
|
+
const parsed = JSON.parse(match);
|
|
1589
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
1590
|
+
objects.push(parsed);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
catch {
|
|
1594
|
+
// Raw command files can contain shell syntax around JSON payloads.
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
return objects;
|
|
1598
|
+
}
|
|
1599
|
+
/**
|
|
1600
|
+
* Reads an Android profile-session seed from adb AsyncStorage raw artifacts.
|
|
1601
|
+
*
|
|
1602
|
+
* @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
|
|
1603
|
+
* @returns {ProfileSessionSeed | null}
|
|
1604
|
+
*/
|
|
1605
|
+
function readAndroidProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
|
|
1606
|
+
const rawDir = path.resolve(sidecarRoot, 'raw');
|
|
1607
|
+
if (!fs.existsSync(rawDir)) {
|
|
1608
|
+
return null;
|
|
1609
|
+
}
|
|
1610
|
+
for (const fileName of fs.readdirSync(rawDir).filter((entry) => /^adb-async-storage-write-\d+\.txt$/u.test(entry)).sort()) {
|
|
1611
|
+
const rawText = fs.readFileSync(path.join(rawDir, fileName), 'utf8');
|
|
1612
|
+
for (const candidate of parseJsonObjectsFromText(rawText)) {
|
|
1613
|
+
if (candidate.runId === runId &&
|
|
1614
|
+
candidate.scenario === scenarioName &&
|
|
1615
|
+
typeof candidate.startedAt === 'number' &&
|
|
1616
|
+
Number.isFinite(candidate.startedAt)) {
|
|
1617
|
+
return {
|
|
1618
|
+
runId,
|
|
1619
|
+
scenario: scenarioName,
|
|
1620
|
+
startedAt: candidate.startedAt,
|
|
1621
|
+
};
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
return null;
|
|
1626
|
+
}
|
|
1627
|
+
/**
|
|
1628
|
+
* Reads an iOS profile-session seed from simctl storage artifacts.
|
|
1629
|
+
*
|
|
1630
|
+
* @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
|
|
1631
|
+
* @returns {ProfileSessionSeed | null}
|
|
1632
|
+
*/
|
|
1633
|
+
function readIosProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
|
|
1634
|
+
const seedPath = path.resolve(sidecarRoot, 'raw', 'ios-profile-session-seed.json');
|
|
1635
|
+
const seed = readOptionalJsonObject(seedPath);
|
|
1636
|
+
const session = seed?.session;
|
|
1637
|
+
if (!session || typeof session !== 'object' || Array.isArray(session)) {
|
|
1638
|
+
return null;
|
|
1639
|
+
}
|
|
1640
|
+
const record = session;
|
|
1641
|
+
if (record.runId === runId &&
|
|
1642
|
+
record.scenario === scenarioName &&
|
|
1643
|
+
typeof record.startedAt === 'number' &&
|
|
1644
|
+
Number.isFinite(record.startedAt)) {
|
|
1645
|
+
return {
|
|
1646
|
+
runId,
|
|
1647
|
+
scenario: scenarioName,
|
|
1648
|
+
startedAt: record.startedAt,
|
|
1649
|
+
};
|
|
1650
|
+
}
|
|
1651
|
+
return null;
|
|
1652
|
+
}
|
|
1653
|
+
/**
|
|
1654
|
+
* Reads the profile-session seed written by a platform sidecar, when present.
|
|
1655
|
+
*
|
|
1656
|
+
* @param {{args: CliArgs, platform: ProfilePlatform, runId: string, scenarioName: string}} options
|
|
1657
|
+
* @returns {ProfileSessionSeed | null}
|
|
1658
|
+
*/
|
|
1659
|
+
function resolveProfileSessionSeed({ args, platform, runId, scenarioName, }) {
|
|
1660
|
+
if (platform === 'android' && typeof args['adb-artifacts'] === 'string') {
|
|
1661
|
+
return readAndroidProfileSessionSeed({
|
|
1662
|
+
runId,
|
|
1663
|
+
scenarioName,
|
|
1664
|
+
sidecarRoot: path.resolve(args['adb-artifacts']),
|
|
1665
|
+
});
|
|
1666
|
+
}
|
|
1667
|
+
if (platform === 'ios' && typeof args['simctl-artifacts'] === 'string') {
|
|
1668
|
+
return readIosProfileSessionSeed({
|
|
1669
|
+
runId,
|
|
1670
|
+
scenarioName,
|
|
1671
|
+
sidecarRoot: path.resolve(args['simctl-artifacts']),
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
return null;
|
|
1675
|
+
}
|
|
1676
|
+
/**
|
|
1677
|
+
* Compares the sidecar-written profile session to the app-emitted session.
|
|
1678
|
+
*
|
|
1679
|
+
* @param {{seed: ProfileSessionSeed | null, sessionEntries: Record<string, unknown>[]}} options
|
|
1680
|
+
* @returns {ProfileSessionFreshness | null}
|
|
1681
|
+
*/
|
|
1682
|
+
function resolveProfileSessionFreshness({ seed, sessionEntries, }) {
|
|
1683
|
+
if (!seed) {
|
|
1684
|
+
return null;
|
|
1685
|
+
}
|
|
1686
|
+
const appStart = sessionEntries.find((entry) => (entry?.kind === 'start' &&
|
|
1687
|
+
entry.runId === seed.runId &&
|
|
1688
|
+
entry.scenario === seed.scenario &&
|
|
1689
|
+
typeof entry.startedAt === 'number' &&
|
|
1690
|
+
Number.isFinite(entry.startedAt)));
|
|
1691
|
+
if (!appStart || typeof appStart.startedAt !== 'number') {
|
|
1692
|
+
return {
|
|
1693
|
+
seed,
|
|
1694
|
+
status: 'missing-app-session',
|
|
1695
|
+
reason: 'The runner wrote a profile-session seed, but no matching app-side start entry was observed.',
|
|
1696
|
+
};
|
|
1697
|
+
}
|
|
1698
|
+
if (appStart.startedAt !== seed.startedAt) {
|
|
1699
|
+
return {
|
|
1700
|
+
appStartedAt: appStart.startedAt,
|
|
1701
|
+
seed,
|
|
1702
|
+
status: 'stale',
|
|
1703
|
+
reason: 'The app-side profile-session start did not match the runner-written seed.',
|
|
1704
|
+
};
|
|
1705
|
+
}
|
|
1706
|
+
return {
|
|
1707
|
+
appStartedAt: appStart.startedAt,
|
|
1708
|
+
seed,
|
|
1709
|
+
status: 'fresh',
|
|
1710
|
+
};
|
|
1711
|
+
}
|
|
1544
1712
|
/**
|
|
1545
1713
|
* Resolves the run id used by rehydrated sidecar evidence.
|
|
1546
1714
|
*
|
|
@@ -2414,6 +2582,16 @@ async function runProfileMobile(args, options) {
|
|
|
2414
2582
|
})
|
|
2415
2583
|
: []),
|
|
2416
2584
|
];
|
|
2585
|
+
const profileSessionSeed = resolveProfileSessionSeed({
|
|
2586
|
+
args,
|
|
2587
|
+
platform: options.platform,
|
|
2588
|
+
runId: evidenceFilterRunId,
|
|
2589
|
+
scenarioName,
|
|
2590
|
+
});
|
|
2591
|
+
const sessionFreshness = resolveProfileSessionFreshness({
|
|
2592
|
+
seed: profileSessionSeed,
|
|
2593
|
+
sessionEntries,
|
|
2594
|
+
});
|
|
2417
2595
|
const runtimeTarget = resolveRuntimeTarget({ args, platform: options.platform });
|
|
2418
2596
|
const metrics = buildMetricsFromProfileEvents({
|
|
2419
2597
|
scenario: scenarioName,
|
|
@@ -2542,6 +2720,7 @@ async function runProfileMobile(args, options) {
|
|
|
2542
2720
|
profileSessionEntryCount: sessionEntries.length,
|
|
2543
2721
|
commandTransport,
|
|
2544
2722
|
sessionEntries,
|
|
2723
|
+
sessionFreshness,
|
|
2545
2724
|
});
|
|
2546
2725
|
const verdict = buildProfileVerdict({ scenario: profileScenario, runId, health, metrics });
|
|
2547
2726
|
const agentSummary = buildAgentSummaryMarkdown({ health, verdict, manifest });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-scenario-loop",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
|
|
6
6
|
"license": "MIT",
|