agent-scenario-loop 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -167,6 +167,7 @@ function parseKeyValueProfileSessionEntry(payload) {
167
167
  return null;
168
168
  }
169
169
  const timestamp = coerceNumber(entry.timestamp);
170
+ const startedAt = coerceNumber(entry.startedAt);
170
171
  const atMs = coerceNumber(entry.atMs);
171
172
  const sequence = coerceNumber(entry.sequence);
172
173
  const waitMs = coerceNumber(entry.waitMs);
@@ -177,6 +178,9 @@ function parseKeyValueProfileSessionEntry(payload) {
177
178
  if (timestamp !== null) {
178
179
  entry.timestamp = timestamp;
179
180
  }
181
+ if (startedAt !== null) {
182
+ entry.startedAt = startedAt;
183
+ }
180
184
  if (sequence !== null) {
181
185
  entry.sequence = sequence;
182
186
  }
@@ -473,10 +477,15 @@ function buildMetricsFromProfileEvents({ scenario, runId, events, expectedIterat
473
477
  (typeof record.milestoneCount === 'number' &&
474
478
  record.milestoneCount >= requiredMilestoneEventsPerIteration)) &&
475
479
  record.milestoneAt >= 0;
476
- if (hasMilestoneDuration) {
480
+ if (hasMilestoneDuration && expectedIterations === 1) {
477
481
  durationsMs.push(roundMs(record.milestoneAt));
478
482
  openDurationsMs.push(roundMs(record.milestoneAt));
479
483
  }
484
+ else if (hasMilestoneDuration) {
485
+ // Repeated completion-only milestones prove that cycles finished, but
486
+ // their atMs values are positions on the session timeline, not per-cycle
487
+ // latency samples. Repeated latency budgets need explicit interval anchors.
488
+ }
480
489
  else if (hasCycleDuration) {
481
490
  durationsMs.push(roundMs(record.dismissedAt - record.presentRequestedAt));
482
491
  }
@@ -126,6 +126,17 @@ type ProviderCommandFailure = {
126
126
  providerId: string;
127
127
  rawPath?: string;
128
128
  };
129
+ type ProfileSessionSeed = {
130
+ runId: string;
131
+ scenario: string;
132
+ startedAt: number;
133
+ };
134
+ type ProfileSessionFreshness = {
135
+ appStartedAt?: number;
136
+ reason?: string;
137
+ seed: ProfileSessionSeed;
138
+ status: 'fresh' | 'missing-app-session' | 'stale';
139
+ };
129
140
  /**
130
141
  * Prints CLI usage to stderr.
131
142
  *
@@ -173,10 +184,10 @@ declare function resolveAttachedEvidence({ args, layout, providerInputs, }: {
173
184
  /**
174
185
  * Builds scenario health from profile metrics.
175
186
  *
176
- * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
187
+ * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
177
188
  * @returns {Record<string, unknown>}
178
189
  */
179
- declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, }: {
190
+ declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, sessionFreshness, }: {
180
191
  scenario: Record<string, any>;
181
192
  runId: string;
182
193
  metrics: Record<string, any>;
@@ -185,6 +196,7 @@ declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, pro
185
196
  profileSessionEntryCount?: number;
186
197
  commandTransport?: string;
187
198
  sessionEntries?: Record<string, any>[];
199
+ sessionFreshness?: ProfileSessionFreshness | null;
188
200
  }): Record<string, unknown>;
189
201
  /**
190
202
  * Builds failed scenario health from evidence-provider command failures.
@@ -1204,10 +1204,10 @@ function buildRequiredDiagnosticHealthChecks(diagnostics = []) {
1204
1204
  /**
1205
1205
  * Builds scenario health from profile metrics.
1206
1206
  *
1207
- * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
1207
+ * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
1208
1208
  * @returns {Record<string, unknown>}
1209
1209
  */
1210
- function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], }) {
1210
+ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], sessionFreshness = null, }) {
1211
1211
  const passed = metrics.status === 'passed';
1212
1212
  const metadata = {
1213
1213
  failures: typeof metrics.failures === 'number' ? metrics.failures : null,
@@ -1270,7 +1270,39 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
1270
1270
  const commandChecksPassed = commandChecks.every((check) => check.status === 'passed');
1271
1271
  const diagnosticChecks = buildRequiredDiagnosticHealthChecks(diagnostics);
1272
1272
  const diagnosticChecksPassed = diagnosticChecks.every((check) => check.status === 'passed');
1273
- const healthPassed = passed && commandChecksPassed && diagnosticChecksPassed;
1273
+ const sessionFreshnessChecks = sessionFreshness
1274
+ ? [
1275
+ {
1276
+ name: 'profile_session_freshness',
1277
+ status: sessionFreshness.status === 'fresh'
1278
+ ? 'passed'
1279
+ : sessionFreshness.status === 'missing-app-session'
1280
+ ? 'warning'
1281
+ : 'failed',
1282
+ source: 'runner',
1283
+ code: sessionFreshness.status === 'fresh'
1284
+ ? 'profile_session_fresh'
1285
+ : sessionFreshness.status === 'missing-app-session'
1286
+ ? 'profile_session_start_missing'
1287
+ : 'profile_session_stale',
1288
+ message: sessionFreshness.status === 'fresh'
1289
+ ? 'App-side profile-session start matched the runner-written session seed.'
1290
+ : sessionFreshness.reason ?? 'App-side profile-session evidence did not match the runner-written session seed.',
1291
+ metadata: {
1292
+ appStartedAt: sessionFreshness.appStartedAt ?? null,
1293
+ nextAction: sessionFreshness.status === 'fresh'
1294
+ ? 'No action required.'
1295
+ : 'Clear stale app/session state, reload the expected app bundle, and rerun before treating profile events or metrics as product evidence.',
1296
+ nextActionCode: sessionFreshness.status === 'fresh'
1297
+ ? 'none'
1298
+ : 'rerun_with_fresh_profile_session',
1299
+ seedStartedAt: sessionFreshness.seed.startedAt,
1300
+ },
1301
+ },
1302
+ ]
1303
+ : [];
1304
+ const sessionFreshnessChecksPassed = sessionFreshnessChecks.every((check) => check.status !== 'failed');
1305
+ const healthPassed = passed && commandChecksPassed && diagnosticChecksPassed && sessionFreshnessChecksPassed;
1274
1306
  return assertValidJson({
1275
1307
  schemaVersion: '1.0.0',
1276
1308
  scenarioId: scenario.name,
@@ -1288,6 +1320,7 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
1288
1320
  : 'Profile events did not complete every expected iteration.',
1289
1321
  metadata,
1290
1322
  },
1323
+ ...sessionFreshnessChecks,
1291
1324
  ...commandChecks,
1292
1325
  ...diagnosticChecks,
1293
1326
  ],
@@ -1541,6 +1574,141 @@ function resolveProfileSessionEntriesPath({ args, platform }) {
1541
1574
  }
1542
1575
  return null;
1543
1576
  }
1577
+ /**
1578
+ * Reads one JSON object candidate from raw command text.
1579
+ *
1580
+ * @param {string} text
1581
+ * @returns {Record<string, unknown>[]}
1582
+ */
1583
+ function parseJsonObjectsFromText(text) {
1584
+ const matches = text.match(/\{[^{}\n]*\}/gu) ?? [];
1585
+ const objects = [];
1586
+ for (const match of matches) {
1587
+ try {
1588
+ const parsed = JSON.parse(match);
1589
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
1590
+ objects.push(parsed);
1591
+ }
1592
+ }
1593
+ catch {
1594
+ // Raw command files can contain shell syntax around JSON payloads.
1595
+ }
1596
+ }
1597
+ return objects;
1598
+ }
1599
+ /**
1600
+ * Reads an Android profile-session seed from adb AsyncStorage raw artifacts.
1601
+ *
1602
+ * @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
1603
+ * @returns {ProfileSessionSeed | null}
1604
+ */
1605
+ function readAndroidProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
1606
+ const rawDir = path.resolve(sidecarRoot, 'raw');
1607
+ if (!fs.existsSync(rawDir)) {
1608
+ return null;
1609
+ }
1610
+ for (const fileName of fs.readdirSync(rawDir).filter((entry) => /^adb-async-storage-write-\d+\.txt$/u.test(entry)).sort()) {
1611
+ const rawText = fs.readFileSync(path.join(rawDir, fileName), 'utf8');
1612
+ for (const candidate of parseJsonObjectsFromText(rawText)) {
1613
+ if (candidate.runId === runId &&
1614
+ candidate.scenario === scenarioName &&
1615
+ typeof candidate.startedAt === 'number' &&
1616
+ Number.isFinite(candidate.startedAt)) {
1617
+ return {
1618
+ runId,
1619
+ scenario: scenarioName,
1620
+ startedAt: candidate.startedAt,
1621
+ };
1622
+ }
1623
+ }
1624
+ }
1625
+ return null;
1626
+ }
1627
+ /**
1628
+ * Reads an iOS profile-session seed from simctl storage artifacts.
1629
+ *
1630
+ * @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
1631
+ * @returns {ProfileSessionSeed | null}
1632
+ */
1633
+ function readIosProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
1634
+ const seedPath = path.resolve(sidecarRoot, 'raw', 'ios-profile-session-seed.json');
1635
+ const seed = readOptionalJsonObject(seedPath);
1636
+ const session = seed?.session;
1637
+ if (!session || typeof session !== 'object' || Array.isArray(session)) {
1638
+ return null;
1639
+ }
1640
+ const record = session;
1641
+ if (record.runId === runId &&
1642
+ record.scenario === scenarioName &&
1643
+ typeof record.startedAt === 'number' &&
1644
+ Number.isFinite(record.startedAt)) {
1645
+ return {
1646
+ runId,
1647
+ scenario: scenarioName,
1648
+ startedAt: record.startedAt,
1649
+ };
1650
+ }
1651
+ return null;
1652
+ }
1653
+ /**
1654
+ * Reads the profile-session seed written by a platform sidecar, when present.
1655
+ *
1656
+ * @param {{args: CliArgs, platform: ProfilePlatform, runId: string, scenarioName: string}} options
1657
+ * @returns {ProfileSessionSeed | null}
1658
+ */
1659
+ function resolveProfileSessionSeed({ args, platform, runId, scenarioName, }) {
1660
+ if (platform === 'android' && typeof args['adb-artifacts'] === 'string') {
1661
+ return readAndroidProfileSessionSeed({
1662
+ runId,
1663
+ scenarioName,
1664
+ sidecarRoot: path.resolve(args['adb-artifacts']),
1665
+ });
1666
+ }
1667
+ if (platform === 'ios' && typeof args['simctl-artifacts'] === 'string') {
1668
+ return readIosProfileSessionSeed({
1669
+ runId,
1670
+ scenarioName,
1671
+ sidecarRoot: path.resolve(args['simctl-artifacts']),
1672
+ });
1673
+ }
1674
+ return null;
1675
+ }
1676
+ /**
1677
+ * Compares the sidecar-written profile session to the app-emitted session.
1678
+ *
1679
+ * @param {{seed: ProfileSessionSeed | null, sessionEntries: Record<string, unknown>[]}} options
1680
+ * @returns {ProfileSessionFreshness | null}
1681
+ */
1682
+ function resolveProfileSessionFreshness({ seed, sessionEntries, }) {
1683
+ if (!seed) {
1684
+ return null;
1685
+ }
1686
+ const appStart = sessionEntries.find((entry) => (entry?.kind === 'start' &&
1687
+ entry.runId === seed.runId &&
1688
+ entry.scenario === seed.scenario &&
1689
+ typeof entry.startedAt === 'number' &&
1690
+ Number.isFinite(entry.startedAt)));
1691
+ if (!appStart || typeof appStart.startedAt !== 'number') {
1692
+ return {
1693
+ seed,
1694
+ status: 'missing-app-session',
1695
+ reason: 'The runner wrote a profile-session seed, but no matching app-side start entry was observed.',
1696
+ };
1697
+ }
1698
+ if (appStart.startedAt !== seed.startedAt) {
1699
+ return {
1700
+ appStartedAt: appStart.startedAt,
1701
+ seed,
1702
+ status: 'stale',
1703
+ reason: 'The app-side profile-session start did not match the runner-written seed.',
1704
+ };
1705
+ }
1706
+ return {
1707
+ appStartedAt: appStart.startedAt,
1708
+ seed,
1709
+ status: 'fresh',
1710
+ };
1711
+ }
1544
1712
  /**
1545
1713
  * Resolves the run id used by rehydrated sidecar evidence.
1546
1714
  *
@@ -2414,6 +2582,16 @@ async function runProfileMobile(args, options) {
2414
2582
  })
2415
2583
  : []),
2416
2584
  ];
2585
+ const profileSessionSeed = resolveProfileSessionSeed({
2586
+ args,
2587
+ platform: options.platform,
2588
+ runId: evidenceFilterRunId,
2589
+ scenarioName,
2590
+ });
2591
+ const sessionFreshness = resolveProfileSessionFreshness({
2592
+ seed: profileSessionSeed,
2593
+ sessionEntries,
2594
+ });
2417
2595
  const runtimeTarget = resolveRuntimeTarget({ args, platform: options.platform });
2418
2596
  const metrics = buildMetricsFromProfileEvents({
2419
2597
  scenario: scenarioName,
@@ -2542,6 +2720,7 @@ async function runProfileMobile(args, options) {
2542
2720
  profileSessionEntryCount: sessionEntries.length,
2543
2721
  commandTransport,
2544
2722
  sessionEntries,
2723
+ sessionFreshness,
2545
2724
  });
2546
2725
  const verdict = buildProfileVerdict({ scenario: profileScenario, runId, health, metrics });
2547
2726
  const agentSummary = buildAgentSummaryMarkdown({ health, verdict, manifest });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-scenario-loop",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "private": false,
5
5
  "description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
6
6
  "license": "MIT",