npm - agent-scenario-loop - Versions diffs - 0.1.5 → 0.1.7 - Mend

agent-scenario-loop 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/core/artifact-contract.js +10 -1
package/dist/runner/profile-mobile.d.ts +14 -2
package/dist/runner/profile-mobile.js +290 -16
package/package.json +1 -1

package/dist/core/artifact-contract.js CHANGED Viewed

@@ -167,6 +167,7 @@ function parseKeyValueProfileSessionEntry(payload) {
         return null;
     }
     const timestamp = coerceNumber(entry.timestamp);
+    const startedAt = coerceNumber(entry.startedAt);
     const atMs = coerceNumber(entry.atMs);
     const sequence = coerceNumber(entry.sequence);
     const waitMs = coerceNumber(entry.waitMs);
@@ -177,6 +178,9 @@ function parseKeyValueProfileSessionEntry(payload) {
     if (timestamp !== null) {
         entry.timestamp = timestamp;
     }
+    if (startedAt !== null) {
+        entry.startedAt = startedAt;
+    }
     if (sequence !== null) {
         entry.sequence = sequence;
     }
@@ -473,10 +477,15 @@ function buildMetricsFromProfileEvents({ scenario, runId, events, expectedIterat
                 (typeof record.milestoneCount === 'number' &&
                     record.milestoneCount >= requiredMilestoneEventsPerIteration)) &&
             record.milestoneAt >= 0;
-        if (hasMilestoneDuration) {
+        if (hasMilestoneDuration && expectedIterations === 1) {
             durationsMs.push(roundMs(record.milestoneAt));
             openDurationsMs.push(roundMs(record.milestoneAt));
         }
+        else if (hasMilestoneDuration) {
+            // Repeated completion-only milestones prove that cycles finished, but
+            // their atMs values are positions on the session timeline, not per-cycle
+            // latency samples. Repeated latency budgets need explicit interval anchors.
+        }
         else if (hasCycleDuration) {
             durationsMs.push(roundMs(record.dismissedAt - record.presentRequestedAt));
         }

package/dist/runner/profile-mobile.d.ts CHANGED Viewed

@@ -126,6 +126,17 @@ type ProviderCommandFailure = {
     providerId: string;
     rawPath?: string;
 };
+type ProfileSessionSeed = {
+    runId: string;
+    scenario: string;
+    startedAt: number;
+};
+type ProfileSessionFreshness = {
+    appStartedAt?: number;
+    reason?: string;
+    seed: ProfileSessionSeed;
+    status: 'fresh' | 'missing-app-session' | 'stale';
+};
 /**
  * Prints CLI usage to stderr.
  *
@@ -173,10 +184,10 @@ declare function resolveAttachedEvidence({ args, layout, providerInputs, }: {
 /**
  * Builds scenario health from profile metrics.
  *
- * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
+ * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
  * @returns {Record<string, unknown>}
  */
-declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, }: {
+declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries, sessionFreshness, }: {
     scenario: Record<string, any>;
     runId: string;
     metrics: Record<string, any>;
@@ -185,6 +196,7 @@ declare function buildProfileHealth({ scenario, runId, metrics, diagnostics, pro
     profileSessionEntryCount?: number;
     commandTransport?: string;
     sessionEntries?: Record<string, any>[];
+    sessionFreshness?: ProfileSessionFreshness | null;
 }): Record<string, unknown>;
 /**
  * Builds failed scenario health from evidence-provider command failures.

package/dist/runner/profile-mobile.js CHANGED Viewed

@@ -20,7 +20,7 @@ exports.runProfileCli = runProfileCli;
 exports.runProfileMobile = runProfileMobile;
 exports.hashScenarioContract = hashScenarioContract;
 exports.usage = usage;
-const { execFile } = require('node:child_process');
+const { spawn } = require('node:child_process');
 const fs = require('node:fs');
 const fsp = require('node:fs/promises');
 const path = require('node:path');
@@ -35,6 +35,7 @@ const { writeUsage } = require('./cli');
 const CAPTURE_EVIDENCE_KINDS = new Set(['screenshot', 'uiTree', 'video']);
 const PROVIDER_EVIDENCE_KINDS = new Set(['accessibility', 'logs', 'profiler']);
 const SIGNAL_EVIDENCE_KINDS = new Set(['js', 'memory', 'network']);
+const DEFAULT_PROVIDER_COMMAND_TIMEOUT_MS = 180_000;
 /**
  * Prints CLI usage to stderr.
  *
@@ -188,23 +189,83 @@ async function hashFileSha256(filePath) {
     return crypto.createHash('sha256').update(content).digest('hex');
 }
 /**
- * Runs one provider command without a shell and captures its output.
+ * Resolves the timeout applied to provider commands.
  *
- * @param {{command: string, args: string[], cwd?: string, env?: Record<string, string>}} options
+ * @returns {number}
+ */
+function resolveProviderCommandTimeoutMs() {
+    return readPositiveInteger(process.env.ASL_PROVIDER_COMMAND_TIMEOUT_MS, DEFAULT_PROVIDER_COMMAND_TIMEOUT_MS);
+}
+/**
+ * Runs one provider command without a shell, streaming output to raw files.
+ *
+ * @param {{command: string, args: string[], cwd?: string, env?: Record<string, string>, stderrPath: string, stdoutPath: string, timeoutMs: number}} options
  * @returns {Promise<ProviderCommandResult>}
  */
-function execProviderCommand({ args, command, cwd, env, }) {
+function execProviderCommand({ args, command, cwd, env, stderrPath, stdoutPath, timeoutMs, }) {
     return new Promise((resolve) => {
-        execFile(command, args, {
+        const child = spawn(command, args, {
             ...(cwd ? { cwd } : {}),
             env: env ? { ...process.env, ...env } : process.env,
-        }, (error, stdout, stderr) => {
+            shell: false,
+            stdio: ['ignore', 'pipe', 'pipe'],
+        });
+        const stdoutChunks = [];
+        const stderrChunks = [];
+        let timedOut = false;
+        let settled = false;
+        const timeout = setTimeout(() => {
+            timedOut = true;
+            child.kill('SIGTERM');
+            setTimeout(() => {
+                if (!settled) {
+                    child.kill('SIGKILL');
+                }
+            }, 1000).unref();
+        }, timeoutMs);
+        timeout.unref();
+        child.stdout.on('data', (chunk) => {
+            stdoutChunks.push(chunk);
+            fs.appendFileSync(stdoutPath, chunk);
+        });
+        child.stderr.on('data', (chunk) => {
+            stderrChunks.push(chunk);
+            fs.appendFileSync(stderrPath, chunk);
+        });
+        child.on('error', (error) => {
+            if (settled) {
+                return;
+            }
+            clearTimeout(timeout);
+            settled = true;
+            const stderr = error.message;
+            fs.appendFileSync(stderrPath, `${stderr}\n`, 'utf8');
             resolve({
                 args,
                 command,
-                exitCode: error && typeof error.code === 'number' ? error.code : error ? 1 : 0,
+                exitCode: 1,
+                signal: null,
+                stderr,
+                stdout: Buffer.concat(stdoutChunks).toString('utf8'),
+                timedOut,
+            });
+        });
+        child.on('close', (exitCode, signal) => {
+            if (settled) {
+                return;
+            }
+            clearTimeout(timeout);
+            settled = true;
+            const stdout = Buffer.concat(stdoutChunks).toString('utf8');
+            const stderr = Buffer.concat(stderrChunks).toString('utf8');
+            resolve({
+                args,
+                command,
+                exitCode: typeof exitCode === 'number' ? exitCode : timedOut ? 124 : 1,
+                signal,
                 stderr,
                 stdout,
+                timedOut,
             });
         });
     });
@@ -396,32 +457,66 @@ async function executeProviderCommands({ args, layout, platform, runDir, runId,
                 ? resolveProviderPath({ context, manifestDir, value: providerCommand.cwd })
                 : manifestDir;
             const resolvedEnv = Object.fromEntries(Object.entries(providerCommand.env ?? {}).map(([key, value]) => [key, applyProviderPlaceholders(value, context)]));
+            const commandRecordFileName = `${providerId}-${providerCommand.id}.json`;
+            const stdoutFileName = `${providerId}-${providerCommand.id}.stdout.txt`;
+            const stderrFileName = `${providerId}-${providerCommand.id}.stderr.txt`;
+            const commandRecordPath = path.join(commandRecordDir, commandRecordFileName);
+            const stdoutPath = path.join(commandRecordDir, stdoutFileName);
+            const stderrPath = path.join(commandRecordDir, stderrFileName);
+            const timeoutMs = resolveProviderCommandTimeoutMs();
+            const startedAt = new Date().toISOString();
+            await fsp.writeFile(stdoutPath, '', 'utf8');
+            await fsp.writeFile(stderrPath, '', 'utf8');
+            await fsp.writeFile(commandRecordPath, `${JSON.stringify({
+                args: resolvedArgs,
+                command: resolvedCommand,
+                phase: providerCommand.phase,
+                providerId,
+                startedAt,
+                status: 'started',
+                stderrPath: `raw/provider-commands/${stderrFileName}`,
+                stdoutPath: `raw/provider-commands/${stdoutFileName}`,
+                timeoutMs,
+            }, null, 2)}\n`, 'utf8');
             const commandResult = await execProviderCommand({
                 args: resolvedArgs,
                 command: resolvedCommand,
                 cwd: resolvedCwd,
                 env: resolvedEnv,
+                stderrPath,
+                stdoutPath,
+                timeoutMs,
             });
-            const commandRecordFileName = `${providerId}-${providerCommand.id}.json`;
-            const commandRecordPath = path.join(commandRecordDir, commandRecordFileName);
             await fsp.writeFile(commandRecordPath, `${JSON.stringify({
                 args: commandResult.args,
                 command: commandResult.command,
+                endedAt: new Date().toISOString(),
                 exitCode: commandResult.exitCode,
                 phase: providerCommand.phase,
                 providerId,
+                signal: commandResult.signal,
                 stderr: commandResult.stderr,
+                stderrPath: `raw/provider-commands/${stderrFileName}`,
+                status: commandResult.timedOut ? 'timed_out' : commandResult.exitCode === 0 ? 'completed' : 'failed',
                 stdout: commandResult.stdout,
+                stdoutPath: `raw/provider-commands/${stdoutFileName}`,
+                timedOut: commandResult.timedOut,
+                timeoutMs,
             }, null, 2)}\n`, 'utf8');
             if (commandResult.exitCode !== 0) {
+                const timedOut = commandResult.timedOut;
                 failures.push({
                     commandId: providerCommand.id,
-                    code: 'provider_command_failed',
+                    code: timedOut ? 'provider_liveness_timeout' : 'provider_command_failed',
                     exitCode: commandResult.exitCode,
-                    message: `Evidence provider command ${providerId}/${providerCommand.id} failed with exit code ${commandResult.exitCode}.`,
+                    message: timedOut
+                        ? `Evidence provider command ${providerId}/${providerCommand.id} did not finish before the ${timeoutMs}ms timeout.`
+                        : `Evidence provider command ${providerId}/${providerCommand.id} failed with exit code ${commandResult.exitCode}.`,
                     name: 'evidence_provider_command_completed',
-                    nextAction: `Inspect raw/provider-commands/${commandRecordFileName}, fix the provider command or its environment, then rerun the profile.`,
-                    nextActionCode: 'fix_provider_command',
+                    nextAction: timedOut
+                        ? `Inspect raw/provider-commands/${commandRecordFileName}, raw/provider-commands/${stdoutFileName}, and raw/provider-commands/${stderrFileName}; fix the provider liveness issue or increase ASL_PROVIDER_COMMAND_TIMEOUT_MS only if the provider is making progress.`
+                        : `Inspect raw/provider-commands/${commandRecordFileName}, fix the provider command or its environment, then rerun the profile.`,
+                    nextActionCode: timedOut ? 'fix_provider_liveness' : 'fix_provider_command',
                     phase: providerCommand.phase,
                     providerId,
                     rawPath: `raw/provider-commands/${commandRecordFileName}`,
@@ -1109,10 +1204,10 @@ function buildRequiredDiagnosticHealthChecks(diagnostics = []) {
 /**
  * Builds scenario health from profile metrics.
  *
- * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[]}} options
+ * @param {{scenario: Record<string, unknown>, runId: string, metrics: Record<string, unknown>, diagnostics?: DiagnosticInventoryEntry[], profileEventCount?: number, profileSessionEntryCount?: number, commandTransport?: string, sessionEntries?: Record<string, unknown>[], sessionFreshness?: ProfileSessionFreshness | null}} options
  * @returns {Record<string, unknown>}
  */
-function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], }) {
+function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profileEventCount, profileSessionEntryCount, commandTransport, sessionEntries = [], sessionFreshness = null, }) {
     const passed = metrics.status === 'passed';
     const metadata = {
         failures: typeof metrics.failures === 'number' ? metrics.failures : null,
@@ -1175,7 +1270,39 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
     const commandChecksPassed = commandChecks.every((check) => check.status === 'passed');
     const diagnosticChecks = buildRequiredDiagnosticHealthChecks(diagnostics);
     const diagnosticChecksPassed = diagnosticChecks.every((check) => check.status === 'passed');
-    const healthPassed = passed && commandChecksPassed && diagnosticChecksPassed;
+    const sessionFreshnessChecks = sessionFreshness
+        ? [
+            {
+                name: 'profile_session_freshness',
+                status: sessionFreshness.status === 'fresh'
+                    ? 'passed'
+                    : sessionFreshness.status === 'missing-app-session'
+                        ? 'warning'
+                        : 'failed',
+                source: 'runner',
+                code: sessionFreshness.status === 'fresh'
+                    ? 'profile_session_fresh'
+                    : sessionFreshness.status === 'missing-app-session'
+                        ? 'profile_session_start_missing'
+                        : 'profile_session_stale',
+                message: sessionFreshness.status === 'fresh'
+                    ? 'App-side profile-session start matched the runner-written session seed.'
+                    : sessionFreshness.reason ?? 'App-side profile-session evidence did not match the runner-written session seed.',
+                metadata: {
+                    appStartedAt: sessionFreshness.appStartedAt ?? null,
+                    nextAction: sessionFreshness.status === 'fresh'
+                        ? 'No action required.'
+                        : 'Clear stale app/session state, reload the expected app bundle, and rerun before treating profile events or metrics as product evidence.',
+                    nextActionCode: sessionFreshness.status === 'fresh'
+                        ? 'none'
+                        : 'rerun_with_fresh_profile_session',
+                    seedStartedAt: sessionFreshness.seed.startedAt,
+                },
+            },
+        ]
+        : [];
+    const sessionFreshnessChecksPassed = sessionFreshnessChecks.every((check) => check.status !== 'failed');
+    const healthPassed = passed && commandChecksPassed && diagnosticChecksPassed && sessionFreshnessChecksPassed;
     return assertValidJson({
         schemaVersion: '1.0.0',
         scenarioId: scenario.name,
@@ -1193,6 +1320,7 @@ function buildProfileHealth({ scenario, runId, metrics, diagnostics = [], profil
                     : 'Profile events did not complete every expected iteration.',
                 metadata,
             },
+            ...sessionFreshnessChecks,
             ...commandChecks,
             ...diagnosticChecks,
         ],
@@ -1446,6 +1574,141 @@ function resolveProfileSessionEntriesPath({ args, platform }) {
     }
     return null;
 }
+/**
+ * Reads one JSON object candidate from raw command text.
+ *
+ * @param {string} text
+ * @returns {Record<string, unknown>[]}
+ */
+function parseJsonObjectsFromText(text) {
+    const matches = text.match(/\{[^{}\n]*\}/gu) ?? [];
+    const objects = [];
+    for (const match of matches) {
+        try {
+            const parsed = JSON.parse(match);
+            if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+                objects.push(parsed);
+            }
+        }
+        catch {
+            // Raw command files can contain shell syntax around JSON payloads.
+        }
+    }
+    return objects;
+}
+/**
+ * Reads an Android profile-session seed from adb AsyncStorage raw artifacts.
+ *
+ * @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
+ * @returns {ProfileSessionSeed | null}
+ */
+function readAndroidProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
+    const rawDir = path.resolve(sidecarRoot, 'raw');
+    if (!fs.existsSync(rawDir)) {
+        return null;
+    }
+    for (const fileName of fs.readdirSync(rawDir).filter((entry) => /^adb-async-storage-write-\d+\.txt$/u.test(entry)).sort()) {
+        const rawText = fs.readFileSync(path.join(rawDir, fileName), 'utf8');
+        for (const candidate of parseJsonObjectsFromText(rawText)) {
+            if (candidate.runId === runId &&
+                candidate.scenario === scenarioName &&
+                typeof candidate.startedAt === 'number' &&
+                Number.isFinite(candidate.startedAt)) {
+                return {
+                    runId,
+                    scenario: scenarioName,
+                    startedAt: candidate.startedAt,
+                };
+            }
+        }
+    }
+    return null;
+}
+/**
+ * Reads an iOS profile-session seed from simctl storage artifacts.
+ *
+ * @param {{sidecarRoot: string, runId: string, scenarioName: string}} options
+ * @returns {ProfileSessionSeed | null}
+ */
+function readIosProfileSessionSeed({ runId, scenarioName, sidecarRoot, }) {
+    const seedPath = path.resolve(sidecarRoot, 'raw', 'ios-profile-session-seed.json');
+    const seed = readOptionalJsonObject(seedPath);
+    const session = seed?.session;
+    if (!session || typeof session !== 'object' || Array.isArray(session)) {
+        return null;
+    }
+    const record = session;
+    if (record.runId === runId &&
+        record.scenario === scenarioName &&
+        typeof record.startedAt === 'number' &&
+        Number.isFinite(record.startedAt)) {
+        return {
+            runId,
+            scenario: scenarioName,
+            startedAt: record.startedAt,
+        };
+    }
+    return null;
+}
+/**
+ * Reads the profile-session seed written by a platform sidecar, when present.
+ *
+ * @param {{args: CliArgs, platform: ProfilePlatform, runId: string, scenarioName: string}} options
+ * @returns {ProfileSessionSeed | null}
+ */
+function resolveProfileSessionSeed({ args, platform, runId, scenarioName, }) {
+    if (platform === 'android' && typeof args['adb-artifacts'] === 'string') {
+        return readAndroidProfileSessionSeed({
+            runId,
+            scenarioName,
+            sidecarRoot: path.resolve(args['adb-artifacts']),
+        });
+    }
+    if (platform === 'ios' && typeof args['simctl-artifacts'] === 'string') {
+        return readIosProfileSessionSeed({
+            runId,
+            scenarioName,
+            sidecarRoot: path.resolve(args['simctl-artifacts']),
+        });
+    }
+    return null;
+}
+/**
+ * Compares the sidecar-written profile session to the app-emitted session.
+ *
+ * @param {{seed: ProfileSessionSeed | null, sessionEntries: Record<string, unknown>[]}} options
+ * @returns {ProfileSessionFreshness | null}
+ */
+function resolveProfileSessionFreshness({ seed, sessionEntries, }) {
+    if (!seed) {
+        return null;
+    }
+    const appStart = sessionEntries.find((entry) => (entry?.kind === 'start' &&
+        entry.runId === seed.runId &&
+        entry.scenario === seed.scenario &&
+        typeof entry.startedAt === 'number' &&
+        Number.isFinite(entry.startedAt)));
+    if (!appStart || typeof appStart.startedAt !== 'number') {
+        return {
+            seed,
+            status: 'missing-app-session',
+            reason: 'The runner wrote a profile-session seed, but no matching app-side start entry was observed.',
+        };
+    }
+    if (appStart.startedAt !== seed.startedAt) {
+        return {
+            appStartedAt: appStart.startedAt,
+            seed,
+            status: 'stale',
+            reason: 'The app-side profile-session start did not match the runner-written seed.',
+        };
+    }
+    return {
+        appStartedAt: appStart.startedAt,
+        seed,
+        status: 'fresh',
+    };
+}
 /**
  * Resolves the run id used by rehydrated sidecar evidence.
  *
@@ -2319,6 +2582,16 @@ async function runProfileMobile(args, options) {
             })
             : []),
     ];
+    const profileSessionSeed = resolveProfileSessionSeed({
+        args,
+        platform: options.platform,
+        runId: evidenceFilterRunId,
+        scenarioName,
+    });
+    const sessionFreshness = resolveProfileSessionFreshness({
+        seed: profileSessionSeed,
+        sessionEntries,
+    });
     const runtimeTarget = resolveRuntimeTarget({ args, platform: options.platform });
     const metrics = buildMetricsFromProfileEvents({
         scenario: scenarioName,
@@ -2447,6 +2720,7 @@ async function runProfileMobile(args, options) {
         profileSessionEntryCount: sessionEntries.length,
         commandTransport,
         sessionEntries,
+        sessionFreshness,
     });
     const verdict = buildProfileVerdict({ scenario: profileScenario, runId, health, metrics });
     const agentSummary = buildAgentSummaryMarkdown({ health, verdict, manifest });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-scenario-loop",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "private": false,
   "description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
   "license": "MIT",