agent-scenario-loop 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/app/profile-session.ts +98 -4
- package/dist/core/agent-summary.d.ts +3 -2
- package/dist/core/agent-summary.js +44 -2
- package/dist/core/artifact-contract.d.ts +22 -4
- package/dist/core/artifact-contract.js +512 -11
- package/dist/core/comparison.d.ts +57 -3
- package/dist/core/comparison.js +113 -1
- package/dist/core/planner.d.ts +32 -1
- package/dist/core/planner.js +144 -0
- package/dist/core/run-index.d.ts +4 -0
- package/dist/core/run-index.js +55 -1
- package/dist/core/schema-validator.d.ts +1 -0
- package/dist/core/schema-validator.js +1 -0
- package/dist/runner/compare-latest.d.ts +8 -4
- package/dist/runner/compare-latest.js +24 -5
- package/dist/runner/example-android-live.d.ts +10 -1
- package/dist/runner/example-android-live.js +55 -0
- package/dist/runner/example-ios-live.d.ts +10 -1
- package/dist/runner/example-ios-live.js +55 -0
- package/dist/runner/ios-simctl.d.ts +5 -0
- package/dist/runner/ios-simctl.js +6 -0
- package/dist/runner/live-comparison.d.ts +2 -2
- package/dist/runner/live-comparison.js +2 -1
- package/dist/runner/live-proof-summary.d.ts +5 -4
- package/dist/runner/live-proof-summary.js +12 -2
- package/dist/runner/live-proof.d.ts +3 -2
- package/dist/runner/live-proof.js +9 -2
- package/dist/runner/profile-android.d.ts +5 -0
- package/dist/runner/profile-android.js +148 -24
- package/dist/runner/profile-ios.d.ts +11 -1
- package/dist/runner/profile-ios.js +128 -9
- package/dist/runner/profile-mobile.d.ts +8 -0
- package/dist/runner/profile-mobile.js +267 -28
- package/docs/adapters.md +4 -0
- package/docs/architecture.md +90 -0
- package/docs/authoring.md +5 -1
- package/docs/concepts.md +3 -24
- package/docs/consumer-rehearsal.md +4 -0
- package/docs/contracts.md +30 -100
- package/docs/external-adapter-protocol.md +219 -0
- package/docs/live-proofs.md +83 -2
- package/docs/principles.md +9 -15
- package/examples/mobile-app/README.md +12 -0
- package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
- package/examples/runners/README.md +1 -0
- package/examples/runners/adb-android.json +1 -0
- package/examples/runners/agent-device-android.json +1 -0
- package/examples/runners/agent-device-ios.json +1 -0
- package/examples/runners/argent-android.json +1 -0
- package/examples/runners/argent-ios.json +1 -0
- package/examples/runners/xcodebuildmcp-ios.json +1 -0
- package/package.json +2 -1
- package/schemas/causal-run.schema.json +85 -2
- package/schemas/comparison.schema.json +130 -2
- package/schemas/external-adapter-message.schema.json +693 -0
- package/schemas/health.schema.json +72 -0
- package/schemas/live-proof-set.schema.json +1 -1
- package/schemas/live-proof.schema.json +14 -6
- package/schemas/manifest.schema.json +442 -1
- package/schemas/runner-capabilities.schema.json +20 -0
- package/schemas/scenario.schema.json +16 -0
- package/templates/primary-runner.json +1 -0
|
@@ -26,7 +26,7 @@ const crypto = require('node:crypto');
|
|
|
26
26
|
const { buildAgentSummaryMarkdown } = require('../core/agent-summary');
|
|
27
27
|
const { createArtifactLayout } = require('../core/artifact-layout');
|
|
28
28
|
const { writeJsonArtifact, writeTextArtifact } = require('../core/artifact-writer');
|
|
29
|
-
const { buildBudgetVerdict, buildCausalRun, buildCausalTimeline, buildManifest, buildMetricsFromProfileEvents, buildSummaryMarkdown, extractProfileEvents, } = require('../core/artifact-contract');
|
|
29
|
+
const { buildBudgetVerdict, buildCausalRun, buildCausalTimeline, buildManifest, buildMetricsFromProfileEvents, buildSummaryMarkdown, extractProfileEvents, extractProfileSessionEntries, } = require('../core/artifact-contract');
|
|
30
30
|
const { SCHEMAS, assertValidJson } = require('../core/schema-validator');
|
|
31
31
|
const { writeUsage } = require('./cli');
|
|
32
32
|
const CAPTURE_EVIDENCE_KINDS = new Set(['screenshot', 'uiTree', 'video']);
|
|
@@ -62,6 +62,7 @@ function usage({ binaryName, output = process.stderr, platform, }) {
|
|
|
62
62
|
}
|
|
63
63
|
lines.push('Use --agent-device-capture to execute scenario-declared portable driver actions through agent-device and attach its captures.');
|
|
64
64
|
lines.push('Use --agent-device-session-mode bind when a named agent-device session should still receive the configured Android serial or iOS UDID.');
|
|
65
|
+
lines.push('Use --lifecycle-phase <phase> when the runner can explicitly assert a non-cold lifecycle precondition such as warm-launch or resume.');
|
|
65
66
|
writeUsage(lines, output);
|
|
66
67
|
}
|
|
67
68
|
/**
|
|
@@ -300,9 +301,10 @@ function assertUniqueProviderCommandIds({ providerCommands = [], providerId, })
|
|
|
300
301
|
async function executeProviderCommands({ args, layout, platform, runDir, runId, scenarioId, }) {
|
|
301
302
|
const failures = [];
|
|
302
303
|
const inputs = [];
|
|
304
|
+
const providers = [];
|
|
303
305
|
const providerManifestPaths = readRepeatableArgValues(args, 'provider');
|
|
304
306
|
if (providerManifestPaths.length === 0) {
|
|
305
|
-
return { failures, inputs };
|
|
307
|
+
return { failures, inputs, providers };
|
|
306
308
|
}
|
|
307
309
|
const commandRecordDir = path.join(layout.raw, 'provider-commands');
|
|
308
310
|
await ensureDir(commandRecordDir);
|
|
@@ -314,6 +316,10 @@ async function executeProviderCommands({ args, layout, platform, runDir, runId,
|
|
|
314
316
|
throw new Error(`Provider manifest must use kind "evidenceProvider": ${absoluteManifestPath}`);
|
|
315
317
|
}
|
|
316
318
|
const providerId = safeProviderSegment(String(provider.runnerId ?? path.basename(absoluteManifestPath, '.json')));
|
|
319
|
+
providers.push({
|
|
320
|
+
name: providerId,
|
|
321
|
+
...(typeof provider.version === 'string' ? { version: provider.version } : {}),
|
|
322
|
+
});
|
|
317
323
|
if (Array.isArray(provider.platforms) && !provider.platforms.includes(platform)) {
|
|
318
324
|
failures.push({
|
|
319
325
|
commandId: 'platform-compatibility',
|
|
@@ -392,7 +398,7 @@ async function executeProviderCommands({ args, layout, platform, runDir, runId,
|
|
|
392
398
|
}
|
|
393
399
|
}
|
|
394
400
|
}
|
|
395
|
-
return { failures, inputs };
|
|
401
|
+
return { failures, inputs, providers };
|
|
396
402
|
}
|
|
397
403
|
/**
|
|
398
404
|
* Converts internal attachment copy plans into manifest-safe metadata.
|
|
@@ -403,11 +409,15 @@ async function executeProviderCommands({ args, layout, platform, runDir, runId,
|
|
|
403
409
|
function buildEvidenceAttachmentManifest(attachments) {
|
|
404
410
|
return attachments.map((attachment) => ({
|
|
405
411
|
channel: attachment.channel,
|
|
412
|
+
completenessStatus: attachment.completenessStatus,
|
|
413
|
+
corruptionStatus: attachment.corruptionStatus,
|
|
406
414
|
kind: attachment.kind,
|
|
407
415
|
path: attachment.manifestPath,
|
|
416
|
+
redactionStatus: attachment.redactionStatus,
|
|
408
417
|
sha256: attachment.sha256,
|
|
409
418
|
sizeBytes: attachment.sizeBytes,
|
|
410
419
|
sourceFileName: attachment.sourceFileName,
|
|
420
|
+
transformations: attachment.transformations,
|
|
411
421
|
}));
|
|
412
422
|
}
|
|
413
423
|
/**
|
|
@@ -443,13 +453,17 @@ async function resolveAttachedEvidence({ args, layout, providerInputs = [], }) {
|
|
|
443
453
|
destinationPaths.add(destinationPath);
|
|
444
454
|
const attachment = {
|
|
445
455
|
channel,
|
|
456
|
+
completenessStatus: 'complete',
|
|
457
|
+
corruptionStatus: 'valid',
|
|
446
458
|
destinationPath,
|
|
447
459
|
kind,
|
|
448
460
|
manifestPath,
|
|
461
|
+
redactionStatus: 'not-redacted',
|
|
449
462
|
sha256: await hashFileSha256(sourcePath),
|
|
450
463
|
sourceFileName: path.basename(sourcePath),
|
|
451
464
|
sourcePath,
|
|
452
465
|
sizeBytes: stat.size,
|
|
466
|
+
transformations: ['copied'],
|
|
453
467
|
};
|
|
454
468
|
attached.attachments.push(attachment);
|
|
455
469
|
attached.copies.push(attachment);
|
|
@@ -595,6 +609,76 @@ function buildProfileHealth({ scenario, runId, metrics, }) {
|
|
|
595
609
|
],
|
|
596
610
|
}, SCHEMAS.health, 'Health artifact');
|
|
597
611
|
}
|
|
612
|
+
/**
|
|
613
|
+
* Derives the terminal state for one profile artifact attempt.
|
|
614
|
+
*
|
|
615
|
+
* @param {Record<string, unknown>} metrics
|
|
616
|
+
* @returns {string}
|
|
617
|
+
*/
|
|
618
|
+
function buildAttemptTerminalState(metrics) {
|
|
619
|
+
if (metrics.status === 'passed') {
|
|
620
|
+
return 'passed';
|
|
621
|
+
}
|
|
622
|
+
if (typeof metrics.timeouts === 'number' && metrics.timeouts > 0) {
|
|
623
|
+
return 'timeout';
|
|
624
|
+
}
|
|
625
|
+
return 'failed';
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Classifies one profile artifact attempt without product-specific vocabulary.
|
|
629
|
+
*
|
|
630
|
+
* @param {Record<string, unknown>} metrics
|
|
631
|
+
* @returns {Record<string, unknown>}
|
|
632
|
+
*/
|
|
633
|
+
function buildAttemptClassification(metrics) {
|
|
634
|
+
if (metrics.status === 'passed') {
|
|
635
|
+
return {
|
|
636
|
+
category: 'none',
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
if (typeof metrics.timeouts === 'number' && metrics.timeouts > 0) {
|
|
640
|
+
return {
|
|
641
|
+
category: 'timeout',
|
|
642
|
+
code: 'profile_truth_event_timeout',
|
|
643
|
+
message: `Profile run recorded ${metrics.timeouts} timeout(s) before all expected truth events completed.`,
|
|
644
|
+
retryable: true,
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
return {
|
|
648
|
+
category: 'evidence',
|
|
649
|
+
code: 'profile_truth_events_incomplete',
|
|
650
|
+
message: 'Profile run did not capture every expected truth event.',
|
|
651
|
+
retryable: true,
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Records whether the written artifact set is valid for diagnosis when a run fails.
|
|
656
|
+
*
|
|
657
|
+
* @param {{artifacts: Record<string, unknown>, metrics: Record<string, unknown>}} options
|
|
658
|
+
* @returns {Record<string, unknown>}
|
|
659
|
+
*/
|
|
660
|
+
function buildAttemptPartialArtifacts({ artifacts, metrics, }) {
|
|
661
|
+
if (metrics.status === 'passed') {
|
|
662
|
+
return {
|
|
663
|
+
valid: false,
|
|
664
|
+
reason: 'complete successful run artifacts are present',
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
const paths = [
|
|
668
|
+
artifacts.manifest,
|
|
669
|
+
'health.json',
|
|
670
|
+
artifacts.metrics,
|
|
671
|
+
artifacts.causalRun,
|
|
672
|
+
artifacts.summary,
|
|
673
|
+
artifacts.raw?.interactionLog,
|
|
674
|
+
artifacts.raw?.deviceLog,
|
|
675
|
+
].filter((item) => typeof item === 'string' && item.length > 0);
|
|
676
|
+
return {
|
|
677
|
+
valid: true,
|
|
678
|
+
reason: 'failed profile run artifacts are preserved for diagnosis and are not a product proof until scenario health passes',
|
|
679
|
+
paths,
|
|
680
|
+
};
|
|
681
|
+
}
|
|
598
682
|
/**
|
|
599
683
|
* Builds failed scenario health from evidence-provider command failures.
|
|
600
684
|
*
|
|
@@ -760,6 +844,19 @@ function resolveEventLogPath({ args, platform }) {
|
|
|
760
844
|
}
|
|
761
845
|
return null;
|
|
762
846
|
}
|
|
847
|
+
/**
|
|
848
|
+
* Resolves the optional profile-session entry artifact path for command acknowledgement evidence.
|
|
849
|
+
*
|
|
850
|
+
* @param {{args: CliArgs, platform: ProfilePlatform}} options
|
|
851
|
+
* @returns {string | null}
|
|
852
|
+
*/
|
|
853
|
+
function resolveProfileSessionEntriesPath({ args, platform }) {
|
|
854
|
+
if (platform === 'ios' && typeof args['simctl-artifacts'] === 'string') {
|
|
855
|
+
const storedEntriesPath = path.resolve(args['simctl-artifacts'], 'raw', 'ios-profile-session-entries.json');
|
|
856
|
+
return fs.existsSync(storedEntriesPath) ? storedEntriesPath : null;
|
|
857
|
+
}
|
|
858
|
+
return null;
|
|
859
|
+
}
|
|
763
860
|
/**
|
|
764
861
|
* Reads a JSON artifact if it exists and contains an object.
|
|
765
862
|
*
|
|
@@ -1071,6 +1168,105 @@ function resolveProfileBudgets(scenario) {
|
|
|
1071
1168
|
}
|
|
1072
1169
|
: null;
|
|
1073
1170
|
}
|
|
1171
|
+
/**
|
|
1172
|
+
* Reads the installed package version for run provenance.
|
|
1173
|
+
*
|
|
1174
|
+
* @returns {string}
|
|
1175
|
+
*/
|
|
1176
|
+
function readAslPackageVersion() {
|
|
1177
|
+
try {
|
|
1178
|
+
const packageJsonPath = path.resolve(__dirname, '..', '..', 'package.json');
|
|
1179
|
+
const packageJson = readJson(packageJsonPath);
|
|
1180
|
+
return typeof packageJson.version === 'string' ? packageJson.version : 'unknown';
|
|
1181
|
+
}
|
|
1182
|
+
catch {
|
|
1183
|
+
return 'unknown';
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
/**
|
|
1187
|
+
* Infers the command transport used for profile-session or fixture evidence.
|
|
1188
|
+
*
|
|
1189
|
+
* @param {{args: CliArgs, interactionDriver: string, options: ProfileMobileOptions}} options
|
|
1190
|
+
* @returns {string}
|
|
1191
|
+
*/
|
|
1192
|
+
function resolveCommandTransport({ args, interactionDriver, options, }) {
|
|
1193
|
+
if (typeof options.commandTransport === 'string' && options.commandTransport.length > 0) {
|
|
1194
|
+
return options.commandTransport;
|
|
1195
|
+
}
|
|
1196
|
+
if (typeof args.events === 'string') {
|
|
1197
|
+
return 'fixture-log-ingest';
|
|
1198
|
+
}
|
|
1199
|
+
if (typeof args['ios-profile-session-transport'] === 'string') {
|
|
1200
|
+
return `profile-session-${args['ios-profile-session-transport']}`;
|
|
1201
|
+
}
|
|
1202
|
+
if (args['android-profile-session-storage'] || args['ios-profile-session-storage']) {
|
|
1203
|
+
return 'profile-session-storage';
|
|
1204
|
+
}
|
|
1205
|
+
if (args['profile-session']) {
|
|
1206
|
+
return 'profile-session-deeplink';
|
|
1207
|
+
}
|
|
1208
|
+
if (typeof args['adb-artifacts'] === 'string') {
|
|
1209
|
+
return 'adb-artifacts';
|
|
1210
|
+
}
|
|
1211
|
+
if (typeof args['simctl-artifacts'] === 'string') {
|
|
1212
|
+
return 'simctl-artifacts';
|
|
1213
|
+
}
|
|
1214
|
+
return interactionDriver;
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* Builds product-neutral provenance cohort metadata for the run manifest.
|
|
1218
|
+
*
|
|
1219
|
+
* @param {{args: CliArgs, appId: string, interactionDriver: string, options: ProfileMobileOptions, providerExecution: ProviderCommandExecution}} options
|
|
1220
|
+
* @returns {Record<string, unknown>}
|
|
1221
|
+
*/
|
|
1222
|
+
function buildProfileProvenanceCohort({ appId, args, interactionDriver, options, providerExecution, }) {
|
|
1223
|
+
return {
|
|
1224
|
+
appId,
|
|
1225
|
+
commandTransport: resolveCommandTransport({ args, interactionDriver, options }),
|
|
1226
|
+
platform: options.platform,
|
|
1227
|
+
providers: providerExecution.providers,
|
|
1228
|
+
runnerName: interactionDriver,
|
|
1229
|
+
runnerVersion: readAslPackageVersion(),
|
|
1230
|
+
...options.provenanceCohort,
|
|
1231
|
+
};
|
|
1232
|
+
}
|
|
1233
|
+
/**
|
|
1234
|
+
* Builds an environment assertion for manifest pre/postconditions.
|
|
1235
|
+
*
|
|
1236
|
+
* @param {{artifact?: string, evidence?: string, source: string, value: unknown}} options
|
|
1237
|
+
* @returns {Record<string, unknown>}
|
|
1238
|
+
*/
|
|
1239
|
+
function environmentAssertion({ artifact, evidence = 'asserted', source, value, }) {
|
|
1240
|
+
return {
|
|
1241
|
+
value,
|
|
1242
|
+
evidence,
|
|
1243
|
+
source,
|
|
1244
|
+
...(artifact ? { artifact } : {}),
|
|
1245
|
+
};
|
|
1246
|
+
}
|
|
1247
|
+
/**
|
|
1248
|
+
* Builds postconditions that ASL can truthfully assert after writing profile artifacts.
|
|
1249
|
+
*
|
|
1250
|
+
* @param {{metrics: Record<string, unknown>, options: ProfileMobileOptions}} options
|
|
1251
|
+
* @returns {Record<string, unknown>}
|
|
1252
|
+
*/
|
|
1253
|
+
function buildProfileEnvironmentPostconditions({ metrics, options, }) {
|
|
1254
|
+
const runPassed = metrics.status === 'passed';
|
|
1255
|
+
return {
|
|
1256
|
+
artifactState: environmentAssertion({
|
|
1257
|
+
value: runPassed ? 'complete' : 'partial',
|
|
1258
|
+
evidence: 'asserted',
|
|
1259
|
+
source: 'asl-profile-runner',
|
|
1260
|
+
artifact: 'manifest.json',
|
|
1261
|
+
}),
|
|
1262
|
+
cleanupState: environmentAssertion({
|
|
1263
|
+
value: 'not-required',
|
|
1264
|
+
evidence: 'asserted',
|
|
1265
|
+
source: 'asl-profile-runner',
|
|
1266
|
+
}),
|
|
1267
|
+
...options.environmentPostconditions,
|
|
1268
|
+
};
|
|
1269
|
+
}
|
|
1074
1270
|
/**
|
|
1075
1271
|
* Runs the mobile log-ingest profile artifact pipeline.
|
|
1076
1272
|
*
|
|
@@ -1100,6 +1296,7 @@ async function runProfileMobile(args, options) {
|
|
|
1100
1296
|
const capturesDir = layout.captures;
|
|
1101
1297
|
const startedAt = new Date().toISOString();
|
|
1102
1298
|
const eventLogPath = resolveEventLogPath({ args, platform: options.platform });
|
|
1299
|
+
const profileSessionEntriesPath = resolveProfileSessionEntriesPath({ args, platform: options.platform });
|
|
1103
1300
|
const interactionDriver = resolveInteractionDriver({ config, options, scenario });
|
|
1104
1301
|
const comparisonLane = resolveComparisonLane({ args, options, scenario });
|
|
1105
1302
|
await ensureDir(rawDir);
|
|
@@ -1151,6 +1348,26 @@ async function runProfileMobile(args, options) {
|
|
|
1151
1348
|
scenario: scenarioName,
|
|
1152
1349
|
runId,
|
|
1153
1350
|
});
|
|
1351
|
+
const logSessionEntries = extractProfileSessionEntries(eventLogText, {
|
|
1352
|
+
scenario: scenarioName,
|
|
1353
|
+
runId,
|
|
1354
|
+
});
|
|
1355
|
+
const storedSessionEntries = profileSessionEntriesPath
|
|
1356
|
+
? JSON.parse(await fsp.readFile(profileSessionEntriesPath, 'utf8'))
|
|
1357
|
+
: [];
|
|
1358
|
+
const sessionEntries = [
|
|
1359
|
+
...logSessionEntries,
|
|
1360
|
+
...(Array.isArray(storedSessionEntries)
|
|
1361
|
+
? storedSessionEntries.filter((entry) => {
|
|
1362
|
+
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
|
|
1363
|
+
return false;
|
|
1364
|
+
}
|
|
1365
|
+
const record = entry;
|
|
1366
|
+
return ((!('scenario' in record) || record.scenario === scenarioName) &&
|
|
1367
|
+
(!('runId' in record) || record.runId === runId));
|
|
1368
|
+
})
|
|
1369
|
+
: []),
|
|
1370
|
+
];
|
|
1154
1371
|
const runtimeTarget = resolveRuntimeTarget({ args, platform: options.platform });
|
|
1155
1372
|
const metrics = buildMetricsFromProfileEvents({
|
|
1156
1373
|
scenario: scenarioName,
|
|
@@ -1164,48 +1381,67 @@ async function runProfileMobile(args, options) {
|
|
|
1164
1381
|
signals: attachedEvidence.signals,
|
|
1165
1382
|
},
|
|
1166
1383
|
});
|
|
1384
|
+
const manifestArtifacts = {
|
|
1385
|
+
causalRun: 'causal-run.json',
|
|
1386
|
+
budgetVerdict: 'budget-verdict.json',
|
|
1387
|
+
manifest: 'manifest.json',
|
|
1388
|
+
metrics: 'metrics.json',
|
|
1389
|
+
summary: 'summary.md',
|
|
1390
|
+
scenario: toPortablePathReference(scenarioPath),
|
|
1391
|
+
raw: {
|
|
1392
|
+
interactionLog: eventLogPath ? `raw/${path.basename(eventLogPath)}` : 'raw/interaction.log',
|
|
1393
|
+
deviceLog: 'raw/device.log',
|
|
1394
|
+
},
|
|
1395
|
+
captures: {
|
|
1396
|
+
screenshots: attachedEvidence.captures.screenshots,
|
|
1397
|
+
video: attachedEvidence.captures.video ?? 'captures/run.mp4',
|
|
1398
|
+
uiTree: attachedEvidence.captures.uiTree ?? 'captures/ui-tree.json',
|
|
1399
|
+
},
|
|
1400
|
+
signals: {
|
|
1401
|
+
js: attachedEvidence.signals.js,
|
|
1402
|
+
memory: attachedEvidence.signals.memory,
|
|
1403
|
+
network: attachedEvidence.signals.network,
|
|
1404
|
+
},
|
|
1405
|
+
evidenceAttachments: buildEvidenceAttachmentManifest(attachedEvidence.attachments),
|
|
1406
|
+
};
|
|
1407
|
+
const appId = resolveAppId({ config, platform: options.platform });
|
|
1408
|
+
const provenanceCohort = buildProfileProvenanceCohort({
|
|
1409
|
+
appId,
|
|
1410
|
+
args,
|
|
1411
|
+
interactionDriver,
|
|
1412
|
+
options,
|
|
1413
|
+
providerExecution,
|
|
1414
|
+
});
|
|
1167
1415
|
const manifest = buildManifest({
|
|
1168
1416
|
scenario: scenarioName,
|
|
1169
1417
|
scenarioHash,
|
|
1170
1418
|
runId,
|
|
1171
1419
|
platform: options.platform,
|
|
1172
1420
|
status: metrics.status,
|
|
1421
|
+
terminalState: buildAttemptTerminalState(metrics),
|
|
1173
1422
|
endedAt: new Date().toISOString(),
|
|
1174
1423
|
interactionDriver,
|
|
1175
1424
|
comparisonLane,
|
|
1425
|
+
classification: buildAttemptClassification(metrics),
|
|
1426
|
+
cleanup: {
|
|
1427
|
+
status: 'not-required',
|
|
1428
|
+
},
|
|
1429
|
+
partialArtifacts: buildAttemptPartialArtifacts({ artifacts: manifestArtifacts, metrics }),
|
|
1430
|
+
preconditions: options.environmentPreconditions,
|
|
1431
|
+
postconditions: buildProfileEnvironmentPostconditions({ metrics, options }),
|
|
1176
1432
|
startedAt,
|
|
1177
1433
|
simulator: runtimeTarget,
|
|
1178
|
-
bundleId:
|
|
1434
|
+
bundleId: appId,
|
|
1179
1435
|
gitSha: 'unknown',
|
|
1180
1436
|
toolVersions: {
|
|
1181
1437
|
node: process.version,
|
|
1182
1438
|
},
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
budgetVerdict: 'budget-verdict.json',
|
|
1186
|
-
manifest: 'manifest.json',
|
|
1187
|
-
metrics: 'metrics.json',
|
|
1188
|
-
summary: 'summary.md',
|
|
1189
|
-
scenario: toPortablePathReference(scenarioPath),
|
|
1190
|
-
raw: {
|
|
1191
|
-
interactionLog: eventLogPath ? `raw/${path.basename(eventLogPath)}` : 'raw/interaction.log',
|
|
1192
|
-
deviceLog: 'raw/device.log',
|
|
1193
|
-
},
|
|
1194
|
-
captures: {
|
|
1195
|
-
screenshots: attachedEvidence.captures.screenshots,
|
|
1196
|
-
video: attachedEvidence.captures.video ?? 'captures/run.mp4',
|
|
1197
|
-
uiTree: attachedEvidence.captures.uiTree ?? 'captures/ui-tree.json',
|
|
1198
|
-
},
|
|
1199
|
-
signals: {
|
|
1200
|
-
js: attachedEvidence.signals.js,
|
|
1201
|
-
memory: attachedEvidence.signals.memory,
|
|
1202
|
-
network: attachedEvidence.signals.network,
|
|
1203
|
-
},
|
|
1204
|
-
evidenceAttachments: buildEvidenceAttachmentManifest(attachedEvidence.attachments),
|
|
1205
|
-
},
|
|
1439
|
+
cohort: provenanceCohort,
|
|
1440
|
+
artifacts: manifestArtifacts,
|
|
1206
1441
|
});
|
|
1207
1442
|
const timeline = buildCausalTimeline({
|
|
1208
1443
|
events,
|
|
1444
|
+
sessionEntries,
|
|
1209
1445
|
startedAt,
|
|
1210
1446
|
phaseMap: scenario.timelinePhases ?? null,
|
|
1211
1447
|
owner: scenario.flowId ?? scenarioName,
|
|
@@ -1231,7 +1467,7 @@ async function runProfileMobile(args, options) {
|
|
|
1231
1467
|
});
|
|
1232
1468
|
const health = buildProfileHealth({ scenario: profileScenario, runId, metrics });
|
|
1233
1469
|
const verdict = buildProfileVerdict({ scenario: profileScenario, runId, health, metrics });
|
|
1234
|
-
const agentSummary = buildAgentSummaryMarkdown({ health, verdict });
|
|
1470
|
+
const agentSummary = buildAgentSummaryMarkdown({ health, verdict, manifest });
|
|
1235
1471
|
const summary = buildSummaryMarkdown({ manifest, metrics });
|
|
1236
1472
|
await writeJsonArtifact({
|
|
1237
1473
|
filePath: layout.health,
|
|
@@ -1282,6 +1518,9 @@ async function runProfileMobile(args, options) {
|
|
|
1282
1518
|
if (eventLogPath) {
|
|
1283
1519
|
await fsp.copyFile(eventLogPath, path.join(rawDir, path.basename(eventLogPath)));
|
|
1284
1520
|
}
|
|
1521
|
+
if (profileSessionEntriesPath) {
|
|
1522
|
+
await fsp.copyFile(profileSessionEntriesPath, path.join(rawDir, path.basename(profileSessionEntriesPath)));
|
|
1523
|
+
}
|
|
1285
1524
|
await copyAttachedEvidence(attachedEvidence.copies);
|
|
1286
1525
|
return {
|
|
1287
1526
|
runDir,
|
package/docs/adapters.md
CHANGED
|
@@ -143,3 +143,7 @@ If the provider should run during profiling, declare `providerCommands` in its m
|
|
|
143
143
|
- Passed runs write the standard artifact set.
|
|
144
144
|
- Attached evidence is inventoried with stable run-relative paths.
|
|
145
145
|
- Package docs describe whether the adapter is bundled, a fixture target, or a project-local integration.
|
|
146
|
+
|
|
147
|
+
## Read next
|
|
148
|
+
|
|
149
|
+
- [Consumer App Rehearsal](consumer-rehearsal.md) for adopting the package inside an existing app
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
ASL is implemented in TypeScript, but its contracts are language-neutral.
|
|
4
|
+
|
|
5
|
+
The TypeScript package is the reference implementation for planning, execution,
|
|
6
|
+
schema validation, artifact writing, health/verdict/comparison interpretation,
|
|
7
|
+
run indexing, CLIs, the TypeScript runner SDK, and the React Native/Expo helper.
|
|
8
|
+
Those implementation modules are not the interoperability contract.
|
|
9
|
+
|
|
10
|
+
## Contract Boundary
|
|
11
|
+
|
|
12
|
+
JSON Schema and normative documentation are the source of truth for external
|
|
13
|
+
participants. TypeScript interfaces should reflect those contracts, not replace
|
|
14
|
+
them.
|
|
15
|
+
|
|
16
|
+
Language-neutral contracts include:
|
|
17
|
+
|
|
18
|
+
- scenario schemas;
|
|
19
|
+
- runner and evidence-provider manifests;
|
|
20
|
+
- capability definitions;
|
|
21
|
+
- truth-event envelopes;
|
|
22
|
+
- command and result envelopes;
|
|
23
|
+
- lifecycle states;
|
|
24
|
+
- error taxonomy;
|
|
25
|
+
- artifact schemas;
|
|
26
|
+
- cancellation and timeout semantics;
|
|
27
|
+
- protocol-version negotiation.
|
|
28
|
+
|
|
29
|
+
External adapters must be able to participate out of process. A valid adapter
|
|
30
|
+
may be an executable written in Swift, Kotlin, Python, Rust, shell, TypeScript,
|
|
31
|
+
or another language, provided it satisfies the documented schemas and protocol.
|
|
32
|
+
The minimal executable protocol is described in
|
|
33
|
+
[External Adapter Protocol](external-adapter-protocol.md).
|
|
34
|
+
|
|
35
|
+
## Reference Environments
|
|
36
|
+
|
|
37
|
+
React Native and Expo remain the primary active battle-testing environment.
|
|
38
|
+
They provide real Android and iOS pressure across lifecycle behavior, command
|
|
39
|
+
transport, native boundaries, instrumentation, packaging, evidence provenance,
|
|
40
|
+
and agent-facing summaries.
|
|
41
|
+
|
|
42
|
+
The React Native helper is a reference transport. It does not define the truth
|
|
43
|
+
event contract by itself. Native apps must be able to emit ASL truth events
|
|
44
|
+
without embedding a JavaScript runtime.
|
|
45
|
+
|
|
46
|
+
## Public Interoperability Rules
|
|
47
|
+
|
|
48
|
+
- Scenario files must remain structured data, not arbitrary JavaScript.
|
|
49
|
+
- Runners and providers must not be required to subclass TypeScript classes.
|
|
50
|
+
- Large evidence should be passed by file reference, not embedded as base64.
|
|
51
|
+
- External adapters should use structured protocol messages over stdio.
|
|
52
|
+
- Messages should carry run ids, attempt ids, sequence numbers, operation ids,
|
|
53
|
+
deadlines, platform, clock-domain metadata, adapter identity, and artifact
|
|
54
|
+
references where applicable.
|
|
55
|
+
- Failed operations should return structured failure data with stable codes,
|
|
56
|
+
classes, retryability, and next-action hints.
|
|
57
|
+
|
|
58
|
+
## Audit Snapshot
|
|
59
|
+
|
|
60
|
+
Current public contracts are JSON schemas and JSON manifests. Scenario fixtures
|
|
61
|
+
are structured JSON and do not require callbacks or closures. Runner/provider
|
|
62
|
+
manifests describe capabilities and commands as data.
|
|
63
|
+
|
|
64
|
+
Known implementation-specific surfaces are intentionally reference paths:
|
|
65
|
+
|
|
66
|
+
- npm package scripts and Node CLIs are the TypeScript distribution channel.
|
|
67
|
+
- built-in adb, simctl, Argent, and agent-device runners are TypeScript
|
|
68
|
+
adapters.
|
|
69
|
+
- React Native profile-session logging and AsyncStorage are reference truth
|
|
70
|
+
event transports.
|
|
71
|
+
- provider command examples use Node scripts, but provider manifests can point
|
|
72
|
+
at any executable.
|
|
73
|
+
|
|
74
|
+
These are acceptable as reference implementation details. They should not become
|
|
75
|
+
requirements in scenario schemas, artifact schemas, or external-adapter protocol
|
|
76
|
+
messages.
|
|
77
|
+
|
|
78
|
+
## Future Design Test
|
|
79
|
+
|
|
80
|
+
Could a Swift, Kotlin, Python, or Rust implementation satisfy this contract
|
|
81
|
+
using only the schemas, protocol documentation, executable interface, and
|
|
82
|
+
conformance fixtures?
|
|
83
|
+
|
|
84
|
+
If yes, TypeScript remains a productive reference implementation. If no, the
|
|
85
|
+
implementation-specific assumption should be identified and removed from the
|
|
86
|
+
contract surface.
|
|
87
|
+
|
|
88
|
+
## Read next
|
|
89
|
+
|
|
90
|
+
- [External Adapter Protocol](external-adapter-protocol.md) for the out-of-process adapter envelope, operations, failures, and conformance fixture
|
package/docs/authoring.md
CHANGED
|
@@ -159,7 +159,7 @@ asl-profile-android \
|
|
|
159
159
|
--capture uiTree:artifacts/provider/ui-tree.json
|
|
160
160
|
```
|
|
161
161
|
|
|
162
|
-
Signals are copied into `signals/js`, `signals/memory`, or `signals/network` and listed in `manifest.json`. Captures are copied into `captures`; screenshots are listed in `artifacts.captures.screenshots`, while video and UI tree captures replace the matching named capture path in the manifest. Every attached file is also listed in `artifacts.evidenceAttachments` with kind, run-relative path, source filename, byte size, and
|
|
162
|
+
Signals are copied into `signals/js`, `signals/memory`, or `signals/network` and listed in `manifest.json`. Captures are copied into `captures`; screenshots are listed in `artifacts.captures.screenshots`, while video and UI tree captures replace the matching named capture path in the manifest. Every attached file is also listed in `artifacts.evidenceAttachments` with kind, run-relative path, source filename, byte size, sha256 hash, completeness status, corruption status, redaction status, and transformation list. Attached provider evidence is preserved as proof, but timing verdicts still come from app-owned truth events and budgets.
|
|
163
163
|
|
|
164
164
|
Provider manifests can also declare `providerCommands`. Profile runners execute those commands when passed with `--provider <manifest>`, but only when the provider manifest includes the selected platform. A provider with `platforms: ["ios"]` passed to an Android profile writes failed `health.json` with `provider_platform_unsupported` and does not run the command. Commands run without a shell, can use placeholders such as `{providerDir}`, `{runDir}`, `{runId}`, `{scenarioId}`, and `{platform}`, and must declare their output files. Provider-channel outputs are copied or preserved under `raw/providers/<provider-id>/` and inventoried in `artifacts.evidenceAttachments`; signal and capture outputs can still map into the standard `signals/*` or `captures/` folders. Command stdout, stderr, exit code, phase, and argv are preserved under `raw/provider-commands/`. When a provider command exits nonzero, the runner writes failed `health.json`, inconclusive `verdict.json`, and `agent-summary.md` with a next-action hint instead of making timing claims.
|
|
165
165
|
|
|
@@ -196,3 +196,7 @@ Run the release gate before publishing package changes:
|
|
|
196
196
|
```bash
|
|
197
197
|
pnpm release:check
|
|
198
198
|
```
|
|
199
|
+
|
|
200
|
+
## Read next
|
|
201
|
+
|
|
202
|
+
- [Adapter Onboarding](adapters.md) for runner and provider integration
|
package/docs/concepts.md
CHANGED
|
@@ -104,33 +104,12 @@ The tooling may change. The runners may change. The agents may change. The scena
|
|
|
104
104
|
|
|
105
105
|
That is a different philosophy from frameworks that primarily evaluate agents. Agent Scenario Loop is built to evaluate the evolution of software.
|
|
106
106
|
|
|
107
|
-
##
|
|
107
|
+
## Boundary
|
|
108
108
|
|
|
109
|
-
Agent Scenario Loop
|
|
109
|
+
Agent Scenario Loop is not a replacement for testing frameworks, automation tools, mobile drivers, profilers, or agent evaluation systems. Those tools can still execute or observe work.
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
> Did the application behave correctly?
|
|
114
|
-
|
|
115
|
-
Agent Scenario Loop optimizes for:
|
|
116
|
-
|
|
117
|
-
> What did we learn from running this scenario?
|
|
118
|
-
|
|
119
|
-
Both questions matter. Agent Scenario Loop focuses on the second question by preserving health, verdicts, metrics, logs, traces, comparisons, and other run evidence in a stable artifact shape.
|
|
120
|
-
|
|
121
|
-
## How it differs from agent evaluation
|
|
122
|
-
|
|
123
|
-
Agent Scenario Loop is not primarily evaluating agents.
|
|
124
|
-
|
|
125
|
-
An agent may execute part of a run. A runner may drive a device. A profiler may collect signals. None of those is the center of the model.
|
|
126
|
-
|
|
127
|
-
The scenario is.
|
|
128
|
-
|
|
129
|
-
The feed, livestream, upload flow, checkout flow, or conversation thread is the thing being studied over time.
|
|
111
|
+
The canonical boundary list lives in [What It Is Not](../README.md#what-it-is-not).
|
|
130
112
|
|
|
131
113
|
## Read next
|
|
132
114
|
|
|
133
115
|
- [Principles](principles.md) for the project doctrine
|
|
134
|
-
- [Contracts](contracts.md) for the current artifact and package surface
|
|
135
|
-
- [Live Proofs](live-proofs.md) for fixture, Android, iOS, and comparison runs
|
|
136
|
-
- [Runner docs](../runner/README.md) for the host execution boundary
|
|
@@ -113,3 +113,7 @@ Before expanding beyond the first journey, confirm:
|
|
|
113
113
|
- at least one platform has a passed live proof
|
|
114
114
|
|
|
115
115
|
Only then add more scenarios, providers, or runner adapters.
|
|
116
|
+
|
|
117
|
+
## Read next
|
|
118
|
+
|
|
119
|
+
- [Live Proofs](live-proofs.md) for fixture, Android, iOS, comparison, and release-proof commands
|