agent-scenario-loop 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +15 -9
  2. package/app/profile-session.ts +98 -4
  3. package/dist/core/agent-summary.d.ts +3 -2
  4. package/dist/core/agent-summary.js +44 -2
  5. package/dist/core/artifact-contract.d.ts +22 -4
  6. package/dist/core/artifact-contract.js +512 -11
  7. package/dist/core/comparison.d.ts +57 -3
  8. package/dist/core/comparison.js +113 -1
  9. package/dist/core/planner.d.ts +32 -1
  10. package/dist/core/planner.js +144 -0
  11. package/dist/core/run-index.d.ts +4 -0
  12. package/dist/core/run-index.js +55 -1
  13. package/dist/core/schema-validator.d.ts +1 -0
  14. package/dist/core/schema-validator.js +1 -0
  15. package/dist/runner/compare-latest.d.ts +8 -4
  16. package/dist/runner/compare-latest.js +24 -5
  17. package/dist/runner/example-android-live.d.ts +10 -1
  18. package/dist/runner/example-android-live.js +55 -0
  19. package/dist/runner/example-ios-live.d.ts +10 -1
  20. package/dist/runner/example-ios-live.js +55 -0
  21. package/dist/runner/init-project.d.ts +4 -1
  22. package/dist/runner/init-project.js +26 -4
  23. package/dist/runner/ios-simctl.d.ts +5 -0
  24. package/dist/runner/ios-simctl.js +6 -0
  25. package/dist/runner/live-comparison.d.ts +2 -2
  26. package/dist/runner/live-comparison.js +2 -1
  27. package/dist/runner/live-proof-summary.d.ts +5 -4
  28. package/dist/runner/live-proof-summary.js +12 -2
  29. package/dist/runner/live-proof.d.ts +3 -2
  30. package/dist/runner/live-proof.js +9 -2
  31. package/dist/runner/profile-android.d.ts +5 -0
  32. package/dist/runner/profile-android.js +148 -24
  33. package/dist/runner/profile-ios.d.ts +11 -1
  34. package/dist/runner/profile-ios.js +128 -9
  35. package/dist/runner/profile-mobile.d.ts +8 -0
  36. package/dist/runner/profile-mobile.js +267 -28
  37. package/docs/adapters.md +4 -0
  38. package/docs/api.md +1 -1
  39. package/docs/architecture.md +90 -0
  40. package/docs/authoring.md +7 -1
  41. package/docs/concepts.md +3 -24
  42. package/docs/consumer-rehearsal.md +4 -0
  43. package/docs/contracts.md +30 -100
  44. package/docs/external-adapter-protocol.md +219 -0
  45. package/docs/live-proofs.md +83 -2
  46. package/docs/principles.md +9 -15
  47. package/examples/mobile-app/README.md +12 -0
  48. package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
  49. package/examples/runners/README.md +1 -0
  50. package/examples/runners/adb-android.json +1 -0
  51. package/examples/runners/agent-device-android.json +1 -0
  52. package/examples/runners/agent-device-ios.json +1 -0
  53. package/examples/runners/argent-android.json +1 -0
  54. package/examples/runners/argent-ios.json +1 -0
  55. package/examples/runners/xcodebuildmcp-ios.json +1 -0
  56. package/package.json +2 -1
  57. package/schemas/causal-run.schema.json +85 -2
  58. package/schemas/comparison.schema.json +130 -2
  59. package/schemas/external-adapter-message.schema.json +693 -0
  60. package/schemas/health.schema.json +72 -0
  61. package/schemas/live-proof-set.schema.json +1 -1
  62. package/schemas/live-proof.schema.json +14 -6
  63. package/schemas/manifest.schema.json +442 -1
  64. package/schemas/runner-capabilities.schema.json +20 -0
  65. package/schemas/scenario.schema.json +16 -0
  66. package/templates/primary-runner.json +1 -0
  67. package/templates/skills/agent-scenario-loop/SKILL.md +93 -0
  68. package/templates/skills/agent-scenario-loop/references/adoption-checklist.md +17 -0
  69. package/templates/skills/agent-scenario-loop/references/artifact-interpretation.md +26 -0
package/README.md CHANGED
@@ -14,12 +14,14 @@ Execution tools can change. The scenario and evidence contract should not.
14
14
  | --- | --- |
15
15
  | Understand the idea in plain language | [Concepts](docs/concepts.md) |
16
16
  | Understand the project doctrine | [Principles](docs/principles.md) |
17
- | Write your first scenario | [Scenario Authoring](docs/authoring.md) |
18
- | Rehearse adoption in an existing app | [Consumer App Rehearsal](docs/consumer-rehearsal.md) |
17
+ | Understand why ASL is a protocol, not a TypeScript-only library | [Architecture](docs/architecture.md) |
18
+ | Implement or evaluate an out-of-process adapter in any language | [External Adapter Protocol](docs/external-adapter-protocol.md) |
19
19
  | Inspect artifacts, schemas, and supported surfaces | [Contracts](docs/contracts.md) |
20
- | Use the package from code | [Public API](docs/api.md) |
20
+ | Write your first scenario | [Scenario Authoring](docs/authoring.md) |
21
21
  | Add a runner or evidence provider | [Adapter Onboarding](docs/adapters.md) |
22
+ | Rehearse adoption in an existing app | [Consumer App Rehearsal](docs/consumer-rehearsal.md) |
22
23
  | Run fixture, Android, or iOS proofs | [Live Proofs](docs/live-proofs.md) |
24
+ | Use the package from code | [Public API](docs/api.md) |
23
25
  | Inspect runner behavior and limits | [Runner docs](runner/README.md) |
24
26
  | Explore the neutral dogfood app | [examples/mobile-app](examples/mobile-app/README.md) |
25
27
  | See runner and provider fixtures | [examples/runners](examples/runners/README.md) |
@@ -43,6 +45,12 @@ Install or use the package, then scaffold a first scenario inside an app:
43
45
  asl-init --out . --scenario first-journey
44
46
  ```
45
47
 
48
+ Add the optional repository-scoped agent skill when you want Codex to load ASL operating guidance from the consuming app:
49
+
50
+ ```bash
51
+ asl-init --out . --scenario first-journey --with-agent-skill
52
+ ```
53
+
46
54
  Wire the generated app helper, emit truth events around one real journey, merge the generated `asl:*` scripts intentionally, then validate the project:
47
55
 
48
56
  ```bash
@@ -67,12 +75,6 @@ No simulator or device available yet? Run the fixture loop:
67
75
  pnpm demo:loop -- --out artifacts/demo-loop
68
76
  ```
69
77
 
70
- Read next:
71
-
72
- - [Scenario Authoring](docs/authoring.md) for scenario shape and truth events
73
- - [Consumer App Rehearsal](docs/consumer-rehearsal.md) for adoption in an existing app
74
- - [Live Proofs](docs/live-proofs.md) for Android, iOS, comparison, and release-proof paths
75
-
76
78
  ## Package Surface
77
79
 
78
80
  The root package exports stable core contracts:
@@ -117,3 +119,7 @@ pnpm release:check
117
119
  ```
118
120
 
119
121
  The package should remain product-neutral. Product-specific selectors, routes, auth assumptions, accounts, and scenario data belong in the consuming app, not in this repository.
122
+
123
+ ## Read next
124
+
125
+ - [Concepts](docs/concepts.md) for the plain-language model
@@ -12,11 +12,16 @@ export type ProfileSessionState = {
12
12
 
13
13
  export type ProfileSessionCommand = {
14
14
  id: string;
15
+ commandId?: string;
15
16
  scenario?: string;
16
17
  runId?: string;
17
18
  command: string;
19
+ queueId?: string;
20
+ sequence?: number;
18
21
  source?: 'deeplink' | 'storage';
19
22
  timestamp: number;
23
+ waitForMilestone?: string;
24
+ waitTimeoutMs?: number;
20
25
  };
21
26
 
22
27
  export type ProfileSignalKind = 'js' | 'memory' | 'network';
@@ -65,7 +70,16 @@ type StoredProfileSessionEntry = {
65
70
  startedAt?: number;
66
71
  stoppedAt?: number;
67
72
  command?: string;
73
+ commandId?: string;
68
74
  id?: string;
75
+ queueId?: string;
76
+ reason?: string;
77
+ result?: string;
78
+ sequence?: number;
79
+ source?: 'deeplink' | 'storage';
80
+ status?: 'received' | 'queued' | 'delivered' | 'completed' | 'skipped';
81
+ waitForMilestone?: string;
82
+ waitTimeoutMs?: number;
69
83
  };
70
84
 
71
85
  type StoredProfileSignals = Record<ProfileSignalKind, Record<string, unknown>>;
@@ -316,6 +330,41 @@ function logProfileSession(kind: 'start' | 'stop' | 'command', payload: Record<s
316
330
  if (typeof payload.id === 'string') {
317
331
  entry.id = payload.id;
318
332
  }
333
+ if (typeof payload.commandId === 'string') {
334
+ entry.commandId = payload.commandId;
335
+ } else if (typeof payload.id === 'string') {
336
+ entry.commandId = payload.id;
337
+ }
338
+ if (typeof payload.queueId === 'string') {
339
+ entry.queueId = payload.queueId;
340
+ }
341
+ if (typeof payload.sequence === 'number') {
342
+ entry.sequence = payload.sequence;
343
+ }
344
+ if (payload.source === 'deeplink' || payload.source === 'storage') {
345
+ entry.source = payload.source;
346
+ }
347
+ if (
348
+ payload.status === 'received' ||
349
+ payload.status === 'queued' ||
350
+ payload.status === 'delivered' ||
351
+ payload.status === 'completed' ||
352
+ payload.status === 'skipped'
353
+ ) {
354
+ entry.status = payload.status;
355
+ }
356
+ if (typeof payload.reason === 'string') {
357
+ entry.reason = payload.reason;
358
+ }
359
+ if (typeof payload.result === 'string') {
360
+ entry.result = payload.result;
361
+ }
362
+ if (typeof payload.waitForMilestone === 'string') {
363
+ entry.waitForMilestone = payload.waitForMilestone;
364
+ }
365
+ if (typeof payload.waitTimeoutMs === 'number') {
366
+ entry.waitTimeoutMs = payload.waitTimeoutMs;
367
+ }
319
368
  }
320
369
 
321
370
  appendStoredProfileSessionEntry(entry);
@@ -326,6 +375,11 @@ function getProfileSessionRoute(url: string): {
326
375
  scenario?: string;
327
376
  runId?: string;
328
377
  command?: string;
378
+ commandId?: string;
379
+ queueId?: string;
380
+ sequence?: number;
381
+ waitForMilestone?: string;
382
+ waitTimeoutMs?: number;
329
383
  } | null {
330
384
  const parsed = ExpoLinking.parse(url);
331
385
  const segments = [parsed.hostname, parsed.path]
@@ -347,8 +401,22 @@ function getProfileSessionRoute(url: string): {
347
401
  typeof parsed.queryParams?.runId === 'string' ? parsed.queryParams.runId : undefined;
348
402
  const command =
349
403
  typeof parsed.queryParams?.command === 'string' ? parsed.queryParams.command : undefined;
350
-
351
- return { action, scenario, runId, command };
404
+ const commandId =
405
+ typeof parsed.queryParams?.commandId === 'string' ? parsed.queryParams.commandId : undefined;
406
+ const sequence =
407
+ typeof parsed.queryParams?.sequence === 'string' && Number.isInteger(Number(parsed.queryParams.sequence))
408
+ ? Number(parsed.queryParams.sequence)
409
+ : undefined;
410
+ const queueId =
411
+ typeof parsed.queryParams?.queueId === 'string' ? parsed.queryParams.queueId : undefined;
412
+ const waitForMilestone =
413
+ typeof parsed.queryParams?.waitForMilestone === 'string' ? parsed.queryParams.waitForMilestone : undefined;
414
+ const waitTimeoutMs =
415
+ typeof parsed.queryParams?.waitTimeoutMs === 'string' && Number.isInteger(Number(parsed.queryParams.waitTimeoutMs))
416
+ ? Number(parsed.queryParams.waitTimeoutMs)
417
+ : undefined;
418
+
419
+ return { action, scenario, runId, command, commandId, queueId, sequence, waitForMilestone, waitTimeoutMs };
352
420
  }
353
421
 
354
422
  function queuePendingProfileCommand(command: ProfileSessionCommand) {
@@ -456,17 +524,32 @@ function notifyProfileCommandListeners(command: ProfileSessionCommand) {
456
524
 
457
525
  const targetDispatched = dispatchProfileCommandTarget(command);
458
526
  if (targetDispatched) {
527
+ logProfileSession('command', {
528
+ ...command,
529
+ status: 'completed',
530
+ result: 'target-dispatched',
531
+ });
459
532
  return;
460
533
  }
461
534
 
462
535
  if (profileCommandListeners.size === 0) {
463
536
  queuePendingProfileCommand(command);
537
+ logProfileSession('command', {
538
+ ...command,
539
+ status: 'queued',
540
+ reason: 'no-command-listener',
541
+ });
464
542
  return;
465
543
  }
466
544
 
467
545
  for (const listener of profileCommandListeners) {
468
546
  listener(command);
469
547
  }
548
+ logProfileSession('command', {
549
+ ...command,
550
+ status: 'delivered',
551
+ result: 'listener-notified',
552
+ });
470
553
  }
471
554
 
472
555
  function flushPendingProfileCommands(listener: (command: ProfileSessionCommand) => void) {
@@ -560,10 +643,18 @@ export function applyProfileSessionUrl(url: string | null | undefined): boolean
560
643
  scenario: route.scenario,
561
644
  runId: route.runId,
562
645
  command: route.command,
646
+ ...(route.commandId ? { commandId: route.commandId } : {}),
647
+ ...(route.queueId ? { queueId: route.queueId } : {}),
648
+ ...(typeof route.sequence === 'number' ? { sequence: route.sequence } : {}),
563
649
  source: 'deeplink' as const,
564
650
  timestamp,
651
+ ...(route.waitForMilestone ? { waitForMilestone: route.waitForMilestone } : {}),
652
+ ...(typeof route.waitTimeoutMs === 'number' ? { waitTimeoutMs: route.waitTimeoutMs } : {}),
565
653
  };
566
- logProfileSession('command', command);
654
+ logProfileSession('command', {
655
+ ...command,
656
+ status: 'received',
657
+ });
567
658
  notifyProfileCommandListeners(command);
568
659
  return true;
569
660
  }
@@ -776,7 +867,10 @@ export function useProfileSessionBootstrap(): void {
776
867
  }
777
868
 
778
869
  markProfileCommandIdProcessed(storageCommand);
779
- logProfileSession('command', storageCommand);
870
+ logProfileSession('command', {
871
+ ...storageCommand,
872
+ status: 'received',
873
+ });
780
874
  notifyProfileCommandListeners(storageCommand);
781
875
  }
782
876
  };
@@ -1,10 +1,10 @@
1
1
  /**
2
2
  * Builds the minimum agent-facing markdown summary for a run.
3
3
  *
4
- * @param {{health: Record<string, unknown>, verdict: Record<string, unknown>, comparison?: Record<string, unknown> | null}} options
4
+ * @param {{health: Record<string, unknown>, verdict: Record<string, unknown>, comparison?: Record<string, unknown> | null, manifest?: Record<string, unknown> | null}} options
5
5
  * @returns {string}
6
6
  */
7
- declare function buildAgentSummaryMarkdown({ health, verdict, comparison }: AgentSummaryInput): string;
7
+ declare function buildAgentSummaryMarkdown({ health, verdict, comparison, manifest }: AgentSummaryInput): string;
8
8
  export { buildAgentSummaryMarkdown, };
9
9
  export type { AgentSummaryInput, SummaryRecord, };
10
10
  type SummaryRecord = Record<string, unknown>;
@@ -12,4 +12,5 @@ type AgentSummaryInput = {
12
12
  health: SummaryRecord;
13
13
  verdict: SummaryRecord;
14
14
  comparison?: SummaryRecord | null;
15
+ manifest?: SummaryRecord | null;
15
16
  };
@@ -122,13 +122,54 @@ function formatComparisonBasis(comparison) {
122
122
  }
123
123
  return lines;
124
124
  }
125
+ /**
126
+ * Formats attempt terminal semantics for agent-readable summaries.
127
+ *
128
+ * @param {SummaryRecord | null | undefined} manifest
129
+ * @returns {string[]}
130
+ */
131
+ function formatAttempt(manifest) {
132
+ const attempt = manifest?.attempt;
133
+ if (!attempt || typeof attempt !== 'object' || Array.isArray(attempt)) {
134
+ return [];
135
+ }
136
+ const attemptRecord = attempt;
137
+ const classification = attemptRecord.classification && typeof attemptRecord.classification === 'object' && !Array.isArray(attemptRecord.classification)
138
+ ? attemptRecord.classification
139
+ : {};
140
+ const cleanup = attemptRecord.cleanup && typeof attemptRecord.cleanup === 'object' && !Array.isArray(attemptRecord.cleanup)
141
+ ? attemptRecord.cleanup
142
+ : {};
143
+ const partialArtifacts = attemptRecord.partialArtifacts && typeof attemptRecord.partialArtifacts === 'object' && !Array.isArray(attemptRecord.partialArtifacts)
144
+ ? attemptRecord.partialArtifacts
145
+ : {};
146
+ const retryOfAttemptId = firstString([attemptRecord.retryOfAttemptId], '');
147
+ const retryReason = firstString([attemptRecord.retryReason], '');
148
+ const lines = [
149
+ '',
150
+ '## attempt',
151
+ '',
152
+ `- Attempt: ${code(firstString([attemptRecord.attemptId], 'unknown-attempt'))} (${attemptRecord.attemptNumber ?? 'unknown'}/${attemptRecord.maxAttempts ?? 'unknown'})`,
153
+ `- Terminal state: ${code(firstString([attemptRecord.terminalState], 'unknown'))}`,
154
+ `- Classification: ${code(firstString([classification.category], 'unknown'))}${classification.code ? ` ${code(classification.code)}` : ''}`,
155
+ `- Cleanup: ${code(firstString([cleanup.status], 'unknown'))}`,
156
+ `- Partial artifacts valid: ${partialArtifacts.valid === true ? 'true' : 'false'} - ${firstString([partialArtifacts.reason], 'no reason recorded')}`,
157
+ ];
158
+ if (retryOfAttemptId || retryReason) {
159
+ lines.push(`- Retry lineage: previous=${code(retryOfAttemptId || 'unknown')} reason=${retryReason || 'not recorded'}`);
160
+ }
161
+ if (Array.isArray(partialArtifacts.paths) && partialArtifacts.paths.length > 0) {
162
+ lines.push(`- Partial artifact paths: ${partialArtifacts.paths.map((item) => code(item)).join(', ')}`);
163
+ }
164
+ return lines;
165
+ }
125
166
  /**
126
167
  * Builds the minimum agent-facing markdown summary for a run.
127
168
  *
128
- * @param {{health: Record<string, unknown>, verdict: Record<string, unknown>, comparison?: Record<string, unknown> | null}} options
169
+ * @param {{health: Record<string, unknown>, verdict: Record<string, unknown>, comparison?: Record<string, unknown> | null, manifest?: Record<string, unknown> | null}} options
129
170
  * @returns {string}
130
171
  */
131
- function buildAgentSummaryMarkdown({ health, verdict, comparison = null }) {
172
+ function buildAgentSummaryMarkdown({ health, verdict, comparison = null, manifest = null }) {
132
173
  const scenarioId = firstString([health?.scenarioId, verdict?.scenarioId], 'unknown-scenario');
133
174
  const runId = firstString([health?.runId, verdict?.runId], 'unknown-run');
134
175
  const healthStatus = firstString([health?.healthStatus], 'failed');
@@ -169,6 +210,7 @@ function buildAgentSummaryMarkdown({ health, verdict, comparison = null }) {
169
210
  if (failedBudgets.length > 0) {
170
211
  lines.push('', '## failed budgets', '', ...failedBudgets);
171
212
  }
213
+ lines.push(...formatAttempt(manifest));
172
214
  if (comparison) {
173
215
  lines.push('', '## comparison', '', firstString([comparison.summary], 'No comparison summary provided.'));
174
216
  lines.push(...formatComparisonBasis(comparison));
@@ -8,6 +8,12 @@ type ProfileEvent = ArtifactRecord & {
8
8
  atMs?: number;
9
9
  timestamp?: number | string;
10
10
  };
11
+ type ProfileSessionEntry = ArtifactRecord & {
12
+ kind?: string;
13
+ scenario?: string;
14
+ runId?: string;
15
+ timestamp?: number | string;
16
+ };
11
17
  type BudgetCheck = {
12
18
  name: string;
13
19
  actual: unknown;
@@ -36,6 +42,17 @@ declare function extractProfileEvents(logText: string, filters?: {
36
42
  runId?: string;
37
43
  scenario?: string;
38
44
  }): ProfileEvent[];
45
+ /**
46
+ * Extracts structured profile-session entries from device logs.
47
+ *
48
+ * @param {string} logText
49
+ * @param {{runId?: string, scenario?: string}} [filters]
50
+ * @returns {Record<string, unknown>[]}
51
+ */
52
+ declare function extractProfileSessionEntries(logText: string, filters?: {
53
+ runId?: string;
54
+ scenario?: string;
55
+ }): ProfileSessionEntry[];
39
56
  /**
40
57
  * Builds timing metrics from app-emitted profile events.
41
58
  *
@@ -75,8 +92,9 @@ declare function sortValue(value: any): any;
75
92
  * @param {{events: Record<string, unknown>[], startedAt?: string, phaseMap?: Record<string, string> | null, owner?: string | null}} options
76
93
  * @returns {Record<string, unknown>[]}
77
94
  */
78
- declare function buildCausalTimeline({ events, startedAt, phaseMap, owner, }: {
95
+ declare function buildCausalTimeline({ events, sessionEntries, startedAt, phaseMap, owner, }: {
79
96
  events: ProfileEvent[];
97
+ sessionEntries?: ProfileSessionEntry[];
80
98
  startedAt?: string;
81
99
  phaseMap?: ArtifactRecord | null;
82
100
  owner?: string | null;
@@ -107,7 +125,7 @@ declare function buildCausalRun({ scenario, flowId, runId, platform, buildFlavor
107
125
  * @param {Record<string, unknown>} options
108
126
  * @returns {Record<string, unknown>}
109
127
  */
110
- declare function buildManifest({ scenario, scenarioHash, runId, platform, status, startedAt, endedAt, interactionDriver, comparisonLane, simulator, bundleId, gitSha, toolVersions, artifacts, failureReason, }: ArtifactRecord): ArtifactRecord;
128
+ declare function buildManifest({ scenario, scenarioHash, runId, attemptId, attemptNumber, maxAttempts, retryOfAttemptId, retryReason, platform, status, terminalState, startedAt, endedAt, interactionDriver, comparisonLane, classification, cleanup, partialArtifacts, preconditions, postconditions, simulator, bundleId, gitSha, toolVersions, cohort, artifacts, failureReason, }: ArtifactRecord): ArtifactRecord;
111
129
  /**
112
130
  * Builds the human-readable profile summary.
113
131
  *
@@ -147,5 +165,5 @@ declare function evaluateUiContract({ rawDescription, requiredIdentifierPatterns
147
165
  checks: ArtifactRecord[];
148
166
  missingPatterns: string[];
149
167
  };
150
- export { PROFILE_EVENT_PREFIX, buildBudgetVerdict, buildCausalRun, buildCausalTimeline, buildManifest, buildMetricsFromProfileEvents, buildSummaryMarkdown, evaluateUiContract, evaluateProfileBudgets, extractCandidateIdentifiers, extractProfileEvents, findMatchingIdentifier, percentile, sortValue, };
151
- export type { ArtifactRecord, BudgetCheck, ProfileEvent, };
168
+ export { PROFILE_EVENT_PREFIX, buildBudgetVerdict, buildCausalRun, buildCausalTimeline, buildManifest, buildMetricsFromProfileEvents, buildSummaryMarkdown, evaluateUiContract, evaluateProfileBudgets, extractCandidateIdentifiers, extractProfileEvents, extractProfileSessionEntries, findMatchingIdentifier, percentile, sortValue, };
169
+ export type { ArtifactRecord, BudgetCheck, ProfileEvent, ProfileSessionEntry, };