archal 0.9.19 → 0.9.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +9 -1
  2. package/agents/github-octokit/.archal.json +8 -0
  3. package/agents/github-octokit/Dockerfile +8 -0
  4. package/agents/github-octokit/README.md +113 -0
  5. package/agents/github-octokit/agent.mjs +54 -0
  6. package/agents/github-octokit/package.json +9 -0
  7. package/agents/github-octokit/scenarios/test-repo-access.md +27 -0
  8. package/agents/google-workspace-local-tools/Dockerfile +6 -0
  9. package/agents/google-workspace-local-tools/README.md +58 -0
  10. package/agents/google-workspace-local-tools/agent.mjs +196 -0
  11. package/agents/google-workspace-local-tools/archal-harness.json +7 -0
  12. package/agents/google-workspace-local-tools/run-input.yaml +16 -0
  13. package/agents/google-workspace-local-tools/scenario.md +29 -0
  14. package/agents/hermes/.archal.json +8 -0
  15. package/agents/hermes/Dockerfile +46 -0
  16. package/agents/hermes/README.md +87 -0
  17. package/agents/hermes/SOUL.md +27 -0
  18. package/agents/hermes/config.yaml +34 -0
  19. package/agents/hermes/drive.mjs +113 -0
  20. package/agents/hermes/scenarios/stripe-customers-read-only.md +32 -0
  21. package/agents/openclaw/.archal.json +8 -0
  22. package/agents/openclaw/Dockerfile +96 -0
  23. package/agents/openclaw/README.md +120 -0
  24. package/agents/openclaw/drive.mjs +311 -0
  25. package/agents/openclaw/package.json +9 -0
  26. package/agents/openclaw/scenarios/github-issue-triage-read-only.md +44 -0
  27. package/agents/openclaw/workspace/AGENTS.md +23 -0
  28. package/agents/openclaw/workspace/IDENTITY.md +8 -0
  29. package/agents/openclaw/workspace/SOUL.md +14 -0
  30. package/agents/openclaw/workspace/TOOLS.md +35 -0
  31. package/agents/pagination-test/README.md +24 -0
  32. package/agents/pagination-test/scenario.md +24 -0
  33. package/agents/replay-capsule-harness/README.md +29 -0
  34. package/agents/replay-capsule-harness/observability-install-offline-e2e.mts +1517 -0
  35. package/agents/replay-capsule-harness/replay-capsule-e2e.mjs +104 -0
  36. package/clone-assets/apify/tools.json +256 -22
  37. package/clone-assets/calcom/tools.json +510 -0
  38. package/clone-assets/clickup/tools.json +1258 -0
  39. package/clone-assets/customerio/tools.json +386 -0
  40. package/clone-assets/datadog/tools.json +734 -0
  41. package/clone-assets/github/tools.json +306 -25
  42. package/clone-assets/gitlab/tools.json +999 -0
  43. package/clone-assets/google-workspace/tools.json +18 -6
  44. package/clone-assets/hubspot/tools.json +1406 -0
  45. package/clone-assets/jira/fidelity.json +1 -1
  46. package/clone-assets/jira/tools.json +266 -543
  47. package/clone-assets/linear/tools.json +238 -40
  48. package/clone-assets/ownerrez/tools.json +548 -0
  49. package/clone-assets/pricelabs/tools.json +343 -0
  50. package/clone-assets/sentry/tools.json +745 -0
  51. package/clone-assets/slack/tools.json +1 -2
  52. package/clone-assets/stripe/tools.json +185 -46
  53. package/clone-assets/supabase/tools.json +437 -0
  54. package/clone-assets/unipile/tools.json +408 -0
  55. package/clone-assets/webflow/tools.json +415 -0
  56. package/dist/autoloop-worker-types-BEb_E44z.d.cts +196 -0
  57. package/dist/cli.cjs +150299 -87430
  58. package/dist/commands/autoloop-hosted-worker.cjs +43942 -0
  59. package/dist/commands/autoloop-hosted-worker.d.cts +143 -0
  60. package/dist/commands/autoloop-pr-verification.cjs +4227 -0
  61. package/dist/commands/autoloop-pr-verification.d.cts +17 -0
  62. package/dist/{vitest/chunk-L36NXAU6.js → commands/autoloop-result-parser.cjs} +16445 -18852
  63. package/dist/commands/autoloop-result-parser.d.cts +39 -0
  64. package/dist/commands/autoloop-worker.cjs +36163 -0
  65. package/dist/commands/autoloop-worker.d.cts +97 -0
  66. package/dist/harness.cjs +1 -0
  67. package/dist/index.cjs +1 -1
  68. package/dist/replay.cjs +49624 -0
  69. package/dist/replay.d.cts +4625 -0
  70. package/dist/scenarios.cjs +80343 -0
  71. package/dist/scenarios.d.cts +562 -0
  72. package/dist/vitest/chunk-6CBYFCFK.js +4667 -0
  73. package/dist/vitest/chunk-ARVS45PP.js +2764 -0
  74. package/dist/vitest/index.cjs +6011 -75261
  75. package/dist/vitest/index.d.ts +7 -6
  76. package/dist/vitest/index.js +8 -8
  77. package/dist/vitest/runtime/hosted-session-reaper.cjs +792 -34359
  78. package/dist/vitest/runtime/hosted-session-reaper.js +1 -1
  79. package/dist/vitest/runtime/setup-files.js +2 -2
  80. package/package.json +8 -3
  81. package/skills/archal-agent/SKILL.md +87 -0
  82. package/skills/{attach → autoloop}/SKILL.md +94 -120
  83. package/skills/autoloop/references/hosted-sources.md +62 -0
  84. package/skills/autoloop/references/trace-schema-mapping.md +73 -0
  85. package/skills/eval/SKILL.md +35 -1
  86. package/skills/install-agent/SKILL.md +221 -0
  87. package/skills/onboard/SKILL.md +73 -5
  88. package/skills/scenario/SKILL.md +19 -4
  89. package/skills/seed/SKILL.md +237 -0
  90. package/dist/seed/dynamic-generator.cjs +0 -45687
  91. package/dist/seed/dynamic-generator.d.cts +0 -106
  92. package/dist/vitest/chunk-WZ7SA4CK.js +0 -47369
@@ -4,7 +4,7 @@ import {
4
4
  createHostedAuthLease,
5
5
  parsePositiveInteger,
6
6
  runHostedSessionReaper
7
- } from "../chunk-WZ7SA4CK.js";
7
+ } from "../chunk-ARVS45PP.js";
8
8
 
9
9
  // src/runtime/hosted-session-reaper.ts
10
10
  var VITEST_AUTH_LEASE_OPTIONS = {
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  bootstrapArchalVitestRouting
3
- } from "../chunk-L36NXAU6.js";
4
- import "../chunk-WZ7SA4CK.js";
3
+ } from "../chunk-6CBYFCFK.js";
4
+ import "../chunk-ARVS45PP.js";
5
5
 
6
6
  // src/runtime/setup-files.ts
7
7
  import { existsSync, rmSync } from "fs";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "archal",
3
- "version": "0.9.19",
3
+ "version": "0.9.20",
4
4
  "description": "Test your agents & integrations against service clones",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",
@@ -17,6 +17,10 @@
17
17
  "types": "./dist/harness.d.cts",
18
18
  "default": "./dist/harness.cjs"
19
19
  },
20
+ "./scenarios": {
21
+ "types": "./dist/scenarios.d.cts",
22
+ "default": "./dist/scenarios.cjs"
23
+ },
20
24
  "./vitest": {
21
25
  "types": "./dist/vitest/index.d.ts",
22
26
  "import": "./dist/vitest/index.js",
@@ -47,6 +51,7 @@
47
51
  "dist",
48
52
  "skills",
49
53
  "clone-assets",
54
+ "agents",
50
55
  "LICENSE"
51
56
  ],
52
57
  "peerDependencies": {
@@ -58,8 +63,8 @@
58
63
  }
59
64
  },
60
65
  "dependencies": {
61
- "@aws-sdk/client-secrets-manager": "^3.1001.0",
62
- "e2b": "^2.19.5",
66
+ "@aws-sdk/client-secrets-manager": "^3.1065.0",
67
+ "e2b": "^2.28.2",
63
68
  "picomatch": "^4.0.4"
64
69
  },
65
70
  "scripts": {
@@ -0,0 +1,87 @@
1
+ ---
2
+ name: archal-agent
3
+ description: The front door for using Archal to test, debug, and fix an AI agent. START HERE, then route to the right sub-skill instead of guessing. Use when the user says "use Archal", "test my agent", "set up Archal", "my agent is failing", "reproduce this production failure", "grade my traces", or otherwise wants to evaluate, debug, or auto-fix an agent against service clones. Pick this whenever the request is Archal-shaped but the specific workflow is unclear.
4
+ user-invocable: true
5
+ argument-hint: "[what you want to do with your agent]"
6
+ ---
7
+
8
+ # Archal
9
+
10
+ You are the entry point for Archal. Archal is the QA layer for AI agents: it
11
+ runs an agent against stateful behavioral clones of real services (GitHub,
12
+ Slack, Stripe, Linear, Jira, Supabase, and more), scores how well it satisfies
13
+ each scenario, and turns failures into reproductions and PR fixes. Your job here
14
+ is to orient the operator and route to the sub-skill that owns their workflow.
15
+ Do not inline those flows; hand off by exact name and let the sub-skill drive.
16
+
17
+ ## Product mental model
18
+
19
+ Archal tests AI agents against service clones instead of real services, so every
20
+ run is deterministic and replayable. You describe a task and success criteria,
21
+ the agent runs against clones, and an evaluator scores satisfaction (a
22
+ probability, not pass/fail). When a real production trace shows a failure, Archal
23
+ reproduces it on clones from trace evidence and ships the fix as a GitHub PR.
24
+ Everything aims at the same thing: deterministic, replayable evals you can trust.
25
+
26
+ ## Decision guide
27
+
28
+ | I want to... | Route to |
29
+ |--------------|----------|
30
+ | Set up Archal in a repo from scratch (install, auth, detect clones) | `onboard` |
31
+ | Write or edit a scenario test file | `scenario` |
32
+ | Load explicit JSON/SQL/catalog state into a clone (deterministic, no LLM) | `seed` |
33
+ | Run scenarios or tasks and interpret satisfaction scores and failures | `eval` |
34
+ | Wire clones into an existing Vitest suite | `vitest` |
35
+ | Connect a repo's production observability so traces get captured | `install-agent` |
36
+ | Run the autoloop (ingest -> grade -> find-failed -> reproduce-on-clones) and ship the fix as a PR (autofix) over real trace sources | `autoloop` |
37
+ | Turn autofix or autoloop on/off for an agent | `autoloop`; the copilot can toggle either |
38
+
39
+ If the user is brand new and has none of this set up, start with `onboard`; it
40
+ detects clones and routes onward from there.
41
+
42
+ ## The sub-skills
43
+
44
+ Each lives in `packages/archal/skills/` and owns its own commands, contracts,
45
+ and mental model. Route by exact name:
46
+
47
+ - `onboard` — set up Archal in a repo from scratch: install the CLI, handle auth,
48
+ detect which clones the agent needs, and hand off to the right workflow.
49
+ - `scenario` — author and edit scenario test files (Setup / Prompt / Expected
50
+ Behavior / Success Criteria) that `archal run` executes against clones.
51
+ - `seed` — load explicit JSON, SQL, or catalog state into a clone deterministically,
52
+ with no LLM in the loop, so runs start from a known fixture state.
53
+ - `eval` — run scenarios or inline tasks against clones and interpret the results:
54
+ satisfaction scores, `[D]` vs `[P]` criteria, trace inspection, failure diagnosis.
55
+ - `vitest` — wire clones into an existing Vitest suite using the right composition
56
+ pattern, so the agent's own tests route through clones.
57
+ - `install-agent` — connect a repo's production observability (OTLP, Langfuse,
58
+ Braintrust, database trace tables) so real agent traces are captured for Archal.
59
+ - `autoloop` — the loop over real trace sources: ingest a trace, grade it for a
60
+ real failure, find the failed trace, and reproduce it on clones. Autofix (the
61
+ fix/PR step) is a separate toggle on top of this: when turned on, autoloop
62
+ reproduces a failure and ships the fix as a PR.
63
+
64
+ ## Autoloop and autofix toggles
65
+
66
+ Autoloop (ingest -> grade -> find-failed -> reproduce-on-clones) and autofix (the
67
+ fix/PR step) are **separate per-agent toggles**. Autofix is opt-in: it is not part
68
+ of autoloop until it is turned on. Either can be switched on or off per agent from
69
+ the agents tab, the CLI (`--execution-policy reproduce` is autoloop with autofix
70
+ off; `fix` turns autofix on), or by asking the Archal copilot in chat — the
71
+ copilot can toggle either for an agent. When the user asks to turn autofix or
72
+ autoloop on/off for an agent, handle the toggle, then route to `autoloop` for the
73
+ deeper flow.
74
+
75
+ ## Provider-switchable remediation
76
+
77
+ The Archal copilot is not locked to one model. When autofix reproduces a failure
78
+ and writes a fix, the user can drive that remediation with their own agent —
79
+ `archal preprod` exposes `--remediation-agent auto|codex|claude|cursor` so the
80
+ fix is written by their Claude Code, Cursor, or Codex — or let Archal's managed
81
+ agent do it. Mention this when the user asks who writes the fix or wants to use
82
+ their own coding agent, then route to `autoloop`.
83
+
84
+ ## Docs
85
+
86
+ - Quickstart: https://docs.archal.ai/quickstart
87
+ - Full docs: https://docs.archal.ai
@@ -1,22 +1,27 @@
1
1
  ---
2
- name: attach
3
- description: Connect Archal Attach to a repo and trace source, validate prerequisites, configure trace schema mapping, and diagnose import, grade, reproduction, and PR-fix runs.
2
+ name: autoloop
3
+ description: Wire Archal Autoloop to a repo plus a real agent-trace source, then drive the import -> grade -> reproduce -> PR-fix loop. USE THIS whenever the user wants to turn production agent traces into reproducible failures and fixes: "I have prod agent traces and want to reproduce a failure", "import my Langfuse / Braintrust / OTel / Supabase traces", "connect a trace source", "grade my prod traces", "turn a failed trace into a PR", "set up / configure the autoloop", or any mention of replaying, grading, or auto-fixing real traces. Also fires when diagnosing a stuck import, grade, reproduction, or PR-fix run, or configuring trace schema mapping.
4
4
  user-invocable: true
5
5
  argument-hint: "[repo, trace source, or failure description]"
6
6
  ---
7
7
 
8
- # Archal Attach
8
+ # Archal Autoloop
9
9
 
10
10
  You help users connect real agent traces to Archal. Your job is to wire the repo,
11
11
  trace source, harness contract, scenario contract, and GitHub PR path without
12
12
  guessing or leaking secrets.
13
13
 
14
- Attach is not a replacement for `archal run`. It uses the same harness and clone
14
+ Autoloop is not a replacement for `archal run`. It uses the same harness and clone
15
15
  routing ideas, but the trigger is a trace that already happened.
16
16
 
17
+ Autoloop is also not arbitrary production trace replay. It can reproduce a
18
+ failure only when the trace, scenario contract, and seed templates contain
19
+ enough evidence to reconstruct the service state that matters. If the evidence
20
+ is thin, block and name the missing data instead of claiming reproduction.
21
+
17
22
  ## Product mental model
18
23
 
19
- Attach does this loop:
24
+ Autoloop does this loop:
20
25
 
21
26
  1. Import a trace and its child spans from a read-only source.
22
27
  2. Grade whether the trace contains a real failure.
@@ -25,17 +30,25 @@ Attach does this loop:
25
30
  4. Run the reproduction against service clones through the customer harness.
26
31
  5. If reproduced, patch the repo and open a GitHub issue or PR.
27
32
 
28
- Default policy is `fix`. Narrower policies stop earlier:
33
+ Steps 1-4 are **autoloop**: ingest -> grade -> find the failed trace ->
34
+ reproduce on clones. Step 5 (writing the fix and opening the PR) is **autofix**,
35
+ a separate opt-in step that is *not* part of autoloop until it is turned on.
36
+ Both are per-agent toggles, switchable from the agents tab, the CLI, or by asking
37
+ the Archal copilot in chat.
38
+
39
+ The CLI maps these toggles to `--execution-policy`: `reproduce` runs autoloop
40
+ only with autofix off, while `fix` turns autofix on (autoloop plus the fix/PR
41
+ step). Narrower policies stop earlier:
29
42
 
30
- | Policy | Stops after |
31
- |--------|-------------|
32
- | `observe` | import |
33
- | `grade` | grading |
34
- | `reproduce` | reproduction |
35
- | `fix` | PR or blocked fix status |
43
+ | Policy | Stops after | Autofix |
44
+ |--------|-------------|---------|
45
+ | `observe` | import | off |
46
+ | `grade` | grading | off |
47
+ | `reproduce` | reproduction | off |
48
+ | `fix` | PR or blocked fix status | on |
36
49
 
37
50
  Do not invent or promote separate top-level judge, reproduce, fix, or
38
- trace-replay commands. The public command is `archal attach`. Local stop
51
+ trace-replay commands. The public command is `archal autoloop`. Local stop
39
52
  command is `archal detach` for file-backed trace directories.
40
53
 
41
54
  ## Discover first
@@ -67,6 +80,8 @@ Before changing anything, inspect the repo:
67
80
  Hosted sources and `--execution-policy fix` need a GitHub remote.
68
81
  5. Trace source shape, if available:
69
82
  - provider: local files, Postgres, Supabase
83
+ - or local/client-ingested sources normalized through `archal trace-source`
84
+ such as file, HTTP/OTel, Langfuse, Braintrust, S3/GCS, or custom JSON
70
85
  - trace table and span table names
71
86
  - id columns
72
87
  - parent span column
@@ -78,7 +93,7 @@ only the env var name or secret reference.
78
93
 
79
94
  ## Preconditions
80
95
 
81
- You need these before a full hosted Attach setup:
96
+ You need these before a full hosted Autoloop setup:
82
97
 
83
98
  - Archal CLI installed in the repo or reachable with `npx archal`
84
99
  - authenticated user (`archal login`) or `ARCHAL_TOKEN=archal_ws_...`
@@ -132,7 +147,7 @@ The command must be headless and repeatable. It should run the real agent path,
132
147
  not a hand-authored mock. During reproduction, Archal invokes this command
133
148
  through `archal run`, so the agent should read the task from `AGENT_TASK` and
134
149
  print its final answer to stdout. If the project already has `.archal.json` for
135
- `archal run`, align the Attach harness with that command.
150
+ `archal run`, align the Autoloop harness with that command.
136
151
 
137
152
  ### 3. Add or verify `archal/scenario.md`
138
153
 
@@ -174,127 +189,75 @@ archal/seeds/
174
189
  jira-escalations.json
175
190
  ```
176
191
 
177
- Seed templates should contain stable service state for the task family. Attach
192
+ Seed templates should contain stable service state for the task family. Autoloop
178
193
  can then fill in trace-specific identifiers. This is much safer than expecting
179
194
  weak traces to reconstruct full service state.
180
195
 
181
196
  ## Hosted database source
182
197
 
183
- Use this when traces live in Postgres or Supabase.
184
-
185
- First, create or request a read-only database user. Then keep the URL in an env
186
- var:
187
-
188
- ```bash
189
- export TRACE_DATABASE_URL='postgres://readonly:...'
190
- ```
191
-
192
- Run a check:
193
-
194
- ```bash
195
- npx archal attach \
196
- --repo . \
197
- --source supabase \
198
- --database-url-env TRACE_DATABASE_URL \
199
- --source-id prod-agent-traces \
200
- --check
201
- ```
198
+ Use this when traces live in Postgres or Supabase. Create a read-only DB user,
199
+ keep the URL in `TRACE_DATABASE_URL` (or a secret ref in hosted production), then
200
+ `--check` the source and re-run without `--check` to register it. Registration
201
+ posts the source config to Archal and returns; hosted workers own polling after
202
+ that, so local `archal detach` does not disable it.
202
203
 
203
- Register:
204
+ See `references/hosted-sources.md` for the full check, register, and
205
+ `--database-url-secret-ref` flag blocks.
204
206
 
205
- ```bash
206
- npx archal attach \
207
- --repo . \
208
- --source supabase \
209
- --database-url-env TRACE_DATABASE_URL \
210
- --source-id prod-agent-traces
211
- ```
207
+ ## Client-side trace ingestion
212
208
 
213
- This posts the source config to Archal and returns. It does not start a local
214
- watcher and does not write local source state.
209
+ Use `archal trace-source` when traces are not already in a hosted Postgres or
210
+ Supabase table. This command normalizes source-specific payloads into Archal
211
+ trace upload envelopes, writes them to a trace directory, and can upload them to
212
+ hosted Autoloop when workspace auth is available.
215
213
 
216
- Use a secret reference when the customer already has one:
214
+ Common paths:
217
215
 
218
216
  ```bash
219
- npx archal attach \
220
- --repo . \
221
- --source postgres \
222
- --database-url-secret-ref aws-secretsmanager://customer/prod-agent-traces
217
+ npx archal trace-source import ./exports --preview --json
218
+ npx archal trace-source import ./exports --upload --repository owner/repo
219
+
220
+ npx archal trace-source connect langfuse \
221
+ --base-url https://cloud.langfuse.com \
222
+ --api-key-env LANGFUSE_READ_KEY \
223
+ --out .archal/traces/inbox
224
+ npx archal trace-source test langfuse
225
+ npx archal trace-source sync langfuse --upload --repository owner/repo
226
+ npx archal trace-source watch langfuse --upload --repository owner/repo
227
+
228
+ npx archal trace-source connect custom --name "prod exporter" --out .archal/traces/inbox
229
+ npx archal trace-source serve "prod exporter" --port 4319
223
230
  ```
224
231
 
225
- `--database-url-secret-ref` must not contain a plaintext credential.
232
+ Use `archal trace-source status [source]` to inspect registry validation,
233
+ cursor, and last-sync state. `watch` is for pull-style sources; push sources
234
+ stay continuous through `serve`.
226
235
 
227
236
  ## Trace schema mapping
228
237
 
229
- Defaults:
238
+ Hosted sources default to `ai_traces` / `ai_spans` with `id` / `trace_id`
239
+ columns and `updated_at_id` cursor mode. When the customer's tables differ, pass
240
+ mapping flags to override table names, id columns, parent-span column, and
241
+ cursor columns; switch to `created_at_id` cursor mode for append-only sources;
242
+ and use `--source-*` filters to scope noisy sources by workspace, agent, status,
243
+ trace group, or limit.
230
244
 
231
- | Concept | Default |
232
- |---------|---------|
233
- | trace table | `ai_traces` |
234
- | span table | `ai_spans` |
235
- | trace id | `id` |
236
- | span id | `id` |
237
- | span trace id | `trace_id` |
238
- | trace updated cursor | `updated_at` |
239
- | span updated cursor | `updated_at` |
240
- | cursor mode | `updated_at_id` |
241
-
242
- For a custom schema, pass mapping flags:
243
-
244
- ```bash
245
- npx archal attach \
246
- --repo . \
247
- --source postgres \
248
- --database-url-env TRACE_DATABASE_URL \
249
- --trace-table public.agent_traces \
250
- --span-table public.agent_spans \
251
- --trace-id-column trace_id \
252
- --span-id-column span_id \
253
- --span-trace-id-column trace_id \
254
- --parent-span-id-column parent_span_id \
255
- --trace-updated-at-column updated_at \
256
- --span-updated-at-column updated_at
257
- ```
258
-
259
- For append-only sources:
260
-
261
- ```bash
262
- npx archal attach \
263
- --repo . \
264
- --source supabase \
265
- --database-url-env TRACE_DATABASE_URL \
266
- --cursor-mode created_at_id \
267
- --trace-created-at-column created_at \
268
- --span-created-at-column created_at
269
- ```
270
-
271
- Use filters for noisy sources:
272
-
273
- ```bash
274
- npx archal attach \
275
- --repo . \
276
- --source supabase \
277
- --database-url-env TRACE_DATABASE_URL \
278
- --source-workspace-id workspace_123 \
279
- --source-agent-id support-agent \
280
- --source-status failed error \
281
- --source-trace-group billing-support \
282
- --source-limit 250
283
- ```
245
+ See `references/trace-schema-mapping.md` for the full defaults table plus the
246
+ custom-schema, append-only, and filter flag blocks.
284
247
 
285
248
  ## Local trace directory
286
249
 
287
250
  Use this for a local pilot or exported trace files:
288
251
 
289
252
  ```bash
290
- npx archal attach ./prod-traces --repo . --execution-policy reproduce
253
+ npx archal autoloop ./prod-traces --repo . --execution-policy reproduce
291
254
  ```
292
255
 
293
256
  Artifacts are written under:
294
257
 
295
258
  ```text
296
- .archal/attach/
297
- attachments.json
259
+ .archal/autoloop/
260
+ autoloops.json
298
261
  runs.jsonl
299
262
  raw/
300
263
  grades/
@@ -313,17 +276,25 @@ npx archal detach ./prod-traces --repo .
313
276
 
314
277
  Do not describe `archal detach` as a hosted source disable command.
315
278
 
316
- ## Dashboard expectations
279
+ ## CLI-first operation
317
280
 
318
- The workspace dashboard has three Attach pages:
281
+ Prefer CLI and artifact evidence for handoffs unless the user explicitly asks
282
+ for a workspace page.
319
283
 
320
- - Imported traces: source status, imported rows, grade verdict, blocked reason.
321
- - Reproduced failures: seed evidence, reproduction status, clone parity, run
322
- artifacts.
323
- - Opened issues/PRs: GitHub issue, PR, branch, checks, and fix status.
284
+ - Local file-backed loops: `archal autoloop <trace-dir> --repo ...` starts the
285
+ watcher, `archal detach <trace-dir> --repo ...` stops it, `archal
286
+ autoloop-status --repo ...` summarizes trace jobs, and `archal
287
+ autoloop-reprocess --repo ... <trace-id>` retries terminal jobs after a
288
+ blocker is fixed.
289
+ - Hosted database sources: `archal autoloop --source postgres|supabase ...`
290
+ registers the source and returns. Local `archal detach` does not disable a
291
+ hosted source because hosted workers own polling after registration.
292
+ - Safe resume means re-running the same `archal autoloop` registration or
293
+ reprocessing a terminal local trace only after the missing evidence,
294
+ credential, mapping, harness, or GitHub blocker is corrected.
324
295
 
325
- Do not route users to an old single Attach page. The user-facing pages are the
326
- three workspace dashboard pages above.
296
+ Report exact artifact paths and statuses. Do not make dashboard pages the only
297
+ place a user can understand what happened.
327
298
 
328
299
  ## How to diagnose failures
329
300
 
@@ -331,6 +302,8 @@ Classify failures precisely:
331
302
 
332
303
  - Trace import failure: database/source auth, mapping, cursor, filters, bad
333
304
  trace shape.
305
+ - Trace ingestion failure: `trace-source` adapter mismatch, rejected hosted
306
+ upload, missing workspace auth, bad idempotency key, or receiver auth failure.
334
307
  - Grade failure: judge could not determine expected outcome, missing evaluator
335
308
  contract, trace lacks task context.
336
309
  - Missing evidence: trace does not contain enough state to seed. Add spans,
@@ -351,7 +324,7 @@ claim reproduction succeeded.
351
324
 
352
325
  ## Artifact reading guide
353
326
 
354
- Local file-backed Attach uses repo-local artifacts. Hosted Attach exposes the
327
+ Local file-backed Autoloop uses repo-local artifacts. Hosted Autoloop exposes the
355
328
  same phase information in the dashboard.
356
329
 
357
330
  | Artifact | What to inspect |
@@ -387,7 +360,7 @@ After setup or debugging, give the user:
387
360
  - source provider and source id
388
361
  - repo full name
389
362
  - execution policy
390
- - dashboard pages to inspect
363
+ - CLI status command to run next
391
364
  - artifacts produced, if local
392
365
  - whether import, grade, seed, reproduce, and fix phases are ready
393
366
  - exact blocker if any
@@ -395,8 +368,9 @@ After setup or debugging, give the user:
395
368
 
396
369
  ## Docs
397
370
 
398
- - Attach production traces: https://docs.archal.ai/guides/attach-production-traces
399
- - CLI reference: https://docs.archal.ai/cli/attach
371
+ - Autoloop production traces: https://docs.archal.ai/guides/autoloop-production-traces
372
+ - Autonomous loops: https://docs.archal.ai/guides/autoloop-production-traces
373
+ - CLI reference: https://docs.archal.ai/cli/autoloop
400
374
  - Running with an agent: https://docs.archal.ai/guides/run-with-agent
401
375
  - Writing scenarios: https://docs.archal.ai/guides/writing-scenarios
402
376
  - Seeds: https://docs.archal.ai/guides/seeds
@@ -0,0 +1,62 @@
1
+ # Hosted database source
2
+
3
+ Full flag reference for registering a hosted Postgres or Supabase trace source.
4
+ Use this when traces live in Postgres or Supabase. Registration posts the source
5
+ config to Archal and returns; hosted workers own polling after that.
6
+
7
+ ## Contents
8
+
9
+ - Read-only credential
10
+ - Check the source
11
+ - Register the source
12
+ - Secret reference (hosted production)
13
+
14
+ ## Read-only credential
15
+
16
+ First, create or request a read-only database user. Then keep the URL in an env
17
+ var:
18
+
19
+ ```bash
20
+ export TRACE_DATABASE_URL='postgres://readonly:...'
21
+ ```
22
+
23
+ ## Check the source
24
+
25
+ Run a check:
26
+
27
+ ```bash
28
+ npx archal autoloop \
29
+ --repo . \
30
+ --source supabase \
31
+ --database-url-env TRACE_DATABASE_URL \
32
+ --source-id prod-agent-traces \
33
+ --check
34
+ ```
35
+
36
+ ## Register the source
37
+
38
+ Register:
39
+
40
+ ```bash
41
+ npx archal autoloop \
42
+ --repo . \
43
+ --source supabase \
44
+ --database-url-env TRACE_DATABASE_URL \
45
+ --source-id prod-agent-traces
46
+ ```
47
+
48
+ This posts the source config to Archal and returns. It does not start a local
49
+ watcher and does not write local source state.
50
+
51
+ ## Secret reference (hosted production)
52
+
53
+ Use a secret reference when the customer already has one:
54
+
55
+ ```bash
56
+ npx archal autoloop \
57
+ --repo . \
58
+ --source postgres \
59
+ --database-url-secret-ref aws-secretsmanager://customer/prod-agent-traces
60
+ ```
61
+
62
+ `--database-url-secret-ref` must not contain a plaintext credential.
@@ -0,0 +1,73 @@
1
+ # Trace schema mapping
2
+
3
+ Full mapping-flag reference for hosted Postgres/Supabase trace sources. Use this
4
+ when the customer's trace and span tables do not match the defaults below.
5
+
6
+ ## Contents
7
+
8
+ - Defaults
9
+ - Custom schema flags
10
+ - Append-only sources
11
+ - Filters for noisy sources
12
+
13
+ ## Defaults
14
+
15
+ | Concept | Default |
16
+ |---------|---------|
17
+ | trace table | `ai_traces` |
18
+ | span table | `ai_spans` |
19
+ | trace id | `id` |
20
+ | span id | `id` |
21
+ | span trace id | `trace_id` |
22
+ | trace updated cursor | `updated_at` |
23
+ | span updated cursor | `updated_at` |
24
+ | cursor mode | `updated_at_id` |
25
+
26
+ ## Custom schema flags
27
+
28
+ For a custom schema, pass mapping flags:
29
+
30
+ ```bash
31
+ npx archal autoloop \
32
+ --repo . \
33
+ --source postgres \
34
+ --database-url-env TRACE_DATABASE_URL \
35
+ --trace-table public.agent_traces \
36
+ --span-table public.agent_spans \
37
+ --trace-id-column trace_id \
38
+ --span-id-column span_id \
39
+ --span-trace-id-column trace_id \
40
+ --parent-span-id-column parent_span_id \
41
+ --trace-updated-at-column updated_at \
42
+ --span-updated-at-column updated_at
43
+ ```
44
+
45
+ ## Append-only sources
46
+
47
+ For append-only sources:
48
+
49
+ ```bash
50
+ npx archal autoloop \
51
+ --repo . \
52
+ --source supabase \
53
+ --database-url-env TRACE_DATABASE_URL \
54
+ --cursor-mode created_at_id \
55
+ --trace-created-at-column created_at \
56
+ --span-created-at-column created_at
57
+ ```
58
+
59
+ ## Filters for noisy sources
60
+
61
+ Use filters for noisy sources:
62
+
63
+ ```bash
64
+ npx archal autoloop \
65
+ --repo . \
66
+ --source supabase \
67
+ --database-url-env TRACE_DATABASE_URL \
68
+ --source-workspace-id workspace_123 \
69
+ --source-agent-id support-agent \
70
+ --source-status failed error \
71
+ --source-trace-group billing-support \
72
+ --source-limit 250
73
+ ```
@@ -90,6 +90,40 @@ Exit codes: `0` pass, `1` fail or score < threshold, `2` validation error. For G
90
90
 
91
91
  Workspace API keys are runtime and CI credentials bound to one workspace. They can run clones, upload and read traces, and read usage for that workspace. They cannot manage audit events or workspace API keys. Use an owner/admin user credential, either `archal login` or a dashboard-issued user API key, for workspace administration.
92
92
 
93
+ ## Pre-production autonomous loop
94
+
95
+ Use `archal preprod start` when the user wants a coding agent to run a bounded
96
+ pack of scenarios before shipping, remediate failures, rerun, validate, and
97
+ open a draft PR. This is different from post-production `archal autoloop`: it
98
+ starts from repo scenarios and clone runs, not imported production traces.
99
+
100
+ First do a safe dry run:
101
+
102
+ ```bash
103
+ archal preprod start --scenario-count 20 --dry-run --artifacts .archal/preprod
104
+ ```
105
+
106
+ Then, only after the dry-run artifacts look like real agent/scenario failures,
107
+ allow the managed remediation path:
108
+
109
+ ```bash
110
+ archal preprod start \
111
+ --scenario-count 20 \
112
+ --allow-external-execution \
113
+ --remediation-agent codex \
114
+ --validation-command 'pnpm test' \
115
+ --open-pr \
116
+ --pr-command 'gh pr create --draft --fill' \
117
+ --artifacts .archal/preprod
118
+ ```
119
+
120
+ Read `.archal/preprod/preprod-result.json`,
121
+ `.archal/preprod/preprod-failures.json`, and the remediation context before
122
+ summarizing results. Treat runs without validation evidence as local
123
+ remediation passes, not release proof. If a run stops after `initial-runs`,
124
+ `fix`, or `validation`, resume with `archal preprod start --resume
125
+ .archal/preprod --artifacts .archal/preprod`.
126
+
93
127
  ## Artifacts + dashboard
94
128
 
95
129
  - **Local (always written):** `.archal/cache/last-run.json` (summary), `.archal/cache/runs/*.json` (full redacted trace).
@@ -108,6 +142,6 @@ Don't tell users they need `-o json` to save artifacts locally - that's only for
108
142
  ## Docs
109
143
 
110
144
  - Running with an agent: https://docs.archal.ai/guides/run-with-agent
111
- - Existing repo playbook: https://docs.archal.ai/guides/existing-agent-repo
145
+ - Existing repo playbook: https://docs.archal.ai/guides/run-with-agent
112
146
  - Scenario authoring: hand off to the `scenario` skill
113
147
  - Clone sessions: https://docs.archal.ai/guides/clone-sessions