npm - archal - Versions diffs - 0.9.13 → 0.9.15 - Mend

archal 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +81 -73
package/bin/archal.cjs +1 -1
package/clone-assets/apify/tools.json +668 -0
package/{twin-assets → clone-assets}/discord/fidelity.json +1 -1
package/{twin-assets → clone-assets}/discord/tools.json +510 -510
package/clone-assets/github/fidelity.json +31 -0
package/{twin-assets → clone-assets}/github/tools.json +113 -3
package/{twin-assets → clone-assets}/google-workspace/fidelity.json +2 -2
package/{twin-assets → clone-assets}/google-workspace/tools.json +10 -10
package/{twin-assets → clone-assets}/jira/fidelity.json +44 -4
package/{twin-assets → clone-assets}/jira/tools.json +1 -1
package/clone-assets/linear/fidelity.json +36 -0
package/{twin-assets → clone-assets}/linear/tools.json +1 -1
package/{twin-assets → clone-assets}/ramp/fidelity.json +1 -1
package/{twin-assets → clone-assets}/ramp/tools.json +1 -1
package/clone-assets/slack/fidelity.json +38 -0
package/{twin-assets → clone-assets}/slack/tools.json +1 -1
package/clone-assets/stripe/fidelity.json +67 -0
package/{twin-assets → clone-assets}/stripe/tools.json +42 -11
package/clone-assets/supabase/fidelity.json +31 -0
package/{twin-assets → clone-assets}/supabase/tools.json +1 -1
package/clone-assets/tavily/tools.json +115 -0
package/dist/cli.cjs +97917 -0
package/dist/cli.d.cts +1 -0
package/dist/harness.cjs +62 -0
package/dist/harness.d.cts +20 -0
package/dist/index.cjs +5 -87878
package/dist/index.d.cts +3 -1
package/dist/seed/dynamic-generator.cjs +8796 -9201
package/dist/seed/dynamic-generator.d.cts +39 -0
package/dist/vitest/chunk-2GY4SFKE.js +29279 -0
package/dist/vitest/{chunk-KTMNDJFB.js → chunk-WVRVNHAX.js} +45255 -44440
package/dist/vitest/index.cjs +56408 -31519
package/dist/vitest/index.d.ts +61 -27
package/dist/vitest/index.js +145 -1807
package/dist/vitest/runtime/hosted-session-reaper.cjs +34766 -28922
package/dist/vitest/runtime/hosted-session-reaper.js +1 -2
package/dist/vitest/runtime/setup-files.js +2 -3
package/package.json +19 -10
package/skills/eval/SKILL.md +113 -0
package/skills/onboard/SKILL.md +67 -36
package/skills/scenario/SKILL.md +22 -20
package/skills/vitest/SKILL.md +25 -24
package/dist/vitest/chunk-L6HSMJ3F.js +0 -2216
package/dist/vitest/chunk-YJICENME.js +0 -1230
package/dist/vitest/src-JGHX6UKK.js +0 -94
package/skills/audit/SKILL.md +0 -55
package/skills/test/SKILL.md +0 -109
package/twin-assets/github/fidelity.json +0 -13
package/twin-assets/linear/fidelity.json +0 -18
package/twin-assets/slack/fidelity.json +0 -20
package/twin-assets/stripe/fidelity.json +0 -22
package/twin-assets/supabase/fidelity.json +0 -13

package/dist/vitest/runtime/hosted-session-reaper.js CHANGED Viewed

@@ -4,8 +4,7 @@ import {
   createHostedAuthLease,
   parsePositiveInteger,
   runHostedSessionReaper
-} from "../chunk-KTMNDJFB.js";
-import "../chunk-YJICENME.js";
+} from "../chunk-WVRVNHAX.js";
 // src/runtime/hosted-session-reaper.ts
 var VITEST_AUTH_LEASE_OPTIONS = {

package/dist/vitest/runtime/setup-files.js CHANGED Viewed

@@ -1,8 +1,7 @@
 import {
   bootstrapArchalVitestRouting
-} from "../chunk-L6HSMJ3F.js";
-import "../chunk-KTMNDJFB.js";
-import "../chunk-YJICENME.js";
+} from "../chunk-2GY4SFKE.js";
+import "../chunk-WVRVNHAX.js";
 // src/runtime/setup-files.ts
 import { existsSync, rmSync } from "fs";

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "archal",
-  "version": "0.9.13",
-  "description": "Test your agents & integrations against digital twins",
+  "version": "0.9.15",
+  "description": "Test your agents & integrations against service clones",
   "type": "module",
   "main": "dist/index.cjs",
   "types": "dist/index.d.cts",
@@ -13,6 +13,10 @@
       "types": "./dist/index.d.cts",
       "default": "./dist/index.cjs"
     },
+    "./harness": {
+      "types": "./dist/harness.d.cts",
+      "default": "./dist/harness.cjs"
+    },
     "./vitest": {
       "types": "./dist/vitest/index.d.ts",
       "import": "./dist/vitest/index.js",
@@ -31,20 +35,27 @@
     "agent",
     "testing",
     "mcp",
-    "digital-twin",
+    "service-clone",
     "archal",
     "vitest"
   ],
   "engines": {
-    "node": ">=20"
+    "node": ">=22"
   },
   "files": [
     "bin",
     "dist",
     "skills",
-    "twin-assets",
+    "clone-assets",
     "LICENSE"
   ],
+  "scripts": {
+    "verify:artifacts": "node scripts/assert-artifacts.mjs",
+    "prepack": "pnpm run verify:artifacts",
+    "prepare": "node scripts/prepare.cjs",
+    "typecheck:raw": "node --check bin/archal.cjs && node --check scripts/assert-artifacts.mjs && node --check scripts/prepare.cjs",
+    "typecheck": "pnpm run typecheck:raw"
+  },
   "peerDependencies": {
     "vitest": ">=2.1.0"
   },
@@ -53,9 +64,7 @@
       "optional": true
     }
   },
-  "scripts": {
-    "verify:artifacts": "node scripts/assert-artifacts.mjs",
-    "typecheck:raw": "node --check bin/archal.cjs && node --check scripts/assert-artifacts.mjs && node --check scripts/prepare.cjs",
-    "typecheck": "pnpm run typecheck:raw"
+  "dependencies": {
+    "picomatch": "^4.0.4"
   }
-}
+}

package/skills/eval/SKILL.md ADDED Viewed

@@ -0,0 +1,113 @@
+---
+name: eval
+description: Run Archal scenarios or inline tasks against hosted clones, diagnose failed runs, and interpret satisfaction scores. Triggers on "run my scenario", "evaluate my agent", "archal run X", "debug this failing run", "what does this satisfaction score mean".
+user-invocable: true
+argument-hint: "[scenario.md or task description]"
+---
+# Archal Eval Runner
+You run Archal scenarios and inline tasks, then help the user interpret the results. For setting up the agent path or `.archal.json` in a fresh repo, hand off to the `onboard` skill.
+## What only you know (product mental model)
+- `archal run` spawns the user's agent as a child process. The agent needs:
+  - A **runnable agent path**. Two ways to supply it: explicit `--harness <path>` (e.g. `./.archal/harness.mjs` from `archal init`), or `.archal.json` with an `agent` command. Repo-local auto-discovery also walks up from cwd for a top-level `harness.{ts,js,mjs,cjs}`.
+  - A **headless boundary** - no UI, no browser OAuth. The process is spawned without a shell, so interactive auth hangs forever.
+  - Env vars - auto-injected. `AGENT_TASK` is the prompt; service clones are reached through normal service URLs in a controlled runtime.
+- Every `archal run` writes local artifacts under `.archal/cache/last-run.json` and `.archal/cache/runs/*.json` **regardless** of `--output`. `--output json` is only for machine-readable stdout; it's not needed for local persistence.
+- **Satisfaction score** = (runs passing all criteria) / (total runs). `[D]` criteria are deterministic state checks; `[P]` criteria are LLM-judged from trace + final state.
+## Preflight the harness before a run
+When the agent path is uncertain, or after any change to the harness file, smoke-test the harness directly before `archal run`:
+```bash
+AGENT_TASK="Reply with OK and do not use tools." node ./.archal/harness.mjs
+```
+A wired harness that exits cleanly with no service calls is ready. If it says the starter harness is still a stub, edit `.archal/harness.mjs` to call the user's Cursor, Codex, Claude Code, or custom agent first. Other failures catch: no runnable entrypoint, UI-boot assumptions, missing provider keys, service bridge misconfig.
+If a local harness returns text but records zero clone-observed calls, check for real-service auth signals such as `api.github.com` plus `401`/`Bad credentials`. That means the harness did not reach the clone. Fix by running the SDK under sandbox/Docker routing, or by wiring the SDK to the clone REST URL pattern shown by `archal clone start <service>` / `archal clone status` and authenticating with `ARCHAL_TOKEN`.
+## Running
+Scenario from a file:
+```bash
+archal run scenario.md
+archal run scenario.md --runs 5 --seed enterprise-repo   # N runs -> satisfaction score
+```
+Inline task (no scenario file):
+```bash
+archal run --task "Create an issue titled hello" --harness ./.archal/harness.mjs --clone github
+```
+`--task` only replaces the scenario file - it still needs a runnable agent path. `--clone` is required with `--task`; repeat or comma-separate for multiple clones.
+When `.archal.json` exists in cwd, bare `archal run` uses it. If the user doesn't have one yet, that's setup - hand off to the `onboard` skill, which owns harness creation and `.archal.json` scaffolding.
+## Interpret results
+Score breakdown:
+- `100%` = every run passed every criterion
+- `80%` = 4/5 runs passed
+- `0%` = none passed
+Criterion types:
+- `[D]` - deterministic state check. A failure is real; never a model variance artifact.
+- `[P]` - LLM judge reads trace + final state. A single failure can be variance; re-run with `--runs 3+` to confirm before acting on it.
+## Diagnose failures
+Re-run with `-v` for the full trace, then classify with these signals:
+- **Agent bug** - wrong tool called, wrong arguments, stopped early.
+  *Signals:* trace shows the correct tool was available but the agent chose another; or arguments are malformed.
+  *Fix:* agent prompt, tool wiring, or underlying model.
+- **Scenario bug** - criteria are too strict, ambiguous, or contradict the Setup.
+  *Signals:* agent clearly did the right thing but a `[D]` criterion expects an exact count the Setup didn't guarantee; or two criteria contradict each other.
+  *Fix:* make Setup more specific, or relax the criterion. Use the `scenario` skill.
+- **Seed mismatch** - clone state doesn't match what Setup describes.
+  *Signals:* agent's first introspection tool call returns unexpected state (e.g. Setup says "4 stale issues" but the seed has 3).
+  *Fix:* different seed, or adjust Setup to match. `archal seed list <clone>` to browse.
+- **Harness bug** - agent process never started, crashed immediately, or hung.
+  *Signals:* no tool calls in the trace, stderr shows a boot error, or the run times out at the configured `--timeout`.
+  *Fix:* smoke-test the harness directly with `AGENT_TASK="Reply with OK." node ./.archal/harness.mjs`, then look for an untouched starter stub, UI-only imports, missing provider keys, or interactive auth.
+## CI mode
+```bash
+archal run scenario.md --runs 3 --pass-threshold 80 -o json -q
+```
+Exit codes: `0` pass, `1` fail or score < threshold, `2` validation error. For GitHub Actions, inject `ARCHAL_TOKEN` as a secret. Use a workspace API key (`archal_ws_...`) for CI, not a personal token.
+Workspace API keys are runtime and CI credentials bound to one workspace. They can run clones, upload and read traces, and read usage for that workspace. They cannot manage audit events or workspace API keys. Use an owner/admin user credential, either `archal login` or a dashboard-issued user API key, for workspace administration.
+## Artifacts + dashboard
+- **Local (always written):** `.archal/cache/last-run.json` (summary), `.archal/cache/runs/*.json` (full redacted trace).
+- **Hosted:** every run also uploads to https://www.archal.ai/dashboard - useful for sharing a failing trace with a colleague or comparing across agent model versions.
+Don't tell users they need `-o json` to save artifacts locally - that's only for stdout.
+## Anti-patterns
+- Don't re-document the `archal run` flag list here. `archal run --help` and https://docs.archal.ai/cli/run own that - they'll drift if duplicated.
+- Don't guess the agent path. If the user doesn't have `--harness`, a repo-local harness, or `.archal.json`, hand off to `onboard` - it owns setup.
+- Don't promote local proxy or Archal-owned route env as normal service simulation. Scored runs use a controlled runtime with transparent TLS interception against real service domains; uncontainerized proxy routing is low-fidelity debug only.
+- Don't classify a single `[P]` failure as an agent bug without re-running. Probabilistic criteria need sample size.
+- Don't treat a `[D]` failure as model variance. Deterministic failures are real bugs.
+## Docs
+- Running with an agent: https://docs.archal.ai/guides/run-with-agent
+- Existing repo playbook: https://docs.archal.ai/guides/existing-agent-repo
+- Scenario authoring: hand off to the `scenario` skill
+- Clone sessions: https://docs.archal.ai/guides/clone-sessions

package/skills/onboard/SKILL.md CHANGED Viewed

@@ -6,35 +6,40 @@ user-invocable: true
 # Archal Onboard
-You are setting up Archal in this project. Archal tests AI agents against digital twins of real services (GitHub, Slack, Stripe, etc.). Handle installation and auth yourself; delegate the workflow-specific setup to the matching sub-skill.
+You are setting up Archal in this project. Archal tests AI agents against service clones of real services (GitHub, Slack, Stripe, etc.). Handle installation and auth yourself; delegate the workflow-specific setup to the matching sub-skill.
 ## If this is a cold-start
 The user may have landed here without running `npx archal init` first. If the
 CLI is missing (see "Install + auth" below) AND no `.archal-manifest.json`
-exists in `.claude/skills/`, the canonical first command is:
+exists in any skill directory (`.claude/skills/`, `.codex/skills/`,
+`.cursor/skills/`, `.windsurf/skills/`), the canonical first command is:
 ```bash
 npx archal init
 ```
-That adds `archal` as a devDependency and reinstalls these skills at the
-right version. Re-invoke the onboard skill after it completes.
+That adds `archal` as a devDependency, installs skills for every detected
+agent platform (Claude Code, Codex, Cursor, Windsurf), and creates a starter
+`.archal.json`, `.archal/harness.mjs`, and `scenarios/first-run.md`. Re-invoke
+the onboard skill after it completes.
 ## Discover first
 Before asking anything, read the repo:
-1. `package.json` deps → infer likely twins:
-   - `@octokit/rest`, `octokit` → `github`
-   - `stripe` → `stripe`
-   - `@slack/web-api`, `@slack/bolt` → `slack`
-   - `@linear/sdk` → `linear`
-   - `@supabase/supabase-js` → `supabase`
-   - `googleapis`, `@google-cloud/*` → `google-workspace`
-   - `jira-client`, `jira.js` → `jira`
+1. `package.json` deps -> infer likely clones:
+   - `@octokit/rest`, `octokit` -> `github`
+   - `stripe` -> `stripe`
+   - `@slack/web-api`, `@slack/bolt` -> `slack`
+   - `@linear/sdk` -> `linear`
+   - `@supabase/supabase-js` -> `supabase`
+   - `googleapis`, `@google-cloud/*` -> `google-workspace`
+   - `jira-client`, `jira.js` -> `jira`
+   - Apify SDK or `api.apify.com` usage -> `apify`
+   - Tavily SDK or `api.tavily.com` usage -> `tavily`
 2. Existing vitest config? Existing scenarios? Existing `.archal.json`? Those change which workflow makes sense.
-3. If no `package.json` or no matching deps: ask "Which services does your agent interact with?" and show the full list: `github`, `slack`, `stripe`, `linear`, `jira`, `supabase`, `google-workspace`, `ramp`.
+3. If no `package.json` or no matching deps: ask "Which services does your agent interact with?" and point them to the clone catalog (`archal clone --json`) rather than maintaining a separate list here.
 ## Install + auth
@@ -46,10 +51,21 @@ archal login                # OAuth browser flow, or: archal login --token <toke
 archal usage                # verify auth + plan
 ```
-In CI, set `ARCHAL_TOKEN` instead of running `archal login`.
+In CI, set `ARCHAL_TOKEN` to a **workspace API key** (`archal_ws_...`)
+instead of running `archal login`. Workspace keys are bound to one workspace,
+do not expire when a team member leaves, and are the recommended auth for CI.
+Create one with `archal workspace api-key create <label> --scope sessions:write`
+(requires owner or admin role) or from the dashboard under Settings > API Keys.
+Personal tokens (`arc_...`) are fine for local dev but should not be used in CI.
+Treat workspace API keys as runtime and CI credentials, not governance
+credentials. They can run clones, upload and read traces, and read usage for
+their bound workspace. They cannot manage workspace API keys or audit events.
+Use an owner/admin user credential, either `archal login` or a dashboard-issued
+user API key, for workspace administration.
 If something feels wrong (missing CLI, stale skills), these are the
-recovery commands — don't run them otherwise:
+recovery commands - don't run them otherwise:
 ```bash
 npx archal --version           # CLI reachable? prints e.g. 0.9.12
@@ -58,54 +74,69 @@ npx archal init --skills-only  # re-stage skills if they drifted
 ## Pick a workflow
-Confirm detected twins, then ask which of these the user wants. Each delegates to a sub-skill where appropriate — don't inline those flows.
+Confirm detected clones, then ask which of these the user wants. Each delegates to a sub-skill where appropriate - don't inline those flows.
+### The `agent` command (Options A and B both need this)
+`archal run` spawns the agent as a child process, headlessly - no UI, no browser auth. The `agent` field in `.archal.json` is the shell command that invokes it. Typical shapes:
+- `"agent": { "command": "node", "args": ["./.archal/harness.mjs"] }` - scaffolded by `archal init`
+- `"agent": { "command": "npx", "args": ["tsx", "./.archal/harness.ts"] }` - custom TS entrypoint
+- `"agent": { "command": "node", "args": ["./agent.js"] }` - plain Node script
+- `"agent": { "command": "python", "args": ["agent.py"] }` - Python agent
-### Option A — Test an agent with scenarios
+If the user doesn't have a harness yet, prefer `npx archal init`; it creates `./.archal/harness.mjs`, points `.archal.json` at it, and adds a starter scenario without overwriting existing files. The generated harness is a guarded stub: Archal refuses to score it until the user edits it to call their Cursor, Codex, Claude Code, or custom agent. A custom harness should read `AGENT_TASK` from env, call the agent runtime, print `{ "text": "..." }` to stdout, and call `reportAgentMetrics()` from `archal/harness` with accumulated `{ inputTokens, outputTokens, llmCallCount }` before exit. Service clients need one explicit routing mode: use sandbox/Docker routing when the harness calls normal service URLs such as `https://api.github.com`, or configure SDK base URLs to the clone REST URL pattern shown by `archal clone start <service>` / `archal clone status` and authenticate those requests with `ARCHAL_TOKEN`. Alternative: skip `agent` in `.archal.json` and pass `--harness <path>` per-run.
-Write markdown scenario files that describe setup, prompt, and success criteria; `archal run` executes them against twins.
+### Option A - Evaluate an agent with scenarios
+Write markdown scenario files that describe setup, prompt, and success criteria; `archal run` executes them against clones.
 1. Create `.archal.json`:
    ```json
-   { "agent": "<agent command>", "twins": ["<detected twins>"] }
+   { "agent": { "command": "<agent command>", "args": ["<arg1>", "..."] }, "clones": ["<detected clones>"], "scenarios": ["scenarios/first-run.md"] }
    ```
-2. **Delegate to the `scenario` skill** to author a starter scenario. Don't paste a canned example here — the skill knows the markdown format and success-criteria syntax.
-3. Run: `archal run scenarios/<first>.md`.
+2. **Delegate to the `scenario` skill** to author a starter scenario. Don't paste a canned example here - the skill knows the markdown format and success-criteria syntax.
+3. Run: `archal run scenarios/<first>.md`. **Hand off to the `eval` skill** for result interpretation and failure diagnosis.
+### Option B - Run quick inline tasks
-### Option B — Run quick inline tasks
+Same `.archal.json` as Option A (inline `--task` still needs an agent). Use this when the user wants ad-hoc runs before committing to scenario files.
-1. `.archal.json` with just twins:
+1. `.archal.json`:
    ```json
-   { "twins": ["<detected twins>"] }
+   {
+     "agent": { "command": "node", "args": ["./.archal/harness.mjs"] },
+     "clones": ["<detected clones>"]
+   }
    ```
-2. Demo: `archal run --task "Create an issue titled hello" --twin github`.
-No sub-skill needed — this is a one-shot.
+2. Demo: `archal run --task "Create an issue titled hello" --clone github`.
+3. For the generated first-run project, use bare `archal run` after wiring `.archal/harness.mjs`.
-### Option C — Twins in a Vitest suite
+### Option C - Clones in a Vitest suite
-**Delegate to the `vitest` skill.** It handles reading the existing vitest config, identifying which tests should route, picking the right composition pattern, and seeding the twins.
+**Delegate to the `vitest` skill.** It handles reading the existing vitest config, identifying which tests should route, picking the right composition pattern, and seeding the clones.
 Do not paste a sample config here. The right shape depends on what's already in the repo.
-### Option D — Persistent twins to develop against
+### Option D - Persistent clones to develop against
-Run: `archal twin start <detected twins>` — gives live twin URLs the user's SDK clients can point at.
+Run: `archal clone start <detected clones>` - gives live clone URLs the user's SDK clients can point at. `archal clone status` shows the active session; `archal clone stop` tears down.
 ## Verify
-Run the first test or task and show the result.
+Run the first scenario or task. For Options A and B, hand off to the `eval` skill to interpret the satisfaction score and diagnose failures - that skill owns the runtime mental model (`[D]` vs `[P]` criteria, trace inspection, harness execution diagnostics).
 ## `.archal.json` schema
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `agent` | string or `{ command, args }` | yes (for scenarios) | | Shell command to run the agent |
+| `agent` | `{ command, args?, env? }` | yes (for scenarios) | | Agent command as an object (not a plain string) |
 | `title` | string | no | | Display name for reports |
-| `twins` | string[] | no | inferred | Which twins to provision |
+| `clones` | string[] | no | inferred | Which clones to provision |
 | `scenarios` | string[] | no | | Scenario file paths relative to config |
-| `seeds` | `Record<string, string>` | no | | Per-twin seed overrides |
+| `seeds` | `Record<string, string>` | no | | Per-clone seed overrides |
 | `agentModel` | string | no | | LLM model the agent uses |
-| `model` | string | no | `gemini-2.5-pro` | Evaluator model |
+| `evaluatorModel` | string | no | Archal LLM judge | Evaluator/judge model; set this only when bringing your own judge key |
 | `runs` | number | no | `1` | Runs per scenario |
 | `timeout` | number | no | `180` | Timeout per run in seconds |

package/skills/scenario/SKILL.md CHANGED Viewed

@@ -7,7 +7,7 @@ argument-hint: "[scenario description or file path]"
 # Archal Scenario Writer
-You write and edit Archal scenario files. Scenarios are markdown files that define a test for an AI agent running against digital twins.
+You write and edit Archal scenario files. Scenarios are markdown files that define a test for an AI agent running against service clones.
 ## Scenario format
@@ -24,11 +24,11 @@ The task instruction given to the agent.
 Answer key for the evaluator. Never shown to the agent.
 ## Success Criteria
-- [D] Deterministic criterion checked against twin state
+- [D] Deterministic criterion checked against clone state
 - [P] Probabilistic criterion judged by LLM
 ## Config
-twins: github
+clones: github
 timeout: 90
 runs: 3
 ```
@@ -49,7 +49,7 @@ runs: 3
 Each criterion is a bullet point. Tag with `[D]` or `[P]`:
-- `[D]` = **Deterministic**. Checked against twin state programmatically. Use for counts, existence checks, state assertions. No LLM cost.
+- `[D]` = **Deterministic**. Checked against clone state programmatically. Use for counts, existence checks, state assertions. No LLM cost.
 - `[P]` = **Probabilistic**. Judged by LLM evaluator from the trace and final state. Use for tone, quality, correctness, reasoning.
 If no tag is provided, Archal infers the type:
@@ -78,19 +78,23 @@ If no tag is provided, Archal infers the type:
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| `twins` | comma-separated | inferred from content | Which twins to use |
+| `clones` | comma-separated | inferred from content | Which clones to use |
 | `seed` | string | | Named seed to load |
 | `timeout` | integer | `180` | Seconds per run |
 | `runs` | integer | `1` | Number of runs |
-| `evaluator-model` | string | `gemini-2.5-pro` | LLM for `[P]` criteria |
+| `evaluator-model` | string | Archal LLM judge | LLM for `[P]` criteria; set this only when bringing your own judge key |
 | `tags` | comma-separated | | Scenario tags |
 Aliases for `evaluator-model`: `evaluator`, `evaluatormodel`, `model`.
-## Available twins and general-purpose seeds
+## Available clones and general-purpose seeds
-| Twin | Seeds |
+The full clone and seed surface is manifest-backed. Prefer `archal clone --json`
+and `archal seed list` over maintaining a separate list in this skill.
+| Clone | Seeds |
 |------|-------|
+| `apify` | `empty` |
 | `github` | `empty`, `small-project`, `enterprise-repo`, `ci-cd-pipeline`, `stale-issues`, `large-backlog` |
 | `slack` | `empty`, `engineering-team`, `busy-workspace`, `incident-active` |
 | `stripe` | `empty`, `small-business`, `checkout-flow`, `subscription-lifecycle`, `subscription-heavy` |
@@ -98,13 +102,13 @@ Aliases for `evaluator-model`: `evaluator`, `evaluatormodel`, `model`.
 | `linear` | `empty`, `small-team`, `engineering-org`, `multi-team`, `busy-backlog` |
 | `supabase` | `empty`, `small-project`, `saas-starter`, `ecommerce` |
 | `google-workspace` | `empty`, `assistant-baseline`, `gmail-busy-inbox`, `calendar-packed-week` |
+| `tavily` | `empty` |
 | `ramp` | `empty`, `default` |
 | `discord` | `empty`, `small-server`, `harvested` |
-| `telegram` | `empty`, `harvested` |
-## Twin auto-detection from content
+## Clone auto-detection from content
-If no `twins:` config is set, Archal infers twins from keywords in Setup, Expected Behavior, and Prompt:
+If no `clones:` config is set, Archal infers clones from keywords in Setup, Expected Behavior, and Prompt:
 - `github`, `repository`, `pull request`, `create_issue` -> `github`
 - `slack`, `slack channel`, `send_message` -> `slack`
@@ -115,21 +119,19 @@ If no `twins:` config is set, Archal infers twins from keywords in Setup, Expect
 - `google workspace`, `gmail`, `calendar event`, `inbox` -> `google-workspace`
 - `discord`, `guild`, `text channel` -> `discord`
-Not every twin has auto-detect keywords — `telegram` in particular has
-none. If your scenario uses `telegram`, set `twins: telegram` in the
-Config block or in `.archal.json`. `ramp` auto-detects on `ramp`,
+Not every clone has auto-detect keywords. `ramp` auto-detects on `ramp`,
 `bill`, `expense`, `reimbursement`, `fund`, `card spend`.
 ## Multi-service scenarios
-Use multiple twins by listing them in config:
+Use multiple clones by listing them in config:
 ```markdown
 ## Config
-twins: github, slack
+clones: github, slack
 ```
-The Setup section can describe state across both services. Each twin gets its own seed.
+The Setup section can describe state across both services. Each clone gets its own seed.
 ## Validation
@@ -137,18 +139,18 @@ Run `archal scenario list` to verify scenarios parse correctly. A valid scenario
 - A title (H1 heading)
 - A Prompt section
 - At least one success criterion
-- At least one referenced twin (explicit or inferred)
+- At least one referenced clone (explicit or inferred)
 - Positive timeout and runs values
 ## Common mistakes to avoid
 1. Writing `[D]` criteria that require subjective judgment
 2. Writing `[P]` criteria that could be checked deterministically
-3. Forgetting to specify which twin the scenario uses
+3. Forgetting to specify which clone the scenario uses
 4. Writing Setup descriptions that are too vague for seed generation
 5. Using seed names that don't exist (check the seed table above)
 ## Documentation
 - Writing scenarios: https://docs.archal.ai/guides/writing-scenarios
-- Twins and seeds: https://docs.archal.ai/twins/overview
+- Clones and seeds: https://docs.archal.ai/clones/overview

package/skills/vitest/SKILL.md CHANGED Viewed

@@ -1,27 +1,27 @@
 ---
 name: vitest
-description: Wire `archal/vitest` into a user's existing Vitest suite so integration tests hit hosted twins instead of real SaaS. Use when the user asks to "add archal to vitest", "wire up vitest with twins", "test against twins in vitest", or when invoked from `archal-onboard` Option C.
+description: Wire `archal/vitest` into a user's existing Vitest suite so integration tests hit hosted clones instead of real SaaS. Use when the user asks to "add archal to vitest", "wire up vitest with clones", "test against clones in vitest", or when invoked from `archal-onboard` Option C.
 user-invocable: true
 ---
 # Archal Vitest Integration
-Wire `archal/vitest` into the user's existing Vitest suite. Don't paste a canned config — inspect what's already there, surface the right choices, and compose on top of it.
+Wire `archal/vitest` into the user's existing Vitest suite. Don't paste a canned config - inspect what's already there, surface the right choices, and compose on top of it.
 ## What only you know
 Claude already knows what Vitest is and how a fetch interceptor works. These are the Archal-specific facts that determine your choices:
 - `archal/vitest` is a **subpath export of the `archal` npm package**. Users do `pnpm add -D archal`, not `@archal/vitest`.
-- Route mode installs a setup file that rewrites `fetch()` calls to hosted twins. **Test code stays unchanged** — same SDKs, same URLs.
-- Twins are hosted on **ECS Fargate** in Archal's AWS. First run = ~30s cold start. Subsequent runs within the 30-min idle TTL = ~2s. Tell the user; they'll think it's hung otherwise.
+- Route mode installs a setup file that rewrites `fetch()` calls to hosted clones. **Test code stays unchanged** - same SDKs, same URLs.
+- Clones are hosted on **ECS Fargate** in Archal's AWS. First run = ~30s cold start. Subsequent runs within the 30-min idle TTL = ~2s. Tell the user; they'll think it's hung otherwise.
 - Session cache key = `(projectName, services, seeds)` hash. Change any of those and the cache misses.
-- **Seeds = starting state.** Omit to get the twin's default. Named seeds give fixtures (e.g. `small-project` for GitHub, `small-business` for Stripe). Never ask "what seed?" open-ended — the user doesn't know the catalog.
-- Route-mode twins available: `github`, `slack`, `stripe`, `jira`, `supabase`, `google-workspace`. Not yet: `linear`, `ramp`.
+- **Seeds = starting state.** Omit to get the clone's default. Named seeds give fixtures (e.g. `small-project` for GitHub, `small-business` for Stripe). Never ask "what seed?" open-ended - the user doesn't know the catalog.
+- Route-mode clone availability is defined by `SHARED_ROUTE_MANIFESTS` in `packages/route-runtime-core/src/manifests.ts`; use `archal clone --json` / `archal seed list` before naming supported services.
 ## Discover before you ask
-1. `package.json` deps → infer likely twins (`@octokit/rest` → github, `stripe` → stripe, `@slack/web-api` → slack, `@supabase/supabase-js` → supabase, `googleapis` → google-workspace, `jira.js` → jira).
+1. `package.json` deps -> infer likely clones (`@octokit/rest` -> github, `stripe` -> stripe, `@slack/web-api` -> slack, `@supabase/supabase-js` -> supabase, `googleapis` -> google-workspace, `jira.js` -> jira).
 2. Read any existing `vitest.config.ts` / `vitest.config.js` / `vitest.workspace.ts`. Note `setupFiles`, `include`/`exclude`, `reporters`, `projects`.
 3. Grep test files (`__tests__/`, `tests/`, `*.test.ts`) for outbound calls: `fetch(`, `Octokit`, `new Stripe`, `WebClient`, `createClient`. These are the routing candidates.
 4. Auth: `archal usage` tells you if they're logged in. `archal login` or `ARCHAL_TOKEN` in CI.
@@ -31,19 +31,19 @@ Claude already knows what Vitest is and how a fetch interceptor works. These are
 Offer your inferred answer as the default.
 1. **Scope.** "I found these N test files making outbound HTTP calls: [list]. All of them? Or a specific subset (by folder, glob, or file list)?"
-2. **Twin set.** "From deps I see `[github, stripe]`. Complete, or am I missing/over-including?"
-3. **Seeds (per twin, with inline catalog).** For each twin, present three choices:
-   > "For `github`: (a) default empty twin, (b) `small-project` seed (one repo, few issues/PRs — good starting point), (c) custom seed name. Which?"
+2. **Clone set.** "From deps I see `[github, stripe]`. Complete, or am I missing/over-including?"
+3. **Seeds (per clone, with inline catalog).** For each clone, present three choices:
+   > "For `github`: (a) default empty clone, (b) `small-project` seed (one repo, few issues/PRs - good starting point), (c) custom seed name. Which?"
 ## Pick a config pattern
 Three patterns. The right one depends on what you saw in discovery.
-### Pattern A — wrap existing `vitest.config.ts` with `withArchal` (all tests hit twins)
+### Pattern A - wrap existing `vitest.config.ts` with `withArchal` (all tests hit clones)
 For dedicated integration-test packages where every test should route. `withArchal` is a merge helper: it preserves everything in the existing `test` block (`coverage`, `alias`, `globalSetup`, `poolOptions`, custom reporters, etc.) and additively composes Archal's setup file, reporter, and session env on top.
-Edit their existing file in place — the change is one line on the `test:` value:
+Edit their existing file in place - the change is one line on the `test:` value:
 ```ts
 import { defineConfig } from 'vitest/config';
@@ -71,9 +71,9 @@ Merge behavior: `setupFiles` and `reporters` are concatenated, `env` is merged (
 If the user is starting from scratch (no existing `test` block), pass `{}` as the first argument: `withArchal({}, { services })`.
-### Pattern B — workspace with a separate Archal project (subset of tests hit twins)
+### Pattern B - workspace with a separate Archal project (subset of tests hit clones)
-Most common shape. Unit tests stay fast; only the routed subset provisions twins.
+Most common shape. Unit tests stay fast; only the routed subset provisions clones.
 ```ts
 import { archalVitestProject } from 'archal/vitest';
@@ -82,7 +82,7 @@ export default [
   './vitest.config.ts', // their existing unit project untouched
   archalVitestProject(
     {
-      name: 'hosted-twins',
+      name: 'hosted-clones',
       services: {
         github: { mode: 'route', seed: 'small-project' },
         stripe: { mode: 'route' },
@@ -93,11 +93,11 @@ export default [
 ];
 ```
-### Pattern C — separate config + npm script (strict isolation)
+### Pattern C - separate config + npm script (strict isolation)
 `vitest.integration.config.ts` using Pattern A, plus `"test:integration": "vitest -c vitest.integration.config.ts"`. Use when `pnpm test` must stay unit-only.
-## Apply → verify
+## Apply -> verify
 1. Install `archal` if missing.
 2. Write/edit the config.
@@ -105,6 +105,7 @@ export default [
 4. Run one routed test: `pnpm vitest run <path>`.
 If confirming routing is live from inside a test:
 ```ts
 import { getInstalledArchalVitestSession } from 'archal/vitest';
 console.log(getInstalledArchalVitestSession()?.resolvedRuntime.resolvedServices);
@@ -112,18 +113,18 @@ console.log(getInstalledArchalVitestSession()?.resolvedRuntime.resolvedServices)
 ## Failure modes
-- **Real API response instead of twin response** — test file isn't in the routed project's `include` glob.
-- **401/auth at setup** — `ARCHAL_TOKEN` unset or `archal login` not run.
-- **First run takes 30+ seconds** — ECS cold-start, expected. Warn the user up front.
-- **Seed state unexpected** — inspect via `getInstalledArchalVitestSession()`; confirm resolved seed matches intent.
-- **`resetArchalTwins()` not restoring** — call in `beforeEach`, not `beforeAll`.
-- **CI credential race** (parallel jobs corrupting `~/.archal/credentials.json`) — export `ARCHAL_TOKEN` directly; don't rely on the credential file.
+- **Real API response instead of clone response** - test file isn't in the routed project's `include` glob.
+- **401/auth at setup** - `ARCHAL_TOKEN` unset or `archal login` not run.
+- **First run takes 30+ seconds** - ECS cold-start, expected. Warn the user up front.
+- **Seed state unexpected** - inspect via `getInstalledArchalVitestSession()`; confirm resolved seed matches intent.
+- **`resetArchalClones()` not restoring** - call in `beforeEach`, not `beforeAll`.
+- **CI credential race** (parallel jobs corrupting `~/.archal/credentials.json`) - export `ARCHAL_TOKEN` directly; don't rely on the credential file.
 ## Anti-patterns
 - Don't route `localhost` or the user's own backend. Route mode is for external SaaS.
 - Don't set `testIsolation: 'serial'` preemptively. Only when you've observed cross-test state leaks.
-- Don't add route mode to tests that don't make outbound HTTP calls — the interceptor install has overhead.
+- Don't add route mode to tests that don't make outbound HTTP calls - the interceptor install has overhead.
 - Don't drive vitest through `.archal.json`. That file is for the CLI `archal run` flow; the vitest integration is self-contained.
 - Don't paste a canonical config without reading what's already in the repo.