amatelier 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. amatelier-0.3.0/.cursor/rules/amatelier.mdc +96 -0
  2. amatelier-0.3.0/.depth/backend-delegation_gate.md +26 -0
  3. amatelier-0.3.0/.depth/current_task.txt +1 -0
  4. amatelier-0.3.0/.depth/manual-ci-local_gate.md +14 -0
  5. amatelier-0.3.0/.depth/openmode-fixes-v1_gate.md +34 -0
  6. amatelier-0.3.0/.depth/security-mitigations-v1_gate.md +33 -0
  7. amatelier-0.3.0/.depth/steward-tool-use_gate.md +27 -0
  8. amatelier-0.3.0/.depth/wire-judge-max-effort_gate.md +15 -0
  9. amatelier-0.3.0/.devcontainer/devcontainer.json +22 -0
  10. amatelier-0.3.0/.env.example +17 -0
  11. amatelier-0.3.0/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  12. amatelier-0.3.0/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
  13. amatelier-0.3.0/.github/copilot-instructions.md +92 -0
  14. amatelier-0.3.0/.github/dependabot.yml +7 -0
  15. amatelier-0.3.0/.github/workflows/ci.yml +56 -0
  16. amatelier-0.3.0/.github/workflows/docs.yml +70 -0
  17. amatelier-0.3.0/.github/workflows/publish.yml +48 -0
  18. amatelier-0.3.0/.github/workflows/release.yml +35 -0
  19. amatelier-0.3.0/.github/workflows/wheel-smoke.yml +241 -0
  20. amatelier-0.3.0/.gitignore +67 -0
  21. amatelier-0.3.0/AGENTS.md +69 -0
  22. amatelier-0.3.0/CHANGELOG.md +104 -0
  23. amatelier-0.3.0/CLAUDE.md +90 -0
  24. amatelier-0.3.0/CODE_OF_CONDUCT.md +48 -0
  25. amatelier-0.3.0/CONTRIBUTING.md +103 -0
  26. amatelier-0.3.0/Dockerfile +37 -0
  27. amatelier-0.3.0/LICENSE +21 -0
  28. amatelier-0.3.0/Makefile +35 -0
  29. amatelier-0.3.0/PKG-INFO +323 -0
  30. amatelier-0.3.0/README.md +266 -0
  31. amatelier-0.3.0/SECURITY.md +99 -0
  32. amatelier-0.3.0/SKILL.md +67 -0
  33. amatelier-0.3.0/benchmarks/.gitkeep +0 -0
  34. amatelier-0.3.0/docker-compose.yml +24 -0
  35. amatelier-0.3.0/docs/explanation/architecture.md +441 -0
  36. amatelier-0.3.0/docs/explanation/steward-design.md +198 -0
  37. amatelier-0.3.0/docs/guides/configure-backend.md +200 -0
  38. amatelier-0.3.0/docs/guides/install.md +111 -0
  39. amatelier-0.3.0/docs/guides/troubleshooting.md +217 -0
  40. amatelier-0.3.0/docs/index.md +28 -0
  41. amatelier-0.3.0/docs/reference/cli.md +243 -0
  42. amatelier-0.3.0/docs/reference/config.md +266 -0
  43. amatelier-0.3.0/docs/reference/protocols/competition.md +106 -0
  44. amatelier-0.3.0/docs/reference/protocols/debrief.md +86 -0
  45. amatelier-0.3.0/docs/reference/protocols/distillation.md +87 -0
  46. amatelier-0.3.0/docs/reference/protocols/gemini-bridge.md +84 -0
  47. amatelier-0.3.0/docs/reference/protocols/learning.md +78 -0
  48. amatelier-0.3.0/docs/reference/protocols/memory-tiers.md +87 -0
  49. amatelier-0.3.0/docs/reference/protocols/research.md +96 -0
  50. amatelier-0.3.0/docs/reference/protocols/roundtable.md +86 -0
  51. amatelier-0.3.0/docs/reference/protocols/sparc-phases.md +86 -0
  52. amatelier-0.3.0/docs/reference/protocols/spark-economy.md +230 -0
  53. amatelier-0.3.0/docs/reference/protocols/verification.md +80 -0
  54. amatelier-0.3.0/docs/tutorials/first-run.md +193 -0
  55. amatelier-0.3.0/examples/README.md +68 -0
  56. amatelier-0.3.0/examples/briefings/full-demo.md +47 -0
  57. amatelier-0.3.0/examples/briefings/hello-world.md +42 -0
  58. amatelier-0.3.0/examples/briefings/self-host-vs-api.md +37 -0
  59. amatelier-0.3.0/examples/briefings/single-worker.md +33 -0
  60. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/README.md +126 -0
  61. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/briefing.md +37 -0
  62. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/digest.json +680 -0
  63. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/latest-result.md +8 -0
  64. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/01-header-and-opening.svg +462 -0
  65. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/02-gate.svg +362 -0
  66. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/03-round-transition.svg +333 -0
  67. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/04-session-summary.svg +129 -0
  68. amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/transcript.md +618 -0
  69. amatelier-0.3.0/llm/API.md +1986 -0
  70. amatelier-0.3.0/llm/EXAMPLES.md +364 -0
  71. amatelier-0.3.0/llm/SCHEMA.md +8 -0
  72. amatelier-0.3.0/llm/SPEC.md +471 -0
  73. amatelier-0.3.0/llm/WORKFLOWS.md +450 -0
  74. amatelier-0.3.0/llms-full.txt +3304 -0
  75. amatelier-0.3.0/llms.txt +20 -0
  76. amatelier-0.3.0/mkdocs.yml +81 -0
  77. amatelier-0.3.0/pyproject.toml +107 -0
  78. amatelier-0.3.0/scripts/ci_local.py +76 -0
  79. amatelier-0.3.0/scripts/regen_full.py +53 -0
  80. amatelier-0.3.0/scripts/regen_llm.py +121 -0
  81. amatelier-0.3.0/scripts/regen_sitemap.py +98 -0
  82. amatelier-0.3.0/scripts/regen_tool_rules.py +80 -0
  83. amatelier-0.3.0/scripts/render_session.py +452 -0
  84. amatelier-0.3.0/scripts/run-roundtable.sh +12 -0
  85. amatelier-0.3.0/scripts/run_integration.sh +85 -0
  86. amatelier-0.3.0/src/amatelier/__init__.py +70 -0
  87. amatelier-0.3.0/src/amatelier/agents/clare/CLAUDE.md +146 -0
  88. amatelier-0.3.0/src/amatelier/agents/clare/IDENTITY.md +7 -0
  89. amatelier-0.3.0/src/amatelier/agents/clare/sessions/.gitkeep +0 -0
  90. amatelier-0.3.0/src/amatelier/agents/elena/CLAUDE.md +148 -0
  91. amatelier-0.3.0/src/amatelier/agents/elena/IDENTITY.md +7 -0
  92. amatelier-0.3.0/src/amatelier/agents/elena/sessions/.gitkeep +0 -0
  93. amatelier-0.3.0/src/amatelier/agents/haiku-assistant/CLAUDE.md +86 -0
  94. amatelier-0.3.0/src/amatelier/agents/haiku-assistant/IDENTITY.md +9 -0
  95. amatelier-0.3.0/src/amatelier/agents/haiku-assistant/sessions/.gitkeep +0 -0
  96. amatelier-0.3.0/src/amatelier/agents/judge/CLAUDE.md +136 -0
  97. amatelier-0.3.0/src/amatelier/agents/judge/IDENTITY.md +9 -0
  98. amatelier-0.3.0/src/amatelier/agents/judge/sessions/.gitkeep +0 -0
  99. amatelier-0.3.0/src/amatelier/agents/judge/trace/.gitkeep +0 -0
  100. amatelier-0.3.0/src/amatelier/agents/marcus/CLAUDE.md +146 -0
  101. amatelier-0.3.0/src/amatelier/agents/marcus/IDENTITY.md +7 -0
  102. amatelier-0.3.0/src/amatelier/agents/marcus/sessions/.gitkeep +0 -0
  103. amatelier-0.3.0/src/amatelier/agents/naomi/CLAUDE.md +148 -0
  104. amatelier-0.3.0/src/amatelier/agents/naomi/IDENTITY.md +7 -0
  105. amatelier-0.3.0/src/amatelier/agents/naomi/sessions/.gitkeep +0 -0
  106. amatelier-0.3.0/src/amatelier/agents/opus-admin/CLAUDE.md +267 -0
  107. amatelier-0.3.0/src/amatelier/agents/opus-admin/IDENTITY.md +9 -0
  108. amatelier-0.3.0/src/amatelier/agents/opus-admin/sessions/.gitkeep +0 -0
  109. amatelier-0.3.0/src/amatelier/agents/opus-therapist/CLAUDE.md +139 -0
  110. amatelier-0.3.0/src/amatelier/agents/opus-therapist/IDENTITY.md +8 -0
  111. amatelier-0.3.0/src/amatelier/agents/opus-therapist/sessions/.gitkeep +0 -0
  112. amatelier-0.3.0/src/amatelier/agents/simon/CLAUDE.md +140 -0
  113. amatelier-0.3.0/src/amatelier/agents/simon/IDENTITY.md +7 -0
  114. amatelier-0.3.0/src/amatelier/agents/simon/sessions/.gitkeep +0 -0
  115. amatelier-0.3.0/src/amatelier/agents/therapist/CLAUDE.md +244 -0
  116. amatelier-0.3.0/src/amatelier/agents/therapist/case_notes/.gitkeep +0 -0
  117. amatelier-0.3.0/src/amatelier/cli.py +366 -0
  118. amatelier-0.3.0/src/amatelier/config.json +123 -0
  119. amatelier-0.3.0/src/amatelier/engine/agent_memory.py +877 -0
  120. amatelier-0.3.0/src/amatelier/engine/analytics.py +793 -0
  121. amatelier-0.3.0/src/amatelier/engine/backfill_distill.py +296 -0
  122. amatelier-0.3.0/src/amatelier/engine/classify_concepts.py +202 -0
  123. amatelier-0.3.0/src/amatelier/engine/claude_agent.py +486 -0
  124. amatelier-0.3.0/src/amatelier/engine/db.py +259 -0
  125. amatelier-0.3.0/src/amatelier/engine/distiller.py +257 -0
  126. amatelier-0.3.0/src/amatelier/engine/evolver.py +495 -0
  127. amatelier-0.3.0/src/amatelier/engine/gemini_agent.py +278 -0
  128. amatelier-0.3.0/src/amatelier/engine/gemini_client.py +189 -0
  129. amatelier-0.3.0/src/amatelier/engine/judge_scorer.py +545 -0
  130. amatelier-0.3.0/src/amatelier/engine/migrations/001_initial.sql +32 -0
  131. amatelier-0.3.0/src/amatelier/engine/migrations/002_scores_table.sql +22 -0
  132. amatelier-0.3.0/src/amatelier/engine/migrations/003_spark_ledger.sql +16 -0
  133. amatelier-0.3.0/src/amatelier/engine/migrations/004_byzantine_flags.sql +3 -0
  134. amatelier-0.3.0/src/amatelier/engine/roundtable_runner.py +1715 -0
  135. amatelier-0.3.0/src/amatelier/engine/scorer.py +590 -0
  136. amatelier-0.3.0/src/amatelier/engine/steward_dispatch.py +561 -0
  137. amatelier-0.3.0/src/amatelier/engine/steward_tools.py +391 -0
  138. amatelier-0.3.0/src/amatelier/engine/store.py +841 -0
  139. amatelier-0.3.0/src/amatelier/engine/therapist.py +1419 -0
  140. amatelier-0.3.0/src/amatelier/llm_backend.py +656 -0
  141. amatelier-0.3.0/src/amatelier/paths.py +320 -0
  142. amatelier-0.3.0/src/amatelier/roundtable-server/BRIEFING_TEMPLATE.md +183 -0
  143. amatelier-0.3.0/src/amatelier/roundtable-server/db_client.py +351 -0
  144. amatelier-0.3.0/src/amatelier/roundtable-server/logs/.gitkeep +0 -0
  145. amatelier-0.3.0/src/amatelier/roundtable-server/server.py +380 -0
  146. amatelier-0.3.0/src/amatelier/roundtable-server/test_roundtable.py +339 -0
  147. amatelier-0.3.0/src/amatelier/shared-skills/index.json +1 -0
  148. amatelier-0.3.0/src/amatelier/store/catalog.json +212 -0
  149. amatelier-0.3.0/src/amatelier/store/ledger.json +1 -0
  150. amatelier-0.3.0/src/amatelier/store/skill_templates.py +291 -0
  151. amatelier-0.3.0/src/amatelier/tools/watch_roundtable.py +543 -0
  152. amatelier-0.3.0/tests/fixtures/sample_roundtable.sql +64 -0
  153. amatelier-0.3.0/tests/test_db_integration.py +365 -0
  154. amatelier-0.3.0/tests/test_integration.py +546 -0
  155. amatelier-0.3.0/tests/test_refresh_seeds.py +127 -0
  156. amatelier-0.3.0/tests/test_smoke.py +158 -0
  157. amatelier-0.3.0/tests/test_watcher.py +108 -0
@@ -0,0 +1,96 @@
1
+ ---
2
+ description: amatelier repository rules (Amatayo Standard)
3
+ globs: ["**/*"]
4
+ alwaysApply: true
5
+ ---
6
+
7
+ # Amatelier — Claude Code Instructions
8
+
9
+ Instructions for working on this repository inside Claude Code.
10
+
11
+ ## What this project is
12
+
13
+ A self-evolving multi-model AI team skill for Claude Code
14
+
15
+ ## Repo layout
16
+
17
+ - `src/amatelier/` — shipped package (the canonical code)
18
+ - `tests/` — mirrors `src/` structure
19
+ - `examples/first_run/` — zero-config runnable demo
20
+ - `docs/` — human documentation (MkDocs, Diataxis tiers)
21
+ - `llm/` — LLM-facing documentation (flat, exhaustive, machine-readable)
22
+ - `scripts/` — shell and one-off utility scripts
23
+ - `.github/workflows/` — CI, publish, release, docs workflows
24
+
25
+ ## Rules
26
+
27
+ 1. **Amatayo Standard.** This repo follows the Amatayo Standard. Structure is enforced by CI.
28
+ 2. **Dual-docs invariant.** Every change that adds a public symbol, CLI flag, or config key must update `llm/SPEC.md` and the relevant `docs/reference/*` file. The `llm/API.md` and `llm/SCHEMA.md` files are generated — don't hand-edit them.
29
+ 3. **`llm/` is flat.** Never create subdirectories in `llm/`. Flat is the invariant.
30
+ 4. **Tests required.** New code requires new tests. `make test` must pass before PR.
31
+ 5. **Conventional commits.** `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`. Releases are driven by commit history.
32
+ 6. **No secrets.** `.env.example` documents variables; real values never committed.
33
+
34
+ ## Two-layer paths (critical)
35
+
36
+ Amatelier is pip-installable; the bundled package must stay read-only at runtime. Two layers exist:
37
+
38
+ - **Bundled layer** — `src/amatelier/` and any files shipped inside the wheel. This is the canonical source. Runtime code MUST NOT write here.
39
+ - **User data layer** — everything returned by `amatelier.paths.user_data_dir()` and its siblings (`user_agent_dir`, `user_db_path`, `user_digest_dir`, `user_briefing_dir`, `user_store_ledger`, `user_novel_concepts`, `user_shared_skills_index`, `user_config_override`). All mutable state goes here.
40
+
41
+ Rules for AI agents editing this repo:
42
+
43
+ - Any code that persists state, writes logs, updates ledgers, or mutates agent memory must route through a `paths.user_*()` helper. Do not hard-code paths under `src/amatelier/` for writes.
44
+ - Persona seed files (per-agent `CLAUDE.md`, `IDENTITY.md` under `src/amatelier/agents/<name>/`) are bundled. Edits to seeds only affect a user's environment after `amatelier refresh-seeds` or a fresh install.
45
+ - Generated files must not be hand-edited: `llm/API.md`, `llm/SCHEMA.md`, `llms.txt`, `llms-full.txt`, `.cursor/rules/*.mdc`, `.github/copilot-instructions.md`. CI regenerates them.
46
+
47
+ ## Three LLM backend modes
48
+
49
+ All LLM calls must go through `amatelier.llm_backend.get_backend()`. The backend abstraction resolves to one of three modes at runtime:
50
+
51
+ | Mode | Selected when | Backend class |
52
+ |---|---|---|
53
+ | `claude-code` | Running inside Claude Code, `claude` binary on PATH | `ClaudeCLIBackend` |
54
+ | `anthropic-sdk` | `ANTHROPIC_API_KEY` present, no Claude Code session | `AnthropicSDKBackend` |
55
+ | `openai-compat` | `OPENAI_API_KEY`, `OPENROUTER_API_KEY`, or local Ollama | `OpenAICompatBackend` |
56
+
57
+ Override with `AMATELIER_MODE=claude-code|anthropic-sdk|openai-compat`.
58
+
59
+ When introducing new LLM calls:
60
+
61
+ - Call `get_backend()` and use the returned object's interface. Do not shell out to the `claude` CLI directly and do not `import anthropic` at the call site.
62
+ - Any new backend capability must be added to the `LLMBackend` Protocol in `src/amatelier/llm_backend.py` and implemented by all three concrete backends.
63
+ - Surface new provider env vars in `describe_environment()` so `amatelier config` reports them.
64
+
65
+ ## Tests
66
+
67
+ - `tests/test_smoke.py` — pytest suite, import/CLI smoke checks, runs in CI
68
+ - `tests/test_refresh_seeds.py` — pytest suite, verifies seed materialization, runs in CI
69
+ - `tests/test_integration.py` — **standalone script**, exercises live LLM backends, NOT pytest and NOT run in CI. Execute manually when verifying backend changes.
70
+
71
+ Run the CI-equivalent suites locally:
72
+
73
+ ```bash
74
+ pytest tests/test_smoke.py -v
75
+ pytest tests/test_refresh_seeds.py -v
76
+ ```
77
+
78
+ ## Common commands
79
+
80
+ ```bash
81
+ make setup # install package + dev deps
82
+ make test # run test suite
83
+ make lint # ruff + mypy
84
+ make demo # run examples/first_run/
85
+ make docs # build docs site locally
86
+ amatelier config # show active mode, credentials, paths
87
+ amatelier refresh-seeds # rematerialize per-agent seeds in user data dir
88
+ ```
89
+
90
+ ## When editing docs
91
+
92
+ Use the `dual-docs-architect` skill. It classifies every write (tutorial / guide / reference / explanation x human / LLM / both) and routes to the correct file.
93
+
94
+ ## When scaffolding new repos
95
+
96
+ Use the `repo-architect` skill. Don't copy this file by hand — let the skill render it.
@@ -0,0 +1,26 @@
1
+ TASK: delegate remaining subprocess claude calls to llm_backend
2
+ SCOPE: non-trivial
3
+ FILES: src/amatelier/engine/classify_concepts.py, src/amatelier/engine/backfill_distill.py, src/amatelier/engine/roundtable_runner.py, src/amatelier/engine/therapist.py, src/amatelier/engine/steward_dispatch.py
4
+ REPLACES: 6 direct subprocess.run(["claude", ...]) call sites that hardcode the Claude CLI — broken for users without `claude` on PATH even when ANTHROPIC_API_KEY is set
5
+ MIGRATION: none — each site keeps its existing subprocess call as the claude-code-mode fallback; only adds a backend-first check for open-mode users
6
+ CALLERS:
7
+ - classify_concepts._call_sonnet_classifier() — batch concept classification
8
+ - backfill_distill._distill_skills_sonnet() — retroactive skill extraction from old digests
9
+ - roundtable_runner._summarize_round_haiku() — per-round summaries via haiku
10
+ - roundtable_runner._distill_skills() — post-RT skill extraction via sonnet (called from run_roundtable)
11
+ - therapist._call_llm(prompt, model) — shared helper for _call_therapist, _call_gemini (gemini via its own path)
12
+ - steward_dispatch.run_steward_subagent() — tool-using research agent (cannot delegate — requires agent spawning)
13
+ USER_PATH: amatelier roundtable → roundtable_runner.run_roundtable() → [Round N] → _summarize_round_haiku() | → [post-RT] _distill_skills() | → classify_concepts() | → therapist() | → steward tagged requests → each site calls backend.complete() when backend.name != "claude-code", else falls through to existing subprocess.run(["claude", ...])
14
+ RED_STATE: 6 sites in engine/ directly call subprocess.run(["claude", "-p", "--model", ...]). User in anthropic-sdk or openai-compat mode hits FileNotFoundError('claude') at every site after Judge scoring succeeds (since Judge is the only site already delegating).
15
+ RED_TYPE: USER-OBSERVABLE
16
+ GREEN_CONDITION:
17
+ - When AMATELIER_MODE=anthropic-sdk (or auto-detected via ANTHROPIC_API_KEY with no claude binary): all 5 simple sites (classify_concepts, backfill_distill, 2 roundtable_runner sites, therapist) succeed via Anthropic SDK. Steward returns a degradation message explaining claude-code requirement.
18
+ - When claude-code mode: all sites continue using their existing subprocess.run() path with zero observable difference (same flags, same timeouts, same error handling).
19
+ - pytest tests/test_smoke.py passes (13/13)
20
+ - pytest tests/test_db_integration.py passes (11/11)
21
+ - ruff check src/ passes on the edited files
22
+ OMISSIONS:
23
+ - steward_dispatch in non-claude-code mode returns {"status": "unavailable", "result": "Steward requires claude-code mode..."} instead of spawning a tool-using agent. Proper tool-use delegation (Anthropic SDK messages API with tools param) is out of scope — multi-hour refactor.
24
+ - No new tests added; existing smoke + integration tests cover the non-delegation path. Live verification of open-mode requires ANTHROPIC_API_KEY in CI, which is a separate secrets/keys task.
25
+ - gemini_client and naomi (Naomi worker) unchanged — already use their own google-genai path
26
+ - engine/claude_agent.py line 264 — NOT in scope (this is the legacy shim; newer sites should use llm_backend.call_claude instead, but claude_agent.py still works for back-compat and is out of scope)
@@ -0,0 +1 @@
1
+ openmode-fixes-v1
@@ -0,0 +1,14 @@
1
+ TASK: manual CI — replace auto-trigger with local script
2
+ SCOPE: routine
3
+ FILES: scripts/ci_local.py (new), Makefile (ci target), .github/workflows/ci.yml, .github/workflows/wheel-smoke.yml, .github/workflows/docs.yml
4
+ REPLACES: auto-triggered workflows that burn GitHub Actions minutes on every push/PR — replaced by local script + workflow_dispatch manual-only triggers
5
+ MIGRATION: none — existing v* tag triggers preserved on all workflows (release/publish paths unchanged)
6
+ CALLERS: developer runs `python scripts/ci_local.py` or `make ci` before pushing. CI workflows callable from Actions UI via workflow_dispatch when needed.
7
+ USER_PATH: developer makes code change → runs `python scripts/ci_local.py` locally → script runs ruff + pytest smoke + mkdocs + wheel build + DB integration test sequentially → exits 0 on success, 1 with failure list on failure → developer pushes only if green
8
+ RED_STATE: ci.yml/wheel-smoke.yml/docs.yml all had `on: push: branches: [main]` and `pull_request: branches: [main]` — every push/PR triggered ~7min of CI runs per commit. No cross-platform local CI script existed; Makefile targets unusable on Windows without `make`.
9
+ RED_TYPE: INFRASTRUCTURE
10
+ GREEN_CONDITION: `python scripts/ci_local.py` runs all 5 checks (ruff, pytest smoke, mkdocs, wheel build, pytest integration) on any OS with Python, reports pass/fail per check, exits non-zero on any failure. Pushing to main does NOT fire ci.yml/wheel-smoke.yml/docs.yml. Pushing a `v*` tag DOES fire all of them plus publish.yml + release.yml.
11
+ OMISSIONS:
12
+ - No Docker / `act` integration — users wanting true CI parity can still run `docker compose run --rm integration` as before
13
+ - scripts/ci_local.py does not support parallel execution — runs checks sequentially (acceptable for <10s per check)
14
+ - Makefile `ci` target delegates to python script; mac/linux users could bypass this but it's simpler to maintain one implementation
@@ -0,0 +1,34 @@
1
+ TASK: apply Open-mode RT fixes (text accumulation, exception guard, response_format)
2
+ SCOPE: non-trivial
3
+ FILES: src/amatelier/llm_backend.py, src/amatelier/engine/judge_scorer.py, src/amatelier/engine/classify_concepts.py, src/amatelier/engine/backfill_distill.py, src/amatelier/engine/roundtable_runner.py
4
+ REPLACES:
5
+ 1. AnthropicSDKBackend.complete_with_tools at llm_backend.py:319-321 `final_text = "".join(text_chunks)` — only captures the LAST iteration's text. All intermediate-turn narration is discarded when the model stops calling tools.
6
+ 2. AnthropicSDKBackend.complete_with_tools at llm_backend.py:298 `msg = client.messages.create(...)` — no try/except. SDK RateLimitError / APIError / network failures crash the loop with the partial `messages` state inaccessible to callers for retry.
7
+ 3. OpenAICompatBackend.complete at llm_backend.py:~305 `client.chat.completions.create(...)` — no `response_format`. GPT-4o and most OpenAI-compat models often emit markdown-fenced JSON or conversational filler when engine prompts require strict JSON (judge scoring, skill classification, skill distillation). Causes JSON parse crash for first openai-compat user.
8
+ MIGRATION: None — all three fixes are additive or strictly-more-robust behavior. Existing claude-code and anthropic-sdk callers see no behavior change. openai-compat callers now get valid JSON where engine prompts request it; callers that pass text prompts continue to work (json_mode defaults to False).
9
+ CALLERS:
10
+ - complete_with_tools: only called from steward_dispatch.spawn_steward_subagent() — this is the Steward tool-use path in anthropic-sdk mode.
11
+ - complete (with new json_mode kwarg): called from 5 engine sites currently, 4 of which request JSON-shaped output (judge_scorer, classify_concepts, backfill_distill, roundtable_runner._distill_skills). Haiku summarizer + therapist call it for text, no json_mode change needed.
12
+ USER_PATH:
13
+ Fix 1: user in anthropic-sdk mode runs amatelier roundtable with [[request: ...]] tags → Steward invokes complete_with_tools → model narrates "Let me check X" in iteration 1, calls read_file, synthesizes "Based on X, the answer is Y" in iteration 2 → BEFORE: only "Based on X..." returned. AFTER: both iterations' text concatenated.
14
+ Fix 2: user in anthropic-sdk mode hits a transient 429 rate-limit during iteration 3 of a Steward loop → BEFORE: RateLimitError propagates unhandled, partial message state lost, Steward returns status=error. AFTER: exception caught, accumulated messages visible in log, tool_use_id round-trip preserved, Steward returns status=error with full diagnostic context.
15
+ Fix 3: user with OPENAI_API_KEY or OPENROUTER_API_KEY set, runs first roundtable → judge_scorer._call_sonnet() calls backend.complete(..., json_mode=True) → OpenAICompatBackend adds response_format={"type":"json_object"} → GPT-4o returns clean JSON → engine parses successfully. BEFORE: GPT-4o returns `` ```json\n{...}\n``` ``, parser crashes with JSONDecodeError.
16
+ RED_STATE:
17
+ - llm_backend.py:320 `final_text = "".join(text_chunks)` — text_chunks is the current iteration's blocks only; prior iterations' text already discarded at the top of each loop iteration
18
+ - llm_backend.py:298-305 client.messages.create is not inside a try/except; only the tool_executor call later in the loop is protected
19
+ - llm_backend.py:~305 OpenAICompatBackend.complete client.chat.completions.create has no response_format param and no json_mode detection
20
+ - judge_scorer.py:~155 backend.complete(system=..., prompt=..., model="sonnet", max_tokens=8000, timeout=360, effort=effort) — passes no json_mode; JSON-requiring prompt hits openai-compat without response_format
21
+ - classify_concepts.py, backfill_distill.py, roundtable_runner.py:_distill_skills: same pattern — backend.complete without json_mode, all 3 request JSON output
22
+ RED_TYPE: USER-OBSERVABLE
23
+ GREEN_CONDITION:
24
+ 1. complete_with_tools accumulates text from all iterations (final_text = existing_text + "".join(current_chunks)) — unit test: mock 3 iterations, assert iteration-2 text in returned Completion.text
25
+ 2. complete_with_tools wraps the `msg = client.messages.create(...)` call in try/except; on exception, returns a Completion with text=accumulated_text_so_far, model, backend, latency_ms, and logs a warning with the accumulated messages length
26
+ 3. LLMBackend.complete accepts optional `json_mode: bool = False`. OpenAICompatBackend translates json_mode=True → response_format={"type":"json_object"}. ClaudeCLIBackend and AnthropicSDKBackend accept and ignore (Claude handles JSON without hint).
27
+ 4. Four engine call sites pass json_mode=True where they expect JSON: judge_scorer._call_sonnet, classify_concepts._call_sonnet_classifier, backfill_distill.distill_one, roundtable_runner._distill_skills
28
+ 5. Local CI passes: ruff, pytest smoke 13/13, pytest integration 11/11, mkdocs build
29
+ OMISSIONS:
30
+ - Marcus's 5 mock tests are NOT added in this commit. They're good engineering but not ship-blockers; add in a follow-up commit (tests/test_llm_backend.py).
31
+ - Extended-thinking cost/quality assertion remains uncovered (requires live key; documented as known gap in tests/README.md follow-up).
32
+ - roundtable_runner._summarize_round_haiku and therapist._call_llm pass text prompts — no json_mode change, intentionally left at default False.
33
+ - gemini_agent uses its own path, unchanged.
34
+ - The RT infrastructure collapse (14 worker timeouts in rounds 2-3) is not addressed by these code fixes — it's a concurrency/rate-limit product behavior to document separately, not a code bug.
@@ -0,0 +1,33 @@
1
+ TASK: apply Security RT v2 mandatory mitigations (#2, #3, #4)
2
+ SCOPE: critical
3
+ FILES: src/amatelier/engine/steward_tools.py, src/amatelier/engine/steward_dispatch.py, src/amatelier/cli.py, src/amatelier/paths.py
4
+ REPLACES: three security holes confirmed by Security RT digest-afd96c74180e and Elena's Grand Insight ("path containment and sensitive-file access are orthogonal concerns, and only the first is defended"):
5
+ 1. steward_tools.read_file() has no credential denylist — agents can request `.env`, `.git/config`, `~/.aws/credentials` and they pass _safe_resolve() because they're inside WORKSPACE_ROOT
6
+ 2. steward_dispatch.format_result() returns full result text and runner persists it to digest + steward-log JSON — credentials read once exfiltrate to durable artifacts
7
+ 3. spawn_steward_subagent() executes on first dispatch with no user consent moment — GDPR Article 13 requires disclosure before processing event, not at install time
8
+ MIGRATION: Existing CI/automation that runs amatelier roundtables must set AMATELIER_STEWARD_CONSENT=1 to skip the runtime prompt; documented in CHANGELOG and .env.example.
9
+ CALLERS:
10
+ - steward_tools.read_file() — called from dispatch_tool() during anthropic-sdk Steward tool-use loop
11
+ - steward_dispatch.format_result() — called from runner research-window phase
12
+ - steward_dispatch.spawn_steward_subagent() — called from runner research-window + per-round dispatch
13
+ - cli.py existing roundtable subcommand — gains pre-flight consent check
14
+ USER_PATH: developer runs `amatelier roundtable` → CLI checks AMATELIER_STEWARD_CONSENT env or prior accept → if neither, prints disclosure + prompts for y/n → on consent, sets process env var for child processes → runner enters research window → agents emit `[[request: read .env]]` → steward dispatch resolves to read_file('.env') → _is_secret_path(p) returns True → returns "Error: blocked secret-path .env (Steward denylist)" → result truncated to 4KB at format_result + persisted truncated to digest → no credential ever transits to Anthropic API or persists to disk artifact
15
+ RED_STATE:
16
+ - steward_tools.py:140-152 read_file() opens any path that passes _safe_resolve(). No filename or extension check.
17
+ - steward_dispatch.py:419-422 format_result() returns the full result['result'] string for runner injection.
18
+ - roundtable_runner.py around line 590 db_cmd("speak", "runner", inject_msg) writes full text to messages table → digest persistence.
19
+ - StewardLog.record() at steward_dispatch.py around line 440 writes full result to steward-log JSON.
20
+ - cli.py roundtable command spawns runner immediately on invocation. No consent moment.
21
+ RED_TYPE: USER-OBSERVABLE (privacy + security harm to end users)
22
+ GREEN_CONDITION:
23
+ - steward_tools._is_secret_path(p) blocks: `.env`, `.env.*`, `*.pem`, `*.key`, `*.p12`, `*.pfx`, `id_rsa`, `id_ed25519`, `credentials`, `.git/config`, `.aws/credentials`, `.netrc`, `.npmrc`, `.pypirc`, anything ending in `_token` or `_secret` or `_key` (case-insensitive). read_file() and grep() return "Error: blocked secret-path..." string instead of content.
24
+ - format_result() truncates the result text at 4096 chars before injection, prepending a `[truncated to 4KB]` marker if needed.
25
+ - StewardLog.record() truncates the persisted result text at 4096 chars.
26
+ - First spawn_steward_subagent() call per amatelier process checks env AMATELIER_STEWARD_CONSENT in {"1","yes","true"}; if missing, raises SteWardConsentRequired with a documented message; cli.py catches the exception, prompts the user with a clear disclosure (sends file content excerpts to claude/anthropic API), and sets AMATELIER_STEWARD_CONSENT=1 for the current process if user accepts.
27
+ - All four checks have unit-test-style verification by directly invoking _is_secret_path() with a fixture list, calling format_result() with oversize string, and calling spawn_steward_subagent() with the env var unset.
28
+ OMISSIONS:
29
+ - The denylist is filename-pattern based, not content-scanned. A user-renamed credential file (e.g. mysecret.txt) is not blocked — documented as known limitation.
30
+ - The truncation length is hardcoded 4096; not yet exposed in config.json. A future RT can tune.
31
+ - Consent is per-process not persistent — restarting amatelier re-prompts. Persistent consent (a checkbox in user_data_dir) deferred.
32
+ - Steward in claude-code mode (subprocess CLI) gets the truncation but not the read_file denylist — denylist runs in the SDK tool-use path only. CLI mode uses the actual claude binary's Read tool which has its own surface. This is documented as deferred — claude-code Steward limitation.
33
+ - openai-compat backend already returns "unavailable" for Steward; mitigations don't apply there.
@@ -0,0 +1,27 @@
1
+ TASK: implement Steward tool use in anthropic-sdk mode
2
+ SCOPE: non-trivial
3
+ FILES: src/amatelier/engine/steward_tools.py (new), src/amatelier/llm_backend.py, src/amatelier/engine/steward_dispatch.py
4
+ REPLACES: steward_dispatch.py returns {"status": "unavailable"} in open mode — degrades Steward empirical grounding. Implement actual Anthropic SDK tool-use loop (messages API with tools= param) backed by local read_file/grep/glob functions sandboxed to WORKSPACE_ROOT.
5
+ MIGRATION: none — claude-code mode path unchanged; anthropic-sdk now succeeds with real lookups instead of returning degradation message; openai-compat still degrades (tool schemas differ across OAI-compat providers, out of scope)
6
+ CALLERS:
7
+ - steward_dispatch.spawn_steward_subagent() at line ~253 — called from roundtable_runner when agents emit [[request: ...]] tags
8
+ - AnthropicSDKBackend.complete_with_tools() (new method) — called only from steward_dispatch, not wired into LLMBackend Protocol (avoids forcing all backends to implement)
9
+ - steward_tools.dispatch_tool(name, input) — internal router called by the tool-use loop
10
+ USER_PATH: amatelier roundtable (ANTHROPIC_API_KEY set, no claude CLI) → worker emits "[[request: show schema of messages table]]" → runner detects request → steward_dispatch.spawn_steward_subagent() → backend.name == "anthropic-sdk" → call complete_with_tools(system=STEWARD_SYSTEM_PROMPT, user=request, tools=STEWARD_TOOL_SPECS) → Anthropic returns tool_use block for grep/read_file → steward_tools.dispatch_tool() executes locally with path validation → result appended as tool_result → loop until model returns text → return {"status": "success", "result": text} to runner
11
+ RED_STATE: steward_dispatch.py:291-310 returns {"status": "unavailable"} when backend.name != "claude-code". In anthropic-sdk mode, [[request]] tags produce degradation messages instead of real data.
12
+ RED_TYPE: USER-OBSERVABLE
13
+ GREEN_CONDITION:
14
+ - steward_tools.py exports STEWARD_TOOL_SPECS (3 tools: read_file, grep, glob) and dispatch_tool(name, input) -> str
15
+ - steward_tools._safe_resolve() rejects path-traversal attempts (absolute paths outside WORKSPACE_ROOT, ../../etc)
16
+ - AnthropicSDKBackend has a new method complete_with_tools(system, user, tools, max_iterations=10) that loops tool_use → tool_result until final text
17
+ - steward_dispatch in anthropic-sdk mode returns real data, same structure as claude-code: {"status": "success", "result": str, "elapsed_s": float}
18
+ - Existing claude-code path unchanged (byte-identical flags + subprocess call)
19
+ - openai-compat continues to return {"status": "unavailable"} — documented in OMISSIONS
20
+ - pytest tests/test_smoke.py passes (13/13), pytest tests/test_db_integration.py passes (11/11)
21
+ - ruff clean on new files
22
+ OMISSIONS:
23
+ - OpenAI-compat tool use NOT implemented. OpenAI and OpenRouter support tools but schemas differ (OpenAI functions vs Anthropic tools). A cross-provider abstraction would double the code for marginal benefit since OAI-compat Steward usage is niche. Users who need Steward in OAI-compat must set AMATELIER_MODE=anthropic-sdk with ANTHROPIC_API_KEY.
24
+ - Tool sandbox is path-based only — no syscall sandboxing. Steward can read any file under WORKSPACE_ROOT including .env, secrets, etc. Same security posture as claude-code mode (Read tool with --dangerously-skip-permissions).
25
+ - No timeout per tool call (only overall complete_with_tools timeout). A single grep on a huge directory could block.
26
+ - No token accounting across tool-use iterations — follows existing judge_scorer pattern.
27
+ - No new tests; existing smoke + integration tests cover non-steward paths. Live verification requires ANTHROPIC_API_KEY which is a separate concern.
@@ -0,0 +1,15 @@
1
+ TASK: wire judge max-effort via extended thinking
2
+ SCOPE: non-trivial
3
+ FILES: src/amatelier/llm_backend.py, src/amatelier/engine/judge_scorer.py
4
+ REPLACES: AnthropicSDKBackend.complete() at llm_backend.py:207 currently ignores thinking; _call_sonnet() at judge_scorer.py:144 passes max_tokens=8000 with no thinking budget; config.json has "effort": "max" under judge but code ignores it
5
+ MIGRATION: none — new optional `effort` param on LLMBackend.complete() defaults to None, preserves all existing call sites
6
+ CALLERS: judge_scorer._call_sonnet() will pass effort=<config.judge.effort> to backend.complete(). All other callers (call_claude shim at llm_backend.py:427, other engine sites) continue passing no effort and get identical behavior to today.
7
+ USER_PATH: amatelier roundtable → engine/roundtable_runner.py triggers scoring phase → engine/judge_scorer.py:score_contributions() → _call_sonnet(prompt) → reads config.judge.effort via _get_judge_effort() → backend.complete(effort="max") → AnthropicSDKBackend.complete() adds thinking={"type":"enabled","budget_tokens":16000} to client.messages.create() → Anthropic API returns with extended reasoning → higher-quality scoring
8
+ RED_STATE: llm_backend.py:219 `client.messages.create(model=, max_tokens=, system=, messages=, timeout=)` — no thinking kwarg. judge_scorer.py:156-159 `backend.complete(system="", prompt=prompt, model="sonnet", max_tokens=8000, timeout=360)` — no effort param exists. config.json line 37 `"effort": "max"` reads but never flows to API call.
9
+ RED_TYPE: INFRASTRUCTURE
10
+ GREEN_CONDITION: When config.judge.effort == "max" AND backend.name == "anthropic-sdk", the Anthropic messages.create() call receives thinking={"type": "enabled", "budget_tokens": 16000} and max_tokens is bumped to ≥20000. Other backends (claude-code, openai-compat) accept the effort kwarg without erroring and ignore it (log debug). judge_scorer continues to work when config has no effort field (effort=None, no thinking block).
11
+ OMISSIONS:
12
+ - OpenAICompatBackend.complete() at llm_backend.py:284 has no extended-thinking equivalent; OpenRouter/OpenAI users ignore effort (platform-level limitation)
13
+ - Only judge reads effort from config; agent LLM calls at engine/roundtable_runner.py do not propagate effort (out of scope)
14
+ - budget_tokens is hardcoded to 16000 for effort="max"; not exposed as config.judge.budget_tokens (follow-up)
15
+ - No new tests added; tests/test_smoke.py already covers backend.complete() without effort and will continue to pass
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "amatelier dev",
3
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.10",
4
+ "features": {
5
+ "ghcr.io/devcontainers/features/github-cli:1": {},
6
+ "ghcr.io/devcontainers/features/common-utils:2": {}
7
+ },
8
+ "postCreateCommand": "pip install -e \".[dev]\"",
9
+ "customizations": {
10
+ "vscode": {
11
+ "extensions": [
12
+ "ms-python.python",
13
+ "charliermarsh.ruff",
14
+ "ms-python.mypy-type-checker"
15
+ ],
16
+ "settings": {
17
+ "python.testing.pytestEnabled": true,
18
+ "python.testing.pytestArgs": ["tests"]
19
+ }
20
+ }
21
+ }
22
+ }
@@ -0,0 +1,17 @@
1
+ # Required for the Gemini Flash agent (Naomi)
2
+ # Get your key at: https://aistudio.google.com/apikey
3
+ GEMINI_API_KEY=your-gemini-api-key-here
4
+
5
+ # Optional — override workspace root when the skill is not installed at
6
+ # the default .claude/skills/claude-suite layout. Rarely needed.
7
+ # AMATELIER_WORKSPACE=/absolute/path/to/project
8
+
9
+ # Optional — enables proposal queueing into an external evolution harness
10
+ # (therapist.py). Leave unset to run standalone.
11
+ # CLAUDE_EVOLUTION_HARNESS=/absolute/path/to/harness/repo
12
+
13
+ # Required for non-interactive / CI use of `amatelier roundtable`.
14
+ # Confirms you understand the Steward subagent reads files from
15
+ # AMATELIER_WORKSPACE and sends excerpts to the configured LLM provider.
16
+ # Interactive users will be prompted on first dispatch instead.
17
+ # AMATELIER_STEWARD_CONSENT=1
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: Bug report
3
+ about: Something isn't working as expected
4
+ title: '[BUG] '
5
+ labels: bug
6
+ assignees: ''
7
+ ---
8
+
9
+ ## Describe the bug
10
+
11
+ A clear, concise description.
12
+
13
+ ## To reproduce
14
+
15
+ 1. Step one
16
+ 2. Step two
17
+ 3. See error
18
+
19
+ ## Expected behavior
20
+
21
+ What should have happened.
22
+
23
+ ## Environment
24
+
25
+ - OS:
26
+ - Version of this package:
27
+ - Python/Node version:
28
+ - How installed (pip / npm / source):
29
+
30
+ ## Logs / traceback
31
+
32
+ ```
33
+ paste here
34
+ ```
35
+
36
+ ## Additional context
37
+
38
+ Anything else relevant.
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest a new capability or improvement
4
+ title: '[FEATURE] '
5
+ labels: enhancement
6
+ assignees: ''
7
+ ---
8
+
9
+ ## Problem
10
+
11
+ What problem does this solve? Who has this problem?
12
+
13
+ ## Proposed solution
14
+
15
+ Describe what you'd like to happen.
16
+
17
+ ## Alternatives considered
18
+
19
+ Other approaches you've thought about.
20
+
21
+ ## Additional context
22
+
23
+ Screenshots, links to similar features elsewhere, etc.
@@ -0,0 +1,92 @@
1
+ # GitHub Copilot Instructions — amatelier
2
+
3
+ _This file is generated from CLAUDE.md. Do not hand-edit._
4
+
5
+ Instructions for working on this repository inside Claude Code.
6
+
7
+ ## What this project is
8
+
9
+ A self-evolving multi-model AI team skill for Claude Code
10
+
11
+ ## Repo layout
12
+
13
+ - `src/amatelier/` — shipped package (the canonical code)
14
+ - `tests/` — mirrors `src/` structure
15
+ - `examples/first_run/` — zero-config runnable demo
16
+ - `docs/` — human documentation (MkDocs, Diataxis tiers)
17
+ - `llm/` — LLM-facing documentation (flat, exhaustive, machine-readable)
18
+ - `scripts/` — shell and one-off utility scripts
19
+ - `.github/workflows/` — CI, publish, release, docs workflows
20
+
21
+ ## Rules
22
+
23
+ 1. **Amatayo Standard.** This repo follows the Amatayo Standard. Structure is enforced by CI.
24
+ 2. **Dual-docs invariant.** Every change that adds a public symbol, CLI flag, or config key must update `llm/SPEC.md` and the relevant `docs/reference/*` file. The `llm/API.md` and `llm/SCHEMA.md` files are generated — don't hand-edit them.
25
+ 3. **`llm/` is flat.** Never create subdirectories in `llm/`. Flat is the invariant.
26
+ 4. **Tests required.** New code requires new tests. `make test` must pass before PR.
27
+ 5. **Conventional commits.** `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`. Releases are driven by commit history.
28
+ 6. **No secrets.** `.env.example` documents variables; real values never committed.
29
+
30
+ ## Two-layer paths (critical)
31
+
32
+ Amatelier is pip-installable; the bundled package must stay read-only at runtime. Two layers exist:
33
+
34
+ - **Bundled layer** — `src/amatelier/` and any files shipped inside the wheel. This is the canonical source. Runtime code MUST NOT write here.
35
+ - **User data layer** — everything returned by `amatelier.paths.user_data_dir()` and its siblings (`user_agent_dir`, `user_db_path`, `user_digest_dir`, `user_briefing_dir`, `user_store_ledger`, `user_novel_concepts`, `user_shared_skills_index`, `user_config_override`). All mutable state goes here.
36
+
37
+ Rules for AI agents editing this repo:
38
+
39
+ - Any code that persists state, writes logs, updates ledgers, or mutates agent memory must route through a `paths.user_*()` helper. Do not hard-code paths under `src/amatelier/` for writes.
40
+ - Persona seed files (per-agent `CLAUDE.md`, `IDENTITY.md` under `src/amatelier/agents/<name>/`) are bundled. Edits to seeds only affect a user's environment after `amatelier refresh-seeds` or a fresh install.
41
+ - Generated files must not be hand-edited: `llm/API.md`, `llm/SCHEMA.md`, `llms.txt`, `llms-full.txt`, `.cursor/rules/*.mdc`, `.github/copilot-instructions.md`. CI regenerates them.
42
+
43
+ ## Three LLM backend modes
44
+
45
+ All LLM calls must go through `amatelier.llm_backend.get_backend()`. The backend abstraction resolves to one of three modes at runtime:
46
+
47
+ | Mode | Selected when | Backend class |
48
+ |---|---|---|
49
+ | `claude-code` | Running inside Claude Code, `claude` binary on PATH | `ClaudeCLIBackend` |
50
+ | `anthropic-sdk` | `ANTHROPIC_API_KEY` present, no Claude Code session | `AnthropicSDKBackend` |
51
+ | `openai-compat` | `OPENAI_API_KEY`, `OPENROUTER_API_KEY`, or local Ollama | `OpenAICompatBackend` |
52
+
53
+ Override with `AMATELIER_MODE=claude-code|anthropic-sdk|openai-compat`.
54
+
55
+ When introducing new LLM calls:
56
+
57
+ - Call `get_backend()` and use the returned object's interface. Do not shell out to the `claude` CLI directly and do not `import anthropic` at the call site.
58
+ - Any new backend capability must be added to the `LLMBackend` Protocol in `src/amatelier/llm_backend.py` and implemented by all three concrete backends.
59
+ - Surface new provider env vars in `describe_environment()` so `amatelier config` reports them.
60
+
61
+ ## Tests
62
+
63
+ - `tests/test_smoke.py` — pytest suite, import/CLI smoke checks, runs in CI
64
+ - `tests/test_refresh_seeds.py` — pytest suite, verifies seed materialization, runs in CI
65
+ - `tests/test_integration.py` — **standalone script**, exercises live LLM backends, NOT pytest and NOT run in CI. Execute manually when verifying backend changes.
66
+
67
+ Run the CI-equivalent suites locally:
68
+
69
+ ```bash
70
+ pytest tests/test_smoke.py -v
71
+ pytest tests/test_refresh_seeds.py -v
72
+ ```
73
+
74
+ ## Common commands
75
+
76
+ ```bash
77
+ make setup # install package + dev deps
78
+ make test # run test suite
79
+ make lint # ruff + mypy
80
+ make demo # run examples/first_run/
81
+ make docs # build docs site locally
82
+ amatelier config # show active mode, credentials, paths
83
+ amatelier refresh-seeds # rematerialize per-agent seeds in user data dir
84
+ ```
85
+
86
+ ## When editing docs
87
+
88
+ Use the `dual-docs-architect` skill. It classifies every write (tutorial / guide / reference / explanation x human / LLM / both) and routes to the correct file.
89
+
90
+ ## When scaffolding new repos
91
+
92
+ Use the `repo-architect` skill. Don't copy this file by hand — let the skill render it.
@@ -0,0 +1,7 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ open-pull-requests-limit: 5
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ # Matrix policy: every push/PR runs the fast path (ubuntu + Python 3.12 only).
4
+ # Full matrix (3 OSes x 4 Pythons) runs on v* tags so release validation stays
5
+ # thorough without burning minutes on every commit.
6
+
7
+ on:
8
+ # Manual-only: run `make ci` locally before pushing. Fires on v* tags
9
+ # for release validation, and can be triggered via the Actions UI.
10
+ workflow_dispatch:
11
+ push:
12
+ tags: ['v*']
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ jobs:
18
+ test:
19
+ runs-on: ${{ matrix.os }}
20
+ strategy:
21
+ fail-fast: false
22
+ matrix:
23
+ # Full matrix on release tags; fast path otherwise.
24
+ os: ${{ startsWith(github.ref, 'refs/tags/v') && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }}
25
+ python-version: ${{ startsWith(github.ref, 'refs/tags/v') && fromJSON('["3.10", "3.11", "3.12", "3.13"]') || fromJSON('["3.12"]') }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+ cache: pip
32
+ - name: Install
33
+ run: pip install -e ".[dev]"
34
+ - name: Lint
35
+ run: ruff check src tests
36
+ - name: Type check (Linux/macOS only — Windows mypy trips on system dirs)
37
+ if: runner.os != 'Windows'
38
+ run: |
39
+ mypy --no-strict-optional \
40
+ src/amatelier/__init__.py \
41
+ src/amatelier/cli.py \
42
+ src/amatelier/paths.py \
43
+ src/amatelier/llm_backend.py || true
44
+ - name: Test (pytest smoke suite — no live APIs)
45
+ run: pytest tests/test_smoke.py -v
46
+
47
+ build-check:
48
+ runs-on: ubuntu-latest
49
+ steps:
50
+ - uses: actions/checkout@v4
51
+ - uses: actions/setup-python@v5
52
+ with:
53
+ python-version: "3.12"
54
+ - run: pip install build
55
+ - run: python -m build
56
+ - run: pip install dist/*.whl
@@ -0,0 +1,70 @@
1
+ # Builds the human docs site (MkDocs Material) and regenerates LLM-facing
2
+ # derivatives on every push to main. Also runs on PRs as a dry-build to catch
3
+ # doc errors before merge.
4
+
5
+ name: Docs
6
+
7
+ on:
8
+ # Manual-only: run `mkdocs build` locally to verify. Trigger via Actions UI
9
+ # when you want to publish the docs site. Fires on v* tags for releases.
10
+ workflow_dispatch:
11
+ push:
12
+ tags: ['v*']
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ jobs:
18
+ build:
19
+ runs-on: ubuntu-latest
20
+ permissions:
21
+ contents: write
22
+ pages: write
23
+ id-token: write
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ with:
27
+ fetch-depth: 0
28
+
29
+ - name: Set up Python
30
+ uses: actions/setup-python@v5
31
+ with:
32
+ python-version: "3.12"
33
+ cache: pip
34
+
35
+ - name: Install MkDocs + plugins
36
+ run: |
37
+ pip install \
38
+ mkdocs-material \
39
+ mkdocs-awesome-pages-plugin \
40
+ pymdown-extensions
41
+
42
+ - name: Build MkDocs site
43
+ run: mkdocs build --strict
44
+
45
+ # dual-docs-architect provides a regenerate script at scripts/regen_llm.py
46
+ # It reads src/, docs/, examples/ and writes llm/API.md, llm/SCHEMA.md,
47
+ # llms.txt, llms-full.txt, .cursor/rules/*, .github/copilot-instructions.md.
48
+ # If the script doesn't exist yet, this step is a no-op.
49
+ - name: Regenerate LLM surface
50
+ run: |
51
+ if [ -f scripts/regen_llm.py ]; then
52
+ python scripts/regen_llm.py
53
+ else
54
+ echo "scripts/regen_llm.py not present yet — skipping regeneration"
55
+ fi
56
+
57
+ - name: Check for unsynced derivatives
58
+ if: github.event_name == 'pull_request'
59
+ run: |
60
+ if ! git diff --exit-code; then
61
+ echo "::error::Generated files are out of sync. Run 'make docs' locally and commit."
62
+ exit 1
63
+ fi
64
+
65
+ - name: Deploy to GitHub Pages
66
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
67
+ uses: peaceiris/actions-gh-pages@v4
68
+ with:
69
+ github_token: ${{ secrets.GITHUB_TOKEN }}
70
+ publish_dir: ./site