amatelier 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amatelier-0.3.0/.cursor/rules/amatelier.mdc +96 -0
- amatelier-0.3.0/.depth/backend-delegation_gate.md +26 -0
- amatelier-0.3.0/.depth/current_task.txt +1 -0
- amatelier-0.3.0/.depth/manual-ci-local_gate.md +14 -0
- amatelier-0.3.0/.depth/openmode-fixes-v1_gate.md +34 -0
- amatelier-0.3.0/.depth/security-mitigations-v1_gate.md +33 -0
- amatelier-0.3.0/.depth/steward-tool-use_gate.md +27 -0
- amatelier-0.3.0/.depth/wire-judge-max-effort_gate.md +15 -0
- amatelier-0.3.0/.devcontainer/devcontainer.json +22 -0
- amatelier-0.3.0/.env.example +17 -0
- amatelier-0.3.0/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- amatelier-0.3.0/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- amatelier-0.3.0/.github/copilot-instructions.md +92 -0
- amatelier-0.3.0/.github/dependabot.yml +7 -0
- amatelier-0.3.0/.github/workflows/ci.yml +56 -0
- amatelier-0.3.0/.github/workflows/docs.yml +70 -0
- amatelier-0.3.0/.github/workflows/publish.yml +48 -0
- amatelier-0.3.0/.github/workflows/release.yml +35 -0
- amatelier-0.3.0/.github/workflows/wheel-smoke.yml +241 -0
- amatelier-0.3.0/.gitignore +67 -0
- amatelier-0.3.0/AGENTS.md +69 -0
- amatelier-0.3.0/CHANGELOG.md +104 -0
- amatelier-0.3.0/CLAUDE.md +90 -0
- amatelier-0.3.0/CODE_OF_CONDUCT.md +48 -0
- amatelier-0.3.0/CONTRIBUTING.md +103 -0
- amatelier-0.3.0/Dockerfile +37 -0
- amatelier-0.3.0/LICENSE +21 -0
- amatelier-0.3.0/Makefile +35 -0
- amatelier-0.3.0/PKG-INFO +323 -0
- amatelier-0.3.0/README.md +266 -0
- amatelier-0.3.0/SECURITY.md +99 -0
- amatelier-0.3.0/SKILL.md +67 -0
- amatelier-0.3.0/benchmarks/.gitkeep +0 -0
- amatelier-0.3.0/docker-compose.yml +24 -0
- amatelier-0.3.0/docs/explanation/architecture.md +441 -0
- amatelier-0.3.0/docs/explanation/steward-design.md +198 -0
- amatelier-0.3.0/docs/guides/configure-backend.md +200 -0
- amatelier-0.3.0/docs/guides/install.md +111 -0
- amatelier-0.3.0/docs/guides/troubleshooting.md +217 -0
- amatelier-0.3.0/docs/index.md +28 -0
- amatelier-0.3.0/docs/reference/cli.md +243 -0
- amatelier-0.3.0/docs/reference/config.md +266 -0
- amatelier-0.3.0/docs/reference/protocols/competition.md +106 -0
- amatelier-0.3.0/docs/reference/protocols/debrief.md +86 -0
- amatelier-0.3.0/docs/reference/protocols/distillation.md +87 -0
- amatelier-0.3.0/docs/reference/protocols/gemini-bridge.md +84 -0
- amatelier-0.3.0/docs/reference/protocols/learning.md +78 -0
- amatelier-0.3.0/docs/reference/protocols/memory-tiers.md +87 -0
- amatelier-0.3.0/docs/reference/protocols/research.md +96 -0
- amatelier-0.3.0/docs/reference/protocols/roundtable.md +86 -0
- amatelier-0.3.0/docs/reference/protocols/sparc-phases.md +86 -0
- amatelier-0.3.0/docs/reference/protocols/spark-economy.md +230 -0
- amatelier-0.3.0/docs/reference/protocols/verification.md +80 -0
- amatelier-0.3.0/docs/tutorials/first-run.md +193 -0
- amatelier-0.3.0/examples/README.md +68 -0
- amatelier-0.3.0/examples/briefings/full-demo.md +47 -0
- amatelier-0.3.0/examples/briefings/hello-world.md +42 -0
- amatelier-0.3.0/examples/briefings/self-host-vs-api.md +37 -0
- amatelier-0.3.0/examples/briefings/single-worker.md +33 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/README.md +126 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/briefing.md +37 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/digest.json +680 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/latest-result.md +8 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/01-header-and-opening.svg +462 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/02-gate.svg +362 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/03-round-transition.svg +333 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/screenshots/04-session-summary.svg +129 -0
- amatelier-0.3.0/examples/sessions/2026-04-18-self-host-vs-api/transcript.md +618 -0
- amatelier-0.3.0/llm/API.md +1986 -0
- amatelier-0.3.0/llm/EXAMPLES.md +364 -0
- amatelier-0.3.0/llm/SCHEMA.md +8 -0
- amatelier-0.3.0/llm/SPEC.md +471 -0
- amatelier-0.3.0/llm/WORKFLOWS.md +450 -0
- amatelier-0.3.0/llms-full.txt +3304 -0
- amatelier-0.3.0/llms.txt +20 -0
- amatelier-0.3.0/mkdocs.yml +81 -0
- amatelier-0.3.0/pyproject.toml +107 -0
- amatelier-0.3.0/scripts/ci_local.py +76 -0
- amatelier-0.3.0/scripts/regen_full.py +53 -0
- amatelier-0.3.0/scripts/regen_llm.py +121 -0
- amatelier-0.3.0/scripts/regen_sitemap.py +98 -0
- amatelier-0.3.0/scripts/regen_tool_rules.py +80 -0
- amatelier-0.3.0/scripts/render_session.py +452 -0
- amatelier-0.3.0/scripts/run-roundtable.sh +12 -0
- amatelier-0.3.0/scripts/run_integration.sh +85 -0
- amatelier-0.3.0/src/amatelier/__init__.py +70 -0
- amatelier-0.3.0/src/amatelier/agents/clare/CLAUDE.md +146 -0
- amatelier-0.3.0/src/amatelier/agents/clare/IDENTITY.md +7 -0
- amatelier-0.3.0/src/amatelier/agents/clare/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/elena/CLAUDE.md +148 -0
- amatelier-0.3.0/src/amatelier/agents/elena/IDENTITY.md +7 -0
- amatelier-0.3.0/src/amatelier/agents/elena/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/haiku-assistant/CLAUDE.md +86 -0
- amatelier-0.3.0/src/amatelier/agents/haiku-assistant/IDENTITY.md +9 -0
- amatelier-0.3.0/src/amatelier/agents/haiku-assistant/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/judge/CLAUDE.md +136 -0
- amatelier-0.3.0/src/amatelier/agents/judge/IDENTITY.md +9 -0
- amatelier-0.3.0/src/amatelier/agents/judge/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/judge/trace/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/marcus/CLAUDE.md +146 -0
- amatelier-0.3.0/src/amatelier/agents/marcus/IDENTITY.md +7 -0
- amatelier-0.3.0/src/amatelier/agents/marcus/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/naomi/CLAUDE.md +148 -0
- amatelier-0.3.0/src/amatelier/agents/naomi/IDENTITY.md +7 -0
- amatelier-0.3.0/src/amatelier/agents/naomi/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/opus-admin/CLAUDE.md +267 -0
- amatelier-0.3.0/src/amatelier/agents/opus-admin/IDENTITY.md +9 -0
- amatelier-0.3.0/src/amatelier/agents/opus-admin/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/opus-therapist/CLAUDE.md +139 -0
- amatelier-0.3.0/src/amatelier/agents/opus-therapist/IDENTITY.md +8 -0
- amatelier-0.3.0/src/amatelier/agents/opus-therapist/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/simon/CLAUDE.md +140 -0
- amatelier-0.3.0/src/amatelier/agents/simon/IDENTITY.md +7 -0
- amatelier-0.3.0/src/amatelier/agents/simon/sessions/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/agents/therapist/CLAUDE.md +244 -0
- amatelier-0.3.0/src/amatelier/agents/therapist/case_notes/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/cli.py +366 -0
- amatelier-0.3.0/src/amatelier/config.json +123 -0
- amatelier-0.3.0/src/amatelier/engine/agent_memory.py +877 -0
- amatelier-0.3.0/src/amatelier/engine/analytics.py +793 -0
- amatelier-0.3.0/src/amatelier/engine/backfill_distill.py +296 -0
- amatelier-0.3.0/src/amatelier/engine/classify_concepts.py +202 -0
- amatelier-0.3.0/src/amatelier/engine/claude_agent.py +486 -0
- amatelier-0.3.0/src/amatelier/engine/db.py +259 -0
- amatelier-0.3.0/src/amatelier/engine/distiller.py +257 -0
- amatelier-0.3.0/src/amatelier/engine/evolver.py +495 -0
- amatelier-0.3.0/src/amatelier/engine/gemini_agent.py +278 -0
- amatelier-0.3.0/src/amatelier/engine/gemini_client.py +189 -0
- amatelier-0.3.0/src/amatelier/engine/judge_scorer.py +545 -0
- amatelier-0.3.0/src/amatelier/engine/migrations/001_initial.sql +32 -0
- amatelier-0.3.0/src/amatelier/engine/migrations/002_scores_table.sql +22 -0
- amatelier-0.3.0/src/amatelier/engine/migrations/003_spark_ledger.sql +16 -0
- amatelier-0.3.0/src/amatelier/engine/migrations/004_byzantine_flags.sql +3 -0
- amatelier-0.3.0/src/amatelier/engine/roundtable_runner.py +1715 -0
- amatelier-0.3.0/src/amatelier/engine/scorer.py +590 -0
- amatelier-0.3.0/src/amatelier/engine/steward_dispatch.py +561 -0
- amatelier-0.3.0/src/amatelier/engine/steward_tools.py +391 -0
- amatelier-0.3.0/src/amatelier/engine/store.py +841 -0
- amatelier-0.3.0/src/amatelier/engine/therapist.py +1419 -0
- amatelier-0.3.0/src/amatelier/llm_backend.py +656 -0
- amatelier-0.3.0/src/amatelier/paths.py +320 -0
- amatelier-0.3.0/src/amatelier/roundtable-server/BRIEFING_TEMPLATE.md +183 -0
- amatelier-0.3.0/src/amatelier/roundtable-server/db_client.py +351 -0
- amatelier-0.3.0/src/amatelier/roundtable-server/logs/.gitkeep +0 -0
- amatelier-0.3.0/src/amatelier/roundtable-server/server.py +380 -0
- amatelier-0.3.0/src/amatelier/roundtable-server/test_roundtable.py +339 -0
- amatelier-0.3.0/src/amatelier/shared-skills/index.json +1 -0
- amatelier-0.3.0/src/amatelier/store/catalog.json +212 -0
- amatelier-0.3.0/src/amatelier/store/ledger.json +1 -0
- amatelier-0.3.0/src/amatelier/store/skill_templates.py +291 -0
- amatelier-0.3.0/src/amatelier/tools/watch_roundtable.py +543 -0
- amatelier-0.3.0/tests/fixtures/sample_roundtable.sql +64 -0
- amatelier-0.3.0/tests/test_db_integration.py +365 -0
- amatelier-0.3.0/tests/test_integration.py +546 -0
- amatelier-0.3.0/tests/test_refresh_seeds.py +127 -0
- amatelier-0.3.0/tests/test_smoke.py +158 -0
- amatelier-0.3.0/tests/test_watcher.py +108 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: amatelier repository rules (Amatayo Standard)
|
|
3
|
+
globs: ["**/*"]
|
|
4
|
+
alwaysApply: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Amatelier — Claude Code Instructions
|
|
8
|
+
|
|
9
|
+
Instructions for working on this repository inside Claude Code.
|
|
10
|
+
|
|
11
|
+
## What this project is
|
|
12
|
+
|
|
13
|
+
A self-evolving multi-model AI team skill for Claude Code
|
|
14
|
+
|
|
15
|
+
## Repo layout
|
|
16
|
+
|
|
17
|
+
- `src/amatelier/` — shipped package (the canonical code)
|
|
18
|
+
- `tests/` — mirrors `src/` structure
|
|
19
|
+
- `examples/first_run/` — zero-config runnable demo
|
|
20
|
+
- `docs/` — human documentation (MkDocs, Diataxis tiers)
|
|
21
|
+
- `llm/` — LLM-facing documentation (flat, exhaustive, machine-readable)
|
|
22
|
+
- `scripts/` — shell and one-off utility scripts
|
|
23
|
+
- `.github/workflows/` — CI, publish, release, docs workflows
|
|
24
|
+
|
|
25
|
+
## Rules
|
|
26
|
+
|
|
27
|
+
1. **Amatayo Standard.** This repo follows the Amatayo Standard. Structure is enforced by CI.
|
|
28
|
+
2. **Dual-docs invariant.** Every change that adds a public symbol, CLI flag, or config key must update `llm/SPEC.md` and the relevant `docs/reference/*` file. The `llm/API.md` and `llm/SCHEMA.md` files are generated — don't hand-edit them.
|
|
29
|
+
3. **`llm/` is flat.** Never create subdirectories in `llm/`. Flat is the invariant.
|
|
30
|
+
4. **Tests required.** New code requires new tests. `make test` must pass before PR.
|
|
31
|
+
5. **Conventional commits.** `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`. Releases are driven by commit history.
|
|
32
|
+
6. **No secrets.** `.env.example` documents variables; real values never committed.
|
|
33
|
+
|
|
34
|
+
## Two-layer paths (critical)
|
|
35
|
+
|
|
36
|
+
Amatelier is pip-installable; the bundled package must stay read-only at runtime. Two layers exist:
|
|
37
|
+
|
|
38
|
+
- **Bundled layer** — `src/amatelier/` and any files shipped inside the wheel. This is the canonical source. Runtime code MUST NOT write here.
|
|
39
|
+
- **User data layer** — everything returned by `amatelier.paths.user_data_dir()` and its siblings (`user_agent_dir`, `user_db_path`, `user_digest_dir`, `user_briefing_dir`, `user_store_ledger`, `user_novel_concepts`, `user_shared_skills_index`, `user_config_override`). All mutable state goes here.
|
|
40
|
+
|
|
41
|
+
Rules for AI agents editing this repo:
|
|
42
|
+
|
|
43
|
+
- Any code that persists state, writes logs, updates ledgers, or mutates agent memory must route through a `paths.user_*()` helper. Do not hard-code paths under `src/amatelier/` for writes.
|
|
44
|
+
- Persona seed files (per-agent `CLAUDE.md`, `IDENTITY.md` under `src/amatelier/agents/<name>/`) are bundled. Edits to seeds only affect a user's environment after `amatelier refresh-seeds` or a fresh install.
|
|
45
|
+
- Generated files must not be hand-edited: `llm/API.md`, `llm/SCHEMA.md`, `llms.txt`, `llms-full.txt`, `.cursor/rules/*.mdc`, `.github/copilot-instructions.md`. CI regenerates them.
|
|
46
|
+
|
|
47
|
+
## Three LLM backend modes
|
|
48
|
+
|
|
49
|
+
All LLM calls must go through `amatelier.llm_backend.get_backend()`. The backend abstraction resolves to one of three modes at runtime:
|
|
50
|
+
|
|
51
|
+
| Mode | Selected when | Backend class |
|
|
52
|
+
|---|---|---|
|
|
53
|
+
| `claude-code` | Running inside Claude Code, `claude` binary on PATH | `ClaudeCLIBackend` |
|
|
54
|
+
| `anthropic-sdk` | `ANTHROPIC_API_KEY` present, no Claude Code session | `AnthropicSDKBackend` |
|
|
55
|
+
| `openai-compat` | `OPENAI_API_KEY`, `OPENROUTER_API_KEY`, or local Ollama | `OpenAICompatBackend` |
|
|
56
|
+
|
|
57
|
+
Override with `AMATELIER_MODE=claude-code|anthropic-sdk|openai-compat`.
|
|
58
|
+
|
|
59
|
+
When introducing new LLM calls:
|
|
60
|
+
|
|
61
|
+
- Call `get_backend()` and use the returned object's interface. Do not shell out to the `claude` CLI directly and do not `import anthropic` at the call site.
|
|
62
|
+
- Any new backend capability must be added to the `LLMBackend` Protocol in `src/amatelier/llm_backend.py` and implemented by all three concrete backends.
|
|
63
|
+
- Surface new provider env vars in `describe_environment()` so `amatelier config` reports them.
|
|
64
|
+
|
|
65
|
+
## Tests
|
|
66
|
+
|
|
67
|
+
- `tests/test_smoke.py` — pytest suite, import/CLI smoke checks, runs in CI
|
|
68
|
+
- `tests/test_refresh_seeds.py` — pytest suite, verifies seed materialization, runs in CI
|
|
69
|
+
- `tests/test_integration.py` — **standalone script**, exercises live LLM backends, NOT pytest and NOT run in CI. Execute manually when verifying backend changes.
|
|
70
|
+
|
|
71
|
+
Run the CI-equivalent suites locally:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pytest tests/test_smoke.py -v
|
|
75
|
+
pytest tests/test_refresh_seeds.py -v
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Common commands
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
make setup # install package + dev deps
|
|
82
|
+
make test # run test suite
|
|
83
|
+
make lint # ruff + mypy
|
|
84
|
+
make demo # run examples/first_run/
|
|
85
|
+
make docs # build docs site locally
|
|
86
|
+
amatelier config # show active mode, credentials, paths
|
|
87
|
+
amatelier refresh-seeds # rematerialize per-agent seeds in user data dir
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## When editing docs
|
|
91
|
+
|
|
92
|
+
Use the `dual-docs-architect` skill. It classifies every write (tutorial / guide / reference / explanation x human / LLM / both) and routes to the correct file.
|
|
93
|
+
|
|
94
|
+
## When scaffolding new repos
|
|
95
|
+
|
|
96
|
+
Use the `repo-architect` skill. Don't copy this file by hand — let the skill render it.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
TASK: delegate remaining subprocess claude calls to llm_backend
|
|
2
|
+
SCOPE: non-trivial
|
|
3
|
+
FILES: src/amatelier/engine/classify_concepts.py, src/amatelier/engine/backfill_distill.py, src/amatelier/engine/roundtable_runner.py, src/amatelier/engine/therapist.py, src/amatelier/engine/steward_dispatch.py
|
|
4
|
+
REPLACES: 6 direct subprocess.run(["claude", ...]) call sites that hardcode the Claude CLI — broken for users without `claude` on PATH even when ANTHROPIC_API_KEY is set
|
|
5
|
+
MIGRATION: none — each site keeps its existing subprocess call as the claude-code-mode fallback; only adds a backend-first check for open-mode users
|
|
6
|
+
CALLERS:
|
|
7
|
+
- classify_concepts._call_sonnet_classifier() — batch concept classification
|
|
8
|
+
- backfill_distill._distill_skills_sonnet() — retroactive skill extraction from old digests
|
|
9
|
+
- roundtable_runner._summarize_round_haiku() — per-round summaries via haiku
|
|
10
|
+
- roundtable_runner._distill_skills() — post-RT skill extraction via sonnet (called from run_roundtable)
|
|
11
|
+
- therapist._call_llm(prompt, model) — shared helper for _call_therapist, _call_gemini (gemini via its own path)
|
|
12
|
+
- steward_dispatch.run_steward_subagent() — tool-using research agent (cannot delegate — requires agent spawning)
|
|
13
|
+
USER_PATH: amatelier roundtable → roundtable_runner.run_roundtable() → [Round N] → _summarize_round_haiku() | → [post-RT] _distill_skills() | → classify_concepts() | → therapist() | → steward tagged requests → each site calls backend.complete() when backend.name != "claude-code", else falls through to existing subprocess.run(["claude", ...])
|
|
14
|
+
RED_STATE: 6 sites in engine/ directly call subprocess.run(["claude", "-p", "--model", ...]). User in anthropic-sdk or openai-compat mode hits FileNotFoundError('claude') at every site after Judge scoring succeeds (since Judge is the only site already delegating).
|
|
15
|
+
RED_TYPE: USER-OBSERVABLE
|
|
16
|
+
GREEN_CONDITION:
|
|
17
|
+
- When AMATELIER_MODE=anthropic-sdk (or auto-detected via ANTHROPIC_API_KEY with no claude binary): all 5 simple sites (classify_concepts, backfill_distill, 2 roundtable_runner sites, therapist) succeed via Anthropic SDK. Steward returns a degradation message explaining claude-code requirement.
|
|
18
|
+
- When claude-code mode: all sites continue using their existing subprocess.run() path with zero observable difference (same flags, same timeouts, same error handling).
|
|
19
|
+
- pytest tests/test_smoke.py passes (13/13)
|
|
20
|
+
- pytest tests/test_db_integration.py passes (11/11)
|
|
21
|
+
- ruff check src/ passes on the edited files
|
|
22
|
+
OMISSIONS:
|
|
23
|
+
- steward_dispatch in non-claude-code mode returns {"status": "unavailable", "result": "Steward requires claude-code mode..."} instead of spawning a tool-using agent. Proper tool-use delegation (Anthropic SDK messages API with tools param) is out of scope — multi-hour refactor.
|
|
24
|
+
- No new tests added; existing smoke + integration tests cover the non-delegation path. Live verification of open-mode requires ANTHROPIC_API_KEY in CI, which is a separate secrets/keys task.
|
|
25
|
+
- gemini_client and naomi (Naomi worker) unchanged — already use their own google-genai path
|
|
26
|
+
- engine/claude_agent.py line 264 — NOT in scope (this is the legacy shim; newer sites should use llm_backend.call_claude instead, but claude_agent.py still works for back-compat and is out of scope)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
openmode-fixes-v1
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
TASK: manual CI — replace auto-trigger with local script
|
|
2
|
+
SCOPE: routine
|
|
3
|
+
FILES: scripts/ci_local.py (new), Makefile (ci target), .github/workflows/ci.yml, .github/workflows/wheel-smoke.yml, .github/workflows/docs.yml
|
|
4
|
+
REPLACES: auto-triggered workflows that burn GitHub Actions minutes on every push/PR — replaced by local script + workflow_dispatch manual-only triggers
|
|
5
|
+
MIGRATION: none — existing v* tag triggers preserved on all workflows (release/publish paths unchanged)
|
|
6
|
+
CALLERS: developer runs `python scripts/ci_local.py` or `make ci` before pushing. CI workflows callable from Actions UI via workflow_dispatch when needed.
|
|
7
|
+
USER_PATH: developer makes code change → runs `python scripts/ci_local.py` locally → script runs ruff + pytest smoke + mkdocs + wheel build + DB integration test sequentially → exits 0 on success, 1 with failure list on failure → developer pushes only if green
|
|
8
|
+
RED_STATE: ci.yml/wheel-smoke.yml/docs.yml all had `on: push: branches: [main]` and `pull_request: branches: [main]` — every push/PR triggered ~7min of CI runs per commit. No cross-platform local CI script existed; Makefile targets unusable on Windows without `make`.
|
|
9
|
+
RED_TYPE: INFRASTRUCTURE
|
|
10
|
+
GREEN_CONDITION: `python scripts/ci_local.py` runs all 5 checks (ruff, pytest smoke, mkdocs, wheel build, pytest integration) on any OS with Python, reports pass/fail per check, exits non-zero on any failure. Pushing to main does NOT fire ci.yml/wheel-smoke.yml/docs.yml. Pushing a `v*` tag DOES fire all of them plus publish.yml + release.yml.
|
|
11
|
+
OMISSIONS:
|
|
12
|
+
- No Docker / `act` integration — users wanting true CI parity can still run `docker compose run --rm integration` as before
|
|
13
|
+
- scripts/ci_local.py does not support parallel execution — runs checks sequentially (acceptable for <10s per check)
|
|
14
|
+
- Makefile `ci` target delegates to python script; mac/linux users could bypass this but it's simpler to maintain one implementation
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
TASK: apply Open-mode RT fixes (text accumulation, exception guard, response_format)
|
|
2
|
+
SCOPE: non-trivial
|
|
3
|
+
FILES: src/amatelier/llm_backend.py, src/amatelier/engine/judge_scorer.py, src/amatelier/engine/classify_concepts.py, src/amatelier/engine/backfill_distill.py, src/amatelier/engine/roundtable_runner.py
|
|
4
|
+
REPLACES:
|
|
5
|
+
1. AnthropicSDKBackend.complete_with_tools at llm_backend.py:319-321 `final_text = "".join(text_chunks)` — only captures the LAST iteration's text. All intermediate-turn narration is discarded when the model stops calling tools.
|
|
6
|
+
2. AnthropicSDKBackend.complete_with_tools at llm_backend.py:298 `msg = client.messages.create(...)` — no try/except. SDK RateLimitError / APIError / network failures crash the loop with the partial `messages` state inaccessible to callers for retry.
|
|
7
|
+
3. OpenAICompatBackend.complete at llm_backend.py:~305 `client.chat.completions.create(...)` — no `response_format`. GPT-4o and most OpenAI-compat models often emit markdown-fenced JSON or conversational filler when engine prompts require strict JSON (judge scoring, skill classification, skill distillation). Causes JSON parse crash for first openai-compat user.
|
|
8
|
+
MIGRATION: None — all three fixes are additive or strictly-more-robust behavior. Existing claude-code and anthropic-sdk callers see no behavior change. openai-compat callers now get valid JSON where engine prompts request it; callers that pass text prompts continue to work (json_mode defaults to False).
|
|
9
|
+
CALLERS:
|
|
10
|
+
- complete_with_tools: only called from steward_dispatch.spawn_steward_subagent() — this is the Steward tool-use path in anthropic-sdk mode.
|
|
11
|
+
- complete (with new json_mode kwarg): called from 5 engine sites currently, 4 of which request JSON-shaped output (judge_scorer, classify_concepts, backfill_distill, roundtable_runner._distill_skills). Haiku summarizer + therapist call it for text, no json_mode change needed.
|
|
12
|
+
USER_PATH:
|
|
13
|
+
Fix 1: user in anthropic-sdk mode runs amatelier roundtable with [[request: ...]] tags → Steward invokes complete_with_tools → model narrates "Let me check X" in iteration 1, calls read_file, synthesizes "Based on X, the answer is Y" in iteration 2 → BEFORE: only "Based on X..." returned. AFTER: both iterations' text concatenated.
|
|
14
|
+
Fix 2: user in anthropic-sdk mode hits a transient 429 rate-limit during iteration 3 of a Steward loop → BEFORE: RateLimitError propagates unhandled, partial message state lost, Steward returns status=error. AFTER: exception caught, accumulated messages visible in log, tool_use_id round-trip preserved, Steward returns status=error with full diagnostic context.
|
|
15
|
+
Fix 3: user with OPENAI_API_KEY or OPENROUTER_API_KEY set, runs first roundtable → judge_scorer._call_sonnet() calls backend.complete(..., json_mode=True) → OpenAICompatBackend adds response_format={"type":"json_object"} → GPT-4o returns clean JSON → engine parses successfully. BEFORE: GPT-4o returns `` ```json\n{...}\n``` ``, parser crashes with JSONDecodeError.
|
|
16
|
+
RED_STATE:
|
|
17
|
+
- llm_backend.py:320 `final_text = "".join(text_chunks)` — text_chunks is the current iteration's blocks only; prior iterations' text already discarded at the top of each loop iteration
|
|
18
|
+
- llm_backend.py:298-305 client.messages.create is not inside a try/except; only the tool_executor call later in the loop is protected
|
|
19
|
+
- llm_backend.py:~305 OpenAICompatBackend.complete client.chat.completions.create has no response_format param and no json_mode detection
|
|
20
|
+
- judge_scorer.py:~155 backend.complete(system=..., prompt=..., model="sonnet", max_tokens=8000, timeout=360, effort=effort) — passes no json_mode; JSON-requiring prompt hits openai-compat without response_format
|
|
21
|
+
- classify_concepts.py, backfill_distill.py, roundtable_runner.py:_distill_skills: same pattern — backend.complete without json_mode, all 3 request JSON output
|
|
22
|
+
RED_TYPE: USER-OBSERVABLE
|
|
23
|
+
GREEN_CONDITION:
|
|
24
|
+
1. complete_with_tools accumulates text from all iterations (final_text = existing_text + "".join(current_chunks)) — unit test: mock 3 iterations, assert iteration-2 text in returned Completion.text
|
|
25
|
+
2. complete_with_tools wraps the `msg = client.messages.create(...)` call in try/except; on exception, returns a Completion with text=accumulated_text_so_far, model, backend, latency_ms, and logs a warning with the accumulated messages length
|
|
26
|
+
3. LLMBackend.complete accepts optional `json_mode: bool = False`. OpenAICompatBackend translates json_mode=True → response_format={"type":"json_object"}. ClaudeCLIBackend and AnthropicSDKBackend accept and ignore (Claude handles JSON without hint).
|
|
27
|
+
4. Four engine call sites pass json_mode=True where they expect JSON: judge_scorer._call_sonnet, classify_concepts._call_sonnet_classifier, backfill_distill.distill_one, roundtable_runner._distill_skills
|
|
28
|
+
5. Local CI passes: ruff, pytest smoke 13/13, pytest integration 11/11, mkdocs build
|
|
29
|
+
OMISSIONS:
|
|
30
|
+
- Marcus's 5 mock tests are NOT added in this commit. They're good engineering but not ship-blockers; add in a follow-up commit (tests/test_llm_backend.py).
|
|
31
|
+
- Extended-thinking cost/quality assertion remains uncovered (requires live key; documented as known gap in tests/README.md follow-up).
|
|
32
|
+
- roundtable_runner._summarize_round_haiku and therapist._call_llm pass text prompts — no json_mode change, intentionally left at default False.
|
|
33
|
+
- gemini_agent uses its own path, unchanged.
|
|
34
|
+
- The RT infrastructure collapse (14 worker timeouts in rounds 2-3) is not addressed by these code fixes — it's a concurrency/rate-limit product behavior to document separately, not a code bug.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
TASK: apply Security RT v2 mandatory mitigations (#2, #3, #4)
|
|
2
|
+
SCOPE: critical
|
|
3
|
+
FILES: src/amatelier/engine/steward_tools.py, src/amatelier/engine/steward_dispatch.py, src/amatelier/cli.py, src/amatelier/paths.py
|
|
4
|
+
REPLACES: three security holes confirmed by Security RT digest-afd96c74180e and Elena's Grand Insight ("path containment and sensitive-file access are orthogonal concerns, and only the first is defended"):
|
|
5
|
+
1. steward_tools.read_file() has no credential denylist — agents can request `.env`, `.git/config`, `~/.aws/credentials` and they pass _safe_resolve() because they're inside WORKSPACE_ROOT
|
|
6
|
+
2. steward_dispatch.format_result() returns full result text and runner persists it to digest + steward-log JSON — credentials read once exfiltrate to durable artifacts
|
|
7
|
+
3. spawn_steward_subagent() executes on first dispatch with no user consent moment — GDPR Article 13 requires disclosure before processing event, not at install time
|
|
8
|
+
MIGRATION: Existing CI/automation that runs amatelier roundtables must set AMATELIER_STEWARD_CONSENT=1 to skip the runtime prompt; documented in CHANGELOG and .env.example.
|
|
9
|
+
CALLERS:
|
|
10
|
+
- steward_tools.read_file() — called from dispatch_tool() during anthropic-sdk Steward tool-use loop
|
|
11
|
+
- steward_dispatch.format_result() — called from runner research-window phase
|
|
12
|
+
- steward_dispatch.spawn_steward_subagent() — called from runner research-window + per-round dispatch
|
|
13
|
+
- cli.py existing roundtable subcommand — gains pre-flight consent check
|
|
14
|
+
USER_PATH: developer runs `amatelier roundtable` → CLI checks AMATELIER_STEWARD_CONSENT env or prior accept → if neither, prints disclosure + prompts for y/n → on consent, sets process env var for child processes → runner enters research window → agents emit `[[request: read .env]]` → steward dispatch resolves to read_file('.env') → _is_secret_path(p) returns True → returns "Error: blocked secret-path .env (Steward denylist)" → result truncated to 4KB at format_result + persisted truncated to digest → no credential ever transits to Anthropic API or persists to disk artifact
|
|
15
|
+
RED_STATE:
|
|
16
|
+
- steward_tools.py:140-152 read_file() opens any path that passes _safe_resolve(). No filename or extension check.
|
|
17
|
+
- steward_dispatch.py:419-422 format_result() returns the full result['result'] string for runner injection.
|
|
18
|
+
- roundtable_runner.py around line 590 db_cmd("speak", "runner", inject_msg) writes full text to messages table → digest persistence.
|
|
19
|
+
- StewardLog.record() at steward_dispatch.py around line 440 writes full result to steward-log JSON.
|
|
20
|
+
- cli.py roundtable command spawns runner immediately on invocation. No consent moment.
|
|
21
|
+
RED_TYPE: USER-OBSERVABLE (privacy + security harm to end users)
|
|
22
|
+
GREEN_CONDITION:
|
|
23
|
+
- steward_tools._is_secret_path(p) blocks: `.env`, `.env.*`, `*.pem`, `*.key`, `*.p12`, `*.pfx`, `id_rsa`, `id_ed25519`, `credentials`, `.git/config`, `.aws/credentials`, `.netrc`, `.npmrc`, `.pypirc`, anything ending in `_token` or `_secret` or `_key` (case-insensitive). read_file() and grep() return "Error: blocked secret-path..." string instead of content.
|
|
24
|
+
- format_result() truncates the result text at 4096 chars before injection, prepending a `[truncated to 4KB]` marker if needed.
|
|
25
|
+
- StewardLog.record() truncates the persisted result text at 4096 chars.
|
|
26
|
+
- First spawn_steward_subagent() call per amatelier process checks env AMATELIER_STEWARD_CONSENT in {"1","yes","true"}; if missing, raises SteWardConsentRequired with a documented message; cli.py catches the exception, prompts the user with a clear disclosure (sends file content excerpts to claude/anthropic API), and sets AMATELIER_STEWARD_CONSENT=1 for the current process if user accepts.
|
|
27
|
+
- All four checks have unit-test-style verification by directly invoking _is_secret_path() with a fixture list, calling format_result() with oversize string, and calling spawn_steward_subagent() with the env var unset.
|
|
28
|
+
OMISSIONS:
|
|
29
|
+
- The denylist is filename-pattern based, not content-scanned. A user-renamed credential file (e.g. mysecret.txt) is not blocked — documented as known limitation.
|
|
30
|
+
- The truncation length is hardcoded 4096; not yet exposed in config.json. A future RT can tune.
|
|
31
|
+
- Consent is per-process not persistent — restarting amatelier re-prompts. Persistent consent (a checkbox in user_data_dir) deferred.
|
|
32
|
+
- Steward in claude-code mode (subprocess CLI) gets the truncation but not the read_file denylist — denylist runs in the SDK tool-use path only. CLI mode uses the actual claude binary's Read tool which has its own surface. This is documented as deferred — claude-code Steward limitation.
|
|
33
|
+
- openai-compat backend already returns "unavailable" for Steward; mitigations don't apply there.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
TASK: implement Steward tool use in anthropic-sdk mode
|
|
2
|
+
SCOPE: non-trivial
|
|
3
|
+
FILES: src/amatelier/engine/steward_tools.py (new), src/amatelier/llm_backend.py, src/amatelier/engine/steward_dispatch.py
|
|
4
|
+
REPLACES: steward_dispatch.py returns {"status": "unavailable"} in open mode — degrades Steward empirical grounding. Implement actual Anthropic SDK tool-use loop (messages API with tools= param) backed by local read_file/grep/glob functions sandboxed to WORKSPACE_ROOT.
|
|
5
|
+
MIGRATION: none — claude-code mode path unchanged; anthropic-sdk now succeeds with real lookups instead of returning degradation message; openai-compat still degrades (tool schemas differ across OAI-compat providers, out of scope)
|
|
6
|
+
CALLERS:
|
|
7
|
+
- steward_dispatch.spawn_steward_subagent() at line ~253 — called from roundtable_runner when agents emit [[request: ...]] tags
|
|
8
|
+
- AnthropicSDKBackend.complete_with_tools() (new method) — called only from steward_dispatch, not wired into LLMBackend Protocol (avoids forcing all backends to implement)
|
|
9
|
+
- steward_tools.dispatch_tool(name, input) — internal router called by the tool-use loop
|
|
10
|
+
USER_PATH: amatelier roundtable (ANTHROPIC_API_KEY set, no claude CLI) → worker emits "[[request: show schema of messages table]]" → runner detects request → steward_dispatch.spawn_steward_subagent() → backend.name == "anthropic-sdk" → call complete_with_tools(system=STEWARD_SYSTEM_PROMPT, user=request, tools=STEWARD_TOOL_SPECS) → Anthropic returns tool_use block for grep/read_file → steward_tools.dispatch_tool() executes locally with path validation → result appended as tool_result → loop until model returns text → return {"status": "success", "result": text} to runner
|
|
11
|
+
RED_STATE: steward_dispatch.py:291-310 returns {"status": "unavailable"} when backend.name != "claude-code". In anthropic-sdk mode, [[request]] tags produce degradation messages instead of real data.
|
|
12
|
+
RED_TYPE: USER-OBSERVABLE
|
|
13
|
+
GREEN_CONDITION:
|
|
14
|
+
- steward_tools.py exports STEWARD_TOOL_SPECS (3 tools: read_file, grep, glob) and dispatch_tool(name, input) -> str
|
|
15
|
+
- steward_tools._safe_resolve() rejects path-traversal attempts (absolute paths outside WORKSPACE_ROOT, ../../etc)
|
|
16
|
+
- AnthropicSDKBackend has a new method complete_with_tools(system, user, tools, max_iterations=10) that loops tool_use → tool_result until final text
|
|
17
|
+
- steward_dispatch in anthropic-sdk mode returns real data, same structure as claude-code: {"status": "success", "result": str, "elapsed_s": float}
|
|
18
|
+
- Existing claude-code path unchanged (byte-identical flags + subprocess call)
|
|
19
|
+
- openai-compat continues to return {"status": "unavailable"} — documented in OMISSIONS
|
|
20
|
+
- pytest tests/test_smoke.py passes (13/13), pytest tests/test_db_integration.py passes (11/11)
|
|
21
|
+
- ruff clean on new files
|
|
22
|
+
OMISSIONS:
|
|
23
|
+
- OpenAI-compat tool use NOT implemented. OpenAI and OpenRouter support tools but schemas differ (OpenAI functions vs Anthropic tools). A cross-provider abstraction would double the code for marginal benefit since OAI-compat Steward usage is niche. Users who need Steward in OAI-compat must set AMATELIER_MODE=anthropic-sdk with ANTHROPIC_API_KEY.
|
|
24
|
+
- Tool sandbox is path-based only — no syscall sandboxing. Steward can read any file under WORKSPACE_ROOT including .env, secrets, etc. Same security posture as claude-code mode (Read tool with --dangerously-skip-permissions).
|
|
25
|
+
- No timeout per tool call (only overall complete_with_tools timeout). A single grep on a huge directory could block.
|
|
26
|
+
- No token accounting across tool-use iterations — follows existing judge_scorer pattern.
|
|
27
|
+
- No new tests; existing smoke + integration tests cover non-steward paths. Live verification requires ANTHROPIC_API_KEY which is a separate concern.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
TASK: wire judge max-effort via extended thinking
|
|
2
|
+
SCOPE: non-trivial
|
|
3
|
+
FILES: src/amatelier/llm_backend.py, src/amatelier/engine/judge_scorer.py
|
|
4
|
+
REPLACES: AnthropicSDKBackend.complete() at llm_backend.py:207 currently ignores thinking; _call_sonnet() at judge_scorer.py:144 passes max_tokens=8000 with no thinking budget; config.json has "effort": "max" under judge but code ignores it
|
|
5
|
+
MIGRATION: none — new optional `effort` param on LLMBackend.complete() defaults to None, preserves all existing call sites
|
|
6
|
+
CALLERS: judge_scorer._call_sonnet() will pass effort=<config.judge.effort> to backend.complete(). All other callers (call_claude shim at llm_backend.py:427, other engine sites) continue passing no effort and get identical behavior to today.
|
|
7
|
+
USER_PATH: amatelier roundtable → engine/roundtable_runner.py triggers scoring phase → engine/judge_scorer.py:score_contributions() → _call_sonnet(prompt) → reads config.judge.effort via _get_judge_effort() → backend.complete(effort="max") → AnthropicSDKBackend.complete() adds thinking={"type":"enabled","budget_tokens":16000} to client.messages.create() → Anthropic API returns with extended reasoning → higher-quality scoring
|
|
8
|
+
RED_STATE: llm_backend.py:219 `client.messages.create(model=, max_tokens=, system=, messages=, timeout=)` — no thinking kwarg. judge_scorer.py:156-159 `backend.complete(system="", prompt=prompt, model="sonnet", max_tokens=8000, timeout=360)` — no effort param exists. config.json line 37 `"effort": "max"` reads but never flows to API call.
|
|
9
|
+
RED_TYPE: INFRASTRUCTURE
|
|
10
|
+
GREEN_CONDITION: When config.judge.effort == "max" AND backend.name == "anthropic-sdk", the Anthropic messages.create() call receives thinking={"type": "enabled", "budget_tokens": 16000} and max_tokens is bumped to ≥20000. Other backends (claude-code, openai-compat) accept the effort kwarg without erroring and ignore it (log debug). judge_scorer continues to work when config has no effort field (effort=None, no thinking block).
|
|
11
|
+
OMISSIONS:
|
|
12
|
+
- OpenAICompatBackend.complete() at llm_backend.py:284 has no extended-thinking equivalent; OpenRouter/OpenAI users ignore effort (platform-level limitation)
|
|
13
|
+
- Only judge reads effort from config; agent LLM calls at engine/roundtable_runner.py do not propagate effort (out of scope)
|
|
14
|
+
- budget_tokens is hardcoded to 16000 for effort="max"; not exposed as config.judge.budget_tokens (follow-up)
|
|
15
|
+
- No new tests added; tests/test_smoke.py already covers backend.complete() without effort and will continue to pass
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "amatelier dev",
|
|
3
|
+
"image": "mcr.microsoft.com/devcontainers/python:1-3.10",
|
|
4
|
+
"features": {
|
|
5
|
+
"ghcr.io/devcontainers/features/github-cli:1": {},
|
|
6
|
+
"ghcr.io/devcontainers/features/common-utils:2": {}
|
|
7
|
+
},
|
|
8
|
+
"postCreateCommand": "pip install -e \".[dev]\"",
|
|
9
|
+
"customizations": {
|
|
10
|
+
"vscode": {
|
|
11
|
+
"extensions": [
|
|
12
|
+
"ms-python.python",
|
|
13
|
+
"charliermarsh.ruff",
|
|
14
|
+
"ms-python.mypy-type-checker"
|
|
15
|
+
],
|
|
16
|
+
"settings": {
|
|
17
|
+
"python.testing.pytestEnabled": true,
|
|
18
|
+
"python.testing.pytestArgs": ["tests"]
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Required for the Gemini Flash agent (Naomi)
|
|
2
|
+
# Get your key at: https://aistudio.google.com/apikey
|
|
3
|
+
GEMINI_API_KEY=your-gemini-api-key-here
|
|
4
|
+
|
|
5
|
+
# Optional — override workspace root when the skill is not installed at
|
|
6
|
+
# the default .claude/skills/claude-suite layout. Rarely needed.
|
|
7
|
+
# AMATELIER_WORKSPACE=/absolute/path/to/project
|
|
8
|
+
|
|
9
|
+
# Optional — enables proposal queueing into an external evolution harness
|
|
10
|
+
# (therapist.py). Leave unset to run standalone.
|
|
11
|
+
# CLAUDE_EVOLUTION_HARNESS=/absolute/path/to/harness/repo
|
|
12
|
+
|
|
13
|
+
# Required for non-interactive / CI use of `amatelier roundtable`.
|
|
14
|
+
# Confirms you understand the Steward subagent reads files from
|
|
15
|
+
# AMATELIER_WORKSPACE and sends excerpts to the configured LLM provider.
|
|
16
|
+
# Interactive users will be prompted on first dispatch instead.
|
|
17
|
+
# AMATELIER_STEWARD_CONSENT=1
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Something isn't working as expected
|
|
4
|
+
title: '[BUG] '
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Describe the bug
|
|
10
|
+
|
|
11
|
+
A clear, concise description.
|
|
12
|
+
|
|
13
|
+
## To reproduce
|
|
14
|
+
|
|
15
|
+
1. Step one
|
|
16
|
+
2. Step two
|
|
17
|
+
3. See error
|
|
18
|
+
|
|
19
|
+
## Expected behavior
|
|
20
|
+
|
|
21
|
+
What should have happened.
|
|
22
|
+
|
|
23
|
+
## Environment
|
|
24
|
+
|
|
25
|
+
- OS:
|
|
26
|
+
- Version of this package:
|
|
27
|
+
- Python/Node version:
|
|
28
|
+
- How installed (pip / npm / source):
|
|
29
|
+
|
|
30
|
+
## Logs / traceback
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
paste here
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Additional context
|
|
37
|
+
|
|
38
|
+
Anything else relevant.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest a new capability or improvement
|
|
4
|
+
title: '[FEATURE] '
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Problem
|
|
10
|
+
|
|
11
|
+
What problem does this solve? Who has this problem?
|
|
12
|
+
|
|
13
|
+
## Proposed solution
|
|
14
|
+
|
|
15
|
+
Describe what you'd like to happen.
|
|
16
|
+
|
|
17
|
+
## Alternatives considered
|
|
18
|
+
|
|
19
|
+
Other approaches you've thought about.
|
|
20
|
+
|
|
21
|
+
## Additional context
|
|
22
|
+
|
|
23
|
+
Screenshots, links to similar features elsewhere, etc.
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# GitHub Copilot Instructions — amatelier
|
|
2
|
+
|
|
3
|
+
_This file is generated from CLAUDE.md. Do not hand-edit._
|
|
4
|
+
|
|
5
|
+
Instructions for working on this repository inside Claude Code.
|
|
6
|
+
|
|
7
|
+
## What this project is
|
|
8
|
+
|
|
9
|
+
A self-evolving multi-model AI team skill for Claude Code
|
|
10
|
+
|
|
11
|
+
## Repo layout
|
|
12
|
+
|
|
13
|
+
- `src/amatelier/` — shipped package (the canonical code)
|
|
14
|
+
- `tests/` — mirrors `src/` structure
|
|
15
|
+
- `examples/first_run/` — zero-config runnable demo
|
|
16
|
+
- `docs/` — human documentation (MkDocs, Diataxis tiers)
|
|
17
|
+
- `llm/` — LLM-facing documentation (flat, exhaustive, machine-readable)
|
|
18
|
+
- `scripts/` — shell and one-off utility scripts
|
|
19
|
+
- `.github/workflows/` — CI, publish, release, docs workflows
|
|
20
|
+
|
|
21
|
+
## Rules
|
|
22
|
+
|
|
23
|
+
1. **Amatayo Standard.** This repo follows the Amatayo Standard. Structure is enforced by CI.
|
|
24
|
+
2. **Dual-docs invariant.** Every change that adds a public symbol, CLI flag, or config key must update `llm/SPEC.md` and the relevant `docs/reference/*` file. The `llm/API.md` and `llm/SCHEMA.md` files are generated — don't hand-edit them.
|
|
25
|
+
3. **`llm/` is flat.** Never create subdirectories in `llm/`. Flat is the invariant.
|
|
26
|
+
4. **Tests required.** New code requires new tests. `make test` must pass before PR.
|
|
27
|
+
5. **Conventional commits.** `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`. Releases are driven by commit history.
|
|
28
|
+
6. **No secrets.** `.env.example` documents variables; real values never committed.
|
|
29
|
+
|
|
30
|
+
## Two-layer paths (critical)
|
|
31
|
+
|
|
32
|
+
Amatelier is pip-installable; the bundled package must stay read-only at runtime. Two layers exist:
|
|
33
|
+
|
|
34
|
+
- **Bundled layer** — `src/amatelier/` and any files shipped inside the wheel. This is the canonical source. Runtime code MUST NOT write here.
|
|
35
|
+
- **User data layer** — everything returned by `amatelier.paths.user_data_dir()` and its siblings (`user_agent_dir`, `user_db_path`, `user_digest_dir`, `user_briefing_dir`, `user_store_ledger`, `user_novel_concepts`, `user_shared_skills_index`, `user_config_override`). All mutable state goes here.
|
|
36
|
+
|
|
37
|
+
Rules for AI agents editing this repo:
|
|
38
|
+
|
|
39
|
+
- Any code that persists state, writes logs, updates ledgers, or mutates agent memory must route through a `paths.user_*()` helper. Do not hard-code paths under `src/amatelier/` for writes.
|
|
40
|
+
- Persona seed files (per-agent `CLAUDE.md`, `IDENTITY.md` under `src/amatelier/agents/<name>/`) are bundled. Edits to seeds only affect a user's environment after `amatelier refresh-seeds` or a fresh install.
|
|
41
|
+
- Generated files must not be hand-edited: `llm/API.md`, `llm/SCHEMA.md`, `llms.txt`, `llms-full.txt`, `.cursor/rules/*.mdc`, `.github/copilot-instructions.md`. CI regenerates them.
|
|
42
|
+
|
|
43
|
+
## Three LLM backend modes
|
|
44
|
+
|
|
45
|
+
All LLM calls must go through `amatelier.llm_backend.get_backend()`. The backend abstraction resolves to one of three modes at runtime:
|
|
46
|
+
|
|
47
|
+
| Mode | Selected when | Backend class |
|
|
48
|
+
|---|---|---|
|
|
49
|
+
| `claude-code` | Running inside Claude Code, `claude` binary on PATH | `ClaudeCLIBackend` |
|
|
50
|
+
| `anthropic-sdk` | `ANTHROPIC_API_KEY` present, no Claude Code session | `AnthropicSDKBackend` |
|
|
51
|
+
| `openai-compat` | `OPENAI_API_KEY`, `OPENROUTER_API_KEY`, or local Ollama | `OpenAICompatBackend` |
|
|
52
|
+
|
|
53
|
+
Override with `AMATELIER_MODE=claude-code|anthropic-sdk|openai-compat`.
|
|
54
|
+
|
|
55
|
+
When introducing new LLM calls:
|
|
56
|
+
|
|
57
|
+
- Call `get_backend()` and use the returned object's interface. Do not shell out to the `claude` CLI directly and do not `import anthropic` at the call site.
|
|
58
|
+
- Any new backend capability must be added to the `LLMBackend` Protocol in `src/amatelier/llm_backend.py` and implemented by all three concrete backends.
|
|
59
|
+
- Surface new provider env vars in `describe_environment()` so `amatelier config` reports them.
|
|
60
|
+
|
|
61
|
+
## Tests
|
|
62
|
+
|
|
63
|
+
- `tests/test_smoke.py` — pytest suite, import/CLI smoke checks, runs in CI
|
|
64
|
+
- `tests/test_refresh_seeds.py` — pytest suite, verifies seed materialization, runs in CI
|
|
65
|
+
- `tests/test_integration.py` — **standalone script**, exercises live LLM backends, NOT pytest and NOT run in CI. Execute manually when verifying backend changes.
|
|
66
|
+
|
|
67
|
+
Run the CI-equivalent suites locally:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pytest tests/test_smoke.py -v
|
|
71
|
+
pytest tests/test_refresh_seeds.py -v
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Common commands
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
make setup # install package + dev deps
|
|
78
|
+
make test # run test suite
|
|
79
|
+
make lint # ruff + mypy
|
|
80
|
+
make demo # run examples/first_run/
|
|
81
|
+
make docs # build docs site locally
|
|
82
|
+
amatelier config # show active mode, credentials, paths
|
|
83
|
+
amatelier refresh-seeds # rematerialize per-agent seeds in user data dir
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## When editing docs
|
|
87
|
+
|
|
88
|
+
Use the `dual-docs-architect` skill. It classifies every write (tutorial / guide / reference / explanation x human / LLM / both) and routes to the correct file.
|
|
89
|
+
|
|
90
|
+
## When scaffolding new repos
|
|
91
|
+
|
|
92
|
+
Use the `repo-architect` skill. Don't copy this file by hand — let the skill render it.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
# Matrix policy: every push/PR runs the fast path (ubuntu + Python 3.12 only).
|
|
4
|
+
# Full matrix (3 OSes x 4 Pythons) runs on v* tags so release validation stays
|
|
5
|
+
# thorough without burning minutes on every commit.
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
# Manual-only: run `make ci` locally before pushing. Fires on v* tags
|
|
9
|
+
# for release validation, and can be triggered via the Actions UI.
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
push:
|
|
12
|
+
tags: ['v*']
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
test:
|
|
19
|
+
runs-on: ${{ matrix.os }}
|
|
20
|
+
strategy:
|
|
21
|
+
fail-fast: false
|
|
22
|
+
matrix:
|
|
23
|
+
# Full matrix on release tags; fast path otherwise.
|
|
24
|
+
os: ${{ startsWith(github.ref, 'refs/tags/v') && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }}
|
|
25
|
+
python-version: ${{ startsWith(github.ref, 'refs/tags/v') && fromJSON('["3.10", "3.11", "3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: ${{ matrix.python-version }}
|
|
31
|
+
cache: pip
|
|
32
|
+
- name: Install
|
|
33
|
+
run: pip install -e ".[dev]"
|
|
34
|
+
- name: Lint
|
|
35
|
+
run: ruff check src tests
|
|
36
|
+
- name: Type check (Linux/macOS only — Windows mypy trips on system dirs)
|
|
37
|
+
if: runner.os != 'Windows'
|
|
38
|
+
run: |
|
|
39
|
+
mypy --no-strict-optional \
|
|
40
|
+
src/amatelier/__init__.py \
|
|
41
|
+
src/amatelier/cli.py \
|
|
42
|
+
src/amatelier/paths.py \
|
|
43
|
+
src/amatelier/llm_backend.py || true
|
|
44
|
+
- name: Test (pytest smoke suite — no live APIs)
|
|
45
|
+
run: pytest tests/test_smoke.py -v
|
|
46
|
+
|
|
47
|
+
build-check:
|
|
48
|
+
runs-on: ubuntu-latest
|
|
49
|
+
steps:
|
|
50
|
+
- uses: actions/checkout@v4
|
|
51
|
+
- uses: actions/setup-python@v5
|
|
52
|
+
with:
|
|
53
|
+
python-version: "3.12"
|
|
54
|
+
- run: pip install build
|
|
55
|
+
- run: python -m build
|
|
56
|
+
- run: pip install dist/*.whl
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Builds the human docs site (MkDocs Material) and regenerates LLM-facing
|
|
2
|
+
# derivatives on every push to main. Also runs on PRs as a dry-build to catch
|
|
3
|
+
# doc errors before merge.
|
|
4
|
+
|
|
5
|
+
name: Docs
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
# Manual-only: run `mkdocs build` locally to verify. Trigger via Actions UI
|
|
9
|
+
# when you want to publish the docs site. Fires on v* tags for releases.
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
push:
|
|
12
|
+
tags: ['v*']
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
permissions:
|
|
21
|
+
contents: write
|
|
22
|
+
pages: write
|
|
23
|
+
id-token: write
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
with:
|
|
27
|
+
fetch-depth: 0
|
|
28
|
+
|
|
29
|
+
- name: Set up Python
|
|
30
|
+
uses: actions/setup-python@v5
|
|
31
|
+
with:
|
|
32
|
+
python-version: "3.12"
|
|
33
|
+
cache: pip
|
|
34
|
+
|
|
35
|
+
- name: Install MkDocs + plugins
|
|
36
|
+
run: |
|
|
37
|
+
pip install \
|
|
38
|
+
mkdocs-material \
|
|
39
|
+
mkdocs-awesome-pages-plugin \
|
|
40
|
+
pymdown-extensions
|
|
41
|
+
|
|
42
|
+
- name: Build MkDocs site
|
|
43
|
+
run: mkdocs build --strict
|
|
44
|
+
|
|
45
|
+
# dual-docs-architect provides a regenerate script at scripts/regen_llm.py
|
|
46
|
+
# It reads src/, docs/, examples/ and writes llm/API.md, llm/SCHEMA.md,
|
|
47
|
+
# llms.txt, llms-full.txt, .cursor/rules/*, .github/copilot-instructions.md.
|
|
48
|
+
# If the script doesn't exist yet, this step is a no-op.
|
|
49
|
+
- name: Regenerate LLM surface
|
|
50
|
+
run: |
|
|
51
|
+
if [ -f scripts/regen_llm.py ]; then
|
|
52
|
+
python scripts/regen_llm.py
|
|
53
|
+
else
|
|
54
|
+
echo "scripts/regen_llm.py not present yet — skipping regeneration"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
- name: Check for unsynced derivatives
|
|
58
|
+
if: github.event_name == 'pull_request'
|
|
59
|
+
run: |
|
|
60
|
+
if ! git diff --exit-code; then
|
|
61
|
+
echo "::error::Generated files are out of sync. Run 'make docs' locally and commit."
|
|
62
|
+
exit 1
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
- name: Deploy to GitHub Pages
|
|
66
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
67
|
+
uses: peaceiris/actions-gh-pages@v4
|
|
68
|
+
with:
|
|
69
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
70
|
+
publish_dir: ./site
|