planner-auto 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. planner_auto-0.5.0/PKG-INFO +363 -0
  2. planner_auto-0.5.0/README.md +332 -0
  3. planner_auto-0.5.0/planner_auto/__init__.py +19 -0
  4. planner_auto-0.5.0/planner_auto/agents.py +247 -0
  5. planner_auto-0.5.0/planner_auto/cli.py +1131 -0
  6. planner_auto-0.5.0/planner_auto/db.py +772 -0
  7. planner_auto-0.5.0/planner_auto/errors.py +96 -0
  8. planner_auto-0.5.0/planner_auto/export.py +340 -0
  9. planner_auto-0.5.0/planner_auto/git_utils.py +50 -0
  10. planner_auto-0.5.0/planner_auto/inspect.py +262 -0
  11. planner_auto-0.5.0/planner_auto/logging.py +107 -0
  12. planner_auto-0.5.0/planner_auto/loop/__init__.py +18 -0
  13. planner_auto-0.5.0/planner_auto/loop/convergence.py +127 -0
  14. planner_auto-0.5.0/planner_auto/loop/engine.py +745 -0
  15. planner_auto-0.5.0/planner_auto/loop/feedback.py +219 -0
  16. planner_auto-0.5.0/planner_auto/loop/history.py +201 -0
  17. planner_auto-0.5.0/planner_auto/prompts.py +57 -0
  18. planner_auto-0.5.0/planner_auto/review_workflow.py +349 -0
  19. planner_auto-0.5.0/planner_auto/reviewer/__init__.py +21 -0
  20. planner_auto-0.5.0/planner_auto/reviewer/contract.py +174 -0
  21. planner_auto-0.5.0/planner_auto/reviewer/direct_api.py +245 -0
  22. planner_auto-0.5.0/planner_auto/reviewer/parser.py +384 -0
  23. planner_auto-0.5.0/planner_auto/reviewer/prompts.py +171 -0
  24. planner_auto-0.5.0/planner_auto/sdk_wrapper.py +434 -0
  25. planner_auto-0.5.0/planner_auto/session.py +192 -0
  26. planner_auto-0.5.0/planner_auto/state.py +49 -0
  27. planner_auto-0.5.0/planner_auto/tui/__init__.py +18 -0
  28. planner_auto-0.5.0/planner_auto/tui/adapter.py +129 -0
  29. planner_auto-0.5.0/planner_auto/tui/bindings.py +16 -0
  30. planner_auto-0.5.0/planner_auto/tui/messages.py +135 -0
  31. planner_auto-0.5.0/planner_auto/tui/review_app.py +671 -0
  32. planner_auto-0.5.0/planner_auto/tui/screens/__init__.py +1 -0
  33. planner_auto-0.5.0/planner_auto/tui/screens/disposition_screen.py +137 -0
  34. planner_auto-0.5.0/planner_auto/tui/screens/help_screen.py +83 -0
  35. planner_auto-0.5.0/planner_auto/tui/screens/plan_screen.py +93 -0
  36. planner_auto-0.5.0/planner_auto/tui/screens/raw_response_screen.py +96 -0
  37. planner_auto-0.5.0/planner_auto/tui/styles/theme.tcss +235 -0
  38. planner_auto-0.5.0/planner_auto/tui/widgets/__init__.py +1 -0
  39. planner_auto-0.5.0/planner_auto/tui/widgets/convergence_panel.py +93 -0
  40. planner_auto-0.5.0/planner_auto/tui/widgets/current_round.py +157 -0
  41. planner_auto-0.5.0/planner_auto/tui/widgets/log_panel.py +106 -0
  42. planner_auto-0.5.0/planner_auto/tui/widgets/plan_panel.py +75 -0
  43. planner_auto-0.5.0/planner_auto/tui/widgets/round_detail.py +167 -0
  44. planner_auto-0.5.0/planner_auto/tui/widgets/round_list.py +115 -0
  45. planner_auto-0.5.0/planner_auto/tui/widgets/session_panel.py +73 -0
  46. planner_auto-0.5.0/planner_auto/validation.py +92 -0
  47. planner_auto-0.5.0/planner_auto.egg-info/PKG-INFO +363 -0
  48. planner_auto-0.5.0/planner_auto.egg-info/SOURCES.txt +85 -0
  49. planner_auto-0.5.0/planner_auto.egg-info/dependency_links.txt +1 -0
  50. planner_auto-0.5.0/planner_auto.egg-info/entry_points.txt +2 -0
  51. planner_auto-0.5.0/planner_auto.egg-info/requires.txt +14 -0
  52. planner_auto-0.5.0/planner_auto.egg-info/top_level.txt +1 -0
  53. planner_auto-0.5.0/pyproject.toml +56 -0
  54. planner_auto-0.5.0/setup.cfg +4 -0
  55. planner_auto-0.5.0/tests/test_agents.py +211 -0
  56. planner_auto-0.5.0/tests/test_backend_wiring.py +291 -0
  57. planner_auto-0.5.0/tests/test_check.py +275 -0
  58. planner_auto-0.5.0/tests/test_cli.py +203 -0
  59. planner_auto-0.5.0/tests/test_context.py +174 -0
  60. planner_auto-0.5.0/tests/test_contract.py +281 -0
  61. planner_auto-0.5.0/tests/test_convergence.py +145 -0
  62. planner_auto-0.5.0/tests/test_db.py +260 -0
  63. planner_auto-0.5.0/tests/test_db_v2.py +447 -0
  64. planner_auto-0.5.0/tests/test_direct_api.py +391 -0
  65. planner_auto-0.5.0/tests/test_direct_backend.py +381 -0
  66. planner_auto-0.5.0/tests/test_engine.py +869 -0
  67. planner_auto-0.5.0/tests/test_error_diagnostics.py +295 -0
  68. planner_auto-0.5.0/tests/test_export.py +272 -0
  69. planner_auto-0.5.0/tests/test_git_utils.py +158 -0
  70. planner_auto-0.5.0/tests/test_init.py +60 -0
  71. planner_auto-0.5.0/tests/test_inspect.py +297 -0
  72. planner_auto-0.5.0/tests/test_logging.py +212 -0
  73. planner_auto-0.5.0/tests/test_loop_output.py +274 -0
  74. planner_auto-0.5.0/tests/test_parser.py +380 -0
  75. planner_auto-0.5.0/tests/test_review_cli.py +387 -0
  76. planner_auto-0.5.0/tests/test_review_export.py +270 -0
  77. planner_auto-0.5.0/tests/test_review_workflow.py +176 -0
  78. planner_auto-0.5.0/tests/test_sdk_wrapper.py +126 -0
  79. planner_auto-0.5.0/tests/test_sdk_wrapper_v2.py +173 -0
  80. planner_auto-0.5.0/tests/test_session.py +193 -0
  81. planner_auto-0.5.0/tests/test_structured_logging.py +251 -0
  82. planner_auto-0.5.0/tests/test_tui_adapter.py +137 -0
  83. planner_auto-0.5.0/tests/test_tui_keybindings.py +231 -0
  84. planner_auto-0.5.0/tests/test_tui_live.py +365 -0
  85. planner_auto-0.5.0/tests/test_tui_messages.py +144 -0
  86. planner_auto-0.5.0/tests/test_tui_screens.py +231 -0
  87. planner_auto-0.5.0/tests/test_validation.py +206 -0
@@ -0,0 +1,363 @@
1
+ Metadata-Version: 2.4
2
+ Name: planner-auto
3
+ Version: 0.5.0
4
+ Summary: Interactive planning session manager with SQLite persistence and artifact export
5
+ Author-email: Danny Almaden <dan@ailab.ph>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ailabph/claude-ai-workflows
8
+ Project-URL: Repository, https://github.com/ailabph/claude-ai-workflows
9
+ Project-URL: Changelog, https://github.com/ailabph/claude-ai-workflows/blob/main/planner-auto/CHANGELOG.md
10
+ Keywords: planning,claude,ai,orchestration,milestone,gpt,review
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: click>=8.0
20
+ Requires-Dist: claude-agent-sdk<0.2.0,>=0.1.50
21
+ Requires-Dist: anthropic>=0.40.0
22
+ Requires-Dist: prompt_toolkit>=3.0
23
+ Requires-Dist: openai>=2.0
24
+ Requires-Dist: python-dotenv>=1.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
28
+ Requires-Dist: textual[dev]>=0.80.0; extra == "dev"
29
+ Provides-Extra: tui
30
+ Requires-Dist: textual>=0.80.0; extra == "tui"
31
+
32
+ # planner-auto
33
+
34
+ Automated planning session manager that produces milestone plans through interactive conversation with Claude, reviewed by GPT for quality. Plans are persisted in SQLite and exported as markdown artifacts ready for [orchestrator-auto](../orchestrator-auto/) to implement.
35
+
36
+ ## How It Works
37
+
38
+ ```
39
+ You describe a feature
40
+ → Claude asks clarifying questions
41
+ → Claude generates a milestone plan
42
+ → GPT reviews and critiques
43
+ → Claude revises based on feedback
44
+ → Repeat until GPT says GO
45
+ → Plan exported for orchestrator-auto
46
+ ```
47
+
48
+ Each session follows a strict lifecycle: **SETUP → CONTEXT → DISCUSSION → PLANNING → REVIEW → COMPLETE**. All state lives in SQLite. File artifacts (plans, reviews, chat logs) are exported views, not source of truth.
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ cd planner-auto/
54
+ pip install -e . # Production
55
+ pip install -e ".[dev]" # With pytest
56
+ pip install -e ".[tui]" # With TUI dashboard (textual)
57
+
58
+ # Required
59
+ export ANTHROPIC_API_KEY="your-key" # For Claude (planner)
60
+ # OR
61
+ export CLAUDE_CODE_OAUTH_TOKEN="token" # Claude Pro/Max subscription
62
+
63
+ # Required for Plan 2 (reviewer)
64
+ export OPENAI_API_KEY="your-key" # For GPT-5.4 (reviewer)
65
+ ```
66
+
67
+ ## Quick Start
68
+
69
+ ```bash
70
+ # Start a session
71
+ planner-auto start --project my-api
72
+
73
+ # Add context files
74
+ planner-auto add-context <session-id> --file src/app.py
75
+ planner-auto add-context <session-id> --file src/models.py
76
+ planner-auto add-context <session-id> --note "Uses PostgreSQL, deployed on AWS"
77
+
78
+ # Discuss the feature (interactive mode)
79
+ planner-auto discuss <session-id> --interactive
80
+ # Type your feature description, Claude asks questions
81
+ # Type /done when ready to move to planning
82
+
83
+ # Or one-shot discuss with auto-advance
84
+ planner-auto discuss <session-id> "Add user registration with email validation" --done
85
+
86
+ # Generate the plan
87
+ planner-auto generate <session-id>
88
+ planner-auto generate <session-id> --model claude-opus-4-6 # Override model
89
+
90
+ # Export artifacts
91
+ planner-auto export <session-id>
92
+ planner-auto export <session-id> --output-dir ./my-plans/
93
+
94
+ # Complete the session
95
+ planner-auto complete <session-id>
96
+ ```
97
+
98
+ ## CLI Reference
99
+
100
+ ### Session Commands (Plan 1)
101
+
102
+ | Command | Description |
103
+ |---------|-------------|
104
+ | `start --project <name>` | Create a new planning session |
105
+ | `add-context <id> --file <path>` | Add a file to session context |
106
+ | `add-context <id> --note "text"` | Add a text note to session context |
107
+ | `discuss <id> "message"` | Send a single discussion message |
108
+ | `discuss <id> --interactive` | Enter interactive discussion mode (type `/done` to advance) |
109
+ | `discuss <id> "message" --done` | Send message and advance to PLANNING |
110
+ | `generate <id>` | Generate milestone plan from context + conversation |
111
+ | `generate <id> --model <model>` | Generate with a specific Claude model |
112
+ | `list` | List all sessions |
113
+ | `list --status active` | Filter sessions by status |
114
+ | `status <id>` | Show session details (phase, counts, blockers) |
115
+ | `resume <id>` | Resume a paused session (answer open blockers) |
116
+ | `export <id>` | Export session artifacts to disk |
117
+ | `export <id> --output-dir <path>` | Export to custom directory |
118
+ | `complete <id>` | Complete session (checks blockers, auto-exports) |
119
+
120
+ ### Global Flags
121
+
122
+ | Flag | Description |
123
+ |------|-------------|
124
+ | `--db-path <path>` | Override database path (default: `~/.planner-auto/planner.db`) |
125
+ | `--verbose` | Print detailed output |
126
+ | `--debug` | Print debug-level output + stack traces |
127
+
128
+ ### Review Commands (Plan 2)
129
+
130
+ | Command | Description |
131
+ |---------|-------------|
132
+ | `review <id>` | Run automated GPT review loop on current plan |
133
+ | `review <id> --fast` | Fast mode: 4 rounds, no history, basic prompt |
134
+ | `review <id> --max-rounds <n>` | Override round cap |
135
+ | `review <id> --no-review-history` | Disable review history context |
136
+ | `review <id> --reviewer-model <model>` | Override GPT model (default: gpt-5.4) |
137
+ | `review <id> --reviewer-reasoning <level>` | Reasoning effort (default: high) |
138
+ | `review <id> --complexity standard\|complex` | Override complexity detection |
139
+ | `review <id> --repo-root <path>` | Override repo root for .kafra handoff |
140
+ | `review <id> --tui` | Launch live TUI dashboard (requires `pip install planner-auto[tui]`) |
141
+
142
+ **Review loop features:**
143
+ - GPT-5.4 reviews with `resolution_guidance` + `target_section` per issue
144
+ - `keep/trim` sections: GPT tells Claude what to preserve and what to simplify
145
+ - `validate feedback`: Claude assesses each issue as ACCEPT / DEFER / REJECT
146
+ - Severity filtering: only `critical` + `major` issues reach Claude
147
+ - Review history: GPT sees previous plan + cumulative DEFER decisions across all rounds
148
+ - Complexity detection: auto-adjusts round cap (standard=8, complex=12)
149
+ - Convergence: GPT GO, or cap with zero criticals = accepted
150
+ - Cap with criticals = session paused for human review
151
+ - Final plan copied to `<repo>/.kafra/a-01-plans/`
152
+ - Review metadata persisted: model, cost, tokens, raw response per round
153
+ - **TUI mode** (`--tui`): Live dashboard with round progress, convergence sparkline, disposition drill-down. Requires `pip install planner-auto[tui]`.
154
+
155
+ ## Session Lifecycle
156
+
157
+ ```
158
+ SETUP ──► CONTEXT ──► DISCUSSION ──► PLANNING ──► REVIEW ──► COMPLETE
159
+ │ │ ▲
160
+ │ └──────────┘
161
+ │ (revise & re-review)
162
+ └───────────────────────┘
163
+ (skip review — direct complete)
164
+
165
+ Any phase can transition to PAUSED via blockers.
166
+ PAUSED only allows: resume, status, export.
167
+ ```
168
+
169
+ | Phase | What Happens | Allowed Commands |
170
+ |-------|-------------|-----------------|
171
+ | SETUP | Session created, config saved | start, add-context, status, export |
172
+ | CONTEXT | Files and notes loaded | add-context, status, export |
173
+ | DISCUSSION | User describes feature, Claude asks questions | discuss, status, export |
174
+ | PLANNING | Context synthesized, plan generated | generate, review, complete, status, export |
175
+ | REVIEW | GPT review loop running | review, complete, status, export |
176
+ | COMPLETE | Session finished, artifacts exported | status, export |
177
+
178
+ ## Database Schema
179
+
180
+ All state lives in SQLite at `~/.planner-auto/planner.db`. 7 tables:
181
+
182
+ | Table | Purpose |
183
+ |-------|---------|
184
+ | `sessions` | Session metadata: project, phase, status, timestamps |
185
+ | `messages` | Append-only conversation log (user + assistant turns) |
186
+ | `context_entries` | Loaded files, notes, and synthesized context |
187
+ | `plan_drafts` | Versioned plan content with draft number |
188
+ | `reviews` | Reviewer responses with verdict and issues (Plan 2) |
189
+ | `blockers` | Pause/resume lifecycle with source, question, answer |
190
+ | `session_config` | Config snapshot per session (models, prompt hashes) |
191
+
192
+ ### Transaction Contract
193
+
194
+ CRUD functions do NOT auto-commit. Callers manage transactions:
195
+
196
+ ```python
197
+ from planner_auto.db import transaction, add_message
198
+
199
+ # Single operation — explicit commit
200
+ add_message(conn, session_id, "user", "hello")
201
+ conn.commit()
202
+
203
+ # Atomic multi-operation — transaction context manager
204
+ with transaction(conn):
205
+ add_message(conn, session_id, "user", user_input)
206
+ add_message(conn, session_id, "assistant", response)
207
+ # Both committed together, or both rolled back on error
208
+ ```
209
+
210
+ ## Artifact Export
211
+
212
+ Artifacts are generated from the DB on demand. They are NOT read back by the tool.
213
+
214
+ ```
215
+ ~/.planner-auto/sessions/<session-id>/
216
+ ├── chat.csv # Full conversation (id, timestamp, role, content)
217
+ ├── context-summary.md # Context entries grouped by type
218
+ ├── plan-draft-1.md # First plan draft
219
+ ├── plan-draft-2.md # Revised draft (after review, Plan 2)
220
+ ├── ...
221
+ └── plan-draft-N.md # Latest draft
222
+ ```
223
+
224
+ With Plan 2 (reviewer), additional files:
225
+ ```
226
+ ├── a-01-plan.md # Draft 1
227
+ ├── a-02-review.md # Review 1
228
+ ├── a-03-plan.md # Draft 2 (revised)
229
+ ├── a-04-review.md # Review 2
230
+ ├── ...
231
+ └── a-<N>-plan-final.md # GPT-approved final plan
232
+ ```
233
+
234
+ ## Architecture (For Agents & Devs)
235
+
236
+ ```
237
+ planner_auto/
238
+ ├── cli.py # Click CLI — all user-facing commands
239
+ ├── db.py # SQLite schema (v2), CRUD, transaction(), schema migration
240
+ ├── session.py # SessionManager — phase transitions, pause/resume
241
+ ├── state.py # Phase/Status enums, transition rules, command permissions
242
+ ├── agents.py # discuss(), synthesize_context(), generate_plan()
243
+ ├── sdk_wrapper.py # Claude SDK wrapper — retry, timeout, effort/thinking
244
+ ├── review_workflow.py # Shared review orchestration (prepare/run/finalize)
245
+ ├── prompts.py # System prompts with version hashing
246
+ ├── export.py # Artifact export — plans, reviews, .kafra handoff
247
+ ├── validation.py # Plan format validation (milestone headers, checkboxes)
248
+ ├── errors.py # Custom exceptions (SDK, reviewer, session errors)
249
+ ├── git_utils.py # Repo root discovery (git rev-parse + --repo-root)
250
+ ├── logging.py # Session-scoped log file setup
251
+ ├── inspect.py # DB inspection queries for debugging
252
+ ├── reviewer/
253
+ │ ├── contract.py # ReviewerContract ABC, ReviewerResponse, ReviewIssue
254
+ │ ├── direct_api.py # DirectAPIAdapter — GPT-5.4 via OpenAI SDK
255
+ │ ├── parser.py # Response parser (JSON/XML/free-form fallback)
256
+ │ └── prompts.py # Reviewer system prompts (basic, guidance, keep_trim)
257
+ ├── loop/
258
+ │ ├── engine.py # ReviewLoopEngine — review → revise → repeat
259
+ │ ├── feedback.py # Validate feedback (ACCEPT/DEFER/REJECT per issue)
260
+ │ ├── history.py # Review context builder (cumulative deferred)
261
+ │ └── convergence.py # Complexity detection, caps, fast mode
262
+ └── tui/ # TUI Review Dashboard (optional: pip install planner-auto[tui])
263
+ ├── review_app.py # ReviewTUI — main Textual app
264
+ ├── adapter.py # Thread-safe engine → TUI bridge
265
+ ├── messages.py # 8 Textual message types
266
+ ├── bindings.py # Keybinding definitions
267
+ ├── widgets/ # SessionPanel, ConvergencePanel, RoundList, etc.
268
+ ├── screens/ # DispositionScreen, PlanScreen, RawResponseScreen, HelpScreen
269
+ └── styles/ # theme.tcss — dark theme, 3 responsive breakpoints
270
+ ```
271
+
272
+ ### Key Design Decisions
273
+
274
+ | Decision | Rationale |
275
+ |----------|-----------|
276
+ | SQLite as canonical state | Files drift from tool state. DB is authoritative, files are exports. |
277
+ | Callers manage commits | Enables atomic multi-operation transactions without CRUD-level coupling. |
278
+ | Phase-gated commands | Prevents out-of-order operations (e.g., generate before discuss). |
279
+ | PLANNING→COMPLETE or PLANNING→REVIEW→COMPLETE | Direct complete skips review; `review` command runs the GPT loop. |
280
+ | `asyncio.wait_for` on SDK calls | Prevents hung SDK subprocess from blocking forever. |
281
+ | Prompt version hashing | Config snapshot per session enables reproducibility and regression detection. |
282
+
283
+ ### For Agents
284
+
285
+ When working with planner-auto code:
286
+ - All DB access goes through `db.py` functions — never raw SQL in other modules
287
+ - All phase transitions go through `SessionManager` — never direct DB updates
288
+ - SDK calls go through `sdk_wrapper.py` — handles retry, timeout, error mapping
289
+ - Tests use in-memory SQLite (`:memory:`) with explicit commits
290
+ - Tests mock all SDK calls — no real API calls in test suite
291
+
292
+ ## Development
293
+
294
+ ```bash
295
+ # Setup
296
+ cd planner-auto/
297
+ pip install -e ".[dev]"
298
+
299
+ # Run tests
300
+ pytest tests/ -v # All tests
301
+ pytest tests/test_db.py -v # Single file
302
+ pytest tests/test_session.py::TestCheckCommand -v # Single class
303
+ pytest -k "complete" -v # Filter by name
304
+
305
+ # Current test count: 464 passing
306
+ ```
307
+
308
+ ## Config Versioning
309
+
310
+ Every session captures its configuration at creation time in `session_config`:
311
+
312
+ ```json
313
+ {
314
+ "project": "my-api",
315
+ "model_default": "claude-sonnet-4-6",
316
+ "prompt_hashes": {
317
+ "planner": "sha256:abc123...",
318
+ "synthesis": "sha256:def456..."
319
+ }
320
+ }
321
+ ```
322
+
323
+ Plan 2 extends this with reviewer settings:
324
+
325
+ ```json
326
+ {
327
+ "project": "my-api",
328
+ "model_default": "claude-opus-4-6",
329
+ "repo_root": "/Users/me/my-api",
330
+ "reviewer_model": "gpt-5.4",
331
+ "reasoning_effort": "high",
332
+ "prompt_mode": "keep_trim",
333
+ "review_history": true,
334
+ "validate_feedback": true,
335
+ "filter_severity": ["critical", "major"],
336
+ "fast_mode": false,
337
+ "complexity": "standard",
338
+ "max_rounds": 8
339
+ }
340
+ ```
341
+
342
+ ## Known Issues
343
+
344
+ ### Claude Agent SDK Subprocess (when using `--claude-backend sdk`)
345
+
346
+ The SDK backend (`--claude-backend sdk`) spawns a `claude` CLI subprocess which shares rate-limit quota with active Claude Code sessions. This is **not an issue with the default `direct` backend**.
347
+
348
+ If you need to use the SDK backend (e.g., OAuth-only auth), be aware:
349
+ - Rate limiting when other Claude Code sessions are active
350
+ - anyio cancel scope tracebacks on error paths (cosmetic, not harmful)
351
+ - Opus + thinking can consume turns on tool calls, returning empty results
352
+
353
+ **Recommendation:** Use the default `direct` backend with `ANTHROPIC_API_KEY` whenever possible.
354
+
355
+ ## Roadmap
356
+
357
+ - [x] **Plan 1: Session Core** — CLI, DB, lifecycle, context, plan generation, export
358
+ - [x] **Plan 2: Reviewer Adapter** — GPT review loop, convergence, .kafra handoff
359
+ - [x] **Direct API backend** — Bypass SDK subprocess, works alongside Claude Code sessions
360
+ - [x] **Stress testing (Level 2)** — First end-to-end success: 3-round convergence, $0.12, GPT GO
361
+ - [x] **TUI mode** — Review dashboard with live round progress, convergence sparkline, drill-down
362
+ - [ ] **Telegram notifications** — Notify on plan approval or blocker
363
+ - [ ] **Homebrew formula** — `brew install planner-auto`
@@ -0,0 +1,332 @@
1
+ # planner-auto
2
+
3
+ Automated planning session manager that produces milestone plans through interactive conversation with Claude, reviewed by GPT for quality. Plans are persisted in SQLite and exported as markdown artifacts ready for [orchestrator-auto](../orchestrator-auto/) to implement.
4
+
5
+ ## How It Works
6
+
7
+ ```
8
+ You describe a feature
9
+ → Claude asks clarifying questions
10
+ → Claude generates a milestone plan
11
+ → GPT reviews and critiques
12
+ → Claude revises based on feedback
13
+ → Repeat until GPT says GO
14
+ → Plan exported for orchestrator-auto
15
+ ```
16
+
17
+ Each session follows a strict lifecycle: **SETUP → CONTEXT → DISCUSSION → PLANNING → REVIEW → COMPLETE**. All state lives in SQLite. File artifacts (plans, reviews, chat logs) are exported views, not source of truth.
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ cd planner-auto/
23
+ pip install -e . # Production
24
+ pip install -e ".[dev]" # With pytest
25
+ pip install -e ".[tui]" # With TUI dashboard (textual)
26
+
27
+ # Required
28
+ export ANTHROPIC_API_KEY="your-key" # For Claude (planner)
29
+ # OR
30
+ export CLAUDE_CODE_OAUTH_TOKEN="token" # Claude Pro/Max subscription
31
+
32
+ # Required for Plan 2 (reviewer)
33
+ export OPENAI_API_KEY="your-key" # For GPT-5.4 (reviewer)
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ```bash
39
+ # Start a session
40
+ planner-auto start --project my-api
41
+
42
+ # Add context files
43
+ planner-auto add-context <session-id> --file src/app.py
44
+ planner-auto add-context <session-id> --file src/models.py
45
+ planner-auto add-context <session-id> --note "Uses PostgreSQL, deployed on AWS"
46
+
47
+ # Discuss the feature (interactive mode)
48
+ planner-auto discuss <session-id> --interactive
49
+ # Type your feature description, Claude asks questions
50
+ # Type /done when ready to move to planning
51
+
52
+ # Or one-shot discuss with auto-advance
53
+ planner-auto discuss <session-id> "Add user registration with email validation" --done
54
+
55
+ # Generate the plan
56
+ planner-auto generate <session-id>
57
+ planner-auto generate <session-id> --model claude-opus-4-6 # Override model
58
+
59
+ # Export artifacts
60
+ planner-auto export <session-id>
61
+ planner-auto export <session-id> --output-dir ./my-plans/
62
+
63
+ # Complete the session
64
+ planner-auto complete <session-id>
65
+ ```
66
+
67
+ ## CLI Reference
68
+
69
+ ### Session Commands (Plan 1)
70
+
71
+ | Command | Description |
72
+ |---------|-------------|
73
+ | `start --project <name>` | Create a new planning session |
74
+ | `add-context <id> --file <path>` | Add a file to session context |
75
+ | `add-context <id> --note "text"` | Add a text note to session context |
76
+ | `discuss <id> "message"` | Send a single discussion message |
77
+ | `discuss <id> --interactive` | Enter interactive discussion mode (type `/done` to advance) |
78
+ | `discuss <id> "message" --done` | Send message and advance to PLANNING |
79
+ | `generate <id>` | Generate milestone plan from context + conversation |
80
+ | `generate <id> --model <model>` | Generate with a specific Claude model |
81
+ | `list` | List all sessions |
82
+ | `list --status active` | Filter sessions by status |
83
+ | `status <id>` | Show session details (phase, counts, blockers) |
84
+ | `resume <id>` | Resume a paused session (answer open blockers) |
85
+ | `export <id>` | Export session artifacts to disk |
86
+ | `export <id> --output-dir <path>` | Export to custom directory |
87
+ | `complete <id>` | Complete session (checks blockers, auto-exports) |
88
+
89
+ ### Global Flags
90
+
91
+ | Flag | Description |
92
+ |------|-------------|
93
+ | `--db-path <path>` | Override database path (default: `~/.planner-auto/planner.db`) |
94
+ | `--verbose` | Print detailed output |
95
+ | `--debug` | Print debug-level output + stack traces |
96
+
97
+ ### Review Commands (Plan 2)
98
+
99
+ | Command | Description |
100
+ |---------|-------------|
101
+ | `review <id>` | Run automated GPT review loop on current plan |
102
+ | `review <id> --fast` | Fast mode: 4 rounds, no history, basic prompt |
103
+ | `review <id> --max-rounds <n>` | Override round cap |
104
+ | `review <id> --no-review-history` | Disable review history context |
105
+ | `review <id> --reviewer-model <model>` | Override GPT model (default: gpt-5.4) |
106
+ | `review <id> --reviewer-reasoning <level>` | Reasoning effort (default: high) |
107
+ | `review <id> --complexity standard\|complex` | Override complexity detection |
108
+ | `review <id> --repo-root <path>` | Override repo root for .kafra handoff |
109
+ | `review <id> --tui` | Launch live TUI dashboard (requires `pip install planner-auto[tui]`) |
110
+
111
+ **Review loop features:**
112
+ - GPT-5.4 reviews with `resolution_guidance` + `target_section` per issue
113
+ - `keep/trim` sections: GPT tells Claude what to preserve and what to simplify
114
+ - `validate feedback`: Claude assesses each issue as ACCEPT / DEFER / REJECT
115
+ - Severity filtering: only `critical` + `major` issues reach Claude
116
+ - Review history: GPT sees previous plan + cumulative DEFER decisions across all rounds
117
+ - Complexity detection: auto-adjusts round cap (standard=8, complex=12)
118
+ - Convergence: GPT GO, or cap with zero criticals = accepted
119
+ - Cap with criticals = session paused for human review
120
+ - Final plan copied to `<repo>/.kafra/a-01-plans/`
121
+ - Review metadata persisted: model, cost, tokens, raw response per round
122
+ - **TUI mode** (`--tui`): Live dashboard with round progress, convergence sparkline, disposition drill-down. Requires `pip install planner-auto[tui]`.
123
+
124
+ ## Session Lifecycle
125
+
126
+ ```
127
+ SETUP ──► CONTEXT ──► DISCUSSION ──► PLANNING ──► REVIEW ──► COMPLETE
128
+ │ │ ▲
129
+ │ └──────────┘
130
+ │ (revise & re-review)
131
+ └───────────────────────┘
132
+ (skip review — direct complete)
133
+
134
+ Any phase can transition to PAUSED via blockers.
135
+ PAUSED only allows: resume, status, export.
136
+ ```
137
+
138
+ | Phase | What Happens | Allowed Commands |
139
+ |-------|-------------|-----------------|
140
+ | SETUP | Session created, config saved | start, add-context, status, export |
141
+ | CONTEXT | Files and notes loaded | add-context, status, export |
142
+ | DISCUSSION | User describes feature, Claude asks questions | discuss, status, export |
143
+ | PLANNING | Context synthesized, plan generated | generate, review, complete, status, export |
144
+ | REVIEW | GPT review loop running | review, complete, status, export |
145
+ | COMPLETE | Session finished, artifacts exported | status, export |
146
+
147
+ ## Database Schema
148
+
149
+ All state lives in SQLite at `~/.planner-auto/planner.db`. 7 tables:
150
+
151
+ | Table | Purpose |
152
+ |-------|---------|
153
+ | `sessions` | Session metadata: project, phase, status, timestamps |
154
+ | `messages` | Append-only conversation log (user + assistant turns) |
155
+ | `context_entries` | Loaded files, notes, and synthesized context |
156
+ | `plan_drafts` | Versioned plan content with draft number |
157
+ | `reviews` | Reviewer responses with verdict and issues (Plan 2) |
158
+ | `blockers` | Pause/resume lifecycle with source, question, answer |
159
+ | `session_config` | Config snapshot per session (models, prompt hashes) |
160
+
161
+ ### Transaction Contract
162
+
163
+ CRUD functions do NOT auto-commit. Callers manage transactions:
164
+
165
+ ```python
166
+ from planner_auto.db import transaction, add_message
167
+
168
+ # Single operation — explicit commit
169
+ add_message(conn, session_id, "user", "hello")
170
+ conn.commit()
171
+
172
+ # Atomic multi-operation — transaction context manager
173
+ with transaction(conn):
174
+ add_message(conn, session_id, "user", user_input)
175
+ add_message(conn, session_id, "assistant", response)
176
+ # Both committed together, or both rolled back on error
177
+ ```
178
+
179
+ ## Artifact Export
180
+
181
+ Artifacts are generated from the DB on demand. They are NOT read back by the tool.
182
+
183
+ ```
184
+ ~/.planner-auto/sessions/<session-id>/
185
+ ├── chat.csv # Full conversation (id, timestamp, role, content)
186
+ ├── context-summary.md # Context entries grouped by type
187
+ ├── plan-draft-1.md # First plan draft
188
+ ├── plan-draft-2.md # Revised draft (after review, Plan 2)
189
+ ├── ...
190
+ └── plan-draft-N.md # Latest draft
191
+ ```
192
+
193
+ With Plan 2 (reviewer), additional files:
194
+ ```
195
+ ├── a-01-plan.md # Draft 1
196
+ ├── a-02-review.md # Review 1
197
+ ├── a-03-plan.md # Draft 2 (revised)
198
+ ├── a-04-review.md # Review 2
199
+ ├── ...
200
+ └── a-<N>-plan-final.md # GPT-approved final plan
201
+ ```
202
+
203
+ ## Architecture (For Agents & Devs)
204
+
205
+ ```
206
+ planner_auto/
207
+ ├── cli.py # Click CLI — all user-facing commands
208
+ ├── db.py # SQLite schema (v2), CRUD, transaction(), schema migration
209
+ ├── session.py # SessionManager — phase transitions, pause/resume
210
+ ├── state.py # Phase/Status enums, transition rules, command permissions
211
+ ├── agents.py # discuss(), synthesize_context(), generate_plan()
212
+ ├── sdk_wrapper.py # Claude SDK wrapper — retry, timeout, effort/thinking
213
+ ├── review_workflow.py # Shared review orchestration (prepare/run/finalize)
214
+ ├── prompts.py # System prompts with version hashing
215
+ ├── export.py # Artifact export — plans, reviews, .kafra handoff
216
+ ├── validation.py # Plan format validation (milestone headers, checkboxes)
217
+ ├── errors.py # Custom exceptions (SDK, reviewer, session errors)
218
+ ├── git_utils.py # Repo root discovery (git rev-parse + --repo-root)
219
+ ├── logging.py # Session-scoped log file setup
220
+ ├── inspect.py # DB inspection queries for debugging
221
+ ├── reviewer/
222
+ │ ├── contract.py # ReviewerContract ABC, ReviewerResponse, ReviewIssue
223
+ │ ├── direct_api.py # DirectAPIAdapter — GPT-5.4 via OpenAI SDK
224
+ │ ├── parser.py # Response parser (JSON/XML/free-form fallback)
225
+ │ └── prompts.py # Reviewer system prompts (basic, guidance, keep_trim)
226
+ ├── loop/
227
+ │ ├── engine.py # ReviewLoopEngine — review → revise → repeat
228
+ │ ├── feedback.py # Validate feedback (ACCEPT/DEFER/REJECT per issue)
229
+ │ ├── history.py # Review context builder (cumulative deferred)
230
+ │ └── convergence.py # Complexity detection, caps, fast mode
231
+ └── tui/ # TUI Review Dashboard (optional: pip install planner-auto[tui])
232
+ ├── review_app.py # ReviewTUI — main Textual app
233
+ ├── adapter.py # Thread-safe engine → TUI bridge
234
+ ├── messages.py # 8 Textual message types
235
+ ├── bindings.py # Keybinding definitions
236
+ ├── widgets/ # SessionPanel, ConvergencePanel, RoundList, etc.
237
+ ├── screens/ # DispositionScreen, PlanScreen, RawResponseScreen, HelpScreen
238
+ └── styles/ # theme.tcss — dark theme, 3 responsive breakpoints
239
+ ```
240
+
241
+ ### Key Design Decisions
242
+
243
+ | Decision | Rationale |
244
+ |----------|-----------|
245
+ | SQLite as canonical state | Files drift from tool state. DB is authoritative, files are exports. |
246
+ | Callers manage commits | Enables atomic multi-operation transactions without CRUD-level coupling. |
247
+ | Phase-gated commands | Prevents out-of-order operations (e.g., generate before discuss). |
248
+ | PLANNING→COMPLETE or PLANNING→REVIEW→COMPLETE | Direct complete skips review; `review` command runs the GPT loop. |
249
+ | `asyncio.wait_for` on SDK calls | Prevents hung SDK subprocess from blocking forever. |
250
+ | Prompt version hashing | Config snapshot per session enables reproducibility and regression detection. |
251
+
252
+ ### For Agents
253
+
254
+ When working with planner-auto code:
255
+ - All DB access goes through `db.py` functions — never raw SQL in other modules
256
+ - All phase transitions go through `SessionManager` — never direct DB updates
257
+ - SDK calls go through `sdk_wrapper.py` — handles retry, timeout, error mapping
258
+ - Tests use in-memory SQLite (`:memory:`) with explicit commits
259
+ - Tests mock all SDK calls — no real API calls in test suite
260
+
261
+ ## Development
262
+
263
+ ```bash
264
+ # Setup
265
+ cd planner-auto/
266
+ pip install -e ".[dev]"
267
+
268
+ # Run tests
269
+ pytest tests/ -v # All tests
270
+ pytest tests/test_db.py -v # Single file
271
+ pytest tests/test_session.py::TestCheckCommand -v # Single class
272
+ pytest -k "complete" -v # Filter by name
273
+
274
+ # Current test count: 464 passing
275
+ ```
276
+
277
+ ## Config Versioning
278
+
279
+ Every session captures its configuration at creation time in `session_config`:
280
+
281
+ ```json
282
+ {
283
+ "project": "my-api",
284
+ "model_default": "claude-sonnet-4-6",
285
+ "prompt_hashes": {
286
+ "planner": "sha256:abc123...",
287
+ "synthesis": "sha256:def456..."
288
+ }
289
+ }
290
+ ```
291
+
292
+ Plan 2 extends this with reviewer settings:
293
+
294
+ ```json
295
+ {
296
+ "project": "my-api",
297
+ "model_default": "claude-opus-4-6",
298
+ "repo_root": "/Users/me/my-api",
299
+ "reviewer_model": "gpt-5.4",
300
+ "reasoning_effort": "high",
301
+ "prompt_mode": "keep_trim",
302
+ "review_history": true,
303
+ "validate_feedback": true,
304
+ "filter_severity": ["critical", "major"],
305
+ "fast_mode": false,
306
+ "complexity": "standard",
307
+ "max_rounds": 8
308
+ }
309
+ ```
310
+
311
+ ## Known Issues
312
+
313
+ ### Claude Agent SDK Subprocess (when using `--claude-backend sdk`)
314
+
315
+ The SDK backend (`--claude-backend sdk`) spawns a `claude` CLI subprocess which shares rate-limit quota with active Claude Code sessions. This is **not an issue with the default `direct` backend**.
316
+
317
+ If you need to use the SDK backend (e.g., OAuth-only auth), be aware:
318
+ - Rate limiting when other Claude Code sessions are active
319
+ - anyio cancel scope tracebacks on error paths (cosmetic, not harmful)
320
+ - Opus + thinking can consume turns on tool calls, returning empty results
321
+
322
+ **Recommendation:** Use the default `direct` backend with `ANTHROPIC_API_KEY` whenever possible.
323
+
324
+ ## Roadmap
325
+
326
+ - [x] **Plan 1: Session Core** — CLI, DB, lifecycle, context, plan generation, export
327
+ - [x] **Plan 2: Reviewer Adapter** — GPT review loop, convergence, .kafra handoff
328
+ - [x] **Direct API backend** — Bypass SDK subprocess, works alongside Claude Code sessions
329
+ - [x] **Stress testing (Level 2)** — First end-to-end success: 3-round convergence, $0.12, GPT GO
330
+ - [x] **TUI mode** — Review dashboard with live round progress, convergence sparkline, drill-down
331
+ - [ ] **Telegram notifications** — Notify on plan approval or blocker
332
+ - [ ] **Homebrew formula** — `brew install planner-auto`