productteam 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. productteam-2.3.0/.coverage +0 -0
  2. productteam-2.3.0/.github/workflows/publish.yml +23 -0
  3. productteam-2.3.0/.github/workflows/test.yml +18 -0
  4. productteam-2.3.0/.gitignore +38 -0
  5. productteam-2.3.0/CHANGELOG.md +152 -0
  6. productteam-2.3.0/LICENSE +21 -0
  7. productteam-2.3.0/Makefile +16 -0
  8. productteam-2.3.0/PKG-INFO +481 -0
  9. productteam-2.3.0/README.md +429 -0
  10. productteam-2.3.0/docs/architecture.svg +254 -0
  11. productteam-2.3.0/docs/index.html +892 -0
  12. productteam-2.3.0/docs/terms.html +133 -0
  13. productteam-2.3.0/pyproject.toml +61 -0
  14. productteam-2.3.0/skills/builder/SKILL.md +107 -0
  15. productteam-2.3.0/skills/doc-writer/SKILL.md +252 -0
  16. productteam-2.3.0/skills/evaluator/SKILL.md +131 -0
  17. productteam-2.3.0/skills/evaluator-design/SKILL.md +149 -0
  18. productteam-2.3.0/skills/orchestrator/SKILL.md +310 -0
  19. productteam-2.3.0/skills/planner/SKILL.md +248 -0
  20. productteam-2.3.0/skills/prd-writer/SKILL.md +203 -0
  21. productteam-2.3.0/skills/ui-builder/SKILL.md +191 -0
  22. productteam-2.3.0/src/productteam/__init__.py +3 -0
  23. productteam-2.3.0/src/productteam/cli.py +936 -0
  24. productteam-2.3.0/src/productteam/config.py +92 -0
  25. productteam-2.3.0/src/productteam/doctor.py +177 -0
  26. productteam-2.3.0/src/productteam/errors.py +5 -0
  27. productteam-2.3.0/src/productteam/forge/__init__.py +1 -0
  28. productteam-2.3.0/src/productteam/forge/daemon.py +144 -0
  29. productteam-2.3.0/src/productteam/forge/dashboard.py +263 -0
  30. productteam-2.3.0/src/productteam/forge/queue.py +201 -0
  31. productteam-2.3.0/src/productteam/models.py +48 -0
  32. productteam-2.3.0/src/productteam/providers/__init__.py +6 -0
  33. productteam-2.3.0/src/productteam/providers/anthropic.py +83 -0
  34. productteam-2.3.0/src/productteam/providers/base.py +46 -0
  35. productteam-2.3.0/src/productteam/providers/factory.py +49 -0
  36. productteam-2.3.0/src/productteam/providers/gemini.py +151 -0
  37. productteam-2.3.0/src/productteam/providers/ollama.py +113 -0
  38. productteam-2.3.0/src/productteam/providers/openai.py +128 -0
  39. productteam-2.3.0/src/productteam/scaffold.py +259 -0
  40. productteam-2.3.0/src/productteam/supervisor.py +816 -0
  41. productteam-2.3.0/src/productteam/tool_loop.py +423 -0
  42. productteam-2.3.0/templates/evaluation-report.yaml +48 -0
  43. productteam-2.3.0/templates/handoff-artifact.yaml +37 -0
  44. productteam-2.3.0/templates/sprint-contract.yaml +40 -0
  45. productteam-2.3.0/tests/__init__.py +0 -0
  46. productteam-2.3.0/tests/conftest.py +50 -0
  47. productteam-2.3.0/tests/test_cli.py +484 -0
  48. productteam-2.3.0/tests/test_config.py +230 -0
  49. productteam-2.3.0/tests/test_dashboard.py +390 -0
  50. productteam-2.3.0/tests/test_docs_quality.py +49 -0
  51. productteam-2.3.0/tests/test_doctor.py +133 -0
  52. productteam-2.3.0/tests/test_forge_daemon.py +220 -0
  53. productteam-2.3.0/tests/test_forge_queue.py +201 -0
  54. productteam-2.3.0/tests/test_live.py +190 -0
  55. productteam-2.3.0/tests/test_providers.py +436 -0
  56. productteam-2.3.0/tests/test_scaffold.py +247 -0
  57. productteam-2.3.0/tests/test_supervisor.py +1254 -0
  58. productteam-2.3.0/tests/test_tool_loop.py +445 -0
Binary file
@@ -0,0 +1,23 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*.*.*"
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ environment: pypi
12
+ permissions:
13
+ id-token: write
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+ - run: pip install -e ".[dev]"
20
+ - run: pytest -m "not live"
21
+ - run: pip install build
22
+ - run: python -m build
23
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,18 @@
1
+ name: Tests
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ matrix:
10
+ os: [ubuntu-latest, windows-latest]
11
+ python-version: ["3.11", "3.12", "3.13"]
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: ${{ matrix.python-version }}
17
+ - run: pip install -e ".[dev]"
18
+ - run: pytest -m "not live" --cov=productteam --cov-report=term-missing --cov-fail-under=80
@@ -0,0 +1,38 @@
1
+ # Environment and secrets
2
+ .env
3
+ .env.local
4
+ .env.*.local
5
+ secrets/
6
+ *.key
7
+ *.pem
8
+
9
+ # Python
10
+ __pycache__/
11
+ *.py[cod]
12
+ *$py.class
13
+ *.so
14
+ *.egg-info/
15
+ dist/
16
+ build/
17
+ .eggs/
18
+
19
+ # Node
20
+ node_modules/
21
+
22
+ # ProductTeam runtime artifacts
23
+ .productteam/
24
+
25
+ # OS files
26
+ .DS_Store
27
+ Thumbs.db
28
+ Desktop.ini
29
+
30
+ # IDE
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+ *.swo
35
+ *~
36
+
37
+ # Temporary commit files
38
+ .commit_msg.txt
@@ -0,0 +1,152 @@
1
+ # Changelog
2
+
3
+ ## [2.3.0] - 2026-03-26
4
+
5
+ ### Added
6
+ - **Forge daemon stage visibility** — `Supervisor.run()` accepts a `stage_callback` parameter. The Forge daemon passes a callback that updates `current_stage` in the queue at the start of each stage, so the dashboard shows real-time pipeline progress instead of `"-"` throughout the run.
7
+ - **Configurable skills directory** — `skills_dir` field in `[pipeline]` config (default: `.claude/skills`). Users who move their skills directory or use non-standard layouts can set this in `productteam.toml`. Error messages now suggest checking `skills_dir` when a skill is not found.
8
+ - **Design evaluator verdict disk fallback** — when the design evaluator's text response has no parseable verdict, the supervisor checks `eval-*-design.yaml` files on disk. Same pattern as the build evaluator fallback added in v2.2.0. Fixes pipelines reporting "stuck" when the design evaluation actually passed.
9
+
10
+ ### Fixed
11
+ - **`run_bash` WinError production handling** — `_execute_tool` now catches `OSError` separately from generic exceptions, returning a structured JSON error with a descriptive message instead of an opaque crash on Windows when subprocess handles are invalid.
12
+ - **Windows credential filter gaps** — `_validate_command` now blocks PowerShell (`$env:`, `Get-ChildItem Env:`) and .NET (`[System.Environment]::GetEnvironmentVariable`) environment access patterns, matching the existing Unix credential filters.
13
+ - **`run_bash` tests on Windows** — `test_execute_run_bash` and `test_execute_run_bash_timeout` now use `python -c` on Windows instead of `echo`/`sleep` which depend on Unix shell builtins. Tests pass on all platforms.
14
+ - **Doc Writer termination validated** — the prompt-based termination instruction ("stop after writing all files") was confirmed working under live conditions. The Doc Writer exits naturally within the stage timeout. No `max_tool_calls` cap was needed.
15
+
16
+ ### Infrastructure
17
+ - 270 unit tests passing on Windows and Linux (up from 239)
18
+ - Coverage restored to 80% (was 75.8%): mocked provider `complete_with_tools` tests for Gemini/Ollama/OpenAI, supervisor error path and artifact tests, credential filter tests
19
+ - CI matrix expanded: Windows (`windows-latest`) added alongside Ubuntu for all Python versions
20
+ - Windows credential filter tests added for PowerShell and .NET environment access patterns
21
+ - Full pipeline validated end-to-end with fresh `productteam init` + `productteam run` on the bmark reference project
22
+
23
+ ## [2.2.0] - 2026-03-26
24
+
25
+ ### Added
26
+ - **`productteam recover` command** — reads state.json, identifies stuck/running stages, resets them to pending, re-enters pipeline at the stuck stage. Supports `--yes` for non-interactive use. Replaces manual state.json editing after timeouts.
27
+ - **Planner sprint sizing examples** — SKILL.md now includes 2 concrete examples of correctly-sized sprints (small + medium) and an anti-pattern example. Establishes 5-8 deliverable floor/ceiling per sprint.
28
+ - **Verdict parsing disk fallback** — when the Evaluator's text response has no parseable verdict, the supervisor checks `.productteam/evaluations/*.yaml` files written by the Evaluator via write_file. Prevents every sprint returning needs_work when the Evaluator writes structured YAML to disk but its text summary lacks the verdict key. **Note:** This bug existed since the Evaluator became a doer stage in v2.1.0. Any evaluation verdicts from live runs prior to this fix are unreliable — the Evaluator may have written PASS to disk while the supervisor recorded needs_work.
29
+ - **Builder tool budget guidance** — SKILL.md now includes explicit budget: write all files first, test once, then fix. Prevents the Builder from spending all tool calls on exploration.
30
+
31
+ ### Changed
32
+ - `builder_max_tool_calls` default raised from 50 to 75 — 50 was too tight for real sprints with test-fix cycles
33
+ - Planner YAML size limit tightened from 10KB to 6KB
34
+ - Planner deliverable definition tightened: "one file with one purpose" — not a subsystem or feature area
35
+ - `run_bash` tool description updated: tells the model Python/pip are available on PATH
36
+
37
+ ### Fixed
38
+ - **`run_bash` Python PATH injection** — Python executable directory and project `.venv/Scripts` (or `bin` on Linux/macOS) added to subprocess PATH. Fixes Windows environments where bash shells (MSYS2/Git Bash) can't find Python. Cross-platform safe.
39
+ - Inline `import sys` in tool_loop.py moved to module level
40
+
41
+ ### Infrastructure
42
+ - 231 unit tests + 6 live integration tests passing
43
+ - Verdict disk fallback covered by `test_build_evaluate_disk_fallback_finds_pass`
44
+ - Recover command covered by 5 tests (no state, no concept, no stuck, identifies stuck, resets with --yes)
45
+
46
+ ## [2.1.0] - 2026-03-26
47
+
48
+ ### Changed
49
+ - **Planner reclassified as doer** — now uses the tool loop to write sprint YAML files directly to `.productteam/sprints/` via `write_file`. Previously a thinker that produced correct YAML as text but couldn't write to disk, causing the build loop to silently skip.
50
+ - **PRD Writer runs headlessly** — detects automated context and skips clarifying questions and review phases. Applies sensible defaults instead of asking 7 questions nobody will answer.
51
+ - **Planner runs headlessly** — proceeds without asking for human confirmation in auto-approve mode.
52
+ - **Sprint scoping tightened** — "large" scope banned, only small (1-3 files) and medium (4-8 files) allowed. Sprints must be completable in 30-40 tool calls. 6KB YAML size limit.
53
+ - **Design Evaluator** — single-pass evaluation with clear escalation instead of fake retry loop that couldn't route back to Doc Writer.
54
+ - Timeout defaults bumped: stage 120→300s, builder 300→600s, new `planner_timeout_seconds` (600s)
55
+ - `_run_tool_loop_stage` accepts optional `timeout_seconds` override
56
+ - Thinker/doer classification updated across README, landing page, and architecture SVG
57
+
58
+ ### Added
59
+ - `max_sprints` config field (default 8) — bounds the number of sprint contracts the Planner produces, making timeout predictable
60
+ - `planner_timeout_seconds` config field (default 600) — separate timeout for the Planner's multi-file tool loop
61
+ - Loud failure when no sprint YAML files found after plan completes (was a silent skip)
62
+ - Doc Writer guard — skips when no sprints have passed evaluation
63
+ - `require_evaluator` config field now wired — when false, build loop auto-passes without evaluation
64
+ - GatesConfig fields wired — `prd_approval`, `sprint_approval`, `ship_approval` individually control their respective gates
65
+ - `_read_artifact` warns on missing artifact path or file
66
+ - Schema version validated on `state.json` load
67
+ - `handoffs/` directory created by `init_project`
68
+ - PRD Writer Rule 10: no invented product names — uses placeholder `[PRODUCT NAME]` when concept doesn't specify one
69
+
70
+ ### Fixed
71
+ - `os.system()` replaced with `asyncio.create_subprocess_exec` in `_gate`
72
+ - `run_bash` credential filtering hardened — blocks `env | grep`, `/proc/environ`, `echo $SECRET`; fixes `poetry env use` false positive; constants moved to module level
73
+ - `read_file` 100KB size cap with truncation notice
74
+ - Doer stages now use `builder_timeout_seconds` (was incorrectly using `stage_timeout_seconds`)
75
+ - Doc Writer stuck status was silently ignored — now gates pipeline
76
+ - Context summarizer includes MEDIUM findings (was CRITICAL/HIGH only)
77
+ - Evaluator SKILL.md paths fixed: `.claude/` → `.productteam/`
78
+
79
+ ### Infrastructure
80
+ - Publish workflow runs tests before build (pytest gate)
81
+ - Test workflow adds `--cov --cov-fail-under=80` and Python 3.13
82
+ - 231 unit tests + 6 live integration tests passing
83
+
84
+ ### Known Issues
85
+ - Planner sprint sizing needs calibration — produces 9-15KB sprints with 20-31 deliverables; target is 5-8 deliverables under 6KB. Tracked for next session.
86
+
87
+ ## [2.0.2] - 2026-03-26
88
+
89
+ ### Changed
90
+ - **Evaluator reclassified as doer** — now runs via tool loop with file access and test execution, instead of receiving only the Builder's text summary
91
+ - **Doc Writer reclassified as doer** — now reads actual source files before writing documentation, instead of hallucinating from the concept string alone
92
+ - Thinker/doer table updated in README, landing page, and architecture SVG to reflect new classifications
93
+ - Architecture SVG reorganized: thinker section (3 agents), doer section (4 agents) with shared tool sandbox
94
+
95
+ ### Added
96
+ - `productteam test` command — runs offline unit tests by default
97
+ - `productteam test --live` — runs live integration tests against real APIs with safety warnings (masked API key display, cost warning panel, cheapest-model default)
98
+ - `_run_tool_loop_stage()` method in Supervisor for dispatching doer stages
99
+ - 6 live integration tests (provider smoke, thinker stage, tool loop read/write, build-evaluate round-trip)
100
+ - 19 dashboard endpoint tests (`/api/submit` happy path, empty concept, oversized body, malformed header, approve/reject, job listing)
101
+ - 5 full pipeline integration tests (multi-sprint end-to-end, fail-stops-pipeline, resume-skips-completed, sprint sequencing)
102
+ - `tests/conftest.py` with shared `live_provider` and `live_project` fixtures
103
+ - `live` pytest marker for API-calling tests
104
+
105
+ ### Fixed
106
+ - XSS vulnerability in dashboard — all user-supplied values now escaped via `escapeHtml()` before innerHTML insertion
107
+ - Content-Length cap (4KB) and validation on `/api/submit` — malformed headers return 400, oversized bodies return 413
108
+ - Missing `import os` in supervisor.py — `_gate()` edit mode no longer crashes with NameError
109
+ - Operator precedence bug in `_run_single_step` — `sprint` arg no longer silently ignored when sprint list is empty
110
+ - Sprint path mismatch — Builder skill now uses `.productteam/sprints/` matching the Supervisor
111
+ - `builder_timeout_seconds` now wired into `run_tool_loop()` via `stage_timeout_seconds`
112
+ - Design evaluation stage now invoked in `Supervisor.run()` when `require_design_review` is enabled
113
+ - LAN IP detection uses UDP socket method instead of unreliable `socket.gethostbyname()`
114
+ - Removed unused `import time` from supervisor.py
115
+
116
+ ### Meta
117
+ - Test count: 207 unit tests + 6 live integration tests (up from ~150)
118
+ - Version synced across pyproject.toml, `__init__.py`, docs/index.html, docs/architecture.svg
119
+
120
+ ## [2.0.1] - 2026-03-26
121
+
122
+ ### Added
123
+ - Dashboard submit form — submit Forge ideas from any device on your LAN via `http://<your-ip>:7654`
124
+ - Dashboard binds to `0.0.0.0` by default (configurable via `[forge] status_host`)
125
+ - `/api/submit` endpoint on Forge dashboard
126
+ - Technical architecture requirement added to doc-writer skill — all products now produce an architecture SVG and component descriptions
127
+
128
+ ### Changed
129
+ - Version bumped to 2.0.1 across all files
130
+
131
+ ## [2.0.0] - 2026-03-26
132
+
133
+ ### Added
134
+ - Multi-provider LLM abstraction layer (Anthropic, OpenAI, Ollama, Gemini)
135
+ - Supervisor agent — real pipeline orchestration with `productteam run`
136
+ - Thinker/doer architecture — single API calls for planning stages, tool-use loop for builder stages
137
+ - Tool-use loop (`tool_loop.py`) — 4 tools (read_file, write_file, run_bash, list_dir) with security constraints
138
+ - Forge — phone-to-product pipeline with file queue, daemon, dashboard, and notifications
139
+ - `productteam doctor` — 11-check diagnostic command with `--json` and `--no-network` flags
140
+ - Incremental rebuilds — skip passed sprints, `--rebuild` flag for force rebuild
141
+ - State persistence via `state.json` with resume capability
142
+ - Stuck detection — timeout, loop detection, max tool call limits
143
+ - Approval gates with interactive y/N/edit prompt
144
+ - Architecture SVG diagram (`docs/architecture.svg`)
145
+ - Landing page with CLI commands, Forge section, Design Evaluator panel, provider badges
146
+ - GitHub Actions workflows for testing and PyPI publishing
147
+ - Comprehensive test suite (10 test files)
148
+
149
+ ### Changed
150
+ - `productteam run` now executes the full pipeline (previously printed manual instructions)
151
+ - README rewritten with full CLI reference, thinker/doer documentation, and architecture section
152
+ - Doc-writer skill updated to require technical architecture section for all products
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Scott Converse
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,16 @@
1
+ .PHONY: build publish-test publish lint test
2
+
3
+ test:
4
+ pytest tests/ -v
5
+
6
+ lint:
7
+ ruff check src/ tests/
8
+
9
+ build:
10
+ python -m build
11
+
12
+ publish-test:
13
+ python -m twine upload --repository testpypi dist/*
14
+
15
+ publish:
16
+ python -m twine upload dist/*