sembl-stack 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/.gitignore +14 -14
  2. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/PKG-INFO +31 -9
  3. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/README.md +29 -8
  4. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/pyproject.toml +18 -1
  5. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/__init__.py +1 -1
  6. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/_redact.py +19 -19
  7. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/base.py +185 -179
  8. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/codegraph_cbm.py +95 -95
  9. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/deploy_vercel.py +215 -215
  10. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_aider.py +115 -115
  11. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_claude.py +114 -114
  12. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_opencode.py +114 -114
  13. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/merge_git.py +107 -107
  14. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/postdeploy_http.py +82 -82
  15. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/review_coderabbit.py +215 -215
  16. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/review_mock.py +42 -42
  17. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/sandbox_worktree.py +79 -79
  18. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/spec_sembl.py +91 -91
  19. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/verify_sembl.py +77 -77
  20. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/artifacts.py +207 -207
  21. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/cli.py +776 -807
  22. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/config.py +87 -87
  23. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/doctor.py +144 -144
  24. sembl_stack-0.2.0/sembl_stack/guide.py +907 -0
  25. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/loop.py +380 -380
  26. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/onboarding.py +272 -272
  27. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/presets.py +132 -132
  28. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/profile.py +193 -193
  29. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/reconciliation.py +138 -138
  30. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/registry.py +91 -91
  31. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/runner.py +27 -2
  32. sembl_stack-0.2.0/sembl_stack/scaffold.py +64 -0
  33. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/session.py +86 -86
  34. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/specgraph.py +146 -146
  35. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/store.py +112 -112
  36. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/tui.py +86 -86
  37. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/views.py +74 -74
  38. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/wizard.py +233 -233
  39. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_artifacts_store.py +71 -71
  40. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_codegraph_cbm.py +104 -104
  41. sembl_stack-0.2.0/tests/test_guide.py +611 -0
  42. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_loop_manifest.py +105 -105
  43. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_loop_smoke.py +92 -92
  44. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_merge_git.py +136 -136
  45. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_onboarding_logic.py +51 -51
  46. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_profile.py +219 -219
  47. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_reconciliation.py +64 -64
  48. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_review.py +356 -356
  49. sembl_stack-0.2.0/tests/test_run_executor.py +44 -0
  50. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_runner.py +25 -0
  51. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_session.py +80 -80
  52. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_spec_sembl.py +18 -18
  53. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_specgraph.py +38 -38
  54. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_verdict_binding.py +22 -0
  55. sembl_stack-0.1.1/.github/workflows/release.yml +0 -72
  56. sembl_stack-0.1.1/.github/workflows/tests.yml +0 -29
  57. sembl_stack-0.1.1/docs/LAUNCH-PREP-JULY1.md +0 -116
  58. sembl_stack-0.1.1/docs/PROCESS-ACTION-PLAN.md +0 -374
  59. sembl_stack-0.1.1/docs/SPEC-coderabbit-prep.md +0 -454
  60. sembl_stack-0.1.1/docs/SPEC-gate-0.2.0.md +0 -90
  61. sembl_stack-0.1.1/docs/SPEC-l8-rollback.md +0 -270
  62. sembl_stack-0.1.1/docs/SPEC-merge-stage.md +0 -269
  63. sembl_stack-0.1.1/docs/SPEC-reconcile-live.md +0 -325
  64. sembl_stack-0.1.1/docs/SPEC-review-standby.md +0 -73
  65. sembl_stack-0.1.1/docs/SPEC-tui-phase0.md +0 -69
  66. sembl_stack-0.1.1/docs/SPEC-tui-phase1-onboarding.md +0 -111
  67. sembl_stack-0.1.1/docs/eval-metric-O3.md +0 -99
  68. sembl_stack-0.1.1/docs/memory-plane-hypothesis.md +0 -141
  69. sembl_stack-0.1.1/docs/process-self-improvement.md +0 -77
  70. sembl_stack-0.1.1/eval/README.md +0 -32
  71. sembl_stack-0.1.1/eval/SPEC-through-deploy.md +0 -119
  72. sembl_stack-0.1.1/eval/build_corpus.py +0 -138
  73. sembl_stack-0.1.1/eval/corpus/01-greenfield-snake/case.json +0 -29
  74. sembl_stack-0.1.1/eval/corpus/02-feature-inscope/case.json +0 -27
  75. sembl_stack-0.1.1/eval/corpus/03-refactor-inscope/case.json +0 -25
  76. sembl_stack-0.1.1/eval/corpus/04-docs-tolerance/case.json +0 -26
  77. sembl_stack-0.1.1/eval/corpus/05-out-of-scope-infra/case.json +0 -25
  78. sembl_stack-0.1.1/eval/corpus/06-forbidden-workflow/case.json +0 -26
  79. sembl_stack-0.1.1/eval/corpus/07-fabricated-file/case.json +0 -26
  80. sembl_stack-0.1.1/eval/corpus/08-feature-with-stray/case.json +0 -26
  81. sembl_stack-0.1.1/eval/corpus/09-clean-diff-fabricated-report/case.json +0 -26
  82. sembl_stack-0.1.1/eval/corpus/10-rogue-combined/case.json +0 -27
  83. sembl_stack-0.1.1/eval/corpus/11-unevidenced-tests/case.json +0 -24
  84. sembl_stack-0.1.1/eval/corpus/12-over-churn/case.json +0 -28
  85. sembl_stack-0.1.1/eval/corpus/13-runtime-break-passes-gate/case.json +0 -21
  86. sembl_stack-0.1.1/eval/corpus/14-quality-defect-passes-gate/case.json +0 -20
  87. sembl_stack-0.1.1/eval/harness.py +0 -121
  88. sembl_stack-0.1.1/eval/through_deploy.py +0 -109
  89. sembl_stack-0.1.1/eval/two_axis.py +0 -126
  90. sembl_stack-0.1.1/examples/flagship-feedback-board/.env.example +0 -6
  91. sembl_stack-0.1.1/examples/flagship-feedback-board/.gitignore +0 -12
  92. sembl_stack-0.1.1/examples/flagship-feedback-board/README.md +0 -76
  93. sembl_stack-0.1.1/examples/flagship-feedback-board/bounds.json +0 -27
  94. sembl_stack-0.1.1/examples/flagship-feedback-board/next-env.d.ts +0 -6
  95. sembl_stack-0.1.1/examples/flagship-feedback-board/next.config.ts +0 -11
  96. sembl_stack-0.1.1/examples/flagship-feedback-board/package-lock.json +0 -1106
  97. sembl_stack-0.1.1/examples/flagship-feedback-board/package.json +0 -36
  98. sembl_stack-0.1.1/examples/flagship-feedback-board/postcss.config.mjs +0 -3
  99. sembl_stack-0.1.1/examples/flagship-feedback-board/scripts/check-deploy-readiness.mjs +0 -127
  100. sembl_stack-0.1.1/examples/flagship-feedback-board/scripts/postdeploy-health.mjs +0 -92
  101. sembl_stack-0.1.1/examples/flagship-feedback-board/sembl.stack.yaml +0 -28
  102. sembl_stack-0.1.1/examples/flagship-feedback-board/specs/001-feedback-board/bounds.json +0 -27
  103. sembl_stack-0.1.1/examples/flagship-feedback-board/specs/001-feedback-board/requirements.md +0 -8
  104. sembl_stack-0.1.1/examples/flagship-feedback-board/specs/001-feedback-board/tasks.md +0 -9
  105. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/actions.ts +0 -105
  106. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/api/health/route.ts +0 -9
  107. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/auth/callback/route.ts +0 -28
  108. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/globals.css +0 -453
  109. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/layout.tsx +0 -20
  110. sembl_stack-0.1.1/examples/flagship-feedback-board/src/app/page.tsx +0 -125
  111. sembl_stack-0.1.1/examples/flagship-feedback-board/src/components/auth-panel.tsx +0 -96
  112. sembl_stack-0.1.1/examples/flagship-feedback-board/src/components/feedback-board.tsx +0 -119
  113. sembl_stack-0.1.1/examples/flagship-feedback-board/src/components/feedback-form.tsx +0 -79
  114. sembl_stack-0.1.1/examples/flagship-feedback-board/src/lib/env.ts +0 -18
  115. sembl_stack-0.1.1/examples/flagship-feedback-board/src/lib/feedback.ts +0 -57
  116. sembl_stack-0.1.1/examples/flagship-feedback-board/src/lib/supabase/client.ts +0 -20
  117. sembl_stack-0.1.1/examples/flagship-feedback-board/src/lib/supabase/server.ts +0 -31
  118. sembl_stack-0.1.1/examples/flagship-feedback-board/src/middleware.ts +0 -42
  119. sembl_stack-0.1.1/examples/flagship-feedback-board/supabase/.gitignore +0 -8
  120. sembl_stack-0.1.1/examples/flagship-feedback-board/supabase/config.toml +0 -408
  121. sembl_stack-0.1.1/examples/flagship-feedback-board/supabase/migrations/202606200001_feedback_board.sql +0 -42
  122. sembl_stack-0.1.1/examples/flagship-feedback-board/supabase/migrations/20260621070532_harden_feedback_item_grants.sql +0 -4
  123. sembl_stack-0.1.1/examples/flagship-feedback-board/task.yaml +0 -3
  124. sembl_stack-0.1.1/examples/flagship-feedback-board/tsconfig.json +0 -43
  125. sembl_stack-0.1.1/examples/flagship-feedback-board/vercel.json +0 -4
  126. sembl_stack-0.1.1/examples/tasks/login-redirect/README.md +0 -54
  127. sembl_stack-0.1.1/examples/tasks/login-redirect/repo/infra/deploy.yaml +0 -1
  128. sembl_stack-0.1.1/examples/tasks/login-redirect/repo/specs/001-feature/bounds.json +0 -5
  129. sembl_stack-0.1.1/examples/tasks/login-redirect/repo/specs/001-feature/tasks.md +0 -3
  130. sembl_stack-0.1.1/examples/tasks/login-redirect/repo/src/app/__init__.py +0 -1
  131. sembl_stack-0.1.1/examples/tasks/login-redirect/task.yaml +0 -4
  132. sembl_stack-0.1.1/sembl.stack.yaml +0 -20
  133. sembl_stack-0.1.1/uv.lock +0 -2370
  134. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/LICENSE +0 -0
  135. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/SECURITY.md +0 -0
  136. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/__init__.py +0 -0
  137. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_mock.py +0 -0
  138. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/adapters/review_llm.py +0 -0
  139. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/contextgraph.py +0 -0
  140. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/rsi.py +0 -0
  141. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/tracing.py +0 -0
  142. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/transport/__init__.py +0 -0
  143. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/sembl_stack/transport/mcp_client.py +0 -0
  144. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_contextgraph.py +0 -0
  145. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_deploy_postdeploy.py +0 -0
  146. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_init_stranger.py +0 -0
  147. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_opencode_adapter.py +0 -0
  148. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_rsi.py +0 -0
  149. {sembl_stack-0.1.1 → sembl_stack-0.2.0}/tests/test_scrub_secrets.py +0 -0
@@ -1,16 +1,16 @@
1
- # Example target repos are git-inited locally for demos; don't nest their .git
2
- examples/**/repo/.git/
3
- # Local-only tests (kept off the repo by request)
4
- tests/local/
5
- # Run artifacts (the run store) are local-only
6
- .sembl/
7
- examples/**/repo/.sembl/
8
- __pycache__/
9
- *.egg-info/
10
- .pytest_cache/
11
- .test-tmp/
12
- build/
13
- dist/
14
- .venv/
1
+ # Example target repos are git-inited locally for demos; don't nest their .git
2
+ examples/**/repo/.git/
3
+ # Local-only tests (kept off the repo by request)
4
+ tests/local/
5
+ # Run artifacts (the run store) are local-only
6
+ .sembl/
7
+ examples/**/repo/.sembl/
8
+ __pycache__/
9
+ *.egg-info/
10
+ .pytest_cache/
11
+ .test-tmp/
12
+ build/
13
+ dist/
14
+ .venv/
15
15
  eval/.checkpoint-*.json
16
16
  eval/.result-*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sembl-stack
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract.
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -9,6 +9,7 @@ Requires-Dist: click>=8.1
9
9
  Requires-Dist: langgraph>=0.2
10
10
  Requires-Dist: mcp>=1.0
11
11
  Requires-Dist: pyyaml>=6.0
12
+ Requires-Dist: questionary>=2.0
12
13
  Requires-Dist: sembl>=0.1.20
13
14
  Provides-Extra: all
14
15
  Requires-Dist: langfuse>=2.0; extra == 'all'
@@ -57,8 +58,30 @@ task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge
57
58
  ## Quickstart
58
59
 
59
60
  ```bash
60
- pip install sembl-stack sembl # the stack + the gate it runs at its core
61
- sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
61
+ pip install sembl-stack
62
+ cd your-repo
63
+ sembl-stack
64
+ ```
65
+
66
+ That's the whole quickstart. Bare `sembl-stack` launches the guided run — an
67
+ OpenCode-style TUI that walks you through, one screen at a time:
68
+
69
+ 1. **repo** — confirms the repo it detected (a fresh non-git directory is offered a
70
+ safe demo scaffold instead)
71
+ 2. **agent & keys** — every way to run AI work, with **live status**: is Claude Code
72
+ installed and logged in, which API-key env vars are set, is OpenCode on PATH — and
73
+ exactly what to do for any option you're missing (mock always works, zero keys)
74
+ 3. **task** — describe the change in plain English and pick which paths the agent may
75
+ touch (suggested from your repo); the tool writes `task.yaml`/`bounds.json`, you
76
+ never do
77
+ 4. **run** — the live stage rail (BLOCK → feedback → retry → PASS), then the verdict
78
+ with its receipt and the one next command
79
+
80
+ Everything the guide does is also a scriptable command (same machinery, byte-identical
81
+ runs — that's the point):
82
+
83
+ ```bash
84
+ sembl-stack init # scaffold config + starter task + demo repo
62
85
  sembl-stack doctor # config-aware preflight
63
86
  sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
64
87
  sembl-stack runs [<id>] # list / inspect runs
@@ -127,13 +150,12 @@ Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
127
150
  └─ trace.json # the timeline
128
151
  ```
129
152
 
130
- ## The guided TUI (optional)
153
+ ## The guided TUI
131
154
 
132
- `pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
133
- press `r`: the stage rail runs the real loop under your configured profile,
134
- streaming per-stage status (pending/running/pass/fail) live and showing the final
135
- verdict byte-identical to a headless `sembl-stack loop`, because it drives the
136
- same adapters.
155
+ The guide is the default surface (see Quickstart) and a core dependency no extra
156
+ install. It streams the real loop live under your configured profile and is
157
+ byte-identical to a headless `sembl-stack loop`, because it drives the same adapters.
158
+ Re-run the agent & keys step anytime with `sembl-stack --reconfigure`.
137
159
 
138
160
  ## The full picture
139
161
 
@@ -27,8 +27,30 @@ task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge
27
27
  ## Quickstart
28
28
 
29
29
  ```bash
30
- pip install sembl-stack sembl # the stack + the gate it runs at its core
31
- sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
30
+ pip install sembl-stack
31
+ cd your-repo
32
+ sembl-stack
33
+ ```
34
+
35
+ That's the whole quickstart. Bare `sembl-stack` launches the guided run — an
36
+ OpenCode-style TUI that walks you through, one screen at a time:
37
+
38
+ 1. **repo** — confirms the repo it detected (a fresh non-git directory is offered a
39
+ safe demo scaffold instead)
40
+ 2. **agent & keys** — every way to run AI work, with **live status**: is Claude Code
41
+ installed and logged in, which API-key env vars are set, is OpenCode on PATH — and
42
+ exactly what to do for any option you're missing (mock always works, zero keys)
43
+ 3. **task** — describe the change in plain English and pick which paths the agent may
44
+ touch (suggested from your repo); the tool writes `task.yaml`/`bounds.json`, you
45
+ never do
46
+ 4. **run** — the live stage rail (BLOCK → feedback → retry → PASS), then the verdict
47
+ with its receipt and the one next command
48
+
49
+ Everything the guide does is also a scriptable command (same machinery, byte-identical
50
+ runs — that's the point):
51
+
52
+ ```bash
53
+ sembl-stack init # scaffold config + starter task + demo repo
32
54
  sembl-stack doctor # config-aware preflight
33
55
  sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
34
56
  sembl-stack runs [<id>] # list / inspect runs
@@ -97,13 +119,12 @@ Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
97
119
  └─ trace.json # the timeline
98
120
  ```
99
121
 
100
- ## The guided TUI (optional)
122
+ ## The guided TUI
101
123
 
102
- `pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
103
- press `r`: the stage rail runs the real loop under your configured profile,
104
- streaming per-stage status (pending/running/pass/fail) live and showing the final
105
- verdict byte-identical to a headless `sembl-stack loop`, because it drives the
106
- same adapters.
124
+ The guide is the default surface (see Quickstart) and a core dependency no extra
125
+ install. It streams the real loop live under your configured profile and is
126
+ byte-identical to a headless `sembl-stack loop`, because it drives the same adapters.
127
+ Re-run the agent & keys step anytime with `sembl-stack --reconfigure`.
107
128
 
108
129
  ## The full picture
109
130
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sembl-stack"
7
- version = "0.1.1"
7
+ version = "0.2.0"
8
8
  description = "A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -17,6 +17,9 @@ dependencies = [
17
17
  # default install runs it, not the built-in fallback runner.
18
18
  "sembl>=0.1.20", # L5 gate + L2 spec engine. Core because the DEFAULT config gates
19
19
  "mcp>=1.0", # with sembl over MCP — a bare install must run the default stack.
20
+ "questionary>=2.0", # the guided run IS the product surface: bare `sembl-stack` must
21
+ # always launch the inline step-by-step guide (owner UX decision
22
+ # 2026-07-04: Claude Code-style inline prompts, NOT a full-screen app).
20
23
  ]
21
24
 
22
25
  [project.optional-dependencies]
@@ -34,3 +37,17 @@ sembl-stack = "sembl_stack.cli:main"
34
37
 
35
38
  [tool.hatch.build.targets.wheel]
36
39
  packages = ["sembl_stack"]
40
+
41
+ [tool.hatch.build.targets.sdist]
42
+ # Explicit allowlist, not "everything minus .gitignore": hatchling's default sdist
43
+ # packed the whole working tree, including examples/flagship-feedback-board's
44
+ # node_modules/.next build output/.vercel/.env.local (a real Next.js app checked out
45
+ # for local dev, not part of this package) — a 61MB sdist. Only what installs/builds
46
+ # the CLI belongs here.
47
+ include = [
48
+ "/sembl_stack",
49
+ "/tests",
50
+ "/README.md",
51
+ "/LICENSE",
52
+ "/SECURITY.md",
53
+ ]
@@ -1,3 +1,3 @@
1
1
  """sembl-stack — an open, swappable spec-driven coding factory."""
2
2
 
3
- __version__ = "0.1.1"
3
+ __version__ = "0.2.0"
@@ -1,19 +1,19 @@
1
- """Redaction helper for adapter artifacts.
2
-
3
- Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
4
- debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
5
- into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
6
- non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
7
- signal without ever serializing the content.
8
- """
9
- from __future__ import annotations
10
-
11
- import hashlib
12
-
13
-
14
- def summarize(text) -> dict:
15
- """Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
16
- if text is None:
17
- return {"bytes": 0, "sha256": None}
18
- raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
19
- return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
1
+ """Redaction helper for adapter artifacts.
2
+
3
+ Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
4
+ debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
5
+ into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
6
+ non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
7
+ signal without ever serializing the content.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+
13
+
14
+ def summarize(text) -> dict:
15
+ """Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
16
+ if text is None:
17
+ return {"bytes": 0, "sha256": None}
18
+ raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
19
+ return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
@@ -1,179 +1,185 @@
1
- """The platform contract.
2
-
3
- The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
4
- Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
5
- here so adapters import everything they need from one place.
6
- """
7
- from __future__ import annotations
8
-
9
- import os
10
- import re
11
- import subprocess
12
- from typing import Protocol, runtime_checkable
13
-
14
- from ..artifacts import ( # noqa: F401 (re-exported for adapters)
15
- Bounds,
16
- Change,
17
- Context,
18
- Delivery,
19
- ExecutionResult,
20
- MergeRecord,
21
- ReconciliationReport,
22
- ReviewReport,
23
- SpecGraph,
24
- Task,
25
- Trace,
26
- Verdict,
27
- )
28
-
29
-
30
- # --- Shared adapter helpers ---------------------------------------------------
31
-
32
- def changed_files_from_diff(diff: str) -> list[str]:
33
- """Files touched by a unified git diff, order-preserved and de-duplicated.
34
-
35
- Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
36
- * the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
37
- EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
38
- and the gate then flags a spurious "unreported change";
39
- * the `+++ b/` marker is the fallback for a diff fragment that arrives without a
40
- full header.
41
- `/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
42
- one parser so Claude/OpenCode/Aider report changed files consistently.
43
- """
44
- seen: set[str] = set()
45
- out: list[str] = []
46
-
47
- def add(path: str) -> None:
48
- path = path.strip()
49
- if path and path != "/dev/null" and path not in seen:
50
- seen.add(path)
51
- out.append(path)
52
-
53
- for line in diff.splitlines():
54
- if line.startswith("diff --git "):
55
- _, _, tail = line.partition(" b/")
56
- if tail:
57
- add(tail)
58
- elif line.startswith("+++ "):
59
- marker = line[4:]
60
- if marker.startswith("b/"):
61
- marker = marker[2:]
62
- add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
63
- return out
64
-
65
-
66
- # Env-var names whose values are credentials; a secret only ever lives in the
67
- # environment, so an executor CLI echoing one (e.g. in an auth error) is the one
68
- # path it could reach a persisted run artifact. Scrubbed by value below.
69
- _SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
70
- # Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
71
- _SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
72
-
73
-
74
- def scrub_secrets(text: str) -> str:
75
- """Redact anything secret-shaped before it reaches a run artifact.
76
-
77
- Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
78
- debuggability; the security invariant (no key value ever stored) must hold even
79
- when a CLI misbehaves and echoes a credential. Env values are compared in memory
80
- only — nothing read here is ever written anywhere except as its redaction marker.
81
- """
82
- if not text:
83
- return text
84
- for name, value in os.environ.items():
85
- if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
86
- text = text.replace(value, f"[redacted:{name}]")
87
- return _SECRET_TOKEN.sub("[redacted:key]", text)
88
-
89
-
90
- def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
91
- """Run an executor subprocess, turning a timeout into a structured signal.
92
-
93
- Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
94
- is caught here (its partial stdout/stderr preserved) instead of being allowed to
95
- propagate and abort the whole loop — the caller records `timed_out` in the report so
96
- the gate stage can convert it to a BLOCK rather than a crash.
97
- """
98
- try:
99
- # encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ). The default
100
- # text=True decodes with the locale codec (cp1252 on Windows), which crashes the
101
- # stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
102
- # replace undecodable bytes so capture never aborts the loop.
103
- proc = subprocess.run(
104
- cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
105
- encoding="utf-8", errors="replace", **run_kwargs)
106
- return proc.returncode, proc.stdout or "", proc.stderr or "", False
107
- except subprocess.TimeoutExpired as exc:
108
- out, err = exc.stdout or "", exc.stderr or ""
109
- if isinstance(out, bytes):
110
- out = out.decode("utf-8", "replace")
111
- if isinstance(err, bytes):
112
- err = err.decode("utf-8", "replace")
113
- return -1, out, err, True
114
-
115
-
116
- # --- Layer interfaces (Protocols) ---------------------------------------------
117
-
118
- class Sandbox(Protocol): # an open sandbox handle (from L4)
119
- workdir: str
120
- def diff(self) -> str: ...
121
- def close(self) -> None: ...
122
-
123
-
124
- @runtime_checkable
125
- class SpecAdapter(Protocol): # L2: Task -> Bounds
126
- def plan(self, task: Task) -> Bounds: ...
127
-
128
-
129
- @runtime_checkable
130
- class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
131
- def open(self, repo: str) -> Sandbox: ...
132
-
133
-
134
- @runtime_checkable
135
- class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
136
- def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
137
- feedback: str | None) -> ExecutionResult: ...
138
-
139
-
140
- @runtime_checkable
141
- class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
142
- def verify(self, bounds: Bounds, result: ExecutionResult,
143
- strict: bool) -> Verdict: ...
144
-
145
-
146
- @runtime_checkable
147
- class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
148
- def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
149
- ...
150
-
151
-
152
- @runtime_checkable
153
- class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
154
- def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
155
- no_ff: bool = True, message: str | None = None) -> MergeRecord:
156
- ...
157
-
158
-
159
- @runtime_checkable
160
- class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
161
- def deploy(self, repo: str, *, production: bool = False,
162
- prebuilt: bool = False) -> Delivery:
163
- ...
164
-
165
- def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
166
- ...
167
-
168
-
169
- @runtime_checkable
170
- class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
171
- def verify(self, delivery: Delivery, *, health_path: str = "/",
172
- timeout_s: float = 10.0) -> Verdict:
173
- ...
174
-
175
-
176
- @runtime_checkable
177
- class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
178
- def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
179
- ...
1
+ """The platform contract.
2
+
3
+ The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
4
+ Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
5
+ here so adapters import everything they need from one place.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ import subprocess
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from ..artifacts import ( # noqa: F401 (re-exported for adapters)
15
+ Bounds,
16
+ Change,
17
+ Context,
18
+ Delivery,
19
+ ExecutionResult,
20
+ MergeRecord,
21
+ ReconciliationReport,
22
+ ReviewReport,
23
+ SpecGraph,
24
+ Task,
25
+ Trace,
26
+ Verdict,
27
+ )
28
+
29
+
30
+ # --- Shared adapter helpers ---------------------------------------------------
31
+
32
+ def changed_files_from_diff(diff: str) -> list[str]:
33
+ """Files touched by a unified git diff, order-preserved and de-duplicated.
34
+
35
+ Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
36
+ * the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
37
+ EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
38
+ and the gate then flags a spurious "unreported change";
39
+ * the `+++ b/` marker is the fallback for a diff fragment that arrives without a
40
+ full header.
41
+ `/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
42
+ one parser so Claude/OpenCode/Aider report changed files consistently.
43
+ """
44
+ seen: set[str] = set()
45
+ out: list[str] = []
46
+
47
+ def add(path: str) -> None:
48
+ path = path.strip()
49
+ if path and path != "/dev/null" and path not in seen:
50
+ seen.add(path)
51
+ out.append(path)
52
+
53
+ for line in diff.splitlines():
54
+ if line.startswith("diff --git "):
55
+ _, _, tail = line.partition(" b/")
56
+ if tail:
57
+ add(tail)
58
+ elif line.startswith("+++ "):
59
+ marker = line[4:]
60
+ if marker.startswith("b/"):
61
+ marker = marker[2:]
62
+ add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
63
+ return out
64
+
65
+
66
+ # Env-var names whose values are credentials; a secret only ever lives in the
67
+ # environment, so an executor CLI echoing one (e.g. in an auth error) is the one
68
+ # path it could reach a persisted run artifact. Scrubbed by value below.
69
+ _SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
70
+ # Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
71
+ _SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
72
+
73
+
74
+ def scrub_secrets(text: str) -> str:
75
+ """Redact anything secret-shaped before it reaches a run artifact.
76
+
77
+ Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
78
+ debuggability; the security invariant (no key value ever stored) must hold even
79
+ when a CLI misbehaves and echoes a credential. Env values are compared in memory
80
+ only — nothing read here is ever written anywhere except as its redaction marker.
81
+ """
82
+ if not text:
83
+ return text
84
+ for name, value in os.environ.items():
85
+ if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
86
+ text = text.replace(value, f"[redacted:{name}]")
87
+ return _SECRET_TOKEN.sub("[redacted:key]", text)
88
+
89
+
90
+ def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
91
+ """Run an executor subprocess, turning a timeout into a structured signal.
92
+
93
+ Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
94
+ is caught here (its partial stdout/stderr preserved) instead of being allowed to
95
+ propagate and abort the whole loop — the caller records `timed_out` in the report so
96
+ the gate stage can convert it to a BLOCK rather than a crash.
97
+
98
+ stdin is DEVNULL by default: a headless factory executor must never be able to sit
99
+ waiting on interactive input (e.g. a CLI's first-run provider setup) that hangs
100
+ silently, since its stdout/stderr are captured into pipes the user can't see. Cut
101
+ off, the CLI hits EOF immediately and fails fast with a visible error instead.
102
+ """
103
+ run_kwargs.setdefault("stdin", subprocess.DEVNULL)
104
+ try:
105
+ # encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ). The default
106
+ # text=True decodes with the locale codec (cp1252 on Windows), which crashes the
107
+ # stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
108
+ # replace undecodable bytes so capture never aborts the loop.
109
+ proc = subprocess.run(
110
+ cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
111
+ encoding="utf-8", errors="replace", **run_kwargs)
112
+ return proc.returncode, proc.stdout or "", proc.stderr or "", False
113
+ except subprocess.TimeoutExpired as exc:
114
+ out, err = exc.stdout or "", exc.stderr or ""
115
+ if isinstance(out, bytes):
116
+ out = out.decode("utf-8", "replace")
117
+ if isinstance(err, bytes):
118
+ err = err.decode("utf-8", "replace")
119
+ return -1, out, err, True
120
+
121
+
122
+ # --- Layer interfaces (Protocols) ---------------------------------------------
123
+
124
+ class Sandbox(Protocol): # an open sandbox handle (from L4)
125
+ workdir: str
126
+ def diff(self) -> str: ...
127
+ def close(self) -> None: ...
128
+
129
+
130
+ @runtime_checkable
131
+ class SpecAdapter(Protocol): # L2: Task -> Bounds
132
+ def plan(self, task: Task) -> Bounds: ...
133
+
134
+
135
+ @runtime_checkable
136
+ class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
137
+ def open(self, repo: str) -> Sandbox: ...
138
+
139
+
140
+ @runtime_checkable
141
+ class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
142
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
143
+ feedback: str | None) -> ExecutionResult: ...
144
+
145
+
146
+ @runtime_checkable
147
+ class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
148
+ def verify(self, bounds: Bounds, result: ExecutionResult,
149
+ strict: bool) -> Verdict: ...
150
+
151
+
152
+ @runtime_checkable
153
+ class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
154
+ def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
155
+ ...
156
+
157
+
158
+ @runtime_checkable
159
+ class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
160
+ def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
161
+ no_ff: bool = True, message: str | None = None) -> MergeRecord:
162
+ ...
163
+
164
+
165
+ @runtime_checkable
166
+ class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
167
+ def deploy(self, repo: str, *, production: bool = False,
168
+ prebuilt: bool = False) -> Delivery:
169
+ ...
170
+
171
+ def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
172
+ ...
173
+
174
+
175
+ @runtime_checkable
176
+ class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
177
+ def verify(self, delivery: Delivery, *, health_path: str = "/",
178
+ timeout_s: float = 10.0) -> Verdict:
179
+ ...
180
+
181
+
182
+ @runtime_checkable
183
+ class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
184
+ def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
185
+ ...