sembl-stack 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/.gitignore +16 -14
  2. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/PKG-INFO +31 -9
  3. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/README.md +29 -8
  4. sembl_stack-0.2.0/SECURITY.md +28 -0
  5. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/pyproject.toml +18 -1
  6. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/__init__.py +1 -1
  7. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/_redact.py +19 -19
  8. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/base.py +185 -179
  9. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/codegraph_cbm.py +95 -95
  10. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/deploy_vercel.py +215 -215
  11. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_aider.py +115 -115
  12. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_claude.py +114 -114
  13. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_opencode.py +114 -114
  14. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/merge_git.py +107 -107
  15. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/postdeploy_http.py +82 -82
  16. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/review_coderabbit.py +215 -215
  17. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/review_mock.py +42 -42
  18. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/sandbox_worktree.py +79 -79
  19. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/spec_sembl.py +91 -91
  20. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/verify_sembl.py +77 -77
  21. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/artifacts.py +207 -207
  22. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/cli.py +776 -759
  23. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/config.py +87 -87
  24. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/doctor.py +144 -111
  25. sembl_stack-0.2.0/sembl_stack/guide.py +907 -0
  26. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/loop.py +380 -380
  27. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/onboarding.py +272 -272
  28. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/presets.py +132 -114
  29. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/profile.py +193 -193
  30. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/reconciliation.py +138 -138
  31. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/registry.py +91 -91
  32. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/runner.py +27 -2
  33. sembl_stack-0.2.0/sembl_stack/scaffold.py +64 -0
  34. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/session.py +86 -86
  35. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/specgraph.py +146 -146
  36. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/store.py +112 -112
  37. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/tui.py +86 -86
  38. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/views.py +74 -74
  39. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/wizard.py +233 -233
  40. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_artifacts_store.py +71 -71
  41. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_codegraph_cbm.py +104 -104
  42. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_deploy_postdeploy.py +4 -0
  43. sembl_stack-0.2.0/tests/test_guide.py +611 -0
  44. sembl_stack-0.2.0/tests/test_init_stranger.py +101 -0
  45. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_loop_manifest.py +105 -105
  46. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_loop_smoke.py +92 -92
  47. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_merge_git.py +136 -136
  48. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_onboarding_logic.py +51 -51
  49. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_profile.py +219 -219
  50. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_reconciliation.py +64 -64
  51. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_review.py +356 -356
  52. sembl_stack-0.2.0/tests/test_run_executor.py +44 -0
  53. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_runner.py +25 -0
  54. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_session.py +80 -80
  55. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_spec_sembl.py +18 -18
  56. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_specgraph.py +38 -38
  57. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_verdict_binding.py +22 -0
  58. sembl_stack-0.1.0/.github/workflows/release.yml +0 -67
  59. sembl_stack-0.1.0/docs/LAUNCH-PREP-JULY1.md +0 -116
  60. sembl_stack-0.1.0/docs/PROCESS-ACTION-PLAN.md +0 -369
  61. sembl_stack-0.1.0/docs/SPEC-coderabbit-prep.md +0 -454
  62. sembl_stack-0.1.0/docs/SPEC-gate-0.2.0.md +0 -90
  63. sembl_stack-0.1.0/docs/SPEC-l8-rollback.md +0 -270
  64. sembl_stack-0.1.0/docs/SPEC-merge-stage.md +0 -269
  65. sembl_stack-0.1.0/docs/SPEC-reconcile-live.md +0 -325
  66. sembl_stack-0.1.0/docs/SPEC-review-standby.md +0 -73
  67. sembl_stack-0.1.0/docs/SPEC-tui-phase0.md +0 -69
  68. sembl_stack-0.1.0/docs/SPEC-tui-phase1-onboarding.md +0 -111
  69. sembl_stack-0.1.0/docs/eval-metric-O3.md +0 -99
  70. sembl_stack-0.1.0/docs/memory-plane-hypothesis.md +0 -141
  71. sembl_stack-0.1.0/docs/process-self-improvement.md +0 -77
  72. sembl_stack-0.1.0/eval/README.md +0 -32
  73. sembl_stack-0.1.0/eval/SPEC-through-deploy.md +0 -119
  74. sembl_stack-0.1.0/eval/build_corpus.py +0 -138
  75. sembl_stack-0.1.0/eval/corpus/01-greenfield-snake/case.json +0 -29
  76. sembl_stack-0.1.0/eval/corpus/02-feature-inscope/case.json +0 -27
  77. sembl_stack-0.1.0/eval/corpus/03-refactor-inscope/case.json +0 -25
  78. sembl_stack-0.1.0/eval/corpus/04-docs-tolerance/case.json +0 -26
  79. sembl_stack-0.1.0/eval/corpus/05-out-of-scope-infra/case.json +0 -25
  80. sembl_stack-0.1.0/eval/corpus/06-forbidden-workflow/case.json +0 -26
  81. sembl_stack-0.1.0/eval/corpus/07-fabricated-file/case.json +0 -26
  82. sembl_stack-0.1.0/eval/corpus/08-feature-with-stray/case.json +0 -26
  83. sembl_stack-0.1.0/eval/corpus/09-clean-diff-fabricated-report/case.json +0 -26
  84. sembl_stack-0.1.0/eval/corpus/10-rogue-combined/case.json +0 -27
  85. sembl_stack-0.1.0/eval/corpus/11-unevidenced-tests/case.json +0 -24
  86. sembl_stack-0.1.0/eval/corpus/12-over-churn/case.json +0 -28
  87. sembl_stack-0.1.0/eval/corpus/13-runtime-break-passes-gate/case.json +0 -21
  88. sembl_stack-0.1.0/eval/corpus/14-quality-defect-passes-gate/case.json +0 -20
  89. sembl_stack-0.1.0/eval/harness.py +0 -121
  90. sembl_stack-0.1.0/eval/through_deploy.py +0 -109
  91. sembl_stack-0.1.0/eval/two_axis.py +0 -103
  92. sembl_stack-0.1.0/examples/flagship-feedback-board/.env.example +0 -6
  93. sembl_stack-0.1.0/examples/flagship-feedback-board/.gitignore +0 -12
  94. sembl_stack-0.1.0/examples/flagship-feedback-board/README.md +0 -76
  95. sembl_stack-0.1.0/examples/flagship-feedback-board/bounds.json +0 -27
  96. sembl_stack-0.1.0/examples/flagship-feedback-board/next-env.d.ts +0 -6
  97. sembl_stack-0.1.0/examples/flagship-feedback-board/next.config.ts +0 -11
  98. sembl_stack-0.1.0/examples/flagship-feedback-board/package-lock.json +0 -1106
  99. sembl_stack-0.1.0/examples/flagship-feedback-board/package.json +0 -36
  100. sembl_stack-0.1.0/examples/flagship-feedback-board/postcss.config.mjs +0 -3
  101. sembl_stack-0.1.0/examples/flagship-feedback-board/scripts/check-deploy-readiness.mjs +0 -127
  102. sembl_stack-0.1.0/examples/flagship-feedback-board/scripts/postdeploy-health.mjs +0 -92
  103. sembl_stack-0.1.0/examples/flagship-feedback-board/sembl.stack.yaml +0 -28
  104. sembl_stack-0.1.0/examples/flagship-feedback-board/specs/001-feedback-board/bounds.json +0 -27
  105. sembl_stack-0.1.0/examples/flagship-feedback-board/specs/001-feedback-board/requirements.md +0 -8
  106. sembl_stack-0.1.0/examples/flagship-feedback-board/specs/001-feedback-board/tasks.md +0 -9
  107. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/actions.ts +0 -105
  108. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/api/health/route.ts +0 -9
  109. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/auth/callback/route.ts +0 -28
  110. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/globals.css +0 -453
  111. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/layout.tsx +0 -20
  112. sembl_stack-0.1.0/examples/flagship-feedback-board/src/app/page.tsx +0 -125
  113. sembl_stack-0.1.0/examples/flagship-feedback-board/src/components/auth-panel.tsx +0 -96
  114. sembl_stack-0.1.0/examples/flagship-feedback-board/src/components/feedback-board.tsx +0 -119
  115. sembl_stack-0.1.0/examples/flagship-feedback-board/src/components/feedback-form.tsx +0 -79
  116. sembl_stack-0.1.0/examples/flagship-feedback-board/src/lib/env.ts +0 -18
  117. sembl_stack-0.1.0/examples/flagship-feedback-board/src/lib/feedback.ts +0 -57
  118. sembl_stack-0.1.0/examples/flagship-feedback-board/src/lib/supabase/client.ts +0 -20
  119. sembl_stack-0.1.0/examples/flagship-feedback-board/src/lib/supabase/server.ts +0 -31
  120. sembl_stack-0.1.0/examples/flagship-feedback-board/src/middleware.ts +0 -42
  121. sembl_stack-0.1.0/examples/flagship-feedback-board/supabase/.gitignore +0 -8
  122. sembl_stack-0.1.0/examples/flagship-feedback-board/supabase/config.toml +0 -408
  123. sembl_stack-0.1.0/examples/flagship-feedback-board/supabase/migrations/202606200001_feedback_board.sql +0 -42
  124. sembl_stack-0.1.0/examples/flagship-feedback-board/supabase/migrations/20260621070532_harden_feedback_item_grants.sql +0 -4
  125. sembl_stack-0.1.0/examples/flagship-feedback-board/task.yaml +0 -3
  126. sembl_stack-0.1.0/examples/flagship-feedback-board/tsconfig.json +0 -43
  127. sembl_stack-0.1.0/examples/flagship-feedback-board/vercel.json +0 -4
  128. sembl_stack-0.1.0/examples/tasks/login-redirect/README.md +0 -54
  129. sembl_stack-0.1.0/examples/tasks/login-redirect/repo/infra/deploy.yaml +0 -1
  130. sembl_stack-0.1.0/examples/tasks/login-redirect/repo/specs/001-feature/bounds.json +0 -5
  131. sembl_stack-0.1.0/examples/tasks/login-redirect/repo/specs/001-feature/tasks.md +0 -3
  132. sembl_stack-0.1.0/examples/tasks/login-redirect/repo/src/app/__init__.py +0 -1
  133. sembl_stack-0.1.0/examples/tasks/login-redirect/task.yaml +0 -4
  134. sembl_stack-0.1.0/sembl.stack.yaml +0 -20
  135. sembl_stack-0.1.0/uv.lock +0 -2370
  136. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/LICENSE +0 -0
  137. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/__init__.py +0 -0
  138. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/execute_mock.py +0 -0
  139. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/adapters/review_llm.py +0 -0
  140. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/contextgraph.py +0 -0
  141. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/rsi.py +0 -0
  142. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/tracing.py +0 -0
  143. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/transport/__init__.py +0 -0
  144. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/sembl_stack/transport/mcp_client.py +0 -0
  145. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_contextgraph.py +0 -0
  146. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_opencode_adapter.py +0 -0
  147. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_rsi.py +0 -0
  148. {sembl_stack-0.1.0 → sembl_stack-0.2.0}/tests/test_scrub_secrets.py +0 -0
@@ -1,14 +1,16 @@
1
- # Example target repos are git-inited locally for demos; don't nest their .git
2
- examples/**/repo/.git/
3
- # Local-only tests (kept off the repo by request)
4
- tests/local/
5
- # Run artifacts (the run store) are local-only
6
- .sembl/
7
- examples/**/repo/.sembl/
8
- __pycache__/
9
- *.egg-info/
10
- .pytest_cache/
11
- .test-tmp/
12
- build/
13
- dist/
14
- .venv/
1
+ # Example target repos are git-inited locally for demos; don't nest their .git
2
+ examples/**/repo/.git/
3
+ # Local-only tests (kept off the repo by request)
4
+ tests/local/
5
+ # Run artifacts (the run store) are local-only
6
+ .sembl/
7
+ examples/**/repo/.sembl/
8
+ __pycache__/
9
+ *.egg-info/
10
+ .pytest_cache/
11
+ .test-tmp/
12
+ build/
13
+ dist/
14
+ .venv/
15
+ eval/.checkpoint-*.json
16
+ eval/.result-*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sembl-stack
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract.
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -9,6 +9,7 @@ Requires-Dist: click>=8.1
9
9
  Requires-Dist: langgraph>=0.2
10
10
  Requires-Dist: mcp>=1.0
11
11
  Requires-Dist: pyyaml>=6.0
12
+ Requires-Dist: questionary>=2.0
12
13
  Requires-Dist: sembl>=0.1.20
13
14
  Provides-Extra: all
14
15
  Requires-Dist: langfuse>=2.0; extra == 'all'
@@ -57,8 +58,30 @@ task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge
57
58
  ## Quickstart
58
59
 
59
60
  ```bash
60
- pip install sembl-stack sembl # the stack + the gate it runs at its core
61
- sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
61
+ pip install sembl-stack
62
+ cd your-repo
63
+ sembl-stack
64
+ ```
65
+
66
+ That's the whole quickstart. Bare `sembl-stack` launches the guided run — an
67
+ OpenCode-style TUI that walks you through, one screen at a time:
68
+
69
+ 1. **repo** — confirms the repo it detected (a fresh non-git directory is offered a
70
+ safe demo scaffold instead)
71
+ 2. **agent & keys** — every way to run AI work, with **live status**: is Claude Code
72
+ installed and logged in, which API-key env vars are set, is OpenCode on PATH — and
73
+ exactly what to do for any option you're missing (mock always works, zero keys)
74
+ 3. **task** — describe the change in plain English and pick which paths the agent may
75
+ touch (suggested from your repo); the tool writes `task.yaml`/`bounds.json`, you
76
+ never do
77
+ 4. **run** — the live stage rail (BLOCK → feedback → retry → PASS), then the verdict
78
+ with its receipt and the one next command
79
+
80
+ Everything the guide does is also a scriptable command (same machinery, byte-identical
81
+ runs — that's the point):
82
+
83
+ ```bash
84
+ sembl-stack init # scaffold config + starter task + demo repo
62
85
  sembl-stack doctor # config-aware preflight
63
86
  sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
64
87
  sembl-stack runs [<id>] # list / inspect runs
@@ -127,13 +150,12 @@ Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
127
150
  └─ trace.json # the timeline
128
151
  ```
129
152
 
130
- ## The guided TUI (optional)
153
+ ## The guided TUI
131
154
 
132
- `pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
133
- press `r`: the stage rail runs the real loop under your configured profile,
134
- streaming per-stage status (pending/running/pass/fail) live and showing the final
135
- verdict byte-identical to a headless `sembl-stack loop`, because it drives the
136
- same adapters.
155
+ The guide is the default surface (see Quickstart) and a core dependency no extra
156
+ install. It streams the real loop live under your configured profile and is
157
+ byte-identical to a headless `sembl-stack loop`, because it drives the same adapters.
158
+ Re-run the agent & keys step anytime with `sembl-stack --reconfigure`.
137
159
 
138
160
  ## The full picture
139
161
 
@@ -27,8 +27,30 @@ task ─▶ bounds ─▶ execute ─▶ sandbox ─▶ SEMBL GATE ─▶ merge
27
27
  ## Quickstart
28
28
 
29
29
  ```bash
30
- pip install sembl-stack sembl # the stack + the gate it runs at its core
31
- sembl-stack init # scaffold sembl.stack.yaml + task.yaml from a preset
30
+ pip install sembl-stack
31
+ cd your-repo
32
+ sembl-stack
33
+ ```
34
+
35
+ That's the whole quickstart. Bare `sembl-stack` launches the guided run — an
36
+ OpenCode-style TUI that walks you through, one screen at a time:
37
+
38
+ 1. **repo** — confirms the repo it detected (a fresh non-git directory is offered a
39
+ safe demo scaffold instead)
40
+ 2. **agent & keys** — every way to run AI work, with **live status**: is Claude Code
41
+ installed and logged in, which API-key env vars are set, is OpenCode on PATH — and
42
+ exactly what to do for any option you're missing (mock always works, zero keys)
43
+ 3. **task** — describe the change in plain English and pick which paths the agent may
44
+ touch (suggested from your repo); the tool writes `task.yaml`/`bounds.json`, you
45
+ never do
46
+ 4. **run** — the live stage rail (BLOCK → feedback → retry → PASS), then the verdict
47
+ with its receipt and the one next command
48
+
49
+ Everything the guide does is also a scriptable command (same machinery, byte-identical
50
+ runs — that's the point):
51
+
52
+ ```bash
53
+ sembl-stack init # scaffold config + starter task + demo repo
32
54
  sembl-stack doctor # config-aware preflight
33
55
  sembl-stack loop task.yaml # plan → execute → gate → retry-on-BLOCK
34
56
  sembl-stack runs [<id>] # list / inspect runs
@@ -97,13 +119,12 @@ Every run leaves a complete paper trail in `.sembl/runs/<id>/`:
97
119
  └─ trace.json # the timeline
98
120
  ```
99
121
 
100
- ## The guided TUI (optional)
122
+ ## The guided TUI
101
123
 
102
- `pip install "sembl-stack[tui]"` adds a Textual wizard. Run bare `sembl-stack` and
103
- press `r`: the stage rail runs the real loop under your configured profile,
104
- streaming per-stage status (pending/running/pass/fail) live and showing the final
105
- verdict byte-identical to a headless `sembl-stack loop`, because it drives the
106
- same adapters.
124
+ The guide is the default surface (see Quickstart) and a core dependency no extra
125
+ install. It streams the real loop live under your configured profile and is
126
+ byte-identical to a headless `sembl-stack loop`, because it drives the same adapters.
127
+ Re-run the agent & keys step anytime with `sembl-stack --reconfigure`.
107
128
 
108
129
  ## The full picture
109
130
 
@@ -0,0 +1,28 @@
1
+ # Security Policy
2
+
3
+ ## Reporting a vulnerability
4
+
5
+ Please report suspected vulnerabilities privately via
6
+ [GitHub Security Advisories](https://github.com/speedvibecode/sembl-stack/security/advisories/new)
7
+ (preferred) or by email to totlasiddharth@gmail.com. Do not open a public issue for
8
+ security reports.
9
+
10
+ You can expect an acknowledgement within a few days. Please include a reproduction
11
+ if you can.
12
+
13
+ ## Scope notes
14
+
15
+ sembl-stack is a **local-first CLI**: it runs on the operator's machine, with the
16
+ operator's own credentials, against repositories the operator already controls. There
17
+ is no hosted service and no server-side data. The security surfaces we care most
18
+ about:
19
+
20
+ - **Credential handling** — profiles store only pointers (`env:VAR` / `keyring`),
21
+ never key material; executor output is secret-scrubbed; third-party process output
22
+ is persisted as fingerprints (byte count + SHA-256), never content.
23
+ - **Untrusted diff/content handling** — reviewer prompts treat the diff as data, not
24
+ instructions; run artifacts are local and gitignored.
25
+ - **Release integrity** — PyPI publishing uses Trusted Publishing (OIDC, no stored
26
+ tokens) with a version-lockstep guard.
27
+
28
+ Reports in any of these areas are especially appreciated.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sembl-stack"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -17,6 +17,9 @@ dependencies = [
17
17
  # default install runs it, not the built-in fallback runner.
18
18
  "sembl>=0.1.20", # L5 gate + L2 spec engine. Core because the DEFAULT config gates
19
19
  "mcp>=1.0", # with sembl over MCP — a bare install must run the default stack.
20
+ "questionary>=2.0", # the guided run IS the product surface: bare `sembl-stack` must
21
+ # always launch the inline step-by-step guide (owner UX decision
22
+ # 2026-07-04: Claude Code-style inline prompts, NOT a full-screen app).
20
23
  ]
21
24
 
22
25
  [project.optional-dependencies]
@@ -34,3 +37,17 @@ sembl-stack = "sembl_stack.cli:main"
34
37
 
35
38
  [tool.hatch.build.targets.wheel]
36
39
  packages = ["sembl_stack"]
40
+
41
+ [tool.hatch.build.targets.sdist]
42
+ # Explicit allowlist, not "everything minus .gitignore": hatchling's default sdist
43
+ # packed the whole working tree, including examples/flagship-feedback-board's
44
+ # node_modules/.next build output/.vercel/.env.local (a real Next.js app checked out
45
+ # for local dev, not part of this package) — a 61MB sdist. Only what installs/builds
46
+ # the CLI belongs here.
47
+ include = [
48
+ "/sembl_stack",
49
+ "/tests",
50
+ "/README.md",
51
+ "/LICENSE",
52
+ "/SECURITY.md",
53
+ ]
@@ -1,3 +1,3 @@
1
1
  """sembl-stack — an open, swappable spec-driven coding factory."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.2.0"
@@ -1,19 +1,19 @@
1
- """Redaction helper for adapter artifacts.
2
-
3
- Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
4
- debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
5
- into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
6
- non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
7
- signal without ever serializing the content.
8
- """
9
- from __future__ import annotations
10
-
11
- import hashlib
12
-
13
-
14
- def summarize(text) -> dict:
15
- """Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
16
- if text is None:
17
- return {"bytes": 0, "sha256": None}
18
- raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
19
- return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
1
+ """Redaction helper for adapter artifacts.
2
+
3
+ Third-party process output (HTTP health bodies, CLI stdout/stderr, reviewer output) can carry
4
+ debug pages, stack traces, env-shaped values, diff snippets, or auth errors. Persisting it raw
5
+ into `.sembl/runs/<id>/` would violate the no-secrets-in-artifacts invariant. We keep only a
6
+ non-reversible fingerprint: byte count + sha256. That preserves "output existed / did it change"
7
+ signal without ever serializing the content.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+
13
+
14
+ def summarize(text) -> dict:
15
+ """Reduce arbitrary third-party text to {bytes, sha256} — never the content itself."""
16
+ if text is None:
17
+ return {"bytes": 0, "sha256": None}
18
+ raw = text if isinstance(text, bytes) else str(text).encode("utf-8", "replace")
19
+ return {"bytes": len(raw), "sha256": hashlib.sha256(raw).hexdigest()}
@@ -1,179 +1,185 @@
1
- """The platform contract.
2
-
3
- The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
4
- Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
5
- here so adapters import everything they need from one place.
6
- """
7
- from __future__ import annotations
8
-
9
- import os
10
- import re
11
- import subprocess
12
- from typing import Protocol, runtime_checkable
13
-
14
- from ..artifacts import ( # noqa: F401 (re-exported for adapters)
15
- Bounds,
16
- Change,
17
- Context,
18
- Delivery,
19
- ExecutionResult,
20
- MergeRecord,
21
- ReconciliationReport,
22
- ReviewReport,
23
- SpecGraph,
24
- Task,
25
- Trace,
26
- Verdict,
27
- )
28
-
29
-
30
- # --- Shared adapter helpers ---------------------------------------------------
31
-
32
- def changed_files_from_diff(diff: str) -> list[str]:
33
- """Files touched by a unified git diff, order-preserved and de-duplicated.
34
-
35
- Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
36
- * the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
37
- EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
38
- and the gate then flags a spurious "unreported change";
39
- * the `+++ b/` marker is the fallback for a diff fragment that arrives without a
40
- full header.
41
- `/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
42
- one parser so Claude/OpenCode/Aider report changed files consistently.
43
- """
44
- seen: set[str] = set()
45
- out: list[str] = []
46
-
47
- def add(path: str) -> None:
48
- path = path.strip()
49
- if path and path != "/dev/null" and path not in seen:
50
- seen.add(path)
51
- out.append(path)
52
-
53
- for line in diff.splitlines():
54
- if line.startswith("diff --git "):
55
- _, _, tail = line.partition(" b/")
56
- if tail:
57
- add(tail)
58
- elif line.startswith("+++ "):
59
- marker = line[4:]
60
- if marker.startswith("b/"):
61
- marker = marker[2:]
62
- add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
63
- return out
64
-
65
-
66
- # Env-var names whose values are credentials; a secret only ever lives in the
67
- # environment, so an executor CLI echoing one (e.g. in an auth error) is the one
68
- # path it could reach a persisted run artifact. Scrubbed by value below.
69
- _SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
70
- # Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
71
- _SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
72
-
73
-
74
- def scrub_secrets(text: str) -> str:
75
- """Redact anything secret-shaped before it reaches a run artifact.
76
-
77
- Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
78
- debuggability; the security invariant (no key value ever stored) must hold even
79
- when a CLI misbehaves and echoes a credential. Env values are compared in memory
80
- only — nothing read here is ever written anywhere except as its redaction marker.
81
- """
82
- if not text:
83
- return text
84
- for name, value in os.environ.items():
85
- if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
86
- text = text.replace(value, f"[redacted:{name}]")
87
- return _SECRET_TOKEN.sub("[redacted:key]", text)
88
-
89
-
90
- def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
91
- """Run an executor subprocess, turning a timeout into a structured signal.
92
-
93
- Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
94
- is caught here (its partial stdout/stderr preserved) instead of being allowed to
95
- propagate and abort the whole loop — the caller records `timed_out` in the report so
96
- the gate stage can convert it to a BLOCK rather than a crash.
97
- """
98
- try:
99
- # encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ). The default
100
- # text=True decodes with the locale codec (cp1252 on Windows), which crashes the
101
- # stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
102
- # replace undecodable bytes so capture never aborts the loop.
103
- proc = subprocess.run(
104
- cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
105
- encoding="utf-8", errors="replace", **run_kwargs)
106
- return proc.returncode, proc.stdout or "", proc.stderr or "", False
107
- except subprocess.TimeoutExpired as exc:
108
- out, err = exc.stdout or "", exc.stderr or ""
109
- if isinstance(out, bytes):
110
- out = out.decode("utf-8", "replace")
111
- if isinstance(err, bytes):
112
- err = err.decode("utf-8", "replace")
113
- return -1, out, err, True
114
-
115
-
116
- # --- Layer interfaces (Protocols) ---------------------------------------------
117
-
118
- class Sandbox(Protocol): # an open sandbox handle (from L4)
119
- workdir: str
120
- def diff(self) -> str: ...
121
- def close(self) -> None: ...
122
-
123
-
124
- @runtime_checkable
125
- class SpecAdapter(Protocol): # L2: Task -> Bounds
126
- def plan(self, task: Task) -> Bounds: ...
127
-
128
-
129
- @runtime_checkable
130
- class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
131
- def open(self, repo: str) -> Sandbox: ...
132
-
133
-
134
- @runtime_checkable
135
- class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
136
- def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
137
- feedback: str | None) -> ExecutionResult: ...
138
-
139
-
140
- @runtime_checkable
141
- class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
142
- def verify(self, bounds: Bounds, result: ExecutionResult,
143
- strict: bool) -> Verdict: ...
144
-
145
-
146
- @runtime_checkable
147
- class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
148
- def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
149
- ...
150
-
151
-
152
- @runtime_checkable
153
- class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
154
- def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
155
- no_ff: bool = True, message: str | None = None) -> MergeRecord:
156
- ...
157
-
158
-
159
- @runtime_checkable
160
- class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
161
- def deploy(self, repo: str, *, production: bool = False,
162
- prebuilt: bool = False) -> Delivery:
163
- ...
164
-
165
- def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
166
- ...
167
-
168
-
169
- @runtime_checkable
170
- class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
171
- def verify(self, delivery: Delivery, *, health_path: str = "/",
172
- timeout_s: float = 10.0) -> Verdict:
173
- ...
174
-
175
-
176
- @runtime_checkable
177
- class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
178
- def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
179
- ...
1
+ """The platform contract.
2
+
3
+ The data types are the canonical artifacts (see `sembl_stack/artifacts.py`); the
4
+ Protocols below are what an adapter must satisfy to be swappable into a layer. Re-exported
5
+ here so adapters import everything they need from one place.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ import subprocess
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from ..artifacts import ( # noqa: F401 (re-exported for adapters)
15
+ Bounds,
16
+ Change,
17
+ Context,
18
+ Delivery,
19
+ ExecutionResult,
20
+ MergeRecord,
21
+ ReconciliationReport,
22
+ ReviewReport,
23
+ SpecGraph,
24
+ Task,
25
+ Trace,
26
+ Verdict,
27
+ )
28
+
29
+
30
+ # --- Shared adapter helpers ---------------------------------------------------
31
+
32
+ def changed_files_from_diff(diff: str) -> list[str]:
33
+ """Files touched by a unified git diff, order-preserved and de-duplicated.
34
+
35
+ Reads BOTH the `diff --git a/… b/…` headers and the `+++ b/…` markers, unioned:
36
+ * the `diff --git` header names a file even when it has no `+++` hunk — e.g. an
37
+ EMPTY new file an errored agent created. A `+++`-only parser silently drops it,
38
+ and the gate then flags a spurious "unreported change";
39
+ * the `+++ b/` marker is the fallback for a diff fragment that arrives without a
40
+ full header.
41
+ `/dev/null` (the add/delete sentinel) is skipped. Every executor adapter uses this
42
+ one parser so Claude/OpenCode/Aider report changed files consistently.
43
+ """
44
+ seen: set[str] = set()
45
+ out: list[str] = []
46
+
47
+ def add(path: str) -> None:
48
+ path = path.strip()
49
+ if path and path != "/dev/null" and path not in seen:
50
+ seen.add(path)
51
+ out.append(path)
52
+
53
+ for line in diff.splitlines():
54
+ if line.startswith("diff --git "):
55
+ _, _, tail = line.partition(" b/")
56
+ if tail:
57
+ add(tail)
58
+ elif line.startswith("+++ "):
59
+ marker = line[4:]
60
+ if marker.startswith("b/"):
61
+ marker = marker[2:]
62
+ add(marker.split("\t", 1)[0]) # drop a trailing tab-timestamp if present
63
+ return out
64
+
65
+
66
+ # Env-var names whose values are credentials; a secret only ever lives in the
67
+ # environment, so an executor CLI echoing one (e.g. in an auth error) is the one
68
+ # path it could reach a persisted run artifact. Scrubbed by value below.
69
+ _SECRET_ENV_NAME = re.compile(r"(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)S?$", re.IGNORECASE)
70
+ # Generic provider-key shapes (sk-ant-…, sk-proj-…, sk-or-v1-…) as a second net.
71
+ _SECRET_TOKEN = re.compile(r"sk-[A-Za-z0-9_\-]{8,}")
72
+
73
+
74
+ def scrub_secrets(text: str) -> str:
75
+ """Redact anything secret-shaped before it reaches a run artifact.
76
+
77
+ Executor stdout/stderr is persisted into `.sembl/runs/<id>/change.json` for
78
+ debuggability; the security invariant (no key value ever stored) must hold even
79
+ when a CLI misbehaves and echoes a credential. Env values are compared in memory
80
+ only — nothing read here is ever written anywhere except as its redaction marker.
81
+ """
82
+ if not text:
83
+ return text
84
+ for name, value in os.environ.items():
85
+ if len(value) >= 8 and _SECRET_ENV_NAME.search(name):
86
+ text = text.replace(value, f"[redacted:{name}]")
87
+ return _SECRET_TOKEN.sub("[redacted:key]", text)
88
+
89
+
90
+ def run_executor(cmd: list[str], cwd: str, timeout: int, **run_kwargs):
91
+ """Run an executor subprocess, turning a timeout into a structured signal.
92
+
93
+ Returns ``(returncode, stdout, stderr, timed_out)``. A `subprocess.TimeoutExpired`
94
+ is caught here (its partial stdout/stderr preserved) instead of being allowed to
95
+ propagate and abort the whole loop — the caller records `timed_out` in the report so
96
+ the gate stage can convert it to a BLOCK rather than a crash.
97
+
98
+ stdin is DEVNULL by default: a headless factory executor must never be able to sit
99
+ waiting on interactive input (e.g. a CLI's first-run provider setup) that hangs
100
+ silently, since its stdout/stderr are captured into pipes the user can't see. Cut
101
+ off, the CLI hits EOF immediately and fails fast with a visible error instead.
102
+ """
103
+ run_kwargs.setdefault("stdin", subprocess.DEVNULL)
104
+ try:
105
+ # encoding/errors explicit: agents emit UTF-8 (box-drawing, emoji, ). The default
106
+ # text=True decodes with the locale codec (cp1252 on Windows), which crashes the
107
+ # stdout reader thread mid-run and silently loses the output. Decode as UTF-8 and
108
+ # replace undecodable bytes so capture never aborts the loop.
109
+ proc = subprocess.run(
110
+ cmd, cwd=cwd, capture_output=True, text=True, timeout=timeout,
111
+ encoding="utf-8", errors="replace", **run_kwargs)
112
+ return proc.returncode, proc.stdout or "", proc.stderr or "", False
113
+ except subprocess.TimeoutExpired as exc:
114
+ out, err = exc.stdout or "", exc.stderr or ""
115
+ if isinstance(out, bytes):
116
+ out = out.decode("utf-8", "replace")
117
+ if isinstance(err, bytes):
118
+ err = err.decode("utf-8", "replace")
119
+ return -1, out, err, True
120
+
121
+
122
+ # --- Layer interfaces (Protocols) ---------------------------------------------
123
+
124
+ class Sandbox(Protocol): # an open sandbox handle (from L4)
125
+ workdir: str
126
+ def diff(self) -> str: ...
127
+ def close(self) -> None: ...
128
+
129
+
130
+ @runtime_checkable
131
+ class SpecAdapter(Protocol): # L2: Task -> Bounds
132
+ def plan(self, task: Task) -> Bounds: ...
133
+
134
+
135
+ @runtime_checkable
136
+ class SandboxAdapter(Protocol): # L4: Change -> Change (contained)
137
+ def open(self, repo: str) -> Sandbox: ...
138
+
139
+
140
+ @runtime_checkable
141
+ class ExecuteAdapter(Protocol): # L3: Task+Bounds(+Context) -> Change
142
+ def run(self, task: Task, bounds: Bounds, sandbox: Sandbox,
143
+ feedback: str | None) -> ExecutionResult: ...
144
+
145
+
146
+ @runtime_checkable
147
+ class VerifyAdapter(Protocol): # L5: Change+Bounds -> Verdict
148
+ def verify(self, bounds: Bounds, result: ExecutionResult,
149
+ strict: bool) -> Verdict: ...
150
+
151
+
152
+ @runtime_checkable
153
+ class ReconcileAdapter(Protocol): # L5.5: SpecGraph+CodeGraph -> report
154
+ def reconcile(self, spec_graph: SpecGraph, code_graph: dict) -> ReconciliationReport:
155
+ ...
156
+
157
+
158
+ @runtime_checkable
159
+ class MergeAdapter(Protocol): # L6.5: Verdict(PASS) -> MergeRecord
160
+ def merge(self, repo: str, *, into: str = "main", source: str = "HEAD",
161
+ no_ff: bool = True, message: str | None = None) -> MergeRecord:
162
+ ...
163
+
164
+
165
+ @runtime_checkable
166
+ class DeployAdapter(Protocol): # L7: Verdict(PASS) -> Delivery; rollback reverts it
167
+ def deploy(self, repo: str, *, production: bool = False,
168
+ prebuilt: bool = False) -> Delivery:
169
+ ...
170
+
171
+ def rollback(self, repo: str, *, to: str | None = None) -> Delivery:
172
+ ...
173
+
174
+
175
+ @runtime_checkable
176
+ class PostDeployAdapter(Protocol): # L8: Delivery -> Verdict
177
+ def verify(self, delivery: Delivery, *, health_path: str = "/",
178
+ timeout_s: float = 10.0) -> Verdict:
179
+ ...
180
+
181
+
182
+ @runtime_checkable
183
+ class ReviewAdapter(Protocol): # L5.5 quality: a diff -> ReviewReport (advisory)
184
+ def review(self, diff: str, *, reviewer_hint: str = "") -> ReviewReport:
185
+ ...