sembl-stack 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/.github/workflows/release.yml +5 -0
  2. sembl_stack-0.1.1/.github/workflows/tests.yml +29 -0
  3. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/.gitignore +2 -0
  4. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/PKG-INFO +1 -1
  5. sembl_stack-0.1.1/SECURITY.md +28 -0
  6. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/PROCESS-ACTION-PLAN.md +6 -1
  7. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/two_axis.py +26 -3
  8. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/pyproject.toml +1 -1
  9. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/__init__.py +1 -1
  10. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/cli.py +49 -1
  11. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/doctor.py +35 -2
  12. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/presets.py +18 -0
  13. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_deploy_postdeploy.py +4 -0
  14. sembl_stack-0.1.1/tests/test_init_stranger.py +101 -0
  15. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/LICENSE +0 -0
  16. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/README.md +0 -0
  17. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/LAUNCH-PREP-JULY1.md +0 -0
  18. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-coderabbit-prep.md +0 -0
  19. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-gate-0.2.0.md +0 -0
  20. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-l8-rollback.md +0 -0
  21. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-merge-stage.md +0 -0
  22. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-reconcile-live.md +0 -0
  23. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-review-standby.md +0 -0
  24. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-tui-phase0.md +0 -0
  25. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/SPEC-tui-phase1-onboarding.md +0 -0
  26. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/eval-metric-O3.md +0 -0
  27. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/memory-plane-hypothesis.md +0 -0
  28. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/docs/process-self-improvement.md +0 -0
  29. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/README.md +0 -0
  30. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/SPEC-through-deploy.md +0 -0
  31. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/build_corpus.py +0 -0
  32. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/01-greenfield-snake/case.json +0 -0
  33. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/02-feature-inscope/case.json +0 -0
  34. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/03-refactor-inscope/case.json +0 -0
  35. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/04-docs-tolerance/case.json +0 -0
  36. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/05-out-of-scope-infra/case.json +0 -0
  37. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/06-forbidden-workflow/case.json +0 -0
  38. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/07-fabricated-file/case.json +0 -0
  39. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/08-feature-with-stray/case.json +0 -0
  40. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/09-clean-diff-fabricated-report/case.json +0 -0
  41. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/10-rogue-combined/case.json +0 -0
  42. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/11-unevidenced-tests/case.json +0 -0
  43. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/12-over-churn/case.json +0 -0
  44. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/13-runtime-break-passes-gate/case.json +0 -0
  45. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/corpus/14-quality-defect-passes-gate/case.json +0 -0
  46. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/harness.py +0 -0
  47. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/eval/through_deploy.py +0 -0
  48. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/.env.example +0 -0
  49. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/.gitignore +0 -0
  50. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/README.md +0 -0
  51. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/bounds.json +0 -0
  52. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/next-env.d.ts +0 -0
  53. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/next.config.ts +0 -0
  54. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/package-lock.json +0 -0
  55. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/package.json +0 -0
  56. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/postcss.config.mjs +0 -0
  57. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/scripts/check-deploy-readiness.mjs +0 -0
  58. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/scripts/postdeploy-health.mjs +0 -0
  59. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/sembl.stack.yaml +0 -0
  60. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/specs/001-feedback-board/bounds.json +0 -0
  61. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/specs/001-feedback-board/requirements.md +0 -0
  62. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/specs/001-feedback-board/tasks.md +0 -0
  63. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/actions.ts +0 -0
  64. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/api/health/route.ts +0 -0
  65. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/auth/callback/route.ts +0 -0
  66. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/globals.css +0 -0
  67. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/layout.tsx +0 -0
  68. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/app/page.tsx +0 -0
  69. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/components/auth-panel.tsx +0 -0
  70. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/components/feedback-board.tsx +0 -0
  71. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/components/feedback-form.tsx +0 -0
  72. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/lib/env.ts +0 -0
  73. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/lib/feedback.ts +0 -0
  74. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/lib/supabase/client.ts +0 -0
  75. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/lib/supabase/server.ts +0 -0
  76. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/src/middleware.ts +0 -0
  77. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/supabase/.gitignore +0 -0
  78. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/supabase/config.toml +0 -0
  79. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/supabase/migrations/202606200001_feedback_board.sql +0 -0
  80. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/supabase/migrations/20260621070532_harden_feedback_item_grants.sql +0 -0
  81. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/task.yaml +0 -0
  82. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/tsconfig.json +0 -0
  83. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/flagship-feedback-board/vercel.json +0 -0
  84. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/README.md +0 -0
  85. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/repo/infra/deploy.yaml +0 -0
  86. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/repo/specs/001-feature/bounds.json +0 -0
  87. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/repo/specs/001-feature/tasks.md +0 -0
  88. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/repo/src/app/__init__.py +0 -0
  89. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/examples/tasks/login-redirect/task.yaml +0 -0
  90. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl.stack.yaml +0 -0
  91. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/__init__.py +0 -0
  92. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/_redact.py +0 -0
  93. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/base.py +0 -0
  94. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/codegraph_cbm.py +0 -0
  95. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/deploy_vercel.py +0 -0
  96. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/execute_aider.py +0 -0
  97. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/execute_claude.py +0 -0
  98. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/execute_mock.py +0 -0
  99. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/execute_opencode.py +0 -0
  100. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/merge_git.py +0 -0
  101. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/postdeploy_http.py +0 -0
  102. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/review_coderabbit.py +0 -0
  103. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/review_llm.py +0 -0
  104. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/review_mock.py +0 -0
  105. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/sandbox_worktree.py +0 -0
  106. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/spec_sembl.py +0 -0
  107. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/adapters/verify_sembl.py +0 -0
  108. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/artifacts.py +0 -0
  109. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/config.py +0 -0
  110. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/contextgraph.py +0 -0
  111. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/loop.py +0 -0
  112. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/onboarding.py +0 -0
  113. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/profile.py +0 -0
  114. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/reconciliation.py +0 -0
  115. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/registry.py +0 -0
  116. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/rsi.py +0 -0
  117. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/runner.py +0 -0
  118. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/session.py +0 -0
  119. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/specgraph.py +0 -0
  120. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/store.py +0 -0
  121. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/tracing.py +0 -0
  122. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/transport/__init__.py +0 -0
  123. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/transport/mcp_client.py +0 -0
  124. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/tui.py +0 -0
  125. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/views.py +0 -0
  126. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/sembl_stack/wizard.py +0 -0
  127. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_artifacts_store.py +0 -0
  128. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_codegraph_cbm.py +0 -0
  129. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_contextgraph.py +0 -0
  130. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_loop_manifest.py +0 -0
  131. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_loop_smoke.py +0 -0
  132. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_merge_git.py +0 -0
  133. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_onboarding_logic.py +0 -0
  134. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_opencode_adapter.py +0 -0
  135. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_profile.py +0 -0
  136. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_reconciliation.py +0 -0
  137. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_review.py +0 -0
  138. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_rsi.py +0 -0
  139. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_runner.py +0 -0
  140. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_scrub_secrets.py +0 -0
  141. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_session.py +0 -0
  142. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_spec_sembl.py +0 -0
  143. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_specgraph.py +0 -0
  144. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/tests/test_verdict_binding.py +0 -0
  145. {sembl_stack-0.1.0 → sembl_stack-0.1.1}/uv.lock +0 -0
@@ -34,6 +34,11 @@ jobs:
34
34
  exit 1
35
35
  fi
36
36
 
37
+ - name: Run test suite (a release never ships untested)
38
+ run: |
39
+ python -m pip install -e ".[all]" pytest
40
+ python -m pytest -q
41
+
37
42
  - name: Build sdist and wheel
38
43
  run: |
39
44
  python -m pip install --upgrade build
@@ -0,0 +1,29 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ name: pytest (py${{ matrix.python }} / ${{ matrix.os }})
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-latest, windows-latest]
16
+ python: ["3.10", "3.12"]
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python }}
23
+
24
+ - name: Install package with all extras
25
+ run: python -m pip install -e ".[all]" pytest
26
+
27
+ - name: Run test suite
28
+ # From the repo root: eval-corpus paths are cwd-relative.
29
+ run: python -m pytest -q
@@ -12,3 +12,5 @@ __pycache__/
12
12
  build/
13
13
  dist/
14
14
  .venv/
15
+ eval/.checkpoint-*.json
16
+ eval/.result-*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sembl-stack
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract.
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -0,0 +1,28 @@
1
+ # Security Policy
2
+
3
+ ## Reporting a vulnerability
4
+
5
+ Please report suspected vulnerabilities privately via
6
+ [GitHub Security Advisories](https://github.com/speedvibecode/sembl-stack/security/advisories/new)
7
+ (preferred) or by email to totlasiddharth@gmail.com. Do not open a public issue for
8
+ security reports.
9
+
10
+ You can expect an acknowledgement within a few days. Please include a reproduction
11
+ if you can.
12
+
13
+ ## Scope notes
14
+
15
+ sembl-stack is a **local-first CLI**: it runs on the operator's machine, with the
16
+ operator's own credentials, against repositories the operator already controls. There
17
+ is no hosted service and no server-side data. The security surfaces we care most
18
+ about:
19
+
20
+ - **Credential handling** — profiles store only pointers (`env:VAR` / `keyring`),
21
+ never key material; executor output is secret-scrubbed; third-party process output
22
+ is persisted as fingerprints (byte count + SHA-256), never content.
23
+ - **Untrusted diff/content handling** — reviewer prompts treat the diff as data, not
24
+ instructions; run artifacts are local and gitignored.
25
+ - **Release integrity** — PyPI publishing uses Trusted Publishing (OIDC, no stored
26
+ tokens) with a version-lockstep guard.
27
+
28
+ Reports in any of these areas are especially appreciated.
@@ -279,7 +279,12 @@ decoupled (confirmed third-party backend bug). History below (spec `SPEC-coderab
279
279
  diff's own hunks (before this, 12/14 corpus cases silently degraded to UNKNOWN). Live smoke:
280
280
  planted case 14 → real FINDINGS (SQL injection flagged critical + N+1 major). Back-to-back
281
281
  corpus runs hit CodeRabbit's **rate limit** (correctly UNKNOWN) — `eval/two_axis.py` gained
282
- `--patient` (waits out the window); full real CodeRabbit 2×2 = run when the window resets.
282
+ `--patient` (waits out the window); full real CodeRabbit 2×2 **DONE 2026-07-04** after six
283
+ failed monolithic attempts — root cause was the eval losing all progress on any mid-run death;
284
+ fixed with `--checkpoint` (per-case review outcomes persisted as they land, reruns resume).
285
+ Result, 14/14 real reviews, 0 UNKNOWN: **gate_only=4, quality_only=3** (planted case 14, runtime
286
+ -break case 13, AND 01-greenfield-snake), **both=2, neither=5** — complementarity thesis now
287
+ proven on BOTH real reviewers (llm + CodeRabbit), nearly identical grids.
283
288
  Status unchanged: optional second reviewer, never load-bearing.
284
289
 
285
290
  **Track 4 — RSI-L1 readout (cheap, high-narrative):** per-executor iters-to-green + cost over the
@@ -9,7 +9,12 @@ Reviewers: mock (default, deterministic, no account), a REAL one via
9
9
  `--reviewer llm [--model m]` — the BYO agent-CLI reviewer (review_llm.py) — or
10
10
  `--reviewer coderabbit` (real authenticated CodeRabbit CLI, review_coderabbit.py).
11
11
  Add `--patient` for real reviewers: waits out rate-limit windows instead of counting a
12
- throttled review as UNKNOWN."""
12
+ throttled review as UNKNOWN.
13
+
14
+ Add `--checkpoint <file>` for real reviewers: each case's review outcome is saved as it
15
+ lands, and a rerun skips cases already reviewed (UNKNOWNs are always retried) — so a
16
+ killed run resumes instead of starting over. The gate axis is deterministic and cheap;
17
+ only review outcomes are cached."""
13
18
  import json
14
19
  import sys
15
20
  import time
@@ -61,16 +66,34 @@ def _patient_review(review, diff, tries: int = 14, wait: int = 300):
61
66
  return rep
62
67
 
63
68
 
69
+ def _load_checkpoint(path: Path, reviewer: str) -> dict:
70
+ if path.is_file():
71
+ data = json.loads(path.read_text(encoding="utf-8"))
72
+ if data.get("reviewer") == reviewer:
73
+ return data.get("cases", {})
74
+ return {}
75
+
76
+
64
77
  def main() -> int:
65
78
  name, review = _reviewer()
66
79
  patient = "--patient" in sys.argv
80
+ ckpt_path = (Path(sys.argv[sys.argv.index("--checkpoint") + 1])
81
+ if "--checkpoint" in sys.argv else None)
82
+ done = _load_checkpoint(ckpt_path, name) if ckpt_path else {}
67
83
  gate_only = quality_only = both = neither = unknown = 0
68
84
  rows = []
69
85
  quality_only_cases = []
70
86
  for c in _cases():
71
87
  gate_bad = _gate(c) == "BLOCK"
72
- rep = _patient_review(review, c["diff"]) if patient else review.review(c["diff"])
73
- status = rep.status
88
+ if c["name"] in done: # cached (never UNKNOWN — those aren't saved)
89
+ status = done[c["name"]]
90
+ else:
91
+ rep = _patient_review(review, c["diff"]) if patient else review.review(c["diff"])
92
+ status = rep.status
93
+ if ckpt_path and status != "UNKNOWN":
94
+ done[c["name"]] = status
95
+ ckpt_path.write_text(json.dumps({"reviewer": name, "cases": done}, indent=2),
96
+ encoding="utf-8")
74
97
  if status == "UNKNOWN": # a real reviewer can fail; never count as clean
75
98
  unknown += 1
76
99
  quality_bad = status == "FINDINGS"
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sembl-stack"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "A swappable, spec-driven software factory: task to bounds, an agent writes in a sandbox, the Sembl gate judges the diff, PASS merges, deploys, and a post-deploy gate confirms or rolls back. Every stage an adapter behind one typed contract."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,3 +1,3 @@
1
1
  """sembl-stack — an open, swappable spec-driven coding factory."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.1"
@@ -157,7 +157,18 @@ def _loop_cmd(task_file: str, config_path: str):
157
157
  click.echo(f"layers: {cfg.raw['layers']}")
158
158
  click.echo(f"task: {task.text!r}\nrepo: {task.repo}\n")
159
159
 
160
- result = run_loop(cfg, task)
160
+ try:
161
+ result = run_loop(cfg, task)
162
+ except RuntimeError as exc:
163
+ # Stage adapters raise RuntimeError with an "L<n>: ..." prefix. A stranger's
164
+ # first failure should be a diagnosis, not a stack trace.
165
+ click.secho(f"error: {exc}", fg="red")
166
+ click.echo("hint: `sembl-stack doctor` checks your environment. The loop needs\n"
167
+ " - the task's repo to be a git repository with at least one commit\n"
168
+ " (the sandbox clones it), and\n"
169
+ " - a bounds source: a task spec_path, or a bounds.json next to the\n"
170
+ " task file (`sembl-stack init` scaffolds a working starter).")
171
+ raise SystemExit(1)
161
172
 
162
173
  click.echo(f"engine: {result.engine}")
163
174
  for attempt, status in result.history:
@@ -510,10 +521,47 @@ def init(preset, config_path, with_task, force):
510
521
  else:
511
522
  tp.write_text(presets.starter_task(), encoding="utf-8")
512
523
  click.secho("wrote task.yaml", fg="green")
524
+ # The starter task has no spec_path, so the L2 spec adapter needs a
525
+ # bounds.json beside it — without one, `loop` cannot derive a contract.
526
+ bp = Path("bounds.json")
527
+ if bp.exists() and not force:
528
+ click.echo(" bounds.json exists — left as-is")
529
+ else:
530
+ bp.write_text(presets.starter_bounds(), encoding="utf-8")
531
+ click.secho("wrote bounds.json (the starter task's contract)", fg="green")
532
+ for msg in _ensure_demo_repo():
533
+ click.secho(msg, fg="green")
513
534
  click.echo("\nnext:\n sembl-stack doctor # check your environment\n"
514
535
  " sembl-stack loop task.yaml # run the loop")
515
536
 
516
537
 
538
+ def _ensure_demo_repo() -> list[str]:
539
+ """Make the cwd loop-runnable: the sandbox clones the repo, so a fresh demo
540
+ directory needs a git repo with at least one commit. Existing repos are left
541
+ entirely alone."""
542
+ if Path(".git").exists():
543
+ return []
544
+ msgs = []
545
+ app = Path("app")
546
+ if not app.exists():
547
+ app.mkdir()
548
+ (app / "__init__.py").write_text(
549
+ '"""Demo app module — the starter task adds a constant here."""\n',
550
+ encoding="utf-8")
551
+ msgs.append("wrote app/__init__.py (demo module the starter task edits)")
552
+ subprocess.run(["git", "init", "-q"], check=True)
553
+ subprocess.run(["git", "add", "-A"], check=True)
554
+ committed = subprocess.run(
555
+ ["git", "commit", "-q", "-m", "sembl-stack demo scaffold"],
556
+ capture_output=True, text=True)
557
+ if committed.returncode != 0: # machine has no git identity configured
558
+ subprocess.run(
559
+ ["git", "-c", "user.name=sembl-stack", "-c", "user.email=demo@sembl.local",
560
+ "commit", "-q", "-m", "sembl-stack demo scaffold"], check=True)
561
+ msgs.append("initialized a git repo + first commit (the sandbox clones it)")
562
+ return msgs
563
+
564
+
517
565
  @main.command()
518
566
  @click.option("--config", "config_path", default="sembl.stack.yaml")
519
567
  def doctor(config_path):
@@ -38,8 +38,10 @@ _EXECUTOR_BINARY = {
38
38
  }
39
39
 
40
40
 
41
- def run_checks(cfg=None) -> list[Check]:
42
- """Structured preflight. `cfg` is an optional loaded StackConfig (config-aware checks)."""
41
+ def run_checks(cfg=None, repo: str = ".") -> list[Check]:
42
+ """Structured preflight. `cfg` is an optional loaded StackConfig (config-aware checks);
43
+ `repo` is where a `loop` run would happen (git + bounds-source checks)."""
44
+ from pathlib import Path
43
45
  layers = (getattr(cfg, "raw", {}) or {}).get("layers", {}) if cfg else {}
44
46
  transport = (getattr(cfg, "raw", {}) or {}).get("transport", {}) if cfg else {}
45
47
  checks: list[Check] = []
@@ -93,6 +95,37 @@ def run_checks(cfg=None) -> list[Check]:
93
95
  elif execute == "mock":
94
96
  checks.append(Check("executor: mock", True, "no binary needed", required=False))
95
97
 
98
+ # --- loop-runnability: the sandbox clones the repo; L2 needs a bounds source ---
99
+ # Both were stranger-blockers found live 2026-07-04: `init` used to scaffold a
100
+ # task with no bounds source in a non-git directory, and `loop` crashed twice.
101
+ sandbox = layers.get("sandbox", "clone")
102
+ if sandbox in ("clone", "worktree"):
103
+ is_repo = (Path(repo) / ".git").exists()
104
+ checks.append(Check(
105
+ "repo (git)", is_repo,
106
+ "git repository" if is_repo else f"{Path(repo).resolve()} is not a git repo",
107
+ "" if is_repo else
108
+ "the sandbox clones the repo — `git init` + a first commit "
109
+ "(`sembl-stack init` scaffolds this for a fresh directory)"))
110
+ task_file = Path(repo) / "task.yaml"
111
+ if task_file.is_file():
112
+ has_spec = False
113
+ try:
114
+ import yaml
115
+ spec = (yaml.safe_load(task_file.read_text(encoding="utf-8")) or {}).get(
116
+ "spec_path")
117
+ has_spec = bool(spec)
118
+ except Exception:
119
+ pass
120
+ has_bounds = has_spec or (Path(repo) / "bounds.json").is_file()
121
+ checks.append(Check(
122
+ "bounds source", has_bounds,
123
+ "spec_path set" if has_spec else
124
+ ("bounds.json" if has_bounds else "no spec_path and no bounds.json"),
125
+ "" if has_bounds else
126
+ "L2 needs a contract: set spec_path in task.yaml, or add a bounds.json "
127
+ "next to it (`sembl-stack init` scaffolds one)"))
128
+
96
129
  # --- context graph (only when context: symgraph) ---
97
130
  if layers.get("context") == "symgraph":
98
131
  sg_ok = shutil.which("symgraph") is not None
@@ -93,8 +93,26 @@ _STARTER_TASK = """\
93
93
  text: "Add a VALUE constant to the app module, in scope, without touching infra."
94
94
  repo: "."
95
95
  # spec_path: "./specs/001-feature" # optional: a Spec Kit feature dir / tasks.md
96
+ # Without a spec_path, bounds come from bounds.json next to this file (scaffolded
97
+ # by `sembl-stack init`).
96
98
  """
97
99
 
100
+ # The bounds contract for the starter task: the mock executor edits inside
101
+ # editable_paths[0] and (on its deliberate first-attempt misbehaviour) strays into
102
+ # forbidden_areas[0] — these two entries are what make the BLOCK -> retry -> PASS
103
+ # demo work.
104
+ _STARTER_BOUNDS = """\
105
+ {
106
+ "editable_paths": ["app/"],
107
+ "forbidden_areas": ["infra/"],
108
+ "churn_budget": {"max_files": 5, "max_lines": 200}
109
+ }
110
+ """
111
+
112
+
113
+ def starter_bounds() -> str:
114
+ return _STARTER_BOUNDS
115
+
98
116
 
99
117
  def names() -> list[str]:
100
118
  return list(PRESETS)
@@ -103,6 +103,10 @@ def test_vercel_deploy_adapter_records_failure(monkeypatch, tmp_path):
103
103
  def fake_run(cmd, **kwargs):
104
104
  return SimpleNamespace(returncode=1, stdout="", stderr="not linked")
105
105
 
106
+ # The adapter resolves the CLI via shutil.which first — without this the test
107
+ # only passes on machines that happen to have vercel installed (CI does not).
108
+ monkeypatch.setattr("sembl_stack.adapters.deploy_vercel.shutil.which",
109
+ lambda name: "vercel")
106
110
  monkeypatch.setattr("sembl_stack.adapters.deploy_vercel.subprocess.run", fake_run)
107
111
 
108
112
  delivery = VercelDeployAdapter(timeout=5).deploy(str(tmp_path))
@@ -0,0 +1,101 @@
1
+ """The stranger quickstart: `init` in a fresh directory must yield a runnable loop.
2
+
3
+ Regression suite for the two live stranger-blockers found 2026-07-04: `init` scaffolded a
4
+ task with no bounds source, in a non-git directory — so `loop task.yaml` crashed at L2
5
+ ("could not derive bounds") and, once bounds existed, at L4 ("git clone failed"). Now
6
+ `init` scaffolds bounds.json + a committed git repo, `doctor` diagnoses both gaps, and
7
+ `loop` reports stage failures as clean errors instead of tracebacks.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import subprocess
13
+ from pathlib import Path
14
+
15
+ from click.testing import CliRunner
16
+
17
+ from sembl_stack import doctor
18
+ from sembl_stack.cli import main
19
+ from sembl_stack.config import load
20
+
21
+
22
+ def _init_in(tmp: Path, runner: CliRunner):
23
+ return runner.invoke(main, ["init", "--preset", "gate+sandbox"], catch_exceptions=False)
24
+
25
+
26
+ class TestInitScaffoldsRunnableDemo:
27
+ def test_fresh_dir_gets_bounds_and_git_repo(self, tmp_path, monkeypatch):
28
+ monkeypatch.chdir(tmp_path)
29
+ res = _init_in(tmp_path, CliRunner())
30
+ assert res.exit_code == 0
31
+ bounds = json.loads((tmp_path / "bounds.json").read_text(encoding="utf-8"))
32
+ assert bounds["editable_paths"] and bounds["forbidden_areas"]
33
+ assert (tmp_path / ".git").exists()
34
+ assert (tmp_path / "app" / "__init__.py").is_file()
35
+ head = subprocess.run(["git", "rev-parse", "HEAD"], cwd=tmp_path,
36
+ capture_output=True, text=True)
37
+ assert head.returncode == 0, "init must leave a first commit for the clone sandbox"
38
+
39
+ def test_existing_git_repo_left_alone(self, tmp_path, monkeypatch):
40
+ monkeypatch.chdir(tmp_path)
41
+ subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
42
+ res = _init_in(tmp_path, CliRunner())
43
+ assert res.exit_code == 0
44
+ assert not (tmp_path / "app").exists(), "must not write demo files into a real repo"
45
+ # bounds.json is still scaffolded — the starter task needs a contract anywhere.
46
+ assert (tmp_path / "bounds.json").is_file()
47
+
48
+ def test_existing_bounds_not_overwritten(self, tmp_path, monkeypatch):
49
+ monkeypatch.chdir(tmp_path)
50
+ (tmp_path / "bounds.json").write_text('{"editable_paths": ["mine/"]}',
51
+ encoding="utf-8")
52
+ res = _init_in(tmp_path, CliRunner())
53
+ assert res.exit_code == 0
54
+ kept = json.loads((tmp_path / "bounds.json").read_text(encoding="utf-8"))
55
+ assert kept["editable_paths"] == ["mine/"]
56
+
57
+ def test_scaffold_loop_runs_end_to_end(self, tmp_path, monkeypatch):
58
+ """The actual stranger journey: init then loop, PASS after the mock's BLOCK."""
59
+ monkeypatch.chdir(tmp_path)
60
+ runner = CliRunner()
61
+ assert _init_in(tmp_path, runner).exit_code == 0
62
+ res = runner.invoke(main, ["loop", "task.yaml"])
63
+ assert "FINAL: PASS" in res.output, res.output
64
+ assert res.exit_code == 0
65
+
66
+
67
+ class TestDoctorDiagnosesStrangerGaps:
68
+ def test_flags_non_git_dir_and_missing_bounds(self, tmp_path):
69
+ (tmp_path / "task.yaml").write_text('text: "t"\nrepo: "."\n', encoding="utf-8")
70
+ checks = {c.name: c for c in doctor.run_checks(None, repo=str(tmp_path))}
71
+ assert not checks["repo (git)"].ok
72
+ assert not checks["bounds source"].ok
73
+
74
+ def test_green_on_scaffolded_dir(self, tmp_path, monkeypatch):
75
+ monkeypatch.chdir(tmp_path)
76
+ assert _init_in(tmp_path, CliRunner()).exit_code == 0
77
+ cfg = load(tmp_path / "sembl.stack.yaml")
78
+ checks = {c.name: c for c in doctor.run_checks(cfg, repo=str(tmp_path))}
79
+ assert checks["repo (git)"].ok
80
+ assert checks["bounds source"].ok
81
+
82
+ def test_spec_path_counts_as_bounds_source(self, tmp_path):
83
+ subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
84
+ (tmp_path / "task.yaml").write_text(
85
+ 'text: "t"\nrepo: "."\nspec_path: "./specs/001"\n', encoding="utf-8")
86
+ checks = {c.name: c for c in doctor.run_checks(None, repo=str(tmp_path))}
87
+ assert checks["bounds source"].ok
88
+
89
+
90
+ class TestLoopFailsClean:
91
+ def test_stage_runtimeerror_is_a_diagnosis_not_a_traceback(self, tmp_path, monkeypatch):
92
+ """A missing bounds source must produce the hint, not a LangGraph traceback."""
93
+ monkeypatch.chdir(tmp_path)
94
+ runner = CliRunner()
95
+ assert _init_in(tmp_path, runner).exit_code == 0
96
+ (tmp_path / "bounds.json").unlink() # recreate the original stranger state
97
+ res = runner.invoke(main, ["loop", "task.yaml"])
98
+ assert res.exit_code == 1
99
+ assert "error: L2" in res.output
100
+ assert "sembl-stack doctor" in res.output
101
+ assert "Traceback" not in res.output
File without changes
File without changes
File without changes
File without changes
File without changes