logomesh 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. logomesh-0.1.0/.env.example +59 -0
  2. logomesh-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
  3. logomesh-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +14 -0
  4. logomesh-0.1.0/.github/pull_request_template.md +16 -0
  5. logomesh-0.1.0/.github/workflows/ci.yml +38 -0
  6. logomesh-0.1.0/.gitignore +87 -0
  7. logomesh-0.1.0/AGENTS.md +62 -0
  8. logomesh-0.1.0/CLAUDE.md +87 -0
  9. logomesh-0.1.0/Dockerfile.sandbox +118 -0
  10. logomesh-0.1.0/Makefile +18 -0
  11. logomesh-0.1.0/PKG-INFO +213 -0
  12. logomesh-0.1.0/README.md +177 -0
  13. logomesh-0.1.0/docs/honest_hit_rates.md +197 -0
  14. logomesh-0.1.0/docs/pilot_deployment.md +164 -0
  15. logomesh-0.1.0/docs/pipeline.md +1183 -0
  16. logomesh-0.1.0/examples/fintech-payments-demo/.env.example +14 -0
  17. logomesh-0.1.0/examples/fintech-payments-demo/Dockerfile +19 -0
  18. logomesh-0.1.0/examples/fintech-payments-demo/README.md +185 -0
  19. logomesh-0.1.0/examples/fintech-payments-demo/core/__init__.py +0 -0
  20. logomesh-0.1.0/examples/fintech-payments-demo/core/apps.py +6 -0
  21. logomesh-0.1.0/examples/fintech-payments-demo/core/views.py +8 -0
  22. logomesh-0.1.0/examples/fintech-payments-demo/docker-compose.yml +69 -0
  23. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/__init__.py +3 -0
  24. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/asgi.py +3 -0
  25. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/celery.py +16 -0
  26. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/settings.py +155 -0
  27. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/urls.py +13 -0
  28. logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/wsgi.py +3 -0
  29. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_fk_violation.json +31 -0
  30. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_integrity_race.json +33 -0
  31. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_stale_data.json +32 -0
  32. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_integrity_event.json +33 -0
  33. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_key_event.json +30 -0
  34. logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_type_event.json +31 -0
  35. logomesh-0.1.0/examples/fintech-payments-demo/manage.py +14 -0
  36. logomesh-0.1.0/examples/fintech-payments-demo/payments/__init__.py +0 -0
  37. logomesh-0.1.0/examples/fintech-payments-demo/payments/admin.py +25 -0
  38. logomesh-0.1.0/examples/fintech-payments-demo/payments/apps.py +6 -0
  39. logomesh-0.1.0/examples/fintech-payments-demo/payments/crash_lab.py +58 -0
  40. logomesh-0.1.0/examples/fintech-payments-demo/payments/management/__init__.py +0 -0
  41. logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/__init__.py +0 -0
  42. logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/seed_demo_data.py +21 -0
  43. logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/trigger_easy_crashes.py +26 -0
  44. logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/trigger_hard_cases.py +40 -0
  45. logomesh-0.1.0/examples/fintech-payments-demo/payments/migrations/0001_initial.py +90 -0
  46. logomesh-0.1.0/examples/fintech-payments-demo/payments/migrations/__init__.py +0 -0
  47. logomesh-0.1.0/examples/fintech-payments-demo/payments/models.py +90 -0
  48. logomesh-0.1.0/examples/fintech-payments-demo/payments/stripe_client.py +35 -0
  49. logomesh-0.1.0/examples/fintech-payments-demo/payments/tasks.py +376 -0
  50. logomesh-0.1.0/examples/fintech-payments-demo/payments/views.py +98 -0
  51. logomesh-0.1.0/examples/fintech-payments-demo/requirements.txt +10 -0
  52. logomesh-0.1.0/examples/fintech-payments-demo/scripts/trigger_crash.py +73 -0
  53. logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722069.py +5 -0
  54. logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722082.py +5 -0
  55. logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722083.py +5 -0
  56. logomesh-0.1.0/examples/fintech_billing/__init__.py +0 -0
  57. logomesh-0.1.0/examples/fintech_billing/billing_ops.py +43 -0
  58. logomesh-0.1.0/examples/fintech_billing/billing_v2.py +185 -0
  59. logomesh-0.1.0/examples/fintech_billing/checkout.py +28 -0
  60. logomesh-0.1.0/examples/fintech_billing_v3/__init__.py +0 -0
  61. logomesh-0.1.0/examples/fintech_billing_v3/bugs.py +338 -0
  62. logomesh-0.1.0/examples/fintech_billing_v5/__init__.py +0 -0
  63. logomesh-0.1.0/examples/fintech_billing_v5/bugs.py +407 -0
  64. logomesh-0.1.0/logomesh_orchestrator.py +3305 -0
  65. logomesh-0.1.0/migrations/0001_installation_secrets.sql +46 -0
  66. logomesh-0.1.0/migrations/0002_runs.sql +42 -0
  67. logomesh-0.1.0/policies/docker/no_root.rego +30 -0
  68. logomesh-0.1.0/policies/docker/registry_allowlist.rego +28 -0
  69. logomesh-0.1.0/policies/docker/secure_defaults.rego +48 -0
  70. logomesh-0.1.0/policies/k8s/no_privilege_escalation.rego +49 -0
  71. logomesh-0.1.0/policies/k8s/no_root.rego +33 -0
  72. logomesh-0.1.0/policies/k8s/readonly_rootfs.rego +29 -0
  73. logomesh-0.1.0/policies/k8s/registry_allowlist.rego +40 -0
  74. logomesh-0.1.0/policies/k8s/resource_limits.rego +42 -0
  75. logomesh-0.1.0/pyproject.toml +80 -0
  76. logomesh-0.1.0/scripts/honest_hit_rates.py +332 -0
  77. logomesh-0.1.0/scripts/run_demo_orchestrator_suite.py +241 -0
  78. logomesh-0.1.0/scripts/seed_billing_v2.py +172 -0
  79. logomesh-0.1.0/scripts/seed_sentry_issue.py +70 -0
  80. logomesh-0.1.0/src/anthropic_adapter.py +3 -0
  81. logomesh-0.1.0/src/business_logic/orm_materializer.py +300 -0
  82. logomesh-0.1.0/src/business_logic/repo_introspection.py +403 -0
  83. logomesh-0.1.0/src/business_logic/sandbox/__init__.py +1011 -0
  84. logomesh-0.1.0/src/business_logic/sandbox/dep_installer.py +1072 -0
  85. logomesh-0.1.0/src/business_logic/sandbox_planner.py +224 -0
  86. logomesh-0.1.0/src/capture/__init__.py +142 -0
  87. logomesh-0.1.0/src/capture/_log.py +35 -0
  88. logomesh-0.1.0/src/capture/hooks/__init__.py +0 -0
  89. logomesh-0.1.0/src/capture/hooks/django_hook.py +179 -0
  90. logomesh-0.1.0/src/capture/hooks/fastapi_hook.py +316 -0
  91. logomesh-0.1.0/src/capture/hooks/http_hook.py +280 -0
  92. logomesh-0.1.0/src/capture/hooks/redis_hook.py +93 -0
  93. logomesh-0.1.0/src/capture/hooks/sqlalchemy_hook.py +175 -0
  94. logomesh-0.1.0/src/capture/ring_buffer.py +77 -0
  95. logomesh-0.1.0/src/capture/safety.py +88 -0
  96. logomesh-0.1.0/src/capture/sentry_hook.py +189 -0
  97. logomesh-0.1.0/src/capture/serializer.py +173 -0
  98. logomesh-0.1.0/src/capture/stripe_meta.py +133 -0
  99. logomesh-0.1.0/src/capture/writer.py +52 -0
  100. logomesh-0.1.0/src/cli/__init__.py +9 -0
  101. logomesh-0.1.0/src/cli/artifact.py +183 -0
  102. logomesh-0.1.0/src/cli/codebase_context.py +347 -0
  103. logomesh-0.1.0/src/cli/draft_pr.py +284 -0
  104. logomesh-0.1.0/src/cli/git_utils.py +92 -0
  105. logomesh-0.1.0/src/cli/main.py +84 -0
  106. logomesh-0.1.0/src/cli/repro.py +534 -0
  107. logomesh-0.1.0/src/cli/state_fixtures.py +176 -0
  108. logomesh-0.1.0/src/core/__init__.py +3 -0
  109. logomesh-0.1.0/src/core/anthropic_adapter.py +117 -0
  110. logomesh-0.1.0/src/core/audit_log.py +168 -0
  111. logomesh-0.1.0/src/core/config.py +241 -0
  112. logomesh-0.1.0/src/core/installation_secrets.py +368 -0
  113. logomesh-0.1.0/src/core/llm_utils.py +122 -0
  114. logomesh-0.1.0/src/core/logomesh_log.py +107 -0
  115. logomesh-0.1.0/src/core/pii_redactor.py +307 -0
  116. logomesh-0.1.0/src/core/responses_adapter.py +94 -0
  117. logomesh-0.1.0/src/core/runs_log.py +178 -0
  118. logomesh-0.1.0/src/core/supabase_client.py +326 -0
  119. logomesh-0.1.0/src/core/usage_tracker.py +173 -0
  120. logomesh-0.1.0/src/llm_utils.py +2 -0
  121. logomesh-0.1.0/src/logomesh_log.py +3 -0
  122. logomesh-0.1.0/src/oracles/__init__.py +0 -0
  123. logomesh-0.1.0/src/oracles/api_error_fixtures.py +115 -0
  124. logomesh-0.1.0/src/oracles/crash_context.py +489 -0
  125. logomesh-0.1.0/src/oracles/hypothesis_report.py +769 -0
  126. logomesh-0.1.0/src/oracles/sentry_replay.py +835 -0
  127. logomesh-0.1.0/src/oracles/sentry_replay_v2.py +588 -0
  128. logomesh-0.1.0/src/oracles/signal_detectors.py +510 -0
  129. logomesh-0.1.0/src/responses_adapter.py +3 -0
  130. logomesh-0.1.0/src/server/__init__.py +0 -0
  131. logomesh-0.1.0/src/server/app.py +98 -0
  132. logomesh-0.1.0/src/server/github_comment.py +95 -0
  133. logomesh-0.1.0/src/server/installations_api.py +532 -0
  134. logomesh-0.1.0/src/server/sentry_comment.py +206 -0
  135. logomesh-0.1.0/src/server/sentry_webhook.py +479 -0
  136. logomesh-0.1.0/src/server/slack_summary.py +146 -0
  137. logomesh-0.1.0/src/supabase_client.py +3 -0
  138. logomesh-0.1.0/src/usage_tracker.py +3 -0
  139. logomesh-0.1.0/tests/fixtures/bv2_s1_exchange_gain_loss.json +48 -0
  140. logomesh-0.1.0/tests/fixtures/bv2_s2_payment_reconciliation.json +51 -0
  141. logomesh-0.1.0/tests/fixtures/bv2_s3_order_discount.json +50 -0
  142. logomesh-0.1.0/tests/fixtures/bv2_s4_gstin_lookup.json +50 -0
  143. logomesh-0.1.0/tests/fixtures/bv2_s5_quantize_price.json +49 -0
  144. logomesh-0.1.0/tests/fixtures/bv2_s5_webhook_amount.json +45 -0
  145. logomesh-0.1.0/tests/fixtures/sentry_invoice_date_invalid.json +36 -0
  146. logomesh-0.1.0/tests/fixtures/sentry_money_in_words_none.json +37 -0
  147. logomesh-0.1.0/tests/fixtures/sentry_negative_qty_event.json +45 -0
  148. logomesh-0.1.0/tests/fixtures/sentry_payment_total_none_rate.json +37 -0
  149. logomesh-0.1.0/tests/fixtures/sentry_v3_01_celery_refund_stale_db.json +205 -0
  150. logomesh-0.1.0/tests/fixtures/sentry_v3_02_balance_debit_race.json +226 -0
  151. logomesh-0.1.0/tests/fixtures/sentry_v3_03_dep_version_conflict.json +225 -0
  152. logomesh-0.1.0/tests/fixtures/sentry_v3_04_tz_naive_datetime.json +244 -0
  153. logomesh-0.1.0/tests/fixtures/sentry_v3_05_stripe_c_ext_crash.json +243 -0
  154. logomesh-0.1.0/tests/fixtures/sentry_v3_06_db_pool_exhaustion.json +394 -0
  155. logomesh-0.1.0/tests/fixtures/sentry_v3_07_tax_rule_drift.json +281 -0
  156. logomesh-0.1.0/tests/fixtures/sentry_v3_08_webhook_idempotency.json +300 -0
  157. logomesh-0.1.0/tests/fixtures/sentry_v3_09_pii_in_locals.json +316 -0
  158. logomesh-0.1.0/tests/fixtures/sentry_v3_10_redis_lock_missing.json +453 -0
  159. logomesh-0.1.0/tests/fixtures/sentry_v4_01_stripe_webhook_dup.json +288 -0
  160. logomesh-0.1.0/tests/fixtures/sentry_v4_02_stale_data_celery.json +236 -0
  161. logomesh-0.1.0/tests/fixtures/sentry_v4_03_async_lost_update.json +183 -0
  162. logomesh-0.1.0/tests/fixtures/sentry_v4_04_tenacity_lock_wait.json +205 -0
  163. logomesh-0.1.0/tests/fixtures/sentry_v4_05_pm_doesnotexist.json +262 -0
  164. logomesh-0.1.0/tests/fixtures/sentry_v5_01_rtp_payment_timeout.json +198 -0
  165. logomesh-0.1.0/tests/fixtures/sentry_v5_02_processor_ransomware.json +232 -0
  166. logomesh-0.1.0/tests/fixtures/sentry_v5_03_fraud_model_drift.json +266 -0
  167. logomesh-0.1.0/tests/fixtures/sentry_v5_04_stablecoin_reconciliation.json +301 -0
  168. logomesh-0.1.0/tests/fixtures/sentry_v5_05_consent_token_expired.json +329 -0
  169. logomesh-0.1.0/tests/fixtures/sentry_v5_06_mobile_confirm_crash.json +367 -0
  170. logomesh-0.1.0/tests/fixtures/sentry_v5_07_sponsor_bank_ratelimit.json +388 -0
  171. logomesh-0.1.0/tests/fixtures/sentry_v5_08_agent_authorization.json +445 -0
  172. logomesh-0.1.0/tests/fixtures/state_and_validation_fixture.py +25 -0
  173. logomesh-0.1.0/tests/fixtures/v3_index.json +72 -0
  174. logomesh-0.1.0/tests/fixtures/v4_index.json +37 -0
  175. logomesh-0.1.0/tests/fixtures/v5_index.json +58 -0
  176. logomesh-0.1.0/tests/test_api_error_fixtures.py +44 -0
  177. logomesh-0.1.0/tests/test_artifact_seal.py +298 -0
  178. logomesh-0.1.0/tests/test_capture_fastapi_middleware.py +198 -0
  179. logomesh-0.1.0/tests/test_capture_http_hook.py +115 -0
  180. logomesh-0.1.0/tests/test_capture_install.py +20 -0
  181. logomesh-0.1.0/tests/test_capture_ring_buffer.py +56 -0
  182. logomesh-0.1.0/tests/test_capture_safety.py +73 -0
  183. logomesh-0.1.0/tests/test_capture_serializer.py +78 -0
  184. logomesh-0.1.0/tests/test_capture_stripe_meta.py +73 -0
  185. logomesh-0.1.0/tests/test_capture_writer.py +30 -0
  186. logomesh-0.1.0/tests/test_cli_artifact.py +43 -0
  187. logomesh-0.1.0/tests/test_cli_git_utils.py +97 -0
  188. logomesh-0.1.0/tests/test_cli_repro.py +81 -0
  189. logomesh-0.1.0/tests/test_core_config.py +233 -0
  190. logomesh-0.1.0/tests/test_crash_context.py +335 -0
  191. logomesh-0.1.0/tests/test_dep_installer.py +96 -0
  192. logomesh-0.1.0/tests/test_github_comment.py +118 -0
  193. logomesh-0.1.0/tests/test_hypothesis_report.py +453 -0
  194. logomesh-0.1.0/tests/test_installation_secrets.py +108 -0
  195. logomesh-0.1.0/tests/test_installations_api.py +375 -0
  196. logomesh-0.1.0/tests/test_llm_utils.py +54 -0
  197. logomesh-0.1.0/tests/test_logomesh_log.py +53 -0
  198. logomesh-0.1.0/tests/test_orchestrator_step_guard.py +68 -0
  199. logomesh-0.1.0/tests/test_orm_materializer.py +191 -0
  200. logomesh-0.1.0/tests/test_pii_redactor.py +348 -0
  201. logomesh-0.1.0/tests/test_prepare_env_module_pinning.py +140 -0
  202. logomesh-0.1.0/tests/test_prepare_environment.py +201 -0
  203. logomesh-0.1.0/tests/test_production_sandbox_refusal.py +103 -0
  204. logomesh-0.1.0/tests/test_repo_introspection.py +152 -0
  205. logomesh-0.1.0/tests/test_request_replay_synthesis.py +196 -0
  206. logomesh-0.1.0/tests/test_sandbox.py +69 -0
  207. logomesh-0.1.0/tests/test_sandbox_planner.py +179 -0
  208. logomesh-0.1.0/tests/test_sandbox_python_version.py +37 -0
  209. logomesh-0.1.0/tests/test_self_refusal_loop.py +186 -0
  210. logomesh-0.1.0/tests/test_sentry_comment.py +117 -0
  211. logomesh-0.1.0/tests/test_sentry_replay.py +188 -0
  212. logomesh-0.1.0/tests/test_sentry_replay_extraction.py +325 -0
  213. logomesh-0.1.0/tests/test_sentry_replay_v2.py +14 -0
  214. logomesh-0.1.0/tests/test_sentry_webhook.py +281 -0
  215. logomesh-0.1.0/tests/test_signal_detectors.py +300 -0
  216. logomesh-0.1.0/tests/test_slack_summary.py +138 -0
  217. logomesh-0.1.0/tests/test_supabase_client.py +12 -0
  218. logomesh-0.1.0/tests/test_supabase_client_env.py +39 -0
  219. logomesh-0.1.0/tests/test_timeout_salvage.py +54 -0
  220. logomesh-0.1.0/tests/test_usage_tracker.py +77 -0
  221. logomesh-0.1.0/uv.lock +3075 -0
@@ -0,0 +1,59 @@
1
+ # === GitHub App ===
2
+ # Create at https://github.com/settings/apps
3
+ GITHUB_APP_ID=123456
4
+ GITHUB_PRIVATE_KEY_PATH=./private-key.pem
5
+ # Or set the key directly (newlines escaped as \n):
6
+ # GITHUB_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\n...\n-----END RSA PRIVATE KEY-----"
7
+ GITHUB_WEBHOOK_SECRET=your-webhook-secret
8
+
9
+ # === LLM ===
10
+ OPENAI_API_KEY=sk-proj-your-key-here
11
+ OPENAI_MODEL=gpt-4o-mini
12
+ # OPENAI_BASE_URL=https://api.openai.com/v1 # override for Azure/local
13
+
14
+ # Fallback: if no OPENAI_API_KEY, uses Gemini
15
+ # GEMINI_API_KEY=your-gemini-key
16
+
17
+ # === Sandbox ===
18
+ # SANDBOX_TIMEOUT=15 # seconds per test run (default: 15)
19
+
20
+ # === Oracle Lanes ===
21
+ # Each lane can be individually enabled/disabled.
22
+
23
+ # Sentry Replay: replays real production exceptions against PR code.
24
+ # Requires Sentry API access. Lane auto-disables if credentials not set.
25
+ # SENTRY_AUTH_TOKEN=sntrys_your-token-here
26
+ # SENTRY_ORG=your-org
27
+ # SENTRY_PROJECT=your-project
28
+ # SENTRY_BASE_URL=https://sentry.io
29
+ # LOGOMESH_ENABLE_SENTRY_LANE=1
30
+
31
+ # Static Patterns: auth guard, secrets, skipped tests, debug flags, migration order.
32
+ # LOGOMESH_ENABLE_STATIC_LANE=1
33
+
34
+ # Type Contracts: test boundary inputs from type hints / Pydantic models. (coming soon)
35
+ # LOGOMESH_ENABLE_TYPE_LANE=1
36
+
37
+ # Behavioral Regression: compare base vs head output on same inputs. (coming soon)
38
+ # LOGOMESH_ENABLE_REGRESSION_LANE=1
39
+
40
+ # Test Augmentation: edge-case variants of existing test assertions. (coming soon)
41
+ # LOGOMESH_ENABLE_AUGMENTATION_LANE=1
42
+
43
+ # === Pipeline Config ===
44
+ # LOGOMESH_ENABLE_CLEAN_SIGNAL=1 # post "no issues found" on clean PRs
45
+ # LOGOMESH_MAX_FINDINGS_PER_FILE=5
46
+ # LOGOMESH_MAX_FINDINGS_PER_PR=15
47
+ # PIPELINE_TIMEOUT_SECONDS=240
48
+ # ENABLE_INLINE_REVIEW=1 # inline ```suggestion``` blocks
49
+
50
+ # === Supabase (multi-tenant install + run storage) ===
51
+ # You can set either SUPABASE_URL directly OR SUPABASE_PROJECT_REF.
52
+ # SUPABASE_PROJECT_REF=xyzcompanyabc123
53
+ # SUPABASE_URL=https://xyzcompanyabc123.supabase.co
54
+ # Prefer SUPABASE_SERVICE_KEY; SUPABASE_SERVICE_ROLE_KEY also supported.
55
+ # SUPABASE_SERVICE_KEY=eyJhbGciOi...
56
+ # LOGOMESH_MASTER_KEY=<32-byte urlsafe-base64 or 64-char hex>
57
+
58
+ # === Production ===
59
+ # LOGOMESH_ENV=production # disables subprocess fallback (requires Docker)
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: Bug report
3
+ about: Report something that isn't working correctly
4
+ labels: bug
5
+ ---
6
+
7
+ **Describe the bug**
8
+ What happened?
9
+
10
+ **To reproduce**
11
+ Steps to reproduce the behavior:
12
+ 1. Run `...`
13
+ 2. Send request to `...`
14
+ 3. See error
15
+
16
+ **Expected behavior**
17
+ What should have happened instead?
18
+
19
+ **Environment**
20
+ - OS: [e.g. macOS, Ubuntu 22.04]
21
+ - Python version: [e.g. 3.11.5]
22
+ - uv version:
23
+
24
+ **Logs / output**
25
+ ```
26
+ paste relevant logs here
27
+ ```
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an improvement or new feature
4
+ labels: enhancement
5
+ ---
6
+
7
+ **What problem does this solve?**
8
+ Describe the problem or limitation you're running into.
9
+
10
+ **Proposed solution**
11
+ How would you solve it? Be specific about what you'd change.
12
+
13
+ **Alternatives considered**
14
+ Any other approaches you thought about?
@@ -0,0 +1,16 @@
1
+ ## What does this PR do?
2
+
3
+ <!-- Brief description of the change -->
4
+
5
+ ## Related issue
6
+
7
+ <!-- Link to the issue this PR addresses, e.g. Fixes #123 -->
8
+
9
+ ## How was this tested?
10
+
11
+ <!-- Describe how you verified the change works -->
12
+
13
+ ## Checklist
14
+
15
+ - [ ] My changes don't break existing functionality
16
+ - [ ] I've tested locally with `uv run pytest tests/ -v`
@@ -0,0 +1,38 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ test:
11
+ name: Test
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - name: Checkout repository
15
+ uses: actions/checkout@v4
16
+
17
+ - name: Setup Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.12'
21
+
22
+ - name: Install uv
23
+ run: pip install uv
24
+
25
+ - name: Install dependencies
26
+ run: uv sync
27
+
28
+ - name: Run tests
29
+ run: uv run pytest tests/ -v
30
+
31
+ - name: Enforce quality gates
32
+ if: ${{ vars.ENFORCE_LOGOMESH_QUALITY_GATES == '1' }}
33
+ run: |
34
+ UV_CACHE_DIR=/tmp/uv-cache uv run python scripts/quality_gates.py \
35
+ --run results/real_prs_run_6.json \
36
+ --replay results/replay_from_run3_current.json \
37
+ --out results/quality_gates_latest.json \
38
+ --strict
@@ -0,0 +1,87 @@
1
+ # Python
2
+ .venv/
3
+ __pycache__/
4
+ *.pyc
5
+ *.egg-info/
6
+ .mypy_cache/
7
+ .pytest_cache/
8
+ dist/
9
+ build/
10
+
11
+ # Environment
12
+ .env
13
+ *.pem
14
+
15
+ # Data
16
+ *.db
17
+ *.db-shm
18
+ *.db-wal
19
+ *.db-journal
20
+
21
+ # OS
22
+ .DS_Store
23
+
24
+ # IDE
25
+ .idea/
26
+ .vscode/
27
+ .claude/
28
+
29
+ # JetBrains / PyCharm
30
+ *.iml
31
+ *.ipr
32
+ *.iws
33
+ .idea_modules/
34
+ out/
35
+
36
+ # PyCharm local/cached project data
37
+ .idea/**/workspace.xml
38
+ .idea/**/tasks.xml
39
+ .idea/**/usage.statistics.xml
40
+ .idea/**/dictionaries/
41
+ .idea/**/shelf/
42
+ .idea/**/httpRequests/
43
+ .idea/**/dataSources/
44
+ .idea/**/dataSources.local.xml
45
+ .idea/**/dataSources.ids
46
+ .idea/**/dataSources.xml
47
+ .idea/**/uiDesigner.xml
48
+
49
+ # JetBrains build output dirs
50
+ cmake-build-*/
51
+
52
+ # Project specific
53
+ docs/startup/
54
+ docs/archive/
55
+ archive/logomesh_legacy/results/
56
+ docs/refs-internal/
57
+ data/job_queue.sqlite3
58
+
59
+ # Internal strategy/agent docs (not for repo)
60
+ docs/agent_system/
61
+ docs/strategy/discovery/
62
+ docs/strategy/evaluation.md
63
+ docs/strategy/research_validation.md
64
+ docs/strategy/narrowing_philosophy.md
65
+ docs/prompts/
66
+ docs/internal/
67
+ docs/run_results/
68
+
69
+ # Private internal files — kept locally, never push
70
+ archive/logomesh_legacy/ref-for-agents/
71
+ archive/logomesh_legacy/ruflo_config.json
72
+ .agents/
73
+
74
+ .playwright-mcp/
75
+ /docs/superpowers/
76
+ .mcp.json
77
+ skills-lock.json
78
+
79
+ *.log
80
+ *.tmp
81
+ *.bak
82
+ *.swp
83
+ *.python-version
84
+ *.orig
85
+
86
+ # ALL LEGACY CODE
87
+ archive/
@@ -0,0 +1,62 @@
1
+ # AGENTS.md
2
+
3
+ ## Mission and product constraints
4
+ - logomesh is post-incident **deterministic repro with proof**: only ship artifacts/PRs/comments when `deterministic_repro` reproduces and the exception type matches (`logomesh_orchestrator.py`, `src/oracles/sentry_replay_v2.py`, `src/cli/artifact.py`).
5
+ - Keep output evidence-first and terse (repro call -> observed result, expected property, location/state) and never add scores or "looks good" comments; non-repro or mismatch should stay silent or `needs_human_review` (`src/cli/repro.py`, `src/server/sentry_comment.py`, `src/server/github_comment.py`).
6
+ - LLM output is advisory only; evidence-path test bytes are deterministic from frame locals (`logomesh_orchestrator.py`, `src/oracles/sentry_replay_v2.py`).
7
+
8
+ ## Pipeline mental model (read this first)
9
+ - Sentry webhook ingress is fast-ACK + async dispatch: verify HMAC signature + timestamp window + idempotency, then 202 ACK and semaphore-limited dispatch (`src/server/sentry_webhook.py`).
10
+ - Core repro flow: fetch event -> innermost in-app frame -> resolve source -> build deterministic test -> sandbox run -> verify exception match (`src/oracles/sentry_replay.py`, `src/oracles/sentry_replay_v2.py`, `src/business_logic/sandbox/__init__.py`).
11
+ - LangGraph orchestrator wraps the deterministic core: only `deterministic_repro` yields sealed bytes; other tools are advisory (context, deps prep, commenting) (`logomesh_orchestrator.py`).
12
+ - `logomesh repro` is the primary product path; `logomesh check` runs the single-file verify pipeline (`src/cli/repro.py`, `src/cli/check.py`, `src/core/pipeline.py`).
13
+
14
+ ## Project-specific conventions that matter
15
+ - Single-file verification in `verify_file`: build change scope -> pack parseable source -> make sandbox importable -> generate tests -> cap tests -> inject target stubs -> dependency-aware sandbox run -> classify -> validate -> repro/regression gate -> `VerifyResult` (`src/core/pipeline.py`, `src/core/repro_gate.py`).
16
+ - Property-inference preprocessing gates (fire before any LLM call): (a) `_is_trivial_passthrough` drops wrapper functions whose body is a single delegation (`return self.inner(...)`); (b) arity filter drops properties whose `input_code` doesn't match the function signature; (c) `_is_identity_dunder_property` drops no-op assertions on dunders; (d) `_counter_impl_probe` (one batched LLM call per function, gated by `ENABLE_COUNTER_IMPL_PROBE`, default on) drops properties that a semantically-wrong implementation could still satisfy — bias toward keeping on parse failure. These gates live in `src/business_logic/generator/inference.py` and are wired into BOTH `inference.infer_properties` and the legacy compat shim `src/business_logic/legacy/generator.py` (where `PropertyTestGenerator` actually resolves `infer_properties` / `infer_metamorphic_properties` from). Do not edit legacy's copy without also updating or re-importing from inference.
17
+ - Spotlighting is on by default in inference (`ENABLE_SPOTLIGHTING`): untrusted source is sentinel-rewritten per call before prompt assembly to blunt prompt-injection instructions embedded in code/comments (`_spotlight` in `src/business_logic/generator/inference.py`).
18
+ - Time-Travel Trace: the sandbox conftest (`_TRACE_HOOK` injected in both `_CONFTEST_AUTOMOCK` and `_CONFTEST_STRICT`) installs a `pytest_exception_interact` hook that walks the traceback to the frame inside `target.py`, captures its `f_locals` (with size/type guards), and writes `{func, lineno, locals}` into `finding.trace` (`src/business_logic/sandbox/__init__.py`, `src/business_logic/classifier/core.py`).
19
+ - Classifier artifact suppressors in `src/business_logic/classifier/core.py`: (a) `_trace_has_automock_leak` drops findings whose trace shows `MagicMock` in `target.py` locals (sandbox automock leaked into user code); (b) `_PY2_BUILTINS` set drops `NameError` for Py2-only names (`unicode`, `basestring`, ...) that are unreachable under Python 3 execution; (c) salt dunder globals (`__opts__`, `__salt__`, ...) are dropped as SaltStack-runtime-injected symbols. These filters must stay — they are the reason fabric-style Py2-compat guard FPs stay suppressed.
20
+ - Files with import chains known to fail sandbox collection are skipped early as structurally untestable (`is_structurally_untestable`, currently Twisted/Scrapy fragments) and counted in taxonomy (`src/business_logic/generator/inference.py`).
21
+ - Test generation must **not** wrap calls in `try/except`; failures should surface to pytest/classifier (`src/business_logic/generator/`).
22
+ - Function extraction is underscore-aware but not "public-only": runtime dunders in `_TESTABLE_DUNDERS` and modified `__init__` paths are intentionally testable; don't reintroduce blanket `_` filtering (`src/business_logic/generator/ast_analysis.py`).
23
+ - Diff-aware scope is semantic, not raw line fuzzing: changed symbols are extracted and packed into parseable source slices (`src/business_logic/change_scope/`, `src/core/pipeline.py`).
24
+ - Sandbox importability is intentionally rewritten: relative imports/external bases/module-level call assignments are mocked to keep `target.py` importable (`make_sandbox_importable` in `src/business_logic/legacy/generator.py`).
25
+ - Sandbox runs are dependency-aware: `Sandbox.run_dependency_aware` accepts dependency files and optional `deps_snapshot` mounts; orchestrator `prepare_environment` can build a snapshot when imports fail (`src/business_logic/sandbox/__init__.py`, `logomesh_orchestrator.py`).
26
+ - Logs are structured JSON only (`log.info/warn/error`) via `src/logomesh_log.py`; preserve `component` names for grepability.
27
+
28
+ ## External integrations and boundaries
29
+ - Sentry webhook auth uses HMAC-SHA256 + timestamp replay window; idempotency is `(issue_id, sha256(payload))` with a 24h in-process LRU; concurrency is capped via `LOGOMESH_MAX_CONCURRENT_RUNS` (`src/server/sentry_webhook.py`).
30
+ - Supabase-backed installations + runs: secrets are AES-256-GCM encrypted and `resolve_secrets()` falls back to env vars; `supabase_client` uses an in-memory shim when credentials are missing (`src/core/installation_secrets.py`, `src/core/supabase_client.py`, `migrations/`).
31
+ - GitHub draft PRs use `gh` CLI first, then REST with `GITHUB_TOKEN` + `GITHUB_REPO`; server comment posting uses per-install tokens (`src/cli/draft_pr.py`, `src/server/github_comment.py`).
32
+ - Sentry + Slack comments are posted from the server layer using per-install secrets (`src/server/sentry_comment.py`, `src/server/slack_summary.py`).
33
+ - LLM provider selection is env-driven (`OPENAI_API_KEY` preferred, Anthropic/Gemini-compatible fallback); temperature/reasoning args come from `src/llm_utils.py`.
34
+ - Sandbox supports Docker hardened runs with subprocess fallback for local dev; path traversal protections and output redaction live in the sandbox (`src/business_logic/sandbox/__init__.py`).
35
+
36
+ ## Developer workflows (high signal)
37
+ - Install deps: `uv sync`
38
+ - Run tests: `uv run pytest tests/ -v`
39
+ - Build sandbox image (required for realistic runs): `docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .`
40
+ - Run repro: `uv run logomesh repro <sentry-url>`
41
+ - Emit artifact: `uv run logomesh repro <url> --artifact`
42
+ - Draft PR: `uv run logomesh repro <url> --draft-pr`
43
+ - Run local verify: `uv run logomesh check <path>`
44
+ - Run API server locally: `LOGOMESH_ENV=production uv run uvicorn src.server.app:app --port 8080`
45
+ - Replay a historical run: `uv run python scripts/replay_run.py --from results/real_prs_run_3.json --out results/replay_from_run3_current.json`
46
+ - Enforce quality gates (also in CI): `UV_CACHE_DIR=/tmp/uv-cache uv run python scripts/quality_gates.py --run results/real_prs_run_6.json --replay results/replay_from_run3_current.json --out results/quality_gates_latest.json --strict`
47
+ - Apply Supabase migrations: `psql $SUPABASE_DB_URL -f migrations/0001_installation_secrets.sql` and `psql $SUPABASE_DB_URL -f migrations/0002_runs.sql` (or `supabase db push`)
48
+ - CI (`.github/workflows/ci.yml`) runs `pytest tests/ -v` on every push/PR; quality gates run only when `ENFORCE_LOGOMESH_QUALITY_GATES=1` is set
49
+
50
+ ## Where to look before editing
51
+ - Orchestrator + compliance gates: `logomesh_orchestrator.py`
52
+ - Deterministic repro + Sentry parsing: `src/oracles/sentry_replay_v2.py`, `src/oracles/sentry_replay.py`
53
+ - CLI entrypoints + artifact/PR output: `src/cli/repro.py`, `src/cli/check.py`, `src/cli/artifact.py`, `src/cli/draft_pr.py`
54
+ - Core verify pipeline + repro gate: `src/core/pipeline.py`, `src/core/repro_gate.py`
55
+ - Webhooks/API + comment channels: `src/server/sentry_webhook.py`, `src/server/installations_api.py`, `src/server/github_comment.py`, `src/server/sentry_comment.py`, `src/server/slack_summary.py`
56
+ - Secrets + persistence: `src/core/installation_secrets.py`, `src/core/supabase_client.py`, `migrations/`
57
+ - Sandbox security and execution modes: `src/business_logic/sandbox/__init__.py`
58
+ - Generation + validation: `src/business_logic/generator/`, `src/business_logic/validator/`, `src/business_logic/scaffold_filter/__init__.py`
59
+ - Diff/change-scope logic: `src/business_logic/change_scope/`
60
+ - Deep debug walkthrough: `docs/pipeline.md`
61
+ - Launch/replay gate definitions: `docs/refs/QUALITY_GATES.md`
62
+ - Production deployment runbook (systemd/nginx/webhook wiring): `docs/refs/DEPLOYMENT.md`
@@ -0,0 +1,87 @@
1
+ # logomesh
2
+
3
+ ## What This Is
4
+ CLI / GitHub App / MCP server that finds logic bugs and edge-case failures in Python code by inferring invariants, then attacking them with adversarial inputs in a Docker sandbox. Only reports when it has concrete, reproducible evidence.
5
+
6
+ Primary surface: `logomesh repro <sentry-url>` — paste a Sentry issue URL, get a failing pytest against your current branch with audit-ready post-incident evidence mapped to **SOC2 CC7.3 + CC7.4 + PCI DSS 12.10.5** (incident response). Earlier docs referenced PCI 6.3.2 — that control covers pre-release code review and is the wrong mapping for a post-incident product. Corrected.
7
+
8
+ ## Output Format
9
+ Lead with the violated property and the evidence.
10
+
11
+ ```
12
+ ## logomesh found 1 issue
13
+
14
+ ### Negative quantity bypasses checkout validation
15
+ **Property:** Order total should always be ≥ 0
16
+ **I called:** `checkout(item_id=1, qty=-5)`
17
+ **Got:** Order created with total `-$49.95`
18
+ **Location:** checkout.py, line 42
19
+ ```
20
+
21
+ No score. No preliminary comment. Silence = clean.
22
+
23
+ ## Key Paths
24
+ - `src/cli/` — `repro.py`, `artifact.py`, `draft_pr.py`, `check.py`, `main.py`
25
+ - `src/oracles/sentry_replay.py` — Sentry event fetch, frame locals → pytest synthesis
26
+ - `src/business_logic/generator/` — inference, ast_analysis, codegen, models
27
+ - `src/business_logic/sandbox/` — Docker sandbox + `_TRACE_HOOK` injection
28
+ - `src/business_logic/classifier/` — pytest-output → findings, artifact suppression
29
+ - `src/business_logic/validator/`, `src/business_logic/scaffold_filter/` — LLM reachability + static drops
30
+ - `src/business_logic/deep_checks/` — static signals + deep probes
31
+ - `src/core/` — llm_utils, logomesh_log, usage_tracker, config
32
+ - `src/capture/` — runtime capture hooks (FastAPI, Django, SQLAlchemy, Redis, Stripe)
33
+ - `tests/` — 500 unit tests
34
+ - `docs/pipeline.md` — full pipeline walkthrough for contributors
35
+ - `Dockerfile.sandbox` — sandbox image (python:3.12-slim + pytest + pytest-json-report)
36
+ - `logomesh_legacy/` — archived GitHub App, MCP server, benchmark data (kept for reference)
37
+
38
+ ## Architecture
39
+ Single-pass pipeline per changed Python file:
40
+ 1. AST extracts public functions + testable dunders
41
+ 2. **Passthrough gate** drops trivial wrappers (`_is_trivial_passthrough`)
42
+ 3. LLM infers properties/invariants per surviving function
43
+ 4. **Counter-impl probe** drops weak properties a wrong-impl could satisfy (1 batched LLM call/function)
44
+ 5. Adversarial tests generated targeting remaining properties + edge-case / metamorphic / toxic variants
45
+ 6. Executed in Docker sandbox (airgapped, nobody user, memory/PID limits, randomized report filename)
46
+ 7. **Time-Travel Trace** hook captures `f_locals` at crash inside `target.py`
47
+ 8. Classifier parses pytest output, drops automock-leak / Py2-builtin / scope-slice artifacts
48
+ 9. Validator: LLM confirms crash is caller-reachable (1 call per finding)
49
+ 10. Repro + base regression + two-signal gates
50
+ 11. Formatter posts only if evidence survives
51
+
52
+ `logomesh repro` path:
53
+ 1. Fetch Sentry event → extract innermost app frame + locals
54
+ 2. PII redact frame locals (PAN regex + field-name matching)
55
+ 3. Synthesize pytest from locals (deterministic) or LLM (default)
56
+ 4. Run in sandbox → retry once on no-repro
57
+ 5. Emit result / compliance artifact / draft PR
58
+
59
+ ## Dev Commands
60
+ ```bash
61
+ uv sync # install deps
62
+ uv run pytest tests/ -v # run tests (500 passing)
63
+ docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
64
+ uv run logomesh repro <sentry-url> # reproduce a crash
65
+ uv run logomesh repro <url> --artifact # + PCI/SOC2 envelope
66
+ uv run logomesh repro <url> --draft-pr # + open GitHub draft PR
67
+ uv run logomesh check <path> # run check pipeline on local file
68
+ ```
69
+
70
+ ## Key Design Rules
71
+ - Only report findings with concrete evidence (violated property + reproducible input/output)
72
+ - Never pip install from PR/user code in sandbox
73
+ - Webhook must ACK in <10s — enqueue and process asynchronously
74
+ - Semaphore(3) concurrency limit, 120s pipeline timeout
75
+ - Graceful degradation: no Docker → subprocess fallback; LLM fails → edge-case tests only
76
+ - Tests must NOT wrap calls in try/except
77
+ - Sandbox: path traversal protection, 512KB file size cap, binary rejection
78
+ - Generator input capped at 4000 chars; retry once on LLM failure
79
+ - PII redaction runs before any frame locals reach LLM or generated test code
80
+
81
+ ## What Was Removed (do not resurrect)
82
+ - `red_logic/` — MCTS orchestrator, dependency_analyzer, semantic_analyzer, constraint_breaker
83
+ - `scoring.py` / CIS formula — meaningless for PRs
84
+ - `compare_vectors.py` / VectorScorer — 400MB model, not needed
85
+ - `analyzer.py` / SemanticAuditor — not used in pipeline
86
+ - Two-pass pipeline / preliminary comment — post nothing until crash confirmed
87
+ - `src/github_app/`, `src/mcp_server/` — moved to `logomesh_legacy/` (deprioritized surfaces)
@@ -0,0 +1,118 @@
1
+ # Sandbox image for running PR tests in isolation.
2
+ # Build: docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
3
+ #
4
+ # Fat image: native toolchains baked in so PRs against packages with
5
+ # C/Rust extensions (datafusion, pydantic-core, lxml, psycopg, protobuf)
6
+ # can build+test without network.
7
+ FROM python:3.12-slim
8
+
9
+ ENV DEBIAN_FRONTEND=noninteractive \
10
+ PIP_NO_CACHE_DIR=1 \
11
+ PIP_DISABLE_PIP_VERSION_CHECK=1 \
12
+ CARGO_HOME=/opt/cargo \
13
+ RUSTUP_HOME=/opt/rustup \
14
+ PATH=/opt/cargo/bin:$PATH \
15
+ TZ=UTC \
16
+ LANG=C.UTF-8 \
17
+ LC_ALL=C.UTF-8 \
18
+ # --- Determinism layer (Task #4: never-fails) ---
19
+ # PYTHONHASHSEED=0: dict/set iteration order stable across runs so the same
20
+ # test + same input always produces the same output. Without this, findings
21
+ # can "flap" between runs, which destroys dev trust exactly once.
22
+ PYTHONHASHSEED=0 \
23
+ # No .pyc artifacts on the read-only FS — prevents permission errors at import.
24
+ PYTHONDONTWRITEBYTECODE=1 \
25
+ # Streams not buffered — every log line lands in real time.
26
+ PYTHONUNBUFFERED=1 \
27
+ # Stable random seeds for anything that consults sys-level sources.
28
+ SOURCE_DATE_EPOCH=1704067200 \
29
+ # Locale already C.UTF-8 above — keeps sort/str.isdigit/etc. deterministic.
30
+ PYTHONIOENCODING=utf-8
31
+
32
+ # Native build deps:
33
+ # gcc/g++/make/pkg-config — C/C++ extensions
34
+ # libpq-dev — psycopg / asyncpg
35
+ # libxml2-dev libxslt1-dev — lxml
36
+ # libssl-dev libffi-dev — cryptography, cffi
37
+ # zlib1g-dev libjpeg-dev — Pillow-ish
38
+ # protobuf-compiler — protoc for grpc/proto codegen
39
+ # curl ca-certificates — rustup bootstrap
40
+ # git — pip VCS installs if ever needed
41
+ RUN apt-get update && apt-get install -y --no-install-recommends \
42
+ gcc g++ make pkg-config \
43
+ clang \
44
+ libpq-dev \
45
+ libxml2-dev libxslt1-dev \
46
+ libssl-dev libffi-dev \
47
+ zlib1g-dev libjpeg-dev \
48
+ protobuf-compiler \
49
+ curl ca-certificates git \
50
+ && rm -rf /var/lib/apt/lists/*
51
+
52
+ # Rust toolchain (stable, minimal profile) for pydantic-core, cryptography>=42,
53
+ # orjson, tokenizers, polars, datafusion, etc.
54
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
55
+ | sh -s -- -y --default-toolchain stable --profile minimal \
56
+ && chmod -R a+rX /opt/cargo /opt/rustup
57
+
58
+ RUN pip install pytest pytest-json-report hypothesis pytest-xdist \
59
+ && pip install "crosshair-tool" "hypothesis[crosshair]" \
60
+ && pip install atheris || true \
61
+ && pip install time-machine responses respx vcrpy pytest-recording \
62
+ && pip install pytest-sqlalchemy-mock frontrun || true \
63
+ && rm -rf /root/.cache
64
+
65
+ # Pre-baked native extension libraries — most common PR targets.
66
+ # Installed so sandbox can import them without network access.
67
+ # "|| true" on each: build succeeds even if a wheel fails (e.g., architecture mismatch).
68
+ RUN pip install \
69
+ pyarrow \
70
+ polars \
71
+ duckdb \
72
+ pandas \
73
+ numpy \
74
+ && pip install datafusion || true \
75
+ && rm -rf /root/.cache
76
+
77
+ # Pre-baked web/fintech/auth stack — covers the top runtime dependencies
78
+ # that degraded findings to "analysis-based" on real PRs. On LogoMesh's
79
+ # own self-review PR (#2), 27 of 28 findings fell to analysis-only mode
80
+ # because openai/pydantic/etc. weren't importable. Pre-installing the
81
+ # common set eliminates 80%+ of those cases without the user needing a
82
+ # `.logomesh.toml` deps entry — YAML stays for the long tail.
83
+ #
84
+ # Order: pydantic first (the #1 missing dep). Pinned to >=2 to match
85
+ # the validator and generator assumptions in our own codebase. Separate
86
+ # RUN layer so bumping LLM SDKs doesn't invalidate the web-stack cache.
87
+ RUN pip install \
88
+ "pydantic>=2" \
89
+ pydantic-settings \
90
+ fastapi \
91
+ starlette \
92
+ httpx \
93
+ requests \
94
+ aiohttp \
95
+ sqlalchemy \
96
+ psycopg2-binary \
97
+ redis \
98
+ celery \
99
+ boto3 \
100
+ pyjwt \
101
+ cryptography \
102
+ stripe \
103
+ && rm -rf /root/.cache
104
+
105
+ # LLM SDKs — separate layer because they release breaking changes
106
+ # every few weeks. "|| true" so a flaky openai wheel doesn't block
107
+ # the whole image build.
108
+ RUN pip install openai anthropic || true \
109
+ && rm -rf /root/.cache
110
+
111
+ # Hypothesis DB mount point — persisted counterexamples survive across PR runs
112
+ # when the host bind-mounts a directory here. Pre-created so non-root user can
113
+ # write even when the mount is absent (Hypothesis falls back to fresh DB).
114
+ RUN mkdir -p /hypothesis-db && chmod 0777 /hypothesis-db
115
+
116
+ WORKDIR /workspace
117
+
118
+ USER nobody
@@ -0,0 +1,18 @@
1
+ .PHONY: verify-latest gates-latest sandbox
2
+
3
+ LATEST_RUN := $(shell ls -1 results/real_prs_run_*.json 2>/dev/null | awk -F'[_.]' '{print $$(NF-1), $$0}' | sort -n | tail -1 | awk '{print $$2}')
4
+ LATEST_RUN_ID := $(shell echo $(LATEST_RUN) | sed -E 's|.*real_prs_run_([0-9]+)\.json|\1|')
5
+
6
+ sandbox:
7
+ docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
8
+ docker image prune -f
9
+
10
+ verify-latest:
11
+ @test -n "$(LATEST_RUN)" || (echo "no results/real_prs_run_*.json found" && exit 1)
12
+ @echo "verifying $(LATEST_RUN) -> benchmarks/labels/run_$(LATEST_RUN_ID).json"
13
+ @mkdir -p benchmarks/labels
14
+ uv run python scripts/verify_findings.py $(LATEST_RUN) --label-out benchmarks/labels/run_$(LATEST_RUN_ID).json
15
+
16
+ gates-latest:
17
+ @test -n "$(LATEST_RUN)" || (echo "no results/real_prs_run_*.json found" && exit 1)
18
+ uv run python scripts/quality_gates.py --run $(LATEST_RUN) --labels benchmarks/labels --strict