logomesh 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logomesh-0.1.0/.env.example +59 -0
- logomesh-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
- logomesh-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +14 -0
- logomesh-0.1.0/.github/pull_request_template.md +16 -0
- logomesh-0.1.0/.github/workflows/ci.yml +38 -0
- logomesh-0.1.0/.gitignore +87 -0
- logomesh-0.1.0/AGENTS.md +62 -0
- logomesh-0.1.0/CLAUDE.md +87 -0
- logomesh-0.1.0/Dockerfile.sandbox +118 -0
- logomesh-0.1.0/Makefile +18 -0
- logomesh-0.1.0/PKG-INFO +213 -0
- logomesh-0.1.0/README.md +177 -0
- logomesh-0.1.0/docs/honest_hit_rates.md +197 -0
- logomesh-0.1.0/docs/pilot_deployment.md +164 -0
- logomesh-0.1.0/docs/pipeline.md +1183 -0
- logomesh-0.1.0/examples/fintech-payments-demo/.env.example +14 -0
- logomesh-0.1.0/examples/fintech-payments-demo/Dockerfile +19 -0
- logomesh-0.1.0/examples/fintech-payments-demo/README.md +185 -0
- logomesh-0.1.0/examples/fintech-payments-demo/core/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech-payments-demo/core/apps.py +6 -0
- logomesh-0.1.0/examples/fintech-payments-demo/core/views.py +8 -0
- logomesh-0.1.0/examples/fintech-payments-demo/docker-compose.yml +69 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/__init__.py +3 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/asgi.py +3 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/celery.py +16 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/settings.py +155 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/urls.py +13 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fintech_payments/wsgi.py +3 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_fk_violation.json +31 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_integrity_race.json +33 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_hard_stale_data.json +32 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_integrity_event.json +33 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_key_event.json +30 -0
- logomesh-0.1.0/examples/fintech-payments-demo/fixtures/sentry_type_event.json +31 -0
- logomesh-0.1.0/examples/fintech-payments-demo/manage.py +14 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/admin.py +25 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/apps.py +6 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/crash_lab.py +58 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/management/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/seed_demo_data.py +21 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/trigger_easy_crashes.py +26 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/management/commands/trigger_hard_cases.py +40 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/migrations/0001_initial.py +90 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/migrations/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/models.py +90 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/stripe_client.py +35 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/tasks.py +376 -0
- logomesh-0.1.0/examples/fintech-payments-demo/payments/views.py +98 -0
- logomesh-0.1.0/examples/fintech-payments-demo/requirements.txt +10 -0
- logomesh-0.1.0/examples/fintech-payments-demo/scripts/trigger_crash.py +73 -0
- logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722069.py +5 -0
- logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722082.py +5 -0
- logomesh-0.1.0/examples/fintech-payments-demo/tests/test_repro_7470722083.py +5 -0
- logomesh-0.1.0/examples/fintech_billing/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech_billing/billing_ops.py +43 -0
- logomesh-0.1.0/examples/fintech_billing/billing_v2.py +185 -0
- logomesh-0.1.0/examples/fintech_billing/checkout.py +28 -0
- logomesh-0.1.0/examples/fintech_billing_v3/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech_billing_v3/bugs.py +338 -0
- logomesh-0.1.0/examples/fintech_billing_v5/__init__.py +0 -0
- logomesh-0.1.0/examples/fintech_billing_v5/bugs.py +407 -0
- logomesh-0.1.0/logomesh_orchestrator.py +3305 -0
- logomesh-0.1.0/migrations/0001_installation_secrets.sql +46 -0
- logomesh-0.1.0/migrations/0002_runs.sql +42 -0
- logomesh-0.1.0/policies/docker/no_root.rego +30 -0
- logomesh-0.1.0/policies/docker/registry_allowlist.rego +28 -0
- logomesh-0.1.0/policies/docker/secure_defaults.rego +48 -0
- logomesh-0.1.0/policies/k8s/no_privilege_escalation.rego +49 -0
- logomesh-0.1.0/policies/k8s/no_root.rego +33 -0
- logomesh-0.1.0/policies/k8s/readonly_rootfs.rego +29 -0
- logomesh-0.1.0/policies/k8s/registry_allowlist.rego +40 -0
- logomesh-0.1.0/policies/k8s/resource_limits.rego +42 -0
- logomesh-0.1.0/pyproject.toml +80 -0
- logomesh-0.1.0/scripts/honest_hit_rates.py +332 -0
- logomesh-0.1.0/scripts/run_demo_orchestrator_suite.py +241 -0
- logomesh-0.1.0/scripts/seed_billing_v2.py +172 -0
- logomesh-0.1.0/scripts/seed_sentry_issue.py +70 -0
- logomesh-0.1.0/src/anthropic_adapter.py +3 -0
- logomesh-0.1.0/src/business_logic/orm_materializer.py +300 -0
- logomesh-0.1.0/src/business_logic/repo_introspection.py +403 -0
- logomesh-0.1.0/src/business_logic/sandbox/__init__.py +1011 -0
- logomesh-0.1.0/src/business_logic/sandbox/dep_installer.py +1072 -0
- logomesh-0.1.0/src/business_logic/sandbox_planner.py +224 -0
- logomesh-0.1.0/src/capture/__init__.py +142 -0
- logomesh-0.1.0/src/capture/_log.py +35 -0
- logomesh-0.1.0/src/capture/hooks/__init__.py +0 -0
- logomesh-0.1.0/src/capture/hooks/django_hook.py +179 -0
- logomesh-0.1.0/src/capture/hooks/fastapi_hook.py +316 -0
- logomesh-0.1.0/src/capture/hooks/http_hook.py +280 -0
- logomesh-0.1.0/src/capture/hooks/redis_hook.py +93 -0
- logomesh-0.1.0/src/capture/hooks/sqlalchemy_hook.py +175 -0
- logomesh-0.1.0/src/capture/ring_buffer.py +77 -0
- logomesh-0.1.0/src/capture/safety.py +88 -0
- logomesh-0.1.0/src/capture/sentry_hook.py +189 -0
- logomesh-0.1.0/src/capture/serializer.py +173 -0
- logomesh-0.1.0/src/capture/stripe_meta.py +133 -0
- logomesh-0.1.0/src/capture/writer.py +52 -0
- logomesh-0.1.0/src/cli/__init__.py +9 -0
- logomesh-0.1.0/src/cli/artifact.py +183 -0
- logomesh-0.1.0/src/cli/codebase_context.py +347 -0
- logomesh-0.1.0/src/cli/draft_pr.py +284 -0
- logomesh-0.1.0/src/cli/git_utils.py +92 -0
- logomesh-0.1.0/src/cli/main.py +84 -0
- logomesh-0.1.0/src/cli/repro.py +534 -0
- logomesh-0.1.0/src/cli/state_fixtures.py +176 -0
- logomesh-0.1.0/src/core/__init__.py +3 -0
- logomesh-0.1.0/src/core/anthropic_adapter.py +117 -0
- logomesh-0.1.0/src/core/audit_log.py +168 -0
- logomesh-0.1.0/src/core/config.py +241 -0
- logomesh-0.1.0/src/core/installation_secrets.py +368 -0
- logomesh-0.1.0/src/core/llm_utils.py +122 -0
- logomesh-0.1.0/src/core/logomesh_log.py +107 -0
- logomesh-0.1.0/src/core/pii_redactor.py +307 -0
- logomesh-0.1.0/src/core/responses_adapter.py +94 -0
- logomesh-0.1.0/src/core/runs_log.py +178 -0
- logomesh-0.1.0/src/core/supabase_client.py +326 -0
- logomesh-0.1.0/src/core/usage_tracker.py +173 -0
- logomesh-0.1.0/src/llm_utils.py +2 -0
- logomesh-0.1.0/src/logomesh_log.py +3 -0
- logomesh-0.1.0/src/oracles/__init__.py +0 -0
- logomesh-0.1.0/src/oracles/api_error_fixtures.py +115 -0
- logomesh-0.1.0/src/oracles/crash_context.py +489 -0
- logomesh-0.1.0/src/oracles/hypothesis_report.py +769 -0
- logomesh-0.1.0/src/oracles/sentry_replay.py +835 -0
- logomesh-0.1.0/src/oracles/sentry_replay_v2.py +588 -0
- logomesh-0.1.0/src/oracles/signal_detectors.py +510 -0
- logomesh-0.1.0/src/responses_adapter.py +3 -0
- logomesh-0.1.0/src/server/__init__.py +0 -0
- logomesh-0.1.0/src/server/app.py +98 -0
- logomesh-0.1.0/src/server/github_comment.py +95 -0
- logomesh-0.1.0/src/server/installations_api.py +532 -0
- logomesh-0.1.0/src/server/sentry_comment.py +206 -0
- logomesh-0.1.0/src/server/sentry_webhook.py +479 -0
- logomesh-0.1.0/src/server/slack_summary.py +146 -0
- logomesh-0.1.0/src/supabase_client.py +3 -0
- logomesh-0.1.0/src/usage_tracker.py +3 -0
- logomesh-0.1.0/tests/fixtures/bv2_s1_exchange_gain_loss.json +48 -0
- logomesh-0.1.0/tests/fixtures/bv2_s2_payment_reconciliation.json +51 -0
- logomesh-0.1.0/tests/fixtures/bv2_s3_order_discount.json +50 -0
- logomesh-0.1.0/tests/fixtures/bv2_s4_gstin_lookup.json +50 -0
- logomesh-0.1.0/tests/fixtures/bv2_s5_quantize_price.json +49 -0
- logomesh-0.1.0/tests/fixtures/bv2_s5_webhook_amount.json +45 -0
- logomesh-0.1.0/tests/fixtures/sentry_invoice_date_invalid.json +36 -0
- logomesh-0.1.0/tests/fixtures/sentry_money_in_words_none.json +37 -0
- logomesh-0.1.0/tests/fixtures/sentry_negative_qty_event.json +45 -0
- logomesh-0.1.0/tests/fixtures/sentry_payment_total_none_rate.json +37 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_01_celery_refund_stale_db.json +205 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_02_balance_debit_race.json +226 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_03_dep_version_conflict.json +225 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_04_tz_naive_datetime.json +244 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_05_stripe_c_ext_crash.json +243 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_06_db_pool_exhaustion.json +394 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_07_tax_rule_drift.json +281 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_08_webhook_idempotency.json +300 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_09_pii_in_locals.json +316 -0
- logomesh-0.1.0/tests/fixtures/sentry_v3_10_redis_lock_missing.json +453 -0
- logomesh-0.1.0/tests/fixtures/sentry_v4_01_stripe_webhook_dup.json +288 -0
- logomesh-0.1.0/tests/fixtures/sentry_v4_02_stale_data_celery.json +236 -0
- logomesh-0.1.0/tests/fixtures/sentry_v4_03_async_lost_update.json +183 -0
- logomesh-0.1.0/tests/fixtures/sentry_v4_04_tenacity_lock_wait.json +205 -0
- logomesh-0.1.0/tests/fixtures/sentry_v4_05_pm_doesnotexist.json +262 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_01_rtp_payment_timeout.json +198 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_02_processor_ransomware.json +232 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_03_fraud_model_drift.json +266 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_04_stablecoin_reconciliation.json +301 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_05_consent_token_expired.json +329 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_06_mobile_confirm_crash.json +367 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_07_sponsor_bank_ratelimit.json +388 -0
- logomesh-0.1.0/tests/fixtures/sentry_v5_08_agent_authorization.json +445 -0
- logomesh-0.1.0/tests/fixtures/state_and_validation_fixture.py +25 -0
- logomesh-0.1.0/tests/fixtures/v3_index.json +72 -0
- logomesh-0.1.0/tests/fixtures/v4_index.json +37 -0
- logomesh-0.1.0/tests/fixtures/v5_index.json +58 -0
- logomesh-0.1.0/tests/test_api_error_fixtures.py +44 -0
- logomesh-0.1.0/tests/test_artifact_seal.py +298 -0
- logomesh-0.1.0/tests/test_capture_fastapi_middleware.py +198 -0
- logomesh-0.1.0/tests/test_capture_http_hook.py +115 -0
- logomesh-0.1.0/tests/test_capture_install.py +20 -0
- logomesh-0.1.0/tests/test_capture_ring_buffer.py +56 -0
- logomesh-0.1.0/tests/test_capture_safety.py +73 -0
- logomesh-0.1.0/tests/test_capture_serializer.py +78 -0
- logomesh-0.1.0/tests/test_capture_stripe_meta.py +73 -0
- logomesh-0.1.0/tests/test_capture_writer.py +30 -0
- logomesh-0.1.0/tests/test_cli_artifact.py +43 -0
- logomesh-0.1.0/tests/test_cli_git_utils.py +97 -0
- logomesh-0.1.0/tests/test_cli_repro.py +81 -0
- logomesh-0.1.0/tests/test_core_config.py +233 -0
- logomesh-0.1.0/tests/test_crash_context.py +335 -0
- logomesh-0.1.0/tests/test_dep_installer.py +96 -0
- logomesh-0.1.0/tests/test_github_comment.py +118 -0
- logomesh-0.1.0/tests/test_hypothesis_report.py +453 -0
- logomesh-0.1.0/tests/test_installation_secrets.py +108 -0
- logomesh-0.1.0/tests/test_installations_api.py +375 -0
- logomesh-0.1.0/tests/test_llm_utils.py +54 -0
- logomesh-0.1.0/tests/test_logomesh_log.py +53 -0
- logomesh-0.1.0/tests/test_orchestrator_step_guard.py +68 -0
- logomesh-0.1.0/tests/test_orm_materializer.py +191 -0
- logomesh-0.1.0/tests/test_pii_redactor.py +348 -0
- logomesh-0.1.0/tests/test_prepare_env_module_pinning.py +140 -0
- logomesh-0.1.0/tests/test_prepare_environment.py +201 -0
- logomesh-0.1.0/tests/test_production_sandbox_refusal.py +103 -0
- logomesh-0.1.0/tests/test_repo_introspection.py +152 -0
- logomesh-0.1.0/tests/test_request_replay_synthesis.py +196 -0
- logomesh-0.1.0/tests/test_sandbox.py +69 -0
- logomesh-0.1.0/tests/test_sandbox_planner.py +179 -0
- logomesh-0.1.0/tests/test_sandbox_python_version.py +37 -0
- logomesh-0.1.0/tests/test_self_refusal_loop.py +186 -0
- logomesh-0.1.0/tests/test_sentry_comment.py +117 -0
- logomesh-0.1.0/tests/test_sentry_replay.py +188 -0
- logomesh-0.1.0/tests/test_sentry_replay_extraction.py +325 -0
- logomesh-0.1.0/tests/test_sentry_replay_v2.py +14 -0
- logomesh-0.1.0/tests/test_sentry_webhook.py +281 -0
- logomesh-0.1.0/tests/test_signal_detectors.py +300 -0
- logomesh-0.1.0/tests/test_slack_summary.py +138 -0
- logomesh-0.1.0/tests/test_supabase_client.py +12 -0
- logomesh-0.1.0/tests/test_supabase_client_env.py +39 -0
- logomesh-0.1.0/tests/test_timeout_salvage.py +54 -0
- logomesh-0.1.0/tests/test_usage_tracker.py +77 -0
- logomesh-0.1.0/uv.lock +3075 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# === GitHub App ===
|
|
2
|
+
# Create at https://github.com/settings/apps
|
|
3
|
+
GITHUB_APP_ID=123456
|
|
4
|
+
GITHUB_PRIVATE_KEY_PATH=./private-key.pem
|
|
5
|
+
# Or set the key directly (newlines escaped as \n):
|
|
6
|
+
# GITHUB_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\n...\n-----END RSA PRIVATE KEY-----"
|
|
7
|
+
GITHUB_WEBHOOK_SECRET=your-webhook-secret
|
|
8
|
+
|
|
9
|
+
# === LLM ===
|
|
10
|
+
OPENAI_API_KEY=sk-proj-your-key-here
|
|
11
|
+
OPENAI_MODEL=gpt-4o-mini
|
|
12
|
+
# OPENAI_BASE_URL=https://api.openai.com/v1 # override for Azure/local
|
|
13
|
+
|
|
14
|
+
# Fallback: if no OPENAI_API_KEY, uses Gemini
|
|
15
|
+
# GEMINI_API_KEY=your-gemini-key
|
|
16
|
+
|
|
17
|
+
# === Sandbox ===
|
|
18
|
+
# SANDBOX_TIMEOUT=15 # seconds per test run (default: 15)
|
|
19
|
+
|
|
20
|
+
# === Oracle Lanes ===
|
|
21
|
+
# Each lane can be individually enabled/disabled.
|
|
22
|
+
|
|
23
|
+
# Sentry Replay: replays real production exceptions against PR code.
|
|
24
|
+
# Requires Sentry API access. Lane auto-disables if credentials not set.
|
|
25
|
+
# SENTRY_AUTH_TOKEN=sntrys_your-token-here
|
|
26
|
+
# SENTRY_ORG=your-org
|
|
27
|
+
# SENTRY_PROJECT=your-project
|
|
28
|
+
# SENTRY_BASE_URL=https://sentry.io
|
|
29
|
+
# LOGOMESH_ENABLE_SENTRY_LANE=1
|
|
30
|
+
|
|
31
|
+
# Static Patterns: auth guard, secrets, skipped tests, debug flags, migration order.
|
|
32
|
+
# LOGOMESH_ENABLE_STATIC_LANE=1
|
|
33
|
+
|
|
34
|
+
# Type Contracts: test boundary inputs from type hints / Pydantic models. (coming soon)
|
|
35
|
+
# LOGOMESH_ENABLE_TYPE_LANE=1
|
|
36
|
+
|
|
37
|
+
# Behavioral Regression: compare base vs head output on same inputs. (coming soon)
|
|
38
|
+
# LOGOMESH_ENABLE_REGRESSION_LANE=1
|
|
39
|
+
|
|
40
|
+
# Test Augmentation: edge-case variants of existing test assertions. (coming soon)
|
|
41
|
+
# LOGOMESH_ENABLE_AUGMENTATION_LANE=1
|
|
42
|
+
|
|
43
|
+
# === Pipeline Config ===
|
|
44
|
+
# LOGOMESH_ENABLE_CLEAN_SIGNAL=1 # post "no issues found" on clean PRs
|
|
45
|
+
# LOGOMESH_MAX_FINDINGS_PER_FILE=5
|
|
46
|
+
# LOGOMESH_MAX_FINDINGS_PER_PR=15
|
|
47
|
+
# PIPELINE_TIMEOUT_SECONDS=240
|
|
48
|
+
# ENABLE_INLINE_REVIEW=1 # inline ```suggestion``` blocks
|
|
49
|
+
|
|
50
|
+
# === Supabase (multi-tenant install + run storage) ===
|
|
51
|
+
# You can set either SUPABASE_URL directly OR SUPABASE_PROJECT_REF.
|
|
52
|
+
# SUPABASE_PROJECT_REF=xyzcompanyabc123
|
|
53
|
+
# SUPABASE_URL=https://xyzcompanyabc123.supabase.co
|
|
54
|
+
# Prefer SUPABASE_SERVICE_KEY; SUPABASE_SERVICE_ROLE_KEY also supported.
|
|
55
|
+
# SUPABASE_SERVICE_KEY=eyJhbGciOi...
|
|
56
|
+
# LOGOMESH_MASTER_KEY=<32-byte urlsafe-base64 or 64-char hex>
|
|
57
|
+
|
|
58
|
+
# === Production ===
|
|
59
|
+
# LOGOMESH_ENV=production # disables subprocess fallback (requires Docker)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Report something that isn't working correctly
|
|
4
|
+
labels: bug
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
**Describe the bug**
|
|
8
|
+
What happened?
|
|
9
|
+
|
|
10
|
+
**To reproduce**
|
|
11
|
+
Steps to reproduce the behavior:
|
|
12
|
+
1. Run `...`
|
|
13
|
+
2. Send request to `...`
|
|
14
|
+
3. See error
|
|
15
|
+
|
|
16
|
+
**Expected behavior**
|
|
17
|
+
What should have happened instead?
|
|
18
|
+
|
|
19
|
+
**Environment**
|
|
20
|
+
- OS: [e.g. macOS, Ubuntu 22.04]
|
|
21
|
+
- Python version: [e.g. 3.11.5]
|
|
22
|
+
- uv version:
|
|
23
|
+
|
|
24
|
+
**Logs / output**
|
|
25
|
+
```
|
|
26
|
+
paste relevant logs here
|
|
27
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an improvement or new feature
|
|
4
|
+
labels: enhancement
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
**What problem does this solve?**
|
|
8
|
+
Describe the problem or limitation you're running into.
|
|
9
|
+
|
|
10
|
+
**Proposed solution**
|
|
11
|
+
How would you solve it? Be specific about what you'd change.
|
|
12
|
+
|
|
13
|
+
**Alternatives considered**
|
|
14
|
+
Any other approaches you thought about?
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## What does this PR do?
|
|
2
|
+
|
|
3
|
+
<!-- Brief description of the change -->
|
|
4
|
+
|
|
5
|
+
## Related issue
|
|
6
|
+
|
|
7
|
+
<!-- Link to the issue this PR addresses, e.g. Fixes #123 -->
|
|
8
|
+
|
|
9
|
+
## How was this tested?
|
|
10
|
+
|
|
11
|
+
<!-- Describe how you verified the change works -->
|
|
12
|
+
|
|
13
|
+
## Checklist
|
|
14
|
+
|
|
15
|
+
- [ ] My changes don't break existing functionality
|
|
16
|
+
- [ ] I've tested locally with `uv run pytest tests/ -v`
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Test
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- name: Checkout repository
|
|
15
|
+
uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Setup Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.12'
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
run: pip install uv
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: uv sync
|
|
27
|
+
|
|
28
|
+
- name: Run tests
|
|
29
|
+
run: uv run pytest tests/ -v
|
|
30
|
+
|
|
31
|
+
- name: Enforce quality gates
|
|
32
|
+
if: ${{ vars.ENFORCE_LOGOMESH_QUALITY_GATES == '1' }}
|
|
33
|
+
run: |
|
|
34
|
+
UV_CACHE_DIR=/tmp/uv-cache uv run python scripts/quality_gates.py \
|
|
35
|
+
--run results/real_prs_run_6.json \
|
|
36
|
+
--replay results/replay_from_run3_current.json \
|
|
37
|
+
--out results/quality_gates_latest.json \
|
|
38
|
+
--strict
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
.venv/
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.pyc
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.mypy_cache/
|
|
7
|
+
.pytest_cache/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
|
|
11
|
+
# Environment
|
|
12
|
+
.env
|
|
13
|
+
*.pem
|
|
14
|
+
|
|
15
|
+
# Data
|
|
16
|
+
*.db
|
|
17
|
+
*.db-shm
|
|
18
|
+
*.db-wal
|
|
19
|
+
*.db-journal
|
|
20
|
+
|
|
21
|
+
# OS
|
|
22
|
+
.DS_Store
|
|
23
|
+
|
|
24
|
+
# IDE
|
|
25
|
+
.idea/
|
|
26
|
+
.vscode/
|
|
27
|
+
.claude/
|
|
28
|
+
|
|
29
|
+
# JetBrains / PyCharm
|
|
30
|
+
*.iml
|
|
31
|
+
*.ipr
|
|
32
|
+
*.iws
|
|
33
|
+
.idea_modules/
|
|
34
|
+
out/
|
|
35
|
+
|
|
36
|
+
# PyCharm local/cached project data
|
|
37
|
+
.idea/**/workspace.xml
|
|
38
|
+
.idea/**/tasks.xml
|
|
39
|
+
.idea/**/usage.statistics.xml
|
|
40
|
+
.idea/**/dictionaries/
|
|
41
|
+
.idea/**/shelf/
|
|
42
|
+
.idea/**/httpRequests/
|
|
43
|
+
.idea/**/dataSources/
|
|
44
|
+
.idea/**/dataSources.local.xml
|
|
45
|
+
.idea/**/dataSources.ids
|
|
46
|
+
.idea/**/dataSources.xml
|
|
47
|
+
.idea/**/uiDesigner.xml
|
|
48
|
+
|
|
49
|
+
# JetBrains build output dirs
|
|
50
|
+
cmake-build-*/
|
|
51
|
+
|
|
52
|
+
# Project specific
|
|
53
|
+
docs/startup/
|
|
54
|
+
docs/archive/
|
|
55
|
+
archive/logomesh_legacy/results/
|
|
56
|
+
docs/refs-internal/
|
|
57
|
+
data/job_queue.sqlite3
|
|
58
|
+
|
|
59
|
+
# Internal strategy/agent docs (not for repo)
|
|
60
|
+
docs/agent_system/
|
|
61
|
+
docs/strategy/discovery/
|
|
62
|
+
docs/strategy/evaluation.md
|
|
63
|
+
docs/strategy/research_validation.md
|
|
64
|
+
docs/strategy/narrowing_philosophy.md
|
|
65
|
+
docs/prompts/
|
|
66
|
+
docs/internal/
|
|
67
|
+
docs/run_results/
|
|
68
|
+
|
|
69
|
+
# Private internal files — kept locally, never push
|
|
70
|
+
archive/logomesh_legacy/ref-for-agents/
|
|
71
|
+
archive/logomesh_legacy/ruflo_config.json
|
|
72
|
+
.agents/
|
|
73
|
+
|
|
74
|
+
.playwright-mcp/
|
|
75
|
+
/docs/superpowers/
|
|
76
|
+
.mcp.json
|
|
77
|
+
skills-lock.json
|
|
78
|
+
|
|
79
|
+
*.log
|
|
80
|
+
*.tmp
|
|
81
|
+
*.bak
|
|
82
|
+
*.swp
|
|
83
|
+
*.python-version
|
|
84
|
+
*.orig
|
|
85
|
+
|
|
86
|
+
# ALL LEGACY CODE
|
|
87
|
+
archive/
|
logomesh-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
## Mission and product constraints
|
|
4
|
+
- logomesh is post-incident **deterministic repro with proof**: only ship artifacts/PRs/comments when `deterministic_repro` reproduces and the exception type matches (`logomesh_orchestrator.py`, `src/oracles/sentry_replay_v2.py`, `src/cli/artifact.py`).
|
|
5
|
+
- Keep output evidence-first and terse (repro call -> observed result, expected property, location/state) and never add scores or "looks good" comments; non-repro or mismatch should stay silent or `needs_human_review` (`src/cli/repro.py`, `src/server/sentry_comment.py`, `src/server/github_comment.py`).
|
|
6
|
+
- LLM output is advisory only; evidence-path test bytes are deterministic from frame locals (`logomesh_orchestrator.py`, `src/oracles/sentry_replay_v2.py`).
|
|
7
|
+
|
|
8
|
+
## Pipeline mental model (read this first)
|
|
9
|
+
- Sentry webhook ingress is fast-ACK + async dispatch: verify HMAC signature + timestamp window + idempotency, then 202 ACK and semaphore-limited dispatch (`src/server/sentry_webhook.py`).
|
|
10
|
+
- Core repro flow: fetch event -> innermost in-app frame -> resolve source -> build deterministic test -> sandbox run -> verify exception match (`src/oracles/sentry_replay.py`, `src/oracles/sentry_replay_v2.py`, `src/business_logic/sandbox/__init__.py`).
|
|
11
|
+
- LangGraph orchestrator wraps the deterministic core: only `deterministic_repro` yields sealed bytes; other tools are advisory (context, deps prep, commenting) (`logomesh_orchestrator.py`).
|
|
12
|
+
- `logomesh repro` is the primary product path; `logomesh check` runs the single-file verify pipeline (`src/cli/repro.py`, `src/cli/check.py`, `src/core/pipeline.py`).
|
|
13
|
+
|
|
14
|
+
## Project-specific conventions that matter
|
|
15
|
+
- Single-file verification in `verify_file`: build change scope -> pack parseable source -> make sandbox importable -> generate tests -> cap tests -> inject target stubs -> dependency-aware sandbox run -> classify -> validate -> repro/regression gate -> `VerifyResult` (`src/core/pipeline.py`, `src/core/repro_gate.py`).
|
|
16
|
+
- Property-inference preprocessing gates (fire before any LLM call): (a) `_is_trivial_passthrough` drops wrapper functions whose body is a single delegation (`return self.inner(...)`); (b) arity filter drops properties whose `input_code` doesn't match the function signature; (c) `_is_identity_dunder_property` drops no-op assertions on dunders; (d) `_counter_impl_probe` (one batched LLM call per function, gated by `ENABLE_COUNTER_IMPL_PROBE`, default on) drops properties that a semantically-wrong implementation could still satisfy — bias toward keeping on parse failure. These gates live in `src/business_logic/generator/inference.py` and are wired into BOTH `inference.infer_properties` and the legacy compat shim `src/business_logic/legacy/generator.py` (where `PropertyTestGenerator` actually resolves `infer_properties` / `infer_metamorphic_properties` from). Do not edit legacy's copy without also updating or re-importing from inference.
|
|
17
|
+
- Spotlighting is on by default in inference (`ENABLE_SPOTLIGHTING`): untrusted source is sentinel-rewritten per call before prompt assembly to blunt prompt-injection instructions embedded in code/comments (`_spotlight` in `src/business_logic/generator/inference.py`).
|
|
18
|
+
- Time-Travel Trace: the sandbox conftest (`_TRACE_HOOK` injected in both `_CONFTEST_AUTOMOCK` and `_CONFTEST_STRICT`) installs a `pytest_exception_interact` hook that walks the traceback to the frame inside `target.py`, captures its `f_locals` (with size/type guards), and writes `{func, lineno, locals}` into `finding.trace` (`src/business_logic/sandbox/__init__.py`, `src/business_logic/classifier/core.py`).
|
|
19
|
+
- Classifier artifact suppressors in `src/business_logic/classifier/core.py`: (a) `_trace_has_automock_leak` drops findings whose trace shows `MagicMock` in `target.py` locals (sandbox automock leaked into user code); (b) `_PY2_BUILTINS` set drops `NameError` for Py2-only names (`unicode`, `basestring`, ...) that are unreachable under Python 3 execution; (c) salt dunder globals (`__opts__`, `__salt__`, ...) are dropped as SaltStack-runtime-injected symbols. These filters must stay — they are the reason fabric-style Py2-compat guard FPs stay suppressed.
|
|
20
|
+
- Files with import chains known to fail sandbox collection are skipped early as structurally untestable (`is_structurally_untestable`, currently Twisted/Scrapy fragments) and counted in taxonomy (`src/business_logic/generator/inference.py`).
|
|
21
|
+
- Test generation must **not** wrap calls in `try/except`; failures should surface to pytest/classifier (`src/business_logic/generator/`).
|
|
22
|
+
- Function extraction is underscore-aware but not "public-only": runtime dunders in `_TESTABLE_DUNDERS` and modified `__init__` paths are intentionally testable; don't reintroduce blanket `_` filtering (`src/business_logic/generator/ast_analysis.py`).
|
|
23
|
+
- Diff-aware scope is semantic, not raw line fuzzing: changed symbols are extracted and packed into parseable source slices (`src/business_logic/change_scope/`, `src/core/pipeline.py`).
|
|
24
|
+
- Sandbox importability is intentionally rewritten: relative imports/external bases/module-level call assignments are mocked to keep `target.py` importable (`make_sandbox_importable` in `src/business_logic/legacy/generator.py`).
|
|
25
|
+
- Sandbox runs are dependency-aware: `Sandbox.run_dependency_aware` accepts dependency files and optional `deps_snapshot` mounts; orchestrator `prepare_environment` can build a snapshot when imports fail (`src/business_logic/sandbox/__init__.py`, `logomesh_orchestrator.py`).
|
|
26
|
+
- Logs are structured JSON only (`log.info/warn/error`) via `src/logomesh_log.py`; preserve `component` names for grepability.
|
|
27
|
+
|
|
28
|
+
## External integrations and boundaries
|
|
29
|
+
- Sentry webhook auth uses HMAC-SHA256 + timestamp replay window; idempotency is `(issue_id, sha256(payload))` with a 24h in-process LRU; concurrency is capped via `LOGOMESH_MAX_CONCURRENT_RUNS` (`src/server/sentry_webhook.py`).
|
|
30
|
+
- Supabase-backed installations + runs: secrets are AES-256-GCM encrypted and `resolve_secrets()` falls back to env vars; `supabase_client` uses an in-memory shim when credentials are missing (`src/core/installation_secrets.py`, `src/core/supabase_client.py`, `migrations/`).
|
|
31
|
+
- GitHub draft PRs use `gh` CLI first, then REST with `GITHUB_TOKEN` + `GITHUB_REPO`; server comment posting uses per-install tokens (`src/cli/draft_pr.py`, `src/server/github_comment.py`).
|
|
32
|
+
- Sentry + Slack comments are posted from the server layer using per-install secrets (`src/server/sentry_comment.py`, `src/server/slack_summary.py`).
|
|
33
|
+
- LLM provider selection is env-driven (`OPENAI_API_KEY` preferred, Anthropic/Gemini-compatible fallback); temperature/reasoning args come from `src/llm_utils.py`.
|
|
34
|
+
- Sandbox supports Docker hardened runs with subprocess fallback for local dev; path traversal protections and output redaction live in the sandbox (`src/business_logic/sandbox/__init__.py`).
|
|
35
|
+
|
|
36
|
+
## Developer workflows (high signal)
|
|
37
|
+
- Install deps: `uv sync`
|
|
38
|
+
- Run tests: `uv run pytest tests/ -v`
|
|
39
|
+
- Build sandbox image (required for realistic runs): `docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .`
|
|
40
|
+
- Run repro: `uv run logomesh repro <sentry-url>`
|
|
41
|
+
- Emit artifact: `uv run logomesh repro <url> --artifact`
|
|
42
|
+
- Draft PR: `uv run logomesh repro <url> --draft-pr`
|
|
43
|
+
- Run local verify: `uv run logomesh check <path>`
|
|
44
|
+
- Run API server locally: `LOGOMESH_ENV=production uv run uvicorn src.server.app:app --port 8080`
|
|
45
|
+
- Replay a historical run: `uv run python scripts/replay_run.py --from results/real_prs_run_3.json --out results/replay_from_run3_current.json`
|
|
46
|
+
- Enforce quality gates (also in CI): `UV_CACHE_DIR=/tmp/uv-cache uv run python scripts/quality_gates.py --run results/real_prs_run_6.json --replay results/replay_from_run3_current.json --out results/quality_gates_latest.json --strict`
|
|
47
|
+
- Apply Supabase migrations: `psql $SUPABASE_DB_URL -f migrations/0001_installation_secrets.sql` and `psql $SUPABASE_DB_URL -f migrations/0002_runs.sql` (or `supabase db push`)
|
|
48
|
+
- CI (`.github/workflows/ci.yml`) runs `pytest tests/ -v` on every push/PR; quality gates run only when `ENFORCE_LOGOMESH_QUALITY_GATES=1` is set
|
|
49
|
+
|
|
50
|
+
## Where to look before editing
|
|
51
|
+
- Orchestrator + compliance gates: `logomesh_orchestrator.py`
|
|
52
|
+
- Deterministic repro + Sentry parsing: `src/oracles/sentry_replay_v2.py`, `src/oracles/sentry_replay.py`
|
|
53
|
+
- CLI entrypoints + artifact/PR output: `src/cli/repro.py`, `src/cli/check.py`, `src/cli/artifact.py`, `src/cli/draft_pr.py`
|
|
54
|
+
- Core verify pipeline + repro gate: `src/core/pipeline.py`, `src/core/repro_gate.py`
|
|
55
|
+
- Webhooks/API + comment channels: `src/server/sentry_webhook.py`, `src/server/installations_api.py`, `src/server/github_comment.py`, `src/server/sentry_comment.py`, `src/server/slack_summary.py`
|
|
56
|
+
- Secrets + persistence: `src/core/installation_secrets.py`, `src/core/supabase_client.py`, `migrations/`
|
|
57
|
+
- Sandbox security and execution modes: `src/business_logic/sandbox/__init__.py`
|
|
58
|
+
- Generation + validation: `src/business_logic/generator/`, `src/business_logic/validator/`, `src/business_logic/scaffold_filter/__init__.py`
|
|
59
|
+
- Diff/change-scope logic: `src/business_logic/change_scope/`
|
|
60
|
+
- Deep debug walkthrough: `docs/pipeline.md`
|
|
61
|
+
- Launch/replay gate definitions: `docs/refs/QUALITY_GATES.md`
|
|
62
|
+
- Production deployment runbook (systemd/nginx/webhook wiring): `docs/refs/DEPLOYMENT.md`
|
logomesh-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# logomesh
|
|
2
|
+
|
|
3
|
+
## What This Is
|
|
4
|
+
CLI / GitHub App / MCP server that finds logic bugs and edge-case failures in Python code by inferring invariants, then attacking them with adversarial inputs in a Docker sandbox. Only reports when it has concrete, reproducible evidence.
|
|
5
|
+
|
|
6
|
+
Primary surface: `logomesh repro <sentry-url>` — paste a Sentry issue URL, get a failing pytest against your current branch with audit-ready post-incident evidence mapped to **SOC2 CC7.3 + CC7.4 + PCI DSS 12.10.5** (incident response). Earlier docs referenced PCI 6.3.2 — that control covers pre-release code review and is the wrong mapping for a post-incident product. Corrected.
|
|
7
|
+
|
|
8
|
+
## Output Format
|
|
9
|
+
Lead with the violated property and the evidence.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
## logomesh found 1 issue
|
|
13
|
+
|
|
14
|
+
### Negative quantity bypasses checkout validation
|
|
15
|
+
**Property:** Order total should always be ≥ 0
|
|
16
|
+
**I called:** `checkout(item_id=1, qty=-5)`
|
|
17
|
+
**Got:** Order created with total `-$49.95`
|
|
18
|
+
**Location:** checkout.py, line 42
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
No score. No preliminary comment. Silence = clean.
|
|
22
|
+
|
|
23
|
+
## Key Paths
|
|
24
|
+
- `src/cli/` — `repro.py`, `artifact.py`, `draft_pr.py`, `check.py`, `main.py`
|
|
25
|
+
- `src/oracles/sentry_replay.py` — Sentry event fetch, frame locals → pytest synthesis
|
|
26
|
+
- `src/business_logic/generator/` — inference, ast_analysis, codegen, models
|
|
27
|
+
- `src/business_logic/sandbox/` — Docker sandbox + `_TRACE_HOOK` injection
|
|
28
|
+
- `src/business_logic/classifier/` — pytest-output → findings, artifact suppression
|
|
29
|
+
- `src/business_logic/validator/`, `src/business_logic/scaffold_filter/` — LLM reachability + static drops
|
|
30
|
+
- `src/business_logic/deep_checks/` — static signals + deep probes
|
|
31
|
+
- `src/core/` — llm_utils, logomesh_log, usage_tracker, config
|
|
32
|
+
- `src/capture/` — runtime capture hooks (FastAPI, Django, SQLAlchemy, Redis, Stripe)
|
|
33
|
+
- `tests/` — 500 unit tests
|
|
34
|
+
- `docs/pipeline.md` — full pipeline walkthrough for contributors
|
|
35
|
+
- `Dockerfile.sandbox` — sandbox image (python:3.12-slim + pytest + pytest-json-report)
|
|
36
|
+
- `logomesh_legacy/` — archived GitHub App, MCP server, benchmark data (kept for reference)
|
|
37
|
+
|
|
38
|
+
## Architecture
|
|
39
|
+
Single-pass pipeline per changed Python file:
|
|
40
|
+
1. AST extracts public functions + testable dunders
|
|
41
|
+
2. **Passthrough gate** drops trivial wrappers (`_is_trivial_passthrough`)
|
|
42
|
+
3. LLM infers properties/invariants per surviving function
|
|
43
|
+
4. **Counter-impl probe** drops weak properties a wrong-impl could satisfy (1 batched LLM call/function)
|
|
44
|
+
5. Adversarial tests generated targeting remaining properties + edge-case / metamorphic / toxic variants
|
|
45
|
+
6. Executed in Docker sandbox (airgapped, nobody user, memory/PID limits, randomized report filename)
|
|
46
|
+
7. **Time-Travel Trace** hook captures `f_locals` at crash inside `target.py`
|
|
47
|
+
8. Classifier parses pytest output, drops automock-leak / Py2-builtin / scope-slice artifacts
|
|
48
|
+
9. Validator: LLM confirms crash is caller-reachable (1 call per finding)
|
|
49
|
+
10. Repro + base regression + two-signal gates
|
|
50
|
+
11. Formatter posts only if evidence survives
|
|
51
|
+
|
|
52
|
+
`logomesh repro` path:
|
|
53
|
+
1. Fetch Sentry event → extract innermost app frame + locals
|
|
54
|
+
2. PII redact frame locals (PAN regex + field-name matching)
|
|
55
|
+
3. Synthesize pytest from locals (deterministic) or LLM (default)
|
|
56
|
+
4. Run in sandbox → retry once on no-repro
|
|
57
|
+
5. Emit result / compliance artifact / draft PR
|
|
58
|
+
|
|
59
|
+
## Dev Commands
|
|
60
|
+
```bash
|
|
61
|
+
uv sync # install deps
|
|
62
|
+
uv run pytest tests/ -v # run tests (500 passing)
|
|
63
|
+
docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
|
|
64
|
+
uv run logomesh repro <sentry-url> # reproduce a crash
|
|
65
|
+
uv run logomesh repro <url> --artifact # + PCI/SOC2 envelope
|
|
66
|
+
uv run logomesh repro <url> --draft-pr # + open GitHub draft PR
|
|
67
|
+
uv run logomesh check <path> # run check pipeline on local file
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Key Design Rules
|
|
71
|
+
- Only report findings with concrete evidence (violated property + reproducible input/output)
|
|
72
|
+
- Never pip install from PR/user code in sandbox
|
|
73
|
+
- Webhook must ACK in <10s — enqueue and process asynchronously
|
|
74
|
+
- Semaphore(3) concurrency limit, 120s pipeline timeout
|
|
75
|
+
- Graceful degradation: no Docker → subprocess fallback; LLM fails → edge-case tests only
|
|
76
|
+
- Tests must NOT wrap calls in try/except
|
|
77
|
+
- Sandbox: path traversal protection, 512KB file size cap, binary rejection
|
|
78
|
+
- Generator input capped at 4000 chars; retry once on LLM failure
|
|
79
|
+
- PII redaction runs before any frame locals reach LLM or generated test code
|
|
80
|
+
|
|
81
|
+
## What Was Removed (do not resurrect)
|
|
82
|
+
- `red_logic/` — MCTS orchestrator, dependency_analyzer, semantic_analyzer, constraint_breaker
|
|
83
|
+
- `scoring.py` / CIS formula — meaningless for PRs
|
|
84
|
+
- `compare_vectors.py` / VectorScorer — 400MB model, not needed
|
|
85
|
+
- `analyzer.py` / SemanticAuditor — not used in pipeline
|
|
86
|
+
- Two-pass pipeline / preliminary comment — post nothing until crash confirmed
|
|
87
|
+
- `src/github_app/`, `src/mcp_server/` — moved to `logomesh_legacy/` (deprioritized surfaces)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Sandbox image for running PR tests in isolation.
|
|
2
|
+
# Build: docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
|
|
3
|
+
#
|
|
4
|
+
# Fat image: native toolchains baked in so PRs against packages with
|
|
5
|
+
# C/Rust extensions (datafusion, pydantic-core, lxml, psycopg, protobuf)
|
|
6
|
+
# can build+test without network.
|
|
7
|
+
FROM python:3.12-slim
|
|
8
|
+
|
|
9
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
10
|
+
PIP_NO_CACHE_DIR=1 \
|
|
11
|
+
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
12
|
+
CARGO_HOME=/opt/cargo \
|
|
13
|
+
RUSTUP_HOME=/opt/rustup \
|
|
14
|
+
PATH=/opt/cargo/bin:$PATH \
|
|
15
|
+
TZ=UTC \
|
|
16
|
+
LANG=C.UTF-8 \
|
|
17
|
+
LC_ALL=C.UTF-8 \
|
|
18
|
+
# --- Determinism layer (Task #4: never-fails) ---
|
|
19
|
+
# PYTHONHASHSEED=0: dict/set iteration order stable across runs so the same
|
|
20
|
+
# test + same input always produces the same output. Without this, findings
|
|
21
|
+
# can "flap" between runs, which destroys dev trust exactly once.
|
|
22
|
+
PYTHONHASHSEED=0 \
|
|
23
|
+
# No .pyc artifacts on the read-only FS — prevents permission errors at import.
|
|
24
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
25
|
+
# Streams not buffered — every log line lands in real time.
|
|
26
|
+
PYTHONUNBUFFERED=1 \
|
|
27
|
+
# Stable random seeds for anything that consults sys-level sources.
|
|
28
|
+
SOURCE_DATE_EPOCH=1704067200 \
|
|
29
|
+
# Locale already C.UTF-8 above — keeps sort/str.isdigit/etc. deterministic.
|
|
30
|
+
PYTHONIOENCODING=utf-8
|
|
31
|
+
|
|
32
|
+
# Native build deps:
|
|
33
|
+
# gcc/g++/make/pkg-config — C/C++ extensions
|
|
34
|
+
# libpq-dev — psycopg / asyncpg
|
|
35
|
+
# libxml2-dev libxslt1-dev — lxml
|
|
36
|
+
# libssl-dev libffi-dev — cryptography, cffi
|
|
37
|
+
# zlib1g-dev libjpeg-dev — Pillow-ish
|
|
38
|
+
# protobuf-compiler — protoc for grpc/proto codegen
|
|
39
|
+
# curl ca-certificates — rustup bootstrap
|
|
40
|
+
# git — pip VCS installs if ever needed
|
|
41
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
42
|
+
gcc g++ make pkg-config \
|
|
43
|
+
clang \
|
|
44
|
+
libpq-dev \
|
|
45
|
+
libxml2-dev libxslt1-dev \
|
|
46
|
+
libssl-dev libffi-dev \
|
|
47
|
+
zlib1g-dev libjpeg-dev \
|
|
48
|
+
protobuf-compiler \
|
|
49
|
+
curl ca-certificates git \
|
|
50
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
51
|
+
|
|
52
|
+
# Rust toolchain (stable, minimal profile) for pydantic-core, cryptography>=42,
|
|
53
|
+
# orjson, tokenizers, polars, datafusion, etc.
|
|
54
|
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
|
55
|
+
| sh -s -- -y --default-toolchain stable --profile minimal \
|
|
56
|
+
&& chmod -R a+rX /opt/cargo /opt/rustup
|
|
57
|
+
|
|
58
|
+
RUN pip install pytest pytest-json-report hypothesis pytest-xdist \
|
|
59
|
+
&& pip install "crosshair-tool" "hypothesis[crosshair]" \
|
|
60
|
+
&& pip install atheris || true \
|
|
61
|
+
&& pip install time-machine responses respx vcrpy pytest-recording \
|
|
62
|
+
&& pip install pytest-sqlalchemy-mock frontrun || true \
|
|
63
|
+
&& rm -rf /root/.cache
|
|
64
|
+
|
|
65
|
+
# Pre-baked native extension libraries — most common PR targets.
|
|
66
|
+
# Installed so sandbox can import them without network access.
|
|
67
|
+
# "|| true" on each: build succeeds even if a wheel fails (e.g., architecture mismatch).
|
|
68
|
+
RUN pip install \
|
|
69
|
+
pyarrow \
|
|
70
|
+
polars \
|
|
71
|
+
duckdb \
|
|
72
|
+
pandas \
|
|
73
|
+
numpy \
|
|
74
|
+
&& pip install datafusion || true \
|
|
75
|
+
&& rm -rf /root/.cache
|
|
76
|
+
|
|
77
|
+
# Pre-baked web/fintech/auth stack — covers the top runtime dependencies
|
|
78
|
+
# that degraded findings to "analysis-based" on real PRs. On LogoMesh's
|
|
79
|
+
# own self-review PR (#2), 27 of 28 findings fell to analysis-only mode
|
|
80
|
+
# because openai/pydantic/etc. weren't importable. Pre-installing the
|
|
81
|
+
# common set eliminates 80%+ of those cases without the user needing a
|
|
82
|
+
# `.logomesh.toml` deps entry — YAML stays for the long tail.
|
|
83
|
+
#
|
|
84
|
+
# Order: pydantic first (the #1 missing dep). Pinned to >=2 to match
|
|
85
|
+
# the validator and generator assumptions in our own codebase. Separate
|
|
86
|
+
# RUN layer so bumping LLM SDKs doesn't invalidate the web-stack cache.
|
|
87
|
+
RUN pip install \
|
|
88
|
+
"pydantic>=2" \
|
|
89
|
+
pydantic-settings \
|
|
90
|
+
fastapi \
|
|
91
|
+
starlette \
|
|
92
|
+
httpx \
|
|
93
|
+
requests \
|
|
94
|
+
aiohttp \
|
|
95
|
+
sqlalchemy \
|
|
96
|
+
psycopg2-binary \
|
|
97
|
+
redis \
|
|
98
|
+
celery \
|
|
99
|
+
boto3 \
|
|
100
|
+
pyjwt \
|
|
101
|
+
cryptography \
|
|
102
|
+
stripe \
|
|
103
|
+
&& rm -rf /root/.cache
|
|
104
|
+
|
|
105
|
+
# LLM SDKs — separate layer because they release breaking changes
|
|
106
|
+
# every few weeks. "|| true" so a flaky openai wheel doesn't block
|
|
107
|
+
# the whole image build.
|
|
108
|
+
RUN pip install openai anthropic || true \
|
|
109
|
+
&& rm -rf /root/.cache
|
|
110
|
+
|
|
111
|
+
# Hypothesis DB mount point — persisted counterexamples survive across PR runs
|
|
112
|
+
# when the host bind-mounts a directory here. Pre-created so non-root user can
|
|
113
|
+
# write even when the mount is absent (Hypothesis falls back to fresh DB).
|
|
114
|
+
RUN mkdir -p /hypothesis-db && chmod 0777 /hypothesis-db
|
|
115
|
+
|
|
116
|
+
WORKDIR /workspace
|
|
117
|
+
|
|
118
|
+
USER nobody
|
logomesh-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
.PHONY: verify-latest gates-latest sandbox
|
|
2
|
+
|
|
3
|
+
LATEST_RUN := $(shell ls -1 results/real_prs_run_*.json 2>/dev/null | awk -F'[_.]' '{print $$(NF-1), $$0}' | sort -n | tail -1 | awk '{print $$2}')
|
|
4
|
+
LATEST_RUN_ID := $(shell echo $(LATEST_RUN) | sed -E 's|.*real_prs_run_([0-9]+)\.json|\1|')
|
|
5
|
+
|
|
6
|
+
sandbox:
|
|
7
|
+
docker build -t logomesh-startup-sandbox:latest -f Dockerfile.sandbox .
|
|
8
|
+
docker image prune -f
|
|
9
|
+
|
|
10
|
+
verify-latest:
|
|
11
|
+
@test -n "$(LATEST_RUN)" || (echo "no results/real_prs_run_*.json found" && exit 1)
|
|
12
|
+
@echo "verifying $(LATEST_RUN) -> benchmarks/labels/run_$(LATEST_RUN_ID).json"
|
|
13
|
+
@mkdir -p benchmarks/labels
|
|
14
|
+
uv run python scripts/verify_findings.py $(LATEST_RUN) --label-out benchmarks/labels/run_$(LATEST_RUN_ID).json
|
|
15
|
+
|
|
16
|
+
gates-latest:
|
|
17
|
+
@test -n "$(LATEST_RUN)" || (echo "no results/real_prs_run_*.json found" && exit 1)
|
|
18
|
+
uv run python scripts/quality_gates.py --run $(LATEST_RUN) --labels benchmarks/labels --strict
|