draft-board 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/backend/.env.example +9 -0
- package/app/backend/.smartkanban/evidence/8b383839-cbec-45af-86ee-c7708d075cbe/bddf2ed5-2e21-4d46-a62b-10b87f1642a6_patch.txt +195 -0
- package/app/backend/.smartkanban/evidence/8b383839-cbec-45af-86ee-c7708d075cbe/bddf2ed5-2e21-4d46-a62b-10b87f1642a6_stat.txt +6 -0
- package/app/backend/CURL_EXAMPLES.md +335 -0
- package/app/backend/ENV_SETUP.md +65 -0
- package/app/backend/alembic/env.py +71 -0
- package/app/backend/alembic/script.py.mako +28 -0
- package/app/backend/alembic/versions/001_initial_schema.py +104 -0
- package/app/backend/alembic/versions/002_add_jobs_table.py +52 -0
- package/app/backend/alembic/versions/003_add_workspace_table.py +48 -0
- package/app/backend/alembic/versions/004_add_evidence_table.py +56 -0
- package/app/backend/alembic/versions/005_add_verification_commands.py +32 -0
- package/app/backend/alembic/versions/006_add_planner_lock_table.py +39 -0
- package/app/backend/alembic/versions/007_add_revision_review_tables.py +126 -0
- package/app/backend/alembic/versions/008_add_revision_idempotency_and_traceability.py +52 -0
- package/app/backend/alembic/versions/009_add_job_health_fields.py +46 -0
- package/app/backend/alembic/versions/010_add_review_comment_line_content.py +36 -0
- package/app/backend/alembic/versions/011_add_analysis_cache.py +47 -0
- package/app/backend/alembic/versions/012_add_boards_table.py +102 -0
- package/app/backend/alembic/versions/013_add_ticket_blocking.py +45 -0
- package/app/backend/alembic/versions/014_add_agent_sessions.py +220 -0
- package/app/backend/alembic/versions/015_add_ticket_sort_order.py +33 -0
- package/app/backend/alembic/versions/03220f0b93ae_add_pr_fields_to_ticket.py +49 -0
- package/app/backend/alembic/versions/0c2d89fff3b1_seed_board_configs_from_yaml.py +206 -0
- package/app/backend/alembic/versions/3348e5cf54c1_add_merge_checklist_table.py +67 -0
- package/app/backend/alembic/versions/357c780ee445_add_goal_status.py +34 -0
- package/app/backend/alembic/versions/553340b7e26c_add_autonomy_fields_to_goal.py +65 -0
- package/app/backend/alembic/versions/774dc335c679_merge_migration_heads.py +23 -0
- package/app/backend/alembic/versions/7b307e847cbd_merge_heads.py +23 -0
- package/app/backend/alembic/versions/82ecd978cc70_add_missing_indexes.py +48 -0
- package/app/backend/alembic/versions/8ef5054dc280_add_normalized_log_entries.py +173 -0
- package/app/backend/alembic/versions/8f3e2bd8ea3b_merge_migration_heads.py +23 -0
- package/app/backend/alembic/versions/9d17f0698d3b_add_config_column_to_boards_table.py +30 -0
- package/app/backend/alembic/versions/add_agent_conversation_history.py +72 -0
- package/app/backend/alembic/versions/add_job_variant.py +34 -0
- package/app/backend/alembic/versions/add_performance_indexes.py +95 -0
- package/app/backend/alembic/versions/add_repos_and_board_repos.py +174 -0
- package/app/backend/alembic/versions/add_session_id_to_jobs.py +27 -0
- package/app/backend/alembic/versions/add_sqlite_backend_tables.py +104 -0
- package/app/backend/alembic/versions/b10fb0b62240_add_diff_content_to_revisions.py +34 -0
- package/app/backend/alembic.ini +89 -0
- package/app/backend/app/__init__.py +3 -0
- package/app/backend/app/data_dir.py +85 -0
- package/app/backend/app/database.py +70 -0
- package/app/backend/app/database_sync.py +64 -0
- package/app/backend/app/dependencies/__init__.py +5 -0
- package/app/backend/app/dependencies/auth.py +80 -0
- package/app/backend/app/dependencies.py +43 -0
- package/app/backend/app/exceptions.py +178 -0
- package/app/backend/app/executors/__init__.py +1 -0
- package/app/backend/app/executors/adapters/__init__.py +1 -0
- package/app/backend/app/executors/adapters/aider.py +152 -0
- package/app/backend/app/executors/adapters/amazon_q.py +103 -0
- package/app/backend/app/executors/adapters/amp.py +123 -0
- package/app/backend/app/executors/adapters/claude.py +177 -0
- package/app/backend/app/executors/adapters/cline.py +127 -0
- package/app/backend/app/executors/adapters/codex.py +167 -0
- package/app/backend/app/executors/adapters/copilot.py +202 -0
- package/app/backend/app/executors/adapters/cursor.py +87 -0
- package/app/backend/app/executors/adapters/droid.py +123 -0
- package/app/backend/app/executors/adapters/gemini.py +132 -0
- package/app/backend/app/executors/adapters/goose.py +131 -0
- package/app/backend/app/executors/adapters/opencode.py +123 -0
- package/app/backend/app/executors/adapters/qwen.py +123 -0
- package/app/backend/app/executors/plugins/__init__.py +1 -0
- package/app/backend/app/executors/registry.py +202 -0
- package/app/backend/app/executors/spec.py +226 -0
- package/app/backend/app/main.py +486 -0
- package/app/backend/app/middleware/__init__.py +13 -0
- package/app/backend/app/middleware/idempotency.py +426 -0
- package/app/backend/app/middleware/rate_limit.py +312 -0
- package/app/backend/app/middleware/security_headers.py +43 -0
- package/app/backend/app/middleware/timeout.py +37 -0
- package/app/backend/app/models/__init__.py +56 -0
- package/app/backend/app/models/agent_conversation_history.py +56 -0
- package/app/backend/app/models/agent_session.py +127 -0
- package/app/backend/app/models/analysis_cache.py +49 -0
- package/app/backend/app/models/base.py +9 -0
- package/app/backend/app/models/board.py +79 -0
- package/app/backend/app/models/board_repo.py +68 -0
- package/app/backend/app/models/cost_budget.py +42 -0
- package/app/backend/app/models/enums.py +40 -0
- package/app/backend/app/models/evidence.py +132 -0
- package/app/backend/app/models/goal.py +102 -0
- package/app/backend/app/models/idempotency_entry.py +30 -0
- package/app/backend/app/models/job.py +163 -0
- package/app/backend/app/models/job_queue.py +39 -0
- package/app/backend/app/models/kv_store.py +28 -0
- package/app/backend/app/models/merge_checklist.py +87 -0
- package/app/backend/app/models/normalized_log.py +100 -0
- package/app/backend/app/models/planner_lock.py +43 -0
- package/app/backend/app/models/rate_limit_entry.py +25 -0
- package/app/backend/app/models/repo.py +66 -0
- package/app/backend/app/models/review_comment.py +91 -0
- package/app/backend/app/models/review_summary.py +69 -0
- package/app/backend/app/models/revision.py +130 -0
- package/app/backend/app/models/ticket.py +223 -0
- package/app/backend/app/models/ticket_event.py +83 -0
- package/app/backend/app/models/user.py +47 -0
- package/app/backend/app/models/workspace.py +71 -0
- package/app/backend/app/redis_client.py +119 -0
- package/app/backend/app/routers/__init__.py +29 -0
- package/app/backend/app/routers/agents.py +296 -0
- package/app/backend/app/routers/auth.py +94 -0
- package/app/backend/app/routers/board.py +885 -0
- package/app/backend/app/routers/dashboard.py +351 -0
- package/app/backend/app/routers/debug.py +528 -0
- package/app/backend/app/routers/evidence.py +96 -0
- package/app/backend/app/routers/executors.py +324 -0
- package/app/backend/app/routers/goals.py +574 -0
- package/app/backend/app/routers/jobs.py +448 -0
- package/app/backend/app/routers/maintenance.py +172 -0
- package/app/backend/app/routers/merge.py +360 -0
- package/app/backend/app/routers/planner.py +537 -0
- package/app/backend/app/routers/pull_requests.py +382 -0
- package/app/backend/app/routers/repos.py +263 -0
- package/app/backend/app/routers/revisions.py +939 -0
- package/app/backend/app/routers/settings.py +267 -0
- package/app/backend/app/routers/tickets.py +2003 -0
- package/app/backend/app/routers/webhooks.py +143 -0
- package/app/backend/app/routers/websocket.py +249 -0
- package/app/backend/app/schemas/__init__.py +109 -0
- package/app/backend/app/schemas/board.py +87 -0
- package/app/backend/app/schemas/common.py +33 -0
- package/app/backend/app/schemas/evidence.py +87 -0
- package/app/backend/app/schemas/goal.py +90 -0
- package/app/backend/app/schemas/job.py +97 -0
- package/app/backend/app/schemas/merge.py +139 -0
- package/app/backend/app/schemas/planner.py +500 -0
- package/app/backend/app/schemas/repo.py +187 -0
- package/app/backend/app/schemas/review.py +137 -0
- package/app/backend/app/schemas/revision.py +114 -0
- package/app/backend/app/schemas/ticket.py +238 -0
- package/app/backend/app/schemas/ticket_event.py +72 -0
- package/app/backend/app/schemas/workspace.py +19 -0
- package/app/backend/app/services/__init__.py +31 -0
- package/app/backend/app/services/agent_memory_service.py +223 -0
- package/app/backend/app/services/agent_registry.py +346 -0
- package/app/backend/app/services/agent_session_manager.py +318 -0
- package/app/backend/app/services/agent_session_service.py +219 -0
- package/app/backend/app/services/agent_tools.py +379 -0
- package/app/backend/app/services/auth_service.py +98 -0
- package/app/backend/app/services/autonomy_service.py +380 -0
- package/app/backend/app/services/board_repo_service.py +201 -0
- package/app/backend/app/services/board_service.py +326 -0
- package/app/backend/app/services/cleanup_service.py +1085 -0
- package/app/backend/app/services/config_service.py +908 -0
- package/app/backend/app/services/context_gatherer.py +557 -0
- package/app/backend/app/services/cost_tracking_service.py +293 -0
- package/app/backend/app/services/cursor_log_normalizer.py +536 -0
- package/app/backend/app/services/delivery_pipeline.py +440 -0
- package/app/backend/app/services/executor_service.py +634 -0
- package/app/backend/app/services/git_host/__init__.py +11 -0
- package/app/backend/app/services/git_host/factory.py +87 -0
- package/app/backend/app/services/git_host/github.py +270 -0
- package/app/backend/app/services/git_host/gitlab.py +194 -0
- package/app/backend/app/services/git_host/protocol.py +75 -0
- package/app/backend/app/services/git_merge_simple.py +346 -0
- package/app/backend/app/services/git_ops.py +384 -0
- package/app/backend/app/services/github_service.py +233 -0
- package/app/backend/app/services/goal_service.py +113 -0
- package/app/backend/app/services/job_service.py +423 -0
- package/app/backend/app/services/job_watchdog_service.py +424 -0
- package/app/backend/app/services/langchain_adapter.py +122 -0
- package/app/backend/app/services/llm_provider_clients.py +351 -0
- package/app/backend/app/services/llm_service.py +285 -0
- package/app/backend/app/services/log_normalizer.py +342 -0
- package/app/backend/app/services/log_stream_service.py +276 -0
- package/app/backend/app/services/merge_checklist_service.py +264 -0
- package/app/backend/app/services/merge_service.py +784 -0
- package/app/backend/app/services/orchestrator_log.py +84 -0
- package/app/backend/app/services/planner_service.py +1662 -0
- package/app/backend/app/services/planner_tick_sync.py +1040 -0
- package/app/backend/app/services/queued_message_service.py +156 -0
- package/app/backend/app/services/reliability_wrapper.py +389 -0
- package/app/backend/app/services/repo_discovery_service.py +318 -0
- package/app/backend/app/services/review_service.py +334 -0
- package/app/backend/app/services/revision_service.py +389 -0
- package/app/backend/app/services/safe_autopilot.py +510 -0
- package/app/backend/app/services/sqlite_worker.py +372 -0
- package/app/backend/app/services/task_dispatch.py +135 -0
- package/app/backend/app/services/ticket_generation_service.py +1781 -0
- package/app/backend/app/services/ticket_service.py +486 -0
- package/app/backend/app/services/udar_planner_service.py +1007 -0
- package/app/backend/app/services/webhook_service.py +126 -0
- package/app/backend/app/services/workspace_service.py +465 -0
- package/app/backend/app/services/worktree_file_service.py +92 -0
- package/app/backend/app/services/worktree_validator.py +213 -0
- package/app/backend/app/sqlite_kv.py +278 -0
- package/app/backend/app/state_machine.py +128 -0
- package/app/backend/app/templates/__init__.py +5 -0
- package/app/backend/app/templates/registry.py +243 -0
- package/app/backend/app/utils/__init__.py +5 -0
- package/app/backend/app/utils/artifact_reader.py +87 -0
- package/app/backend/app/utils/circuit_breaker.py +229 -0
- package/app/backend/app/utils/db_retry.py +136 -0
- package/app/backend/app/utils/ignored_fields.py +123 -0
- package/app/backend/app/utils/validators.py +54 -0
- package/app/backend/app/websocket/__init__.py +5 -0
- package/app/backend/app/websocket/manager.py +179 -0
- package/app/backend/app/websocket/state_tracker.py +113 -0
- package/app/backend/app/worker.py +3190 -0
- package/app/backend/calculator_tickets.json +40 -0
- package/app/backend/canary_tests.sh +591 -0
- package/app/backend/celerybeat-schedule +0 -0
- package/app/backend/celerybeat-schedule-shm +0 -0
- package/app/backend/celerybeat-schedule-wal +0 -0
- package/app/backend/logs/.gitkeep +3 -0
- package/app/backend/multiplication_division_implementation_tickets.json +55 -0
- package/app/backend/multiplication_division_tickets.json +42 -0
- package/app/backend/pyproject.toml +45 -0
- package/app/backend/requirements-dev.txt +8 -0
- package/app/backend/requirements.txt +20 -0
- package/app/backend/run.sh +30 -0
- package/app/backend/run_with_logs.sh +10 -0
- package/app/backend/scientific_calculator_tickets.json +40 -0
- package/app/backend/scripts/extract_openapi.py +21 -0
- package/app/backend/scripts/seed_demo.py +187 -0
- package/app/backend/setup_demo_review.py +302 -0
- package/app/backend/test_actual_parse.py +41 -0
- package/app/backend/test_agent_streaming.py +61 -0
- package/app/backend/test_parse.py +51 -0
- package/app/backend/test_streaming.py +51 -0
- package/app/backend/test_subprocess_streaming.py +50 -0
- package/app/backend/tests/__init__.py +1 -0
- package/app/backend/tests/conftest.py +46 -0
- package/app/backend/tests/test_auth.py +341 -0
- package/app/backend/tests/test_autonomy_service.py +391 -0
- package/app/backend/tests/test_cleanup_service_safety.py +417 -0
- package/app/backend/tests/test_middleware.py +279 -0
- package/app/backend/tests/test_planner_providers.py +290 -0
- package/app/backend/tests/test_planner_unblock.py +183 -0
- package/app/backend/tests/test_revision_invariants.py +618 -0
- package/app/backend/tests/test_sqlite_kv.py +290 -0
- package/app/backend/tests/test_sqlite_worker.py +353 -0
- package/app/backend/tests/test_task_dispatch.py +100 -0
- package/app/backend/tests/test_ticket_validation.py +304 -0
- package/app/backend/tests/test_udar_agent.py +693 -0
- package/app/backend/tests/test_webhook_service.py +184 -0
- package/app/backend/tickets_output.json +59 -0
- package/app/backend/user_management_tickets.json +50 -0
- package/app/backend/uvicorn.log +0 -0
- package/app/draft.yaml +313 -0
- package/app/frontend/dist/assets/index-LcjCczu5.js +155 -0
- package/app/frontend/dist/assets/index-_FP_279e.css +1 -0
- package/app/frontend/dist/index.html +14 -0
- package/app/frontend/dist/vite.svg +1 -0
- package/app/frontend/package.json +101 -0
- package/bin/cli.js +527 -0
- package/package.json +37 -0
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
"""Safe autopilot with configurable safety gates for autonomous execution.
|
|
2
|
+
|
|
3
|
+
The SafeAutopilot ensures that autonomous execution respects safety constraints
|
|
4
|
+
and doesn't make changes that could be dangerous or expensive without human review.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from enum import StrEnum
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from sqlalchemy import select
|
|
14
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
15
|
+
from sqlalchemy.orm import selectinload
|
|
16
|
+
|
|
17
|
+
from app.models.goal import Goal
|
|
18
|
+
from app.models.job import Job
|
|
19
|
+
from app.models.ticket import Ticket
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GateAction(StrEnum):
|
|
25
|
+
"""What to do when a gate fails."""
|
|
26
|
+
|
|
27
|
+
BLOCK = "block" # Stop execution, mark as blocked
|
|
28
|
+
PAUSE = "pause" # Pause for human review
|
|
29
|
+
ALERT = "alert" # Alert but continue
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class GateContext:
|
|
34
|
+
"""Context information for gate evaluation."""
|
|
35
|
+
|
|
36
|
+
ticket: Ticket
|
|
37
|
+
goal: Goal | None
|
|
38
|
+
total_cost_so_far: float
|
|
39
|
+
total_files_changed: int
|
|
40
|
+
total_lines_changed: int
|
|
41
|
+
all_tests_passed: bool
|
|
42
|
+
modified_files: list[str]
|
|
43
|
+
budget_limit: float | None
|
|
44
|
+
|
|
45
|
+
def __post_init__(self):
|
|
46
|
+
if self.modified_files is None:
|
|
47
|
+
self.modified_files = []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class GateResult:
|
|
52
|
+
"""Result of evaluating a safety gate."""
|
|
53
|
+
|
|
54
|
+
gate_name: str
|
|
55
|
+
passed: bool
|
|
56
|
+
action: GateAction
|
|
57
|
+
reason: str | None = None
|
|
58
|
+
details: dict[str, Any] = None
|
|
59
|
+
|
|
60
|
+
def __post_init__(self):
|
|
61
|
+
if self.details is None:
|
|
62
|
+
self.details = {}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SafetyGate:
|
|
66
|
+
"""Base class for safety gates."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, name: str, action: GateAction = GateAction.BLOCK):
|
|
69
|
+
self.name = name
|
|
70
|
+
self.action = action
|
|
71
|
+
|
|
72
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
73
|
+
"""Evaluate the gate against the context."""
|
|
74
|
+
raise NotImplementedError
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class TestsPassedGate(SafetyGate):
|
|
78
|
+
"""Gate that checks if all tests passed."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, action: GateAction = GateAction.BLOCK):
|
|
81
|
+
super().__init__("tests_passed", action)
|
|
82
|
+
|
|
83
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
84
|
+
if context.all_tests_passed:
|
|
85
|
+
return GateResult(gate_name=self.name, passed=True, action=self.action)
|
|
86
|
+
else:
|
|
87
|
+
return GateResult(
|
|
88
|
+
gate_name=self.name,
|
|
89
|
+
passed=False,
|
|
90
|
+
action=self.action,
|
|
91
|
+
reason="Not all verification tests passed",
|
|
92
|
+
details={"tests_passed": context.all_tests_passed},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DiffSizeGate(SafetyGate):
|
|
97
|
+
"""Gate that checks if diff size is within threshold."""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
max_files: int = 50,
|
|
102
|
+
max_lines: int = 1000,
|
|
103
|
+
action: GateAction = GateAction.PAUSE,
|
|
104
|
+
):
|
|
105
|
+
super().__init__("diff_size_threshold", action)
|
|
106
|
+
self.max_files = max_files
|
|
107
|
+
self.max_lines = max_lines
|
|
108
|
+
|
|
109
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
110
|
+
if context.total_files_changed > self.max_files:
|
|
111
|
+
return GateResult(
|
|
112
|
+
gate_name=self.name,
|
|
113
|
+
passed=False,
|
|
114
|
+
action=self.action,
|
|
115
|
+
reason=f"Too many files changed: {context.total_files_changed} > {self.max_files}",
|
|
116
|
+
details={
|
|
117
|
+
"files_changed": context.total_files_changed,
|
|
118
|
+
"max_files": self.max_files,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if context.total_lines_changed > self.max_lines:
|
|
123
|
+
return GateResult(
|
|
124
|
+
gate_name=self.name,
|
|
125
|
+
passed=False,
|
|
126
|
+
action=self.action,
|
|
127
|
+
reason=f"Too many lines changed: {context.total_lines_changed} > {self.max_lines}",
|
|
128
|
+
details={
|
|
129
|
+
"lines_changed": context.total_lines_changed,
|
|
130
|
+
"max_lines": self.max_lines,
|
|
131
|
+
},
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
return GateResult(
|
|
135
|
+
gate_name=self.name,
|
|
136
|
+
passed=True,
|
|
137
|
+
action=self.action,
|
|
138
|
+
details={
|
|
139
|
+
"files_changed": context.total_files_changed,
|
|
140
|
+
"lines_changed": context.total_lines_changed,
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class SensitiveFilesGate(SafetyGate):
|
|
146
|
+
"""Gate that checks for modifications to sensitive files."""
|
|
147
|
+
|
|
148
|
+
# Files that should never be modified without human review
|
|
149
|
+
SENSITIVE_PATTERNS = [
|
|
150
|
+
".env",
|
|
151
|
+
"credentials",
|
|
152
|
+
"secrets",
|
|
153
|
+
"password",
|
|
154
|
+
".key",
|
|
155
|
+
".pem",
|
|
156
|
+
".crt",
|
|
157
|
+
"api_key",
|
|
158
|
+
"token",
|
|
159
|
+
"config/production",
|
|
160
|
+
"database.yml",
|
|
161
|
+
"production.yml",
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
def __init__(self, action: GateAction = GateAction.BLOCK):
|
|
165
|
+
super().__init__("no_sensitive_files", action)
|
|
166
|
+
|
|
167
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
168
|
+
sensitive_files = []
|
|
169
|
+
|
|
170
|
+
for file_path in context.modified_files:
|
|
171
|
+
file_lower = file_path.lower()
|
|
172
|
+
if any(pattern in file_lower for pattern in self.SENSITIVE_PATTERNS):
|
|
173
|
+
sensitive_files.append(file_path)
|
|
174
|
+
|
|
175
|
+
if sensitive_files:
|
|
176
|
+
return GateResult(
|
|
177
|
+
gate_name=self.name,
|
|
178
|
+
passed=False,
|
|
179
|
+
action=self.action,
|
|
180
|
+
reason=f"Modified sensitive files: {', '.join(sensitive_files)}",
|
|
181
|
+
details={"sensitive_files": sensitive_files},
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return GateResult(gate_name=self.name, passed=True, action=self.action)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class BudgetGate(SafetyGate):
|
|
188
|
+
"""Gate that checks if budget is within limits."""
|
|
189
|
+
|
|
190
|
+
def __init__(
|
|
191
|
+
self, warning_threshold: float = 0.8, action: GateAction = GateAction.PAUSE
|
|
192
|
+
):
|
|
193
|
+
super().__init__("cost_budget", action)
|
|
194
|
+
self.warning_threshold = warning_threshold
|
|
195
|
+
|
|
196
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
197
|
+
if context.budget_limit is None:
|
|
198
|
+
# No budget set, pass
|
|
199
|
+
return GateResult(
|
|
200
|
+
gate_name=self.name,
|
|
201
|
+
passed=True,
|
|
202
|
+
action=self.action,
|
|
203
|
+
details={"budget_set": False},
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if context.budget_limit <= 0:
|
|
207
|
+
# Unlimited budget
|
|
208
|
+
return GateResult(
|
|
209
|
+
gate_name=self.name,
|
|
210
|
+
passed=True,
|
|
211
|
+
action=self.action,
|
|
212
|
+
details={"budget_unlimited": True},
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
budget_used_pct = context.total_cost_so_far / context.budget_limit
|
|
216
|
+
|
|
217
|
+
if budget_used_pct >= 1.0:
|
|
218
|
+
return GateResult(
|
|
219
|
+
gate_name=self.name,
|
|
220
|
+
passed=False,
|
|
221
|
+
action=GateAction.BLOCK, # Always block on exceeded budget
|
|
222
|
+
reason=f"Budget exceeded: ${context.total_cost_so_far:.2f} >= ${context.budget_limit:.2f}",
|
|
223
|
+
details={
|
|
224
|
+
"total_cost": context.total_cost_so_far,
|
|
225
|
+
"budget_limit": context.budget_limit,
|
|
226
|
+
"budget_used_pct": budget_used_pct,
|
|
227
|
+
},
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if budget_used_pct >= self.warning_threshold:
|
|
231
|
+
return GateResult(
|
|
232
|
+
gate_name=self.name,
|
|
233
|
+
passed=False,
|
|
234
|
+
action=self.action,
|
|
235
|
+
reason=f"Budget warning: {budget_used_pct * 100:.1f}% used (${context.total_cost_so_far:.2f} / ${context.budget_limit:.2f})",
|
|
236
|
+
details={
|
|
237
|
+
"total_cost": context.total_cost_so_far,
|
|
238
|
+
"budget_limit": context.budget_limit,
|
|
239
|
+
"budget_used_pct": budget_used_pct,
|
|
240
|
+
"warning_threshold": self.warning_threshold,
|
|
241
|
+
},
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return GateResult(
|
|
245
|
+
gate_name=self.name,
|
|
246
|
+
passed=True,
|
|
247
|
+
action=self.action,
|
|
248
|
+
details={
|
|
249
|
+
"total_cost": context.total_cost_so_far,
|
|
250
|
+
"budget_limit": context.budget_limit,
|
|
251
|
+
"budget_used_pct": budget_used_pct,
|
|
252
|
+
},
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class CustomGate(SafetyGate):
|
|
257
|
+
"""Custom gate with user-defined evaluation function."""
|
|
258
|
+
|
|
259
|
+
def __init__(
|
|
260
|
+
self,
|
|
261
|
+
name: str,
|
|
262
|
+
check_func: Callable[[GateContext], bool],
|
|
263
|
+
action: GateAction = GateAction.PAUSE,
|
|
264
|
+
failure_message: str = "Custom gate check failed",
|
|
265
|
+
):
|
|
266
|
+
super().__init__(name, action)
|
|
267
|
+
self.check_func = check_func
|
|
268
|
+
self.failure_message = failure_message
|
|
269
|
+
|
|
270
|
+
async def evaluate(self, context: GateContext) -> GateResult:
|
|
271
|
+
try:
|
|
272
|
+
passed = self.check_func(context)
|
|
273
|
+
|
|
274
|
+
if passed:
|
|
275
|
+
return GateResult(gate_name=self.name, passed=True, action=self.action)
|
|
276
|
+
else:
|
|
277
|
+
return GateResult(
|
|
278
|
+
gate_name=self.name,
|
|
279
|
+
passed=False,
|
|
280
|
+
action=self.action,
|
|
281
|
+
reason=self.failure_message,
|
|
282
|
+
)
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.exception(f"Custom gate {self.name} evaluation failed")
|
|
285
|
+
return GateResult(
|
|
286
|
+
gate_name=self.name,
|
|
287
|
+
passed=False,
|
|
288
|
+
action=GateAction.BLOCK,
|
|
289
|
+
reason=f"Gate evaluation error: {str(e)}",
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class SafeAutopilot:
|
|
294
|
+
"""
|
|
295
|
+
Safe autopilot with configurable safety gates.
|
|
296
|
+
|
|
297
|
+
Ensures autonomous execution respects safety constraints before continuing.
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
# Default gates applied to all executions
|
|
301
|
+
DEFAULT_GATES = [
|
|
302
|
+
TestsPassedGate(action=GateAction.BLOCK),
|
|
303
|
+
DiffSizeGate(max_files=50, max_lines=1000, action=GateAction.PAUSE),
|
|
304
|
+
SensitiveFilesGate(action=GateAction.BLOCK),
|
|
305
|
+
BudgetGate(warning_threshold=0.8, action=GateAction.PAUSE),
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
def __init__(self, db: AsyncSession, gates: list[SafetyGate] | None = None):
|
|
309
|
+
self.db = db
|
|
310
|
+
self.gates = gates if gates is not None else self.DEFAULT_GATES.copy()
|
|
311
|
+
|
|
312
|
+
def add_gate(self, gate: SafetyGate):
|
|
313
|
+
"""Add a custom gate to the autopilot."""
|
|
314
|
+
self.gates.append(gate)
|
|
315
|
+
|
|
316
|
+
def remove_gate(self, gate_name: str):
|
|
317
|
+
"""Remove a gate by name."""
|
|
318
|
+
self.gates = [g for g in self.gates if g.name != gate_name]
|
|
319
|
+
|
|
320
|
+
async def check_gates(self, ticket: Ticket) -> list[GateResult]:
|
|
321
|
+
"""Check all safety gates for a ticket.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
List of GateResult objects (one per gate)
|
|
325
|
+
"""
|
|
326
|
+
# Build context from ticket and related data
|
|
327
|
+
context = await self._build_context(ticket)
|
|
328
|
+
|
|
329
|
+
results = []
|
|
330
|
+
for gate in self.gates:
|
|
331
|
+
try:
|
|
332
|
+
result = await gate.evaluate(context)
|
|
333
|
+
results.append(result)
|
|
334
|
+
|
|
335
|
+
# Log gate results
|
|
336
|
+
if not result.passed:
|
|
337
|
+
logger.warning(
|
|
338
|
+
f"Gate {gate.name} failed for ticket {ticket.id}: {result.reason}"
|
|
339
|
+
)
|
|
340
|
+
except Exception as e:
|
|
341
|
+
logger.exception(f"Gate {gate.name} evaluation crashed")
|
|
342
|
+
results.append(
|
|
343
|
+
GateResult(
|
|
344
|
+
gate_name=gate.name,
|
|
345
|
+
passed=False,
|
|
346
|
+
action=GateAction.BLOCK,
|
|
347
|
+
reason=f"Gate evaluation crashed: {str(e)}",
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
return results
|
|
352
|
+
|
|
353
|
+
async def should_continue(self, ticket: Ticket) -> tuple[bool, list[GateResult]]:
|
|
354
|
+
"""
|
|
355
|
+
Check if autopilot should continue with this ticket.
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
(can_continue, gate_results)
|
|
359
|
+
"""
|
|
360
|
+
results = await self.check_gates(ticket)
|
|
361
|
+
|
|
362
|
+
# Check for any blocking failures
|
|
363
|
+
blocked = any(not r.passed and r.action == GateAction.BLOCK for r in results)
|
|
364
|
+
|
|
365
|
+
# Check for any pause requests
|
|
366
|
+
paused = any(not r.passed and r.action == GateAction.PAUSE for r in results)
|
|
367
|
+
|
|
368
|
+
can_continue = not blocked and not paused
|
|
369
|
+
|
|
370
|
+
return can_continue, results
|
|
371
|
+
|
|
372
|
+
async def _build_context(self, ticket: Ticket) -> GateContext:
|
|
373
|
+
"""Build gate evaluation context from ticket data."""
|
|
374
|
+
# Get goal
|
|
375
|
+
goal = None
|
|
376
|
+
if ticket.goal_id:
|
|
377
|
+
result = await self.db.execute(
|
|
378
|
+
select(Goal)
|
|
379
|
+
.where(Goal.id == ticket.goal_id)
|
|
380
|
+
.options(selectinload(Goal.cost_budget))
|
|
381
|
+
)
|
|
382
|
+
goal = result.scalar_one_or_none()
|
|
383
|
+
|
|
384
|
+
# Calculate costs so far for this goal
|
|
385
|
+
total_cost = 0.0
|
|
386
|
+
budget_limit = None
|
|
387
|
+
|
|
388
|
+
if goal:
|
|
389
|
+
# Get all jobs for this goal's tickets
|
|
390
|
+
from app.models.agent_session import AgentSession
|
|
391
|
+
|
|
392
|
+
result = await self.db.execute(
|
|
393
|
+
select(AgentSession)
|
|
394
|
+
.join(Ticket, Ticket.id == AgentSession.ticket_id)
|
|
395
|
+
.where(Ticket.goal_id == goal.id)
|
|
396
|
+
)
|
|
397
|
+
sessions = result.scalars().all()
|
|
398
|
+
total_cost = sum(s.cost_usd or 0.0 for s in sessions)
|
|
399
|
+
|
|
400
|
+
if goal.cost_budget:
|
|
401
|
+
budget_limit = goal.cost_budget.total_budget
|
|
402
|
+
|
|
403
|
+
# Get file changes from ticket's jobs
|
|
404
|
+
modified_files = []
|
|
405
|
+
total_files_changed = 0
|
|
406
|
+
total_lines_changed = 0
|
|
407
|
+
|
|
408
|
+
from app.models.evidence import Evidence
|
|
409
|
+
|
|
410
|
+
result = await self.db.execute(
|
|
411
|
+
select(Evidence)
|
|
412
|
+
.join(Job, Job.id == Evidence.job_id)
|
|
413
|
+
.where(Job.ticket_id == ticket.id)
|
|
414
|
+
.where(Evidence.kind == "diff_stat")
|
|
415
|
+
)
|
|
416
|
+
diff_evidences = result.scalars().all()
|
|
417
|
+
|
|
418
|
+
for evidence in diff_evidences:
|
|
419
|
+
# Parse diff stat to get file count and line changes
|
|
420
|
+
# Format: "3 files changed, 45 insertions(+), 12 deletions(-)"
|
|
421
|
+
if evidence.content:
|
|
422
|
+
import re
|
|
423
|
+
|
|
424
|
+
files_match = re.search(r"(\d+) files? changed", evidence.content)
|
|
425
|
+
if files_match:
|
|
426
|
+
total_files_changed += int(files_match.group(1))
|
|
427
|
+
|
|
428
|
+
insertions_match = re.search(r"(\d+) insertions?", evidence.content)
|
|
429
|
+
deletions_match = re.search(r"(\d+) deletions?", evidence.content)
|
|
430
|
+
|
|
431
|
+
if insertions_match:
|
|
432
|
+
total_lines_changed += int(insertions_match.group(1))
|
|
433
|
+
if deletions_match:
|
|
434
|
+
total_lines_changed += int(deletions_match.group(1))
|
|
435
|
+
|
|
436
|
+
# Get list of modified files from diff patches
|
|
437
|
+
result = await self.db.execute(
|
|
438
|
+
select(Evidence)
|
|
439
|
+
.join(Job, Job.id == Evidence.job_id)
|
|
440
|
+
.where(Job.ticket_id == ticket.id)
|
|
441
|
+
.where(Evidence.kind == "diff_patch")
|
|
442
|
+
)
|
|
443
|
+
patch_evidences = result.scalars().all()
|
|
444
|
+
|
|
445
|
+
for evidence in patch_evidences:
|
|
446
|
+
if evidence.content:
|
|
447
|
+
# Extract file paths from diff headers
|
|
448
|
+
import re
|
|
449
|
+
|
|
450
|
+
file_matches = re.findall(
|
|
451
|
+
r"^\+\+\+ b/(.+)$", evidence.content, re.MULTILINE
|
|
452
|
+
)
|
|
453
|
+
modified_files.extend(file_matches)
|
|
454
|
+
|
|
455
|
+
# Check if tests passed
|
|
456
|
+
all_tests_passed = await self._check_tests_passed(ticket)
|
|
457
|
+
|
|
458
|
+
return GateContext(
|
|
459
|
+
ticket=ticket,
|
|
460
|
+
goal=goal,
|
|
461
|
+
total_cost_so_far=total_cost,
|
|
462
|
+
total_files_changed=total_files_changed,
|
|
463
|
+
total_lines_changed=total_lines_changed,
|
|
464
|
+
all_tests_passed=all_tests_passed,
|
|
465
|
+
modified_files=modified_files,
|
|
466
|
+
budget_limit=budget_limit,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
async def _check_tests_passed(self, ticket: Ticket) -> bool:
|
|
470
|
+
"""Check if all verification tests passed for this ticket."""
|
|
471
|
+
# Get verification jobs for this ticket
|
|
472
|
+
result = await self.db.execute(
|
|
473
|
+
select(Job).where(Job.ticket_id == ticket.id).where(Job.kind == "verify")
|
|
474
|
+
)
|
|
475
|
+
verify_jobs = result.scalars().all()
|
|
476
|
+
|
|
477
|
+
if not verify_jobs:
|
|
478
|
+
# No verification run yet
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
# Check if latest verification job succeeded
|
|
482
|
+
latest_verify = max(verify_jobs, key=lambda j: j.created_at)
|
|
483
|
+
return latest_verify.status == "succeeded"
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def create_default_autopilot(db: AsyncSession) -> SafeAutopilot:
|
|
487
|
+
"""Create a SafeAutopilot with default gates."""
|
|
488
|
+
return SafeAutopilot(db, gates=SafeAutopilot.DEFAULT_GATES.copy())
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def create_yolo_autopilot(db: AsyncSession) -> SafeAutopilot:
|
|
492
|
+
"""Create a YOLO autopilot with minimal gates (only tests and budget hard limits)."""
|
|
493
|
+
gates = [
|
|
494
|
+
TestsPassedGate(action=GateAction.BLOCK),
|
|
495
|
+
BudgetGate(
|
|
496
|
+
warning_threshold=1.0, action=GateAction.BLOCK
|
|
497
|
+
), # Only block on exceeded
|
|
498
|
+
]
|
|
499
|
+
return SafeAutopilot(db, gates=gates)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def create_strict_autopilot(db: AsyncSession) -> SafeAutopilot:
|
|
503
|
+
"""Create a strict autopilot with tight constraints."""
|
|
504
|
+
gates = [
|
|
505
|
+
TestsPassedGate(action=GateAction.BLOCK),
|
|
506
|
+
DiffSizeGate(max_files=20, max_lines=500, action=GateAction.BLOCK),
|
|
507
|
+
SensitiveFilesGate(action=GateAction.BLOCK),
|
|
508
|
+
BudgetGate(warning_threshold=0.5, action=GateAction.PAUSE),
|
|
509
|
+
]
|
|
510
|
+
return SafeAutopilot(db, gates=gates)
|