draft-board 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/backend/.env.example +9 -0
- package/app/backend/.smartkanban/evidence/8b383839-cbec-45af-86ee-c7708d075cbe/bddf2ed5-2e21-4d46-a62b-10b87f1642a6_patch.txt +195 -0
- package/app/backend/.smartkanban/evidence/8b383839-cbec-45af-86ee-c7708d075cbe/bddf2ed5-2e21-4d46-a62b-10b87f1642a6_stat.txt +6 -0
- package/app/backend/CURL_EXAMPLES.md +335 -0
- package/app/backend/ENV_SETUP.md +65 -0
- package/app/backend/alembic/env.py +71 -0
- package/app/backend/alembic/script.py.mako +28 -0
- package/app/backend/alembic/versions/001_initial_schema.py +104 -0
- package/app/backend/alembic/versions/002_add_jobs_table.py +52 -0
- package/app/backend/alembic/versions/003_add_workspace_table.py +48 -0
- package/app/backend/alembic/versions/004_add_evidence_table.py +56 -0
- package/app/backend/alembic/versions/005_add_verification_commands.py +32 -0
- package/app/backend/alembic/versions/006_add_planner_lock_table.py +39 -0
- package/app/backend/alembic/versions/007_add_revision_review_tables.py +126 -0
- package/app/backend/alembic/versions/008_add_revision_idempotency_and_traceability.py +52 -0
- package/app/backend/alembic/versions/009_add_job_health_fields.py +46 -0
- package/app/backend/alembic/versions/010_add_review_comment_line_content.py +36 -0
- package/app/backend/alembic/versions/011_add_analysis_cache.py +47 -0
- package/app/backend/alembic/versions/012_add_boards_table.py +102 -0
- package/app/backend/alembic/versions/013_add_ticket_blocking.py +45 -0
- package/app/backend/alembic/versions/014_add_agent_sessions.py +220 -0
- package/app/backend/alembic/versions/015_add_ticket_sort_order.py +33 -0
- package/app/backend/alembic/versions/03220f0b93ae_add_pr_fields_to_ticket.py +49 -0
- package/app/backend/alembic/versions/0c2d89fff3b1_seed_board_configs_from_yaml.py +206 -0
- package/app/backend/alembic/versions/3348e5cf54c1_add_merge_checklist_table.py +67 -0
- package/app/backend/alembic/versions/357c780ee445_add_goal_status.py +34 -0
- package/app/backend/alembic/versions/553340b7e26c_add_autonomy_fields_to_goal.py +65 -0
- package/app/backend/alembic/versions/774dc335c679_merge_migration_heads.py +23 -0
- package/app/backend/alembic/versions/7b307e847cbd_merge_heads.py +23 -0
- package/app/backend/alembic/versions/82ecd978cc70_add_missing_indexes.py +48 -0
- package/app/backend/alembic/versions/8ef5054dc280_add_normalized_log_entries.py +173 -0
- package/app/backend/alembic/versions/8f3e2bd8ea3b_merge_migration_heads.py +23 -0
- package/app/backend/alembic/versions/9d17f0698d3b_add_config_column_to_boards_table.py +30 -0
- package/app/backend/alembic/versions/add_agent_conversation_history.py +72 -0
- package/app/backend/alembic/versions/add_job_variant.py +34 -0
- package/app/backend/alembic/versions/add_performance_indexes.py +95 -0
- package/app/backend/alembic/versions/add_repos_and_board_repos.py +174 -0
- package/app/backend/alembic/versions/add_session_id_to_jobs.py +27 -0
- package/app/backend/alembic/versions/add_sqlite_backend_tables.py +104 -0
- package/app/backend/alembic/versions/b10fb0b62240_add_diff_content_to_revisions.py +34 -0
- package/app/backend/alembic.ini +89 -0
- package/app/backend/app/__init__.py +3 -0
- package/app/backend/app/data_dir.py +85 -0
- package/app/backend/app/database.py +70 -0
- package/app/backend/app/database_sync.py +64 -0
- package/app/backend/app/dependencies/__init__.py +5 -0
- package/app/backend/app/dependencies/auth.py +80 -0
- package/app/backend/app/dependencies.py +43 -0
- package/app/backend/app/exceptions.py +178 -0
- package/app/backend/app/executors/__init__.py +1 -0
- package/app/backend/app/executors/adapters/__init__.py +1 -0
- package/app/backend/app/executors/adapters/aider.py +152 -0
- package/app/backend/app/executors/adapters/amazon_q.py +103 -0
- package/app/backend/app/executors/adapters/amp.py +123 -0
- package/app/backend/app/executors/adapters/claude.py +177 -0
- package/app/backend/app/executors/adapters/cline.py +127 -0
- package/app/backend/app/executors/adapters/codex.py +167 -0
- package/app/backend/app/executors/adapters/copilot.py +202 -0
- package/app/backend/app/executors/adapters/cursor.py +87 -0
- package/app/backend/app/executors/adapters/droid.py +123 -0
- package/app/backend/app/executors/adapters/gemini.py +132 -0
- package/app/backend/app/executors/adapters/goose.py +131 -0
- package/app/backend/app/executors/adapters/opencode.py +123 -0
- package/app/backend/app/executors/adapters/qwen.py +123 -0
- package/app/backend/app/executors/plugins/__init__.py +1 -0
- package/app/backend/app/executors/registry.py +202 -0
- package/app/backend/app/executors/spec.py +226 -0
- package/app/backend/app/main.py +486 -0
- package/app/backend/app/middleware/__init__.py +13 -0
- package/app/backend/app/middleware/idempotency.py +426 -0
- package/app/backend/app/middleware/rate_limit.py +312 -0
- package/app/backend/app/middleware/security_headers.py +43 -0
- package/app/backend/app/middleware/timeout.py +37 -0
- package/app/backend/app/models/__init__.py +56 -0
- package/app/backend/app/models/agent_conversation_history.py +56 -0
- package/app/backend/app/models/agent_session.py +127 -0
- package/app/backend/app/models/analysis_cache.py +49 -0
- package/app/backend/app/models/base.py +9 -0
- package/app/backend/app/models/board.py +79 -0
- package/app/backend/app/models/board_repo.py +68 -0
- package/app/backend/app/models/cost_budget.py +42 -0
- package/app/backend/app/models/enums.py +40 -0
- package/app/backend/app/models/evidence.py +132 -0
- package/app/backend/app/models/goal.py +102 -0
- package/app/backend/app/models/idempotency_entry.py +30 -0
- package/app/backend/app/models/job.py +163 -0
- package/app/backend/app/models/job_queue.py +39 -0
- package/app/backend/app/models/kv_store.py +28 -0
- package/app/backend/app/models/merge_checklist.py +87 -0
- package/app/backend/app/models/normalized_log.py +100 -0
- package/app/backend/app/models/planner_lock.py +43 -0
- package/app/backend/app/models/rate_limit_entry.py +25 -0
- package/app/backend/app/models/repo.py +66 -0
- package/app/backend/app/models/review_comment.py +91 -0
- package/app/backend/app/models/review_summary.py +69 -0
- package/app/backend/app/models/revision.py +130 -0
- package/app/backend/app/models/ticket.py +223 -0
- package/app/backend/app/models/ticket_event.py +83 -0
- package/app/backend/app/models/user.py +47 -0
- package/app/backend/app/models/workspace.py +71 -0
- package/app/backend/app/redis_client.py +119 -0
- package/app/backend/app/routers/__init__.py +29 -0
- package/app/backend/app/routers/agents.py +296 -0
- package/app/backend/app/routers/auth.py +94 -0
- package/app/backend/app/routers/board.py +885 -0
- package/app/backend/app/routers/dashboard.py +351 -0
- package/app/backend/app/routers/debug.py +528 -0
- package/app/backend/app/routers/evidence.py +96 -0
- package/app/backend/app/routers/executors.py +324 -0
- package/app/backend/app/routers/goals.py +574 -0
- package/app/backend/app/routers/jobs.py +448 -0
- package/app/backend/app/routers/maintenance.py +172 -0
- package/app/backend/app/routers/merge.py +360 -0
- package/app/backend/app/routers/planner.py +537 -0
- package/app/backend/app/routers/pull_requests.py +382 -0
- package/app/backend/app/routers/repos.py +263 -0
- package/app/backend/app/routers/revisions.py +939 -0
- package/app/backend/app/routers/settings.py +267 -0
- package/app/backend/app/routers/tickets.py +2003 -0
- package/app/backend/app/routers/webhooks.py +143 -0
- package/app/backend/app/routers/websocket.py +249 -0
- package/app/backend/app/schemas/__init__.py +109 -0
- package/app/backend/app/schemas/board.py +87 -0
- package/app/backend/app/schemas/common.py +33 -0
- package/app/backend/app/schemas/evidence.py +87 -0
- package/app/backend/app/schemas/goal.py +90 -0
- package/app/backend/app/schemas/job.py +97 -0
- package/app/backend/app/schemas/merge.py +139 -0
- package/app/backend/app/schemas/planner.py +500 -0
- package/app/backend/app/schemas/repo.py +187 -0
- package/app/backend/app/schemas/review.py +137 -0
- package/app/backend/app/schemas/revision.py +114 -0
- package/app/backend/app/schemas/ticket.py +238 -0
- package/app/backend/app/schemas/ticket_event.py +72 -0
- package/app/backend/app/schemas/workspace.py +19 -0
- package/app/backend/app/services/__init__.py +31 -0
- package/app/backend/app/services/agent_memory_service.py +223 -0
- package/app/backend/app/services/agent_registry.py +346 -0
- package/app/backend/app/services/agent_session_manager.py +318 -0
- package/app/backend/app/services/agent_session_service.py +219 -0
- package/app/backend/app/services/agent_tools.py +379 -0
- package/app/backend/app/services/auth_service.py +98 -0
- package/app/backend/app/services/autonomy_service.py +380 -0
- package/app/backend/app/services/board_repo_service.py +201 -0
- package/app/backend/app/services/board_service.py +326 -0
- package/app/backend/app/services/cleanup_service.py +1085 -0
- package/app/backend/app/services/config_service.py +908 -0
- package/app/backend/app/services/context_gatherer.py +557 -0
- package/app/backend/app/services/cost_tracking_service.py +293 -0
- package/app/backend/app/services/cursor_log_normalizer.py +536 -0
- package/app/backend/app/services/delivery_pipeline.py +440 -0
- package/app/backend/app/services/executor_service.py +634 -0
- package/app/backend/app/services/git_host/__init__.py +11 -0
- package/app/backend/app/services/git_host/factory.py +87 -0
- package/app/backend/app/services/git_host/github.py +270 -0
- package/app/backend/app/services/git_host/gitlab.py +194 -0
- package/app/backend/app/services/git_host/protocol.py +75 -0
- package/app/backend/app/services/git_merge_simple.py +346 -0
- package/app/backend/app/services/git_ops.py +384 -0
- package/app/backend/app/services/github_service.py +233 -0
- package/app/backend/app/services/goal_service.py +113 -0
- package/app/backend/app/services/job_service.py +423 -0
- package/app/backend/app/services/job_watchdog_service.py +424 -0
- package/app/backend/app/services/langchain_adapter.py +122 -0
- package/app/backend/app/services/llm_provider_clients.py +351 -0
- package/app/backend/app/services/llm_service.py +285 -0
- package/app/backend/app/services/log_normalizer.py +342 -0
- package/app/backend/app/services/log_stream_service.py +276 -0
- package/app/backend/app/services/merge_checklist_service.py +264 -0
- package/app/backend/app/services/merge_service.py +784 -0
- package/app/backend/app/services/orchestrator_log.py +84 -0
- package/app/backend/app/services/planner_service.py +1662 -0
- package/app/backend/app/services/planner_tick_sync.py +1040 -0
- package/app/backend/app/services/queued_message_service.py +156 -0
- package/app/backend/app/services/reliability_wrapper.py +389 -0
- package/app/backend/app/services/repo_discovery_service.py +318 -0
- package/app/backend/app/services/review_service.py +334 -0
- package/app/backend/app/services/revision_service.py +389 -0
- package/app/backend/app/services/safe_autopilot.py +510 -0
- package/app/backend/app/services/sqlite_worker.py +372 -0
- package/app/backend/app/services/task_dispatch.py +135 -0
- package/app/backend/app/services/ticket_generation_service.py +1781 -0
- package/app/backend/app/services/ticket_service.py +486 -0
- package/app/backend/app/services/udar_planner_service.py +1007 -0
- package/app/backend/app/services/webhook_service.py +126 -0
- package/app/backend/app/services/workspace_service.py +465 -0
- package/app/backend/app/services/worktree_file_service.py +92 -0
- package/app/backend/app/services/worktree_validator.py +213 -0
- package/app/backend/app/sqlite_kv.py +278 -0
- package/app/backend/app/state_machine.py +128 -0
- package/app/backend/app/templates/__init__.py +5 -0
- package/app/backend/app/templates/registry.py +243 -0
- package/app/backend/app/utils/__init__.py +5 -0
- package/app/backend/app/utils/artifact_reader.py +87 -0
- package/app/backend/app/utils/circuit_breaker.py +229 -0
- package/app/backend/app/utils/db_retry.py +136 -0
- package/app/backend/app/utils/ignored_fields.py +123 -0
- package/app/backend/app/utils/validators.py +54 -0
- package/app/backend/app/websocket/__init__.py +5 -0
- package/app/backend/app/websocket/manager.py +179 -0
- package/app/backend/app/websocket/state_tracker.py +113 -0
- package/app/backend/app/worker.py +3190 -0
- package/app/backend/calculator_tickets.json +40 -0
- package/app/backend/canary_tests.sh +591 -0
- package/app/backend/celerybeat-schedule +0 -0
- package/app/backend/celerybeat-schedule-shm +0 -0
- package/app/backend/celerybeat-schedule-wal +0 -0
- package/app/backend/logs/.gitkeep +3 -0
- package/app/backend/multiplication_division_implementation_tickets.json +55 -0
- package/app/backend/multiplication_division_tickets.json +42 -0
- package/app/backend/pyproject.toml +45 -0
- package/app/backend/requirements-dev.txt +8 -0
- package/app/backend/requirements.txt +20 -0
- package/app/backend/run.sh +30 -0
- package/app/backend/run_with_logs.sh +10 -0
- package/app/backend/scientific_calculator_tickets.json +40 -0
- package/app/backend/scripts/extract_openapi.py +21 -0
- package/app/backend/scripts/seed_demo.py +187 -0
- package/app/backend/setup_demo_review.py +302 -0
- package/app/backend/test_actual_parse.py +41 -0
- package/app/backend/test_agent_streaming.py +61 -0
- package/app/backend/test_parse.py +51 -0
- package/app/backend/test_streaming.py +51 -0
- package/app/backend/test_subprocess_streaming.py +50 -0
- package/app/backend/tests/__init__.py +1 -0
- package/app/backend/tests/conftest.py +46 -0
- package/app/backend/tests/test_auth.py +341 -0
- package/app/backend/tests/test_autonomy_service.py +391 -0
- package/app/backend/tests/test_cleanup_service_safety.py +417 -0
- package/app/backend/tests/test_middleware.py +279 -0
- package/app/backend/tests/test_planner_providers.py +290 -0
- package/app/backend/tests/test_planner_unblock.py +183 -0
- package/app/backend/tests/test_revision_invariants.py +618 -0
- package/app/backend/tests/test_sqlite_kv.py +290 -0
- package/app/backend/tests/test_sqlite_worker.py +353 -0
- package/app/backend/tests/test_task_dispatch.py +100 -0
- package/app/backend/tests/test_ticket_validation.py +304 -0
- package/app/backend/tests/test_udar_agent.py +693 -0
- package/app/backend/tests/test_webhook_service.py +184 -0
- package/app/backend/tickets_output.json +59 -0
- package/app/backend/user_management_tickets.json +50 -0
- package/app/backend/uvicorn.log +0 -0
- package/app/draft.yaml +313 -0
- package/app/frontend/dist/assets/index-LcjCczu5.js +155 -0
- package/app/frontend/dist/assets/index-_FP_279e.css +1 -0
- package/app/frontend/dist/index.html +14 -0
- package/app/frontend/dist/vite.svg +1 -0
- package/app/frontend/package.json +101 -0
- package/bin/cli.js +527 -0
- package/package.json +37 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Queued message service for chaining prompts during execution.
|
|
2
|
+
|
|
3
|
+
Allows users to queue the next prompt while an execution is in progress.
|
|
4
|
+
When the current execution completes, the queued message is automatically
|
|
5
|
+
executed.
|
|
6
|
+
|
|
7
|
+
Uses SQLite kv_store for persistence.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import UTC, datetime
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
QUEUE_KEY_PREFIX = "queued_message:"
|
|
18
|
+
FOLLOWUP_KEY_PREFIX = "followup_prompt:"
|
|
19
|
+
QUEUE_TTL = 86400 # 24 hours
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class QueuedMessage:
|
|
24
|
+
"""A queued follow-up message."""
|
|
25
|
+
|
|
26
|
+
ticket_id: str
|
|
27
|
+
message: str
|
|
28
|
+
queued_at: datetime
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
"""Serialize to dict."""
|
|
32
|
+
return {
|
|
33
|
+
"ticket_id": self.ticket_id,
|
|
34
|
+
"message": self.message,
|
|
35
|
+
"queued_at": self.queued_at.isoformat(),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_dict(cls, data: dict) -> "QueuedMessage":
|
|
40
|
+
"""Deserialize from dict."""
|
|
41
|
+
return cls(
|
|
42
|
+
ticket_id=data["ticket_id"],
|
|
43
|
+
message=data["message"],
|
|
44
|
+
queued_at=datetime.fromisoformat(data["queued_at"]),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class QueuedMessageService:
|
|
49
|
+
"""Service for queuing follow-up messages during execution.
|
|
50
|
+
|
|
51
|
+
One queued message per ticket. New messages replace old ones.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def _get_key(self, ticket_id: str) -> str:
|
|
55
|
+
"""Get key for a ticket's queued message."""
|
|
56
|
+
return f"{QUEUE_KEY_PREFIX}{ticket_id}"
|
|
57
|
+
|
|
58
|
+
def queue_message(self, ticket_id: str, message: str) -> QueuedMessage:
|
|
59
|
+
"""Queue a follow-up message for a ticket.
|
|
60
|
+
|
|
61
|
+
Replaces any existing queued message for this ticket.
|
|
62
|
+
"""
|
|
63
|
+
from app.sqlite_kv import kv_set
|
|
64
|
+
|
|
65
|
+
queued = QueuedMessage(
|
|
66
|
+
ticket_id=ticket_id,
|
|
67
|
+
message=message,
|
|
68
|
+
queued_at=datetime.now(UTC),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
key = self._get_key(ticket_id)
|
|
72
|
+
kv_set(key, json.dumps(queued.to_dict()), ttl_seconds=QUEUE_TTL)
|
|
73
|
+
|
|
74
|
+
logger.info(f"Queued message for ticket {ticket_id}: {message[:50]}...")
|
|
75
|
+
return queued
|
|
76
|
+
|
|
77
|
+
def get_queued(self, ticket_id: str) -> QueuedMessage | None:
|
|
78
|
+
"""Get the queued message for a ticket (if any)."""
|
|
79
|
+
from app.sqlite_kv import kv_get
|
|
80
|
+
|
|
81
|
+
key = self._get_key(ticket_id)
|
|
82
|
+
data = kv_get(key)
|
|
83
|
+
|
|
84
|
+
if data is None:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
return QueuedMessage.from_dict(json.loads(data))
|
|
89
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
90
|
+
logger.warning(f"Invalid queued message for {ticket_id}: {e}")
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
def take_queued(self, ticket_id: str) -> QueuedMessage | None:
|
|
94
|
+
"""Take (remove and return) the queued message for a ticket.
|
|
95
|
+
|
|
96
|
+
Used by the planner to consume queued messages after execution.
|
|
97
|
+
"""
|
|
98
|
+
from app.sqlite_kv import kv_take
|
|
99
|
+
|
|
100
|
+
key = self._get_key(ticket_id)
|
|
101
|
+
data = kv_take(key)
|
|
102
|
+
|
|
103
|
+
if data is None:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
msg = QueuedMessage.from_dict(json.loads(data))
|
|
108
|
+
logger.info(f"Consumed queued message for ticket {ticket_id}")
|
|
109
|
+
return msg
|
|
110
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
111
|
+
logger.warning(f"Invalid queued message for {ticket_id}: {e}")
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
def cancel_queued(self, ticket_id: str) -> bool:
|
|
115
|
+
"""Cancel/remove a queued message for a ticket."""
|
|
116
|
+
from app.sqlite_kv import kv_delete
|
|
117
|
+
|
|
118
|
+
key = self._get_key(ticket_id)
|
|
119
|
+
deleted = kv_delete(key)
|
|
120
|
+
|
|
121
|
+
if deleted:
|
|
122
|
+
logger.info(f"Cancelled queued message for ticket {ticket_id}")
|
|
123
|
+
return deleted
|
|
124
|
+
|
|
125
|
+
def has_queued(self, ticket_id: str) -> bool:
|
|
126
|
+
"""Check if a ticket has a queued message."""
|
|
127
|
+
from app.sqlite_kv import kv_exists
|
|
128
|
+
|
|
129
|
+
key = self._get_key(ticket_id)
|
|
130
|
+
return kv_exists(key)
|
|
131
|
+
|
|
132
|
+
# ========== Follow-up prompt storage (for worker) ==========
|
|
133
|
+
|
|
134
|
+
def set_followup_prompt(self, ticket_id: str, prompt: str) -> None:
|
|
135
|
+
"""Set a follow-up prompt for the worker to pick up."""
|
|
136
|
+
from app.sqlite_kv import kv_set
|
|
137
|
+
|
|
138
|
+
key = f"{FOLLOWUP_KEY_PREFIX}{ticket_id}"
|
|
139
|
+
kv_set(key, prompt, ttl_seconds=3600)
|
|
140
|
+
logger.info(f"Set follow-up prompt for ticket {ticket_id}")
|
|
141
|
+
|
|
142
|
+
def get_followup_prompt(self, ticket_id: str) -> str | None:
|
|
143
|
+
"""Get and clear the follow-up prompt for a ticket."""
|
|
144
|
+
from app.sqlite_kv import kv_take
|
|
145
|
+
|
|
146
|
+
key = f"{FOLLOWUP_KEY_PREFIX}{ticket_id}"
|
|
147
|
+
prompt = kv_take(key)
|
|
148
|
+
|
|
149
|
+
if prompt:
|
|
150
|
+
logger.info(f"Retrieved follow-up prompt for ticket {ticket_id}")
|
|
151
|
+
return prompt.decode() if isinstance(prompt, bytes) else prompt
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# Global singleton
|
|
156
|
+
queued_message_service = QueuedMessageService()
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"""Reliability wrapper for autonomous execution with retry, checkpointing, and recovery."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
|
+
|
|
13
|
+
from app.exceptions import ExecutorError, ExecutorTimeoutError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CheckpointType(StrEnum):
|
|
17
|
+
"""Types of execution checkpoints."""
|
|
18
|
+
|
|
19
|
+
START = "start"
|
|
20
|
+
PROGRESS = "progress"
|
|
21
|
+
VALIDATION = "validation"
|
|
22
|
+
COMPLETION = "completion"
|
|
23
|
+
FAILURE = "failure"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ExecutionCheckpoint:
|
|
28
|
+
"""Represents a point in execution that can be resumed from."""
|
|
29
|
+
|
|
30
|
+
checkpoint_id: str
|
|
31
|
+
ticket_id: str
|
|
32
|
+
job_id: str | None
|
|
33
|
+
checkpoint_type: CheckpointType
|
|
34
|
+
timestamp: datetime
|
|
35
|
+
retry_count: int
|
|
36
|
+
state_snapshot: dict[str, Any]
|
|
37
|
+
error_message: str | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class RetryConfig:
|
|
42
|
+
"""Configuration for retry behavior."""
|
|
43
|
+
|
|
44
|
+
max_retries: int = 3
|
|
45
|
+
initial_delay_seconds: float = 2.0
|
|
46
|
+
max_delay_seconds: float = 60.0
|
|
47
|
+
exponential_base: float = 2.0
|
|
48
|
+
jitter: bool = True
|
|
49
|
+
|
|
50
|
+
def get_delay(self, retry_attempt: int) -> float:
|
|
51
|
+
"""Calculate delay for given retry attempt with exponential backoff."""
|
|
52
|
+
delay = min(
|
|
53
|
+
self.initial_delay_seconds * (self.exponential_base**retry_attempt),
|
|
54
|
+
self.max_delay_seconds,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if self.jitter:
|
|
58
|
+
# Add random jitter of ±20% to prevent thundering herd
|
|
59
|
+
import random
|
|
60
|
+
|
|
61
|
+
jitter_amount = delay * 0.2
|
|
62
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
63
|
+
|
|
64
|
+
return max(0, delay)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ReliabilityWrapper:
|
|
68
|
+
"""
|
|
69
|
+
Wraps execution with reliability features:
|
|
70
|
+
- Automatic retry with exponential backoff
|
|
71
|
+
- Checkpointing for resume capability
|
|
72
|
+
- Progress tracking
|
|
73
|
+
- Error recovery
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
db: AsyncSession,
|
|
79
|
+
retry_config: RetryConfig | None = None,
|
|
80
|
+
checkpoint_interval_seconds: int = 300, # 5 minutes
|
|
81
|
+
):
|
|
82
|
+
self.db = db
|
|
83
|
+
self.retry_config = retry_config or RetryConfig()
|
|
84
|
+
self.checkpoint_interval_seconds = checkpoint_interval_seconds
|
|
85
|
+
self._checkpoints: dict[str, ExecutionCheckpoint] = {}
|
|
86
|
+
self._last_checkpoint_time: dict[str, float] = {}
|
|
87
|
+
|
|
88
|
+
async def execute_with_reliability(
|
|
89
|
+
self,
|
|
90
|
+
func: Callable,
|
|
91
|
+
*args,
|
|
92
|
+
ticket_id: str,
|
|
93
|
+
job_id: str | None = None,
|
|
94
|
+
validation_func: Callable[[Any], bool] | None = None,
|
|
95
|
+
checkpoint_key: str | None = None,
|
|
96
|
+
**kwargs,
|
|
97
|
+
) -> Any:
|
|
98
|
+
"""
|
|
99
|
+
Execute a function with automatic retry, checkpointing, and recovery.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
func: The function to execute
|
|
103
|
+
*args: Positional arguments for func
|
|
104
|
+
ticket_id: ID of the ticket being executed
|
|
105
|
+
job_id: Optional job ID for tracking
|
|
106
|
+
validation_func: Optional function to validate result before accepting
|
|
107
|
+
checkpoint_key: Optional key for checkpoint storage
|
|
108
|
+
**kwargs: Keyword arguments for func
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
The result of the function execution
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
The last exception if all retries are exhausted
|
|
115
|
+
"""
|
|
116
|
+
checkpoint_key = checkpoint_key or f"{ticket_id}:{job_id or 'default'}"
|
|
117
|
+
|
|
118
|
+
# Create initial checkpoint
|
|
119
|
+
await self._create_checkpoint(
|
|
120
|
+
checkpoint_key=checkpoint_key,
|
|
121
|
+
ticket_id=ticket_id,
|
|
122
|
+
job_id=job_id,
|
|
123
|
+
checkpoint_type=CheckpointType.START,
|
|
124
|
+
retry_count=0,
|
|
125
|
+
state_snapshot={"args": str(args), "kwargs": str(kwargs)},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
last_exception = None
|
|
129
|
+
|
|
130
|
+
for attempt in range(self.retry_config.max_retries + 1):
|
|
131
|
+
try:
|
|
132
|
+
# Execute the function
|
|
133
|
+
result = await self._execute_with_monitoring(
|
|
134
|
+
func=func,
|
|
135
|
+
checkpoint_key=checkpoint_key,
|
|
136
|
+
ticket_id=ticket_id,
|
|
137
|
+
job_id=job_id,
|
|
138
|
+
retry_count=attempt,
|
|
139
|
+
*args,
|
|
140
|
+
**kwargs,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Validate result if validation function provided
|
|
144
|
+
if validation_func and not await self._validate_result(
|
|
145
|
+
result, validation_func
|
|
146
|
+
):
|
|
147
|
+
raise ValueError("Result validation failed")
|
|
148
|
+
|
|
149
|
+
# Success - create completion checkpoint
|
|
150
|
+
await self._create_checkpoint(
|
|
151
|
+
checkpoint_key=checkpoint_key,
|
|
152
|
+
ticket_id=ticket_id,
|
|
153
|
+
job_id=job_id,
|
|
154
|
+
checkpoint_type=CheckpointType.COMPLETION,
|
|
155
|
+
retry_count=attempt,
|
|
156
|
+
state_snapshot={"success": True},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
except asyncio.CancelledError:
|
|
162
|
+
# Don't retry on cancellation
|
|
163
|
+
await self._create_checkpoint(
|
|
164
|
+
checkpoint_key=checkpoint_key,
|
|
165
|
+
ticket_id=ticket_id,
|
|
166
|
+
job_id=job_id,
|
|
167
|
+
checkpoint_type=CheckpointType.FAILURE,
|
|
168
|
+
retry_count=attempt,
|
|
169
|
+
state_snapshot={"cancelled": True},
|
|
170
|
+
error_message="Execution cancelled",
|
|
171
|
+
)
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
last_exception = e
|
|
176
|
+
|
|
177
|
+
# Check if error is retryable
|
|
178
|
+
if not self._is_retryable_error(e):
|
|
179
|
+
await self._create_checkpoint(
|
|
180
|
+
checkpoint_key=checkpoint_key,
|
|
181
|
+
ticket_id=ticket_id,
|
|
182
|
+
job_id=job_id,
|
|
183
|
+
checkpoint_type=CheckpointType.FAILURE,
|
|
184
|
+
retry_count=attempt,
|
|
185
|
+
state_snapshot={"non_retryable": True},
|
|
186
|
+
error_message=str(e),
|
|
187
|
+
)
|
|
188
|
+
raise
|
|
189
|
+
|
|
190
|
+
# Last attempt failed
|
|
191
|
+
if attempt >= self.retry_config.max_retries:
|
|
192
|
+
await self._create_checkpoint(
|
|
193
|
+
checkpoint_key=checkpoint_key,
|
|
194
|
+
ticket_id=ticket_id,
|
|
195
|
+
job_id=job_id,
|
|
196
|
+
checkpoint_type=CheckpointType.FAILURE,
|
|
197
|
+
retry_count=attempt,
|
|
198
|
+
state_snapshot={"exhausted_retries": True},
|
|
199
|
+
error_message=str(e),
|
|
200
|
+
)
|
|
201
|
+
raise
|
|
202
|
+
|
|
203
|
+
# Calculate delay and retry
|
|
204
|
+
delay = self.retry_config.get_delay(attempt)
|
|
205
|
+
|
|
206
|
+
await self._create_checkpoint(
|
|
207
|
+
checkpoint_key=checkpoint_key,
|
|
208
|
+
ticket_id=ticket_id,
|
|
209
|
+
job_id=job_id,
|
|
210
|
+
checkpoint_type=CheckpointType.PROGRESS,
|
|
211
|
+
retry_count=attempt,
|
|
212
|
+
state_snapshot={
|
|
213
|
+
"retry_in_seconds": delay,
|
|
214
|
+
"error": str(e),
|
|
215
|
+
"attempt": attempt + 1,
|
|
216
|
+
},
|
|
217
|
+
error_message=str(e),
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
await asyncio.sleep(delay)
|
|
221
|
+
|
|
222
|
+
# Should not reach here, but handle it
|
|
223
|
+
if last_exception:
|
|
224
|
+
raise last_exception
|
|
225
|
+
|
|
226
|
+
async def _execute_with_monitoring(
|
|
227
|
+
self,
|
|
228
|
+
func: Callable,
|
|
229
|
+
checkpoint_key: str,
|
|
230
|
+
ticket_id: str,
|
|
231
|
+
job_id: str | None,
|
|
232
|
+
retry_count: int,
|
|
233
|
+
*args,
|
|
234
|
+
**kwargs,
|
|
235
|
+
) -> Any:
|
|
236
|
+
"""Execute function with progress monitoring and periodic checkpointing."""
|
|
237
|
+
start_time = time.time()
|
|
238
|
+
self._last_checkpoint_time[checkpoint_key] = start_time
|
|
239
|
+
|
|
240
|
+
# Check if this is an async function
|
|
241
|
+
if asyncio.iscoroutinefunction(func):
|
|
242
|
+
# Create a task so we can monitor it
|
|
243
|
+
task = asyncio.create_task(func(*args, **kwargs))
|
|
244
|
+
|
|
245
|
+
# Monitor execution and create periodic checkpoints
|
|
246
|
+
while not task.done():
|
|
247
|
+
await asyncio.sleep(1) # Check every second
|
|
248
|
+
|
|
249
|
+
elapsed = time.time() - self._last_checkpoint_time[checkpoint_key]
|
|
250
|
+
if elapsed >= self.checkpoint_interval_seconds:
|
|
251
|
+
await self._create_checkpoint(
|
|
252
|
+
checkpoint_key=checkpoint_key,
|
|
253
|
+
ticket_id=ticket_id,
|
|
254
|
+
job_id=job_id,
|
|
255
|
+
checkpoint_type=CheckpointType.PROGRESS,
|
|
256
|
+
retry_count=retry_count,
|
|
257
|
+
state_snapshot={
|
|
258
|
+
"elapsed_seconds": time.time() - start_time,
|
|
259
|
+
"still_running": True,
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
self._last_checkpoint_time[checkpoint_key] = time.time()
|
|
263
|
+
|
|
264
|
+
return await task
|
|
265
|
+
else:
|
|
266
|
+
# Sync function - execute directly
|
|
267
|
+
return func(*args, **kwargs)
|
|
268
|
+
|
|
269
|
+
async def _validate_result(self, result: Any, validation_func: Callable) -> bool:
|
|
270
|
+
"""Validate execution result."""
|
|
271
|
+
try:
|
|
272
|
+
if asyncio.iscoroutinefunction(validation_func):
|
|
273
|
+
return await validation_func(result)
|
|
274
|
+
else:
|
|
275
|
+
return validation_func(result)
|
|
276
|
+
except Exception:
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
def _is_retryable_error(self, error: Exception) -> bool:
|
|
280
|
+
"""Determine if an error is retryable."""
|
|
281
|
+
# Network/connection errors - retryable
|
|
282
|
+
if isinstance(error, (ConnectionError, TimeoutError, asyncio.TimeoutError)):
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
# Executor timeout - retryable
|
|
286
|
+
if isinstance(error, ExecutorTimeoutError):
|
|
287
|
+
return True
|
|
288
|
+
|
|
289
|
+
# Some executor errors are retryable (transient failures)
|
|
290
|
+
if isinstance(error, ExecutorError):
|
|
291
|
+
error_msg = str(error).lower()
|
|
292
|
+
# Retry on rate limits, temporary unavailability, etc.
|
|
293
|
+
retryable_patterns = [
|
|
294
|
+
"rate limit",
|
|
295
|
+
"timeout",
|
|
296
|
+
"temporary",
|
|
297
|
+
"unavailable",
|
|
298
|
+
"too many requests",
|
|
299
|
+
"service unavailable",
|
|
300
|
+
"connection",
|
|
301
|
+
]
|
|
302
|
+
return any(pattern in error_msg for pattern in retryable_patterns)
|
|
303
|
+
|
|
304
|
+
# Validation errors, logic errors - not retryable
|
|
305
|
+
if isinstance(error, (ValueError, TypeError, KeyError, AttributeError)):
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
# Default: don't retry unknown errors
|
|
309
|
+
return False
|
|
310
|
+
|
|
311
|
+
async def _create_checkpoint(
|
|
312
|
+
self,
|
|
313
|
+
checkpoint_key: str,
|
|
314
|
+
ticket_id: str,
|
|
315
|
+
job_id: str | None,
|
|
316
|
+
checkpoint_type: CheckpointType,
|
|
317
|
+
retry_count: int,
|
|
318
|
+
state_snapshot: dict[str, Any],
|
|
319
|
+
error_message: str | None = None,
|
|
320
|
+
):
|
|
321
|
+
"""Create an execution checkpoint."""
|
|
322
|
+
checkpoint = ExecutionCheckpoint(
|
|
323
|
+
checkpoint_id=f"{checkpoint_key}:{checkpoint_type.value}:{int(time.time())}",
|
|
324
|
+
ticket_id=ticket_id,
|
|
325
|
+
job_id=job_id,
|
|
326
|
+
checkpoint_type=checkpoint_type,
|
|
327
|
+
timestamp=datetime.utcnow(),
|
|
328
|
+
retry_count=retry_count,
|
|
329
|
+
state_snapshot=state_snapshot,
|
|
330
|
+
error_message=error_message,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
self._checkpoints[checkpoint_key] = checkpoint
|
|
334
|
+
|
|
335
|
+
# TODO: Persist checkpoint to database for true resumability
|
|
336
|
+
# For now, keeping in memory is sufficient for single-session reliability
|
|
337
|
+
|
|
338
|
+
async def get_last_checkpoint(
|
|
339
|
+
self, checkpoint_key: str
|
|
340
|
+
) -> ExecutionCheckpoint | None:
|
|
341
|
+
"""Get the last checkpoint for a given key."""
|
|
342
|
+
return self._checkpoints.get(checkpoint_key)
|
|
343
|
+
|
|
344
|
+
async def list_checkpoints(self, ticket_id: str) -> list[ExecutionCheckpoint]:
|
|
345
|
+
"""List all checkpoints for a ticket."""
|
|
346
|
+
return [cp for cp in self._checkpoints.values() if cp.ticket_id == ticket_id]
|
|
347
|
+
|
|
348
|
+
async def cleanup_checkpoints(self, ticket_id: str):
|
|
349
|
+
"""Clean up checkpoints for a completed ticket."""
|
|
350
|
+
keys_to_remove = [
|
|
351
|
+
key for key, cp in self._checkpoints.items() if cp.ticket_id == ticket_id
|
|
352
|
+
]
|
|
353
|
+
for key in keys_to_remove:
|
|
354
|
+
del self._checkpoints[key]
|
|
355
|
+
if key in self._last_checkpoint_time:
|
|
356
|
+
del self._last_checkpoint_time[key]
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
async def with_retry(
|
|
360
|
+
func: Callable, *args, max_retries: int = 3, initial_delay: float = 2.0, **kwargs
|
|
361
|
+
) -> Any:
|
|
362
|
+
"""
|
|
363
|
+
Simple retry decorator for functions that don't need full reliability wrapper.
|
|
364
|
+
|
|
365
|
+
Usage:
|
|
366
|
+
result = await with_retry(some_async_func, arg1, arg2, max_retries=5)
|
|
367
|
+
"""
|
|
368
|
+
retry_config = RetryConfig(
|
|
369
|
+
max_retries=max_retries, initial_delay_seconds=initial_delay
|
|
370
|
+
)
|
|
371
|
+
last_exception = None
|
|
372
|
+
|
|
373
|
+
for attempt in range(max_retries + 1):
|
|
374
|
+
try:
|
|
375
|
+
if asyncio.iscoroutinefunction(func):
|
|
376
|
+
return await func(*args, **kwargs)
|
|
377
|
+
else:
|
|
378
|
+
return func(*args, **kwargs)
|
|
379
|
+
except Exception as e:
|
|
380
|
+
last_exception = e
|
|
381
|
+
|
|
382
|
+
if attempt >= max_retries:
|
|
383
|
+
raise
|
|
384
|
+
|
|
385
|
+
delay = retry_config.get_delay(attempt)
|
|
386
|
+
await asyncio.sleep(delay)
|
|
387
|
+
|
|
388
|
+
if last_exception:
|
|
389
|
+
raise last_exception
|