abom-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abom/__init__.py +3 -0
- abom/agents.py +58 -0
- abom/api.py +177 -0
- abom/audit.py +122 -0
- abom/bom.py +147 -0
- abom/cli.py +130 -0
- abom/config.py +37 -0
- abom/db.py +127 -0
- abom/execution.py +73 -0
- abom/models_router.py +84 -0
- abom/orchestration.py +237 -0
- abom/policy.py +46 -0
- abom/scan.py +207 -0
- abom/schemas.py +79 -0
- abom/sign.py +91 -0
- abom_cli-0.1.0.dist-info/METADATA +108 -0
- abom_cli-0.1.0.dist-info/RECORD +19 -0
- abom_cli-0.1.0.dist-info/WHEEL +4 -0
- abom_cli-0.1.0.dist-info/entry_points.txt +2 -0
abom/db.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Async SQLAlchemy engine, session factory, and ORM models (see MVP_SPEC §5)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
BigInteger, Boolean, DateTime, ForeignKey, Integer, String, Text,
|
|
9
|
+
UniqueConstraint,
|
|
10
|
+
)
|
|
11
|
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
13
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
14
|
+
|
|
15
|
+
from .config import settings
|
|
16
|
+
|
|
17
|
+
engine = create_async_engine(settings.database_url, pool_pre_ping=True)
|
|
18
|
+
SessionLocal = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _uuid() -> uuid.UUID:
|
|
22
|
+
return uuid.uuid4()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _now() -> datetime:
|
|
26
|
+
return datetime.now(timezone.utc)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Base(DeclarativeBase):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Project(Base):
|
|
34
|
+
__tablename__ = "projects"
|
|
35
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
36
|
+
name: Mapped[str] = mapped_column(String(200))
|
|
37
|
+
repo_url: Mapped[str] = mapped_column(Text)
|
|
38
|
+
test_command: Mapped[str] = mapped_column(Text, default="pytest -q")
|
|
39
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Run(Base):
|
|
43
|
+
__tablename__ = "runs"
|
|
44
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
45
|
+
project_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("projects.id"))
|
|
46
|
+
created_by: Mapped[str] = mapped_column(String(200))
|
|
47
|
+
intent: Mapped[str] = mapped_column(Text)
|
|
48
|
+
workload_type: Mapped[str] = mapped_column(String(50), default="dev_agent")
|
|
49
|
+
status: Mapped[str] = mapped_column(String(30), default="pending")
|
|
50
|
+
model: Mapped[str] = mapped_column(String(120), default="")
|
|
51
|
+
max_iterations: Mapped[int] = mapped_column(Integer, default=4)
|
|
52
|
+
prompt_tokens: Mapped[int] = mapped_column(Integer, default=0)
|
|
53
|
+
completion_tokens: Mapped[int] = mapped_column(Integer, default=0)
|
|
54
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
55
|
+
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now, onupdate=_now)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class RunStep(Base):
|
|
59
|
+
__tablename__ = "run_steps"
|
|
60
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
61
|
+
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id"))
|
|
62
|
+
seq: Mapped[int] = mapped_column(Integer)
|
|
63
|
+
type: Mapped[str] = mapped_column(String(30))
|
|
64
|
+
payload: Mapped[dict] = mapped_column(JSONB, default=dict)
|
|
65
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class GateResult(Base):
|
|
69
|
+
__tablename__ = "gate_results"
|
|
70
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
71
|
+
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id"))
|
|
72
|
+
step_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
|
73
|
+
gate: Mapped[str] = mapped_column(String(50))
|
|
74
|
+
status: Mapped[str] = mapped_column(String(10)) # pass|fail
|
|
75
|
+
details: Mapped[dict] = mapped_column(JSONB, default=dict)
|
|
76
|
+
duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
|
77
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Approval(Base):
|
|
81
|
+
__tablename__ = "approvals"
|
|
82
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
83
|
+
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id"))
|
|
84
|
+
step_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
|
85
|
+
required: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
86
|
+
status: Mapped[str] = mapped_column(String(20), default="pending")
|
|
87
|
+
approver: Mapped[str | None] = mapped_column(String(200), nullable=True)
|
|
88
|
+
decided_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
89
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Artifact(Base):
|
|
93
|
+
__tablename__ = "artifacts"
|
|
94
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
95
|
+
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id"))
|
|
96
|
+
kind: Mapped[str] = mapped_column(String(30)) # diff|gate_log|workspace
|
|
97
|
+
object_key: Mapped[str] = mapped_column(Text)
|
|
98
|
+
sha256: Mapped[str] = mapped_column(String(64))
|
|
99
|
+
size_bytes: Mapped[int] = mapped_column(BigInteger, default=0)
|
|
100
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AuditEvent(Base):
|
|
104
|
+
__tablename__ = "audit_events"
|
|
105
|
+
__table_args__ = (UniqueConstraint("run_id", "seq", name="uq_audit_run_seq"),)
|
|
106
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
107
|
+
run_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("runs.id"))
|
|
108
|
+
seq: Mapped[int] = mapped_column(Integer)
|
|
109
|
+
event_type: Mapped[str] = mapped_column(String(50))
|
|
110
|
+
actor: Mapped[str] = mapped_column(String(200))
|
|
111
|
+
data: Mapped[dict] = mapped_column(JSONB, default=dict)
|
|
112
|
+
prev_hash: Mapped[str] = mapped_column(String(80))
|
|
113
|
+
hash: Mapped[str] = mapped_column(String(64))
|
|
114
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Policy(Base):
|
|
118
|
+
__tablename__ = "policies"
|
|
119
|
+
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
|
120
|
+
name: Mapped[str] = mapped_column(String(120), unique=True)
|
|
121
|
+
document: Mapped[dict] = mapped_column(JSONB, default=dict)
|
|
122
|
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def get_session() -> AsyncSession: # FastAPI dependency
|
|
126
|
+
async with SessionLocal() as session:
|
|
127
|
+
yield session
|
abom/execution.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Sandbox runner + the build/test gate.
|
|
2
|
+
|
|
3
|
+
MVP sandbox = a constrained subprocess in an ephemeral workspace. The Sandbox
|
|
4
|
+
interface is intentionally stable so Phase-2 can swap in gVisor / Kata microVMs
|
|
5
|
+
without touching callers.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
from .config import settings
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class GateOutcome:
|
|
21
|
+
status: str # pass|fail
|
|
22
|
+
exit_code: int
|
|
23
|
+
duration_ms: int
|
|
24
|
+
stdout: str
|
|
25
|
+
stderr: str
|
|
26
|
+
details: dict = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Sandbox:
|
|
30
|
+
"""Ephemeral workspace. Phase-2: replace _run with a microVM executor."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, run_id: str):
|
|
33
|
+
self.run_id = str(run_id)
|
|
34
|
+
self.path = os.path.join(settings.workspace_root, f"{self.run_id}-{uuid.uuid4().hex[:8]}")
|
|
35
|
+
os.makedirs(self.path, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
def materialize(self, repo_url: str) -> None:
|
|
38
|
+
"""Clone / copy the target repo into the workspace (no network egress in real impl
|
|
39
|
+
beyond the customer's own git). MVP: local path or git clone."""
|
|
40
|
+
if os.path.isdir(repo_url):
|
|
41
|
+
shutil.copytree(repo_url, os.path.join(self.path, "repo"), dirs_exist_ok=True)
|
|
42
|
+
else:
|
|
43
|
+
self._run(["git", "clone", "--depth", "1", repo_url, "repo"])
|
|
44
|
+
|
|
45
|
+
def apply_patch(self, patch_text: str) -> dict:
|
|
46
|
+
"""Apply an agent-proposed change. MVP placeholder: write the patch to disk.
|
|
47
|
+
Real impl: `git apply` and compute changed_paths."""
|
|
48
|
+
patch_file = os.path.join(self.path, "proposal.patch")
|
|
49
|
+
with open(patch_file, "w") as fh:
|
|
50
|
+
fh.write(patch_text)
|
|
51
|
+
# TODO: git apply + parse changed paths
|
|
52
|
+
return {"changed_paths": [], "writes_outside_workspace": False}
|
|
53
|
+
|
|
54
|
+
def run_gate(self, test_command: str) -> GateOutcome:
|
|
55
|
+
start = time.monotonic()
|
|
56
|
+
proc = self._run(test_command.split(), cwd=os.path.join(self.path, "repo"),
|
|
57
|
+
timeout=settings.gate_timeout_seconds, check=False)
|
|
58
|
+
dur = int((time.monotonic() - start) * 1000)
|
|
59
|
+
status = "pass" if proc.returncode == 0 else "fail"
|
|
60
|
+
return GateOutcome(
|
|
61
|
+
status=status, exit_code=proc.returncode, duration_ms=dur,
|
|
62
|
+
stdout=(proc.stdout or "")[-8000:], stderr=(proc.stderr or "")[-8000:],
|
|
63
|
+
details={"command": test_command},
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def cleanup(self) -> None:
|
|
67
|
+
shutil.rmtree(self.path, ignore_errors=True)
|
|
68
|
+
|
|
69
|
+
def _run(self, cmd, cwd=None, timeout=120, check=True):
|
|
70
|
+
return subprocess.run(
|
|
71
|
+
cmd, cwd=cwd or self.path, capture_output=True, text=True,
|
|
72
|
+
timeout=timeout, check=check,
|
|
73
|
+
)
|
abom/models_router.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Model router + client.
|
|
2
|
+
|
|
3
|
+
MVP: routes everything to the single local OpenAI-compatible endpoint (vLLM).
|
|
4
|
+
The Router is where Phase-2 adds data-sensitivity classification and the gated
|
|
5
|
+
egress path; the interface stays the same.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from .config import settings
|
|
15
|
+
from . import policy as policy_mod
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ModelResponse:
|
|
20
|
+
text: str
|
|
21
|
+
prompt_tokens: int
|
|
22
|
+
completion_tokens: int
|
|
23
|
+
model: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ModelClient:
|
|
27
|
+
"""OpenAI-compatible chat client."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, base_url: str, model: str):
|
|
30
|
+
self.base_url = base_url.rstrip("/")
|
|
31
|
+
self.model = model
|
|
32
|
+
|
|
33
|
+
async def complete(self, messages: list[dict[str, str]], **kw: Any) -> ModelResponse:
|
|
34
|
+
async with httpx.AsyncClient(timeout=120) as client:
|
|
35
|
+
resp = await client.post(
|
|
36
|
+
f"{self.base_url}/chat/completions",
|
|
37
|
+
json={"model": self.model, "messages": messages, **kw},
|
|
38
|
+
)
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
data = resp.json()
|
|
41
|
+
usage = data.get("usage", {})
|
|
42
|
+
return ModelResponse(
|
|
43
|
+
text=data["choices"][0]["message"]["content"],
|
|
44
|
+
prompt_tokens=usage.get("prompt_tokens", 0),
|
|
45
|
+
completion_tokens=usage.get("completion_tokens", 0),
|
|
46
|
+
model=self.model,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class MockModelClient:
|
|
51
|
+
"""Deterministic stand-in so the stack runs on a laptop without a GPU.
|
|
52
|
+
|
|
53
|
+
Returns a trivially-correct unified diff for the seeded demo task and a
|
|
54
|
+
deliberately-broken one on the first attempt to exercise the critic loop.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, model: str):
|
|
58
|
+
self.model = model
|
|
59
|
+
|
|
60
|
+
async def complete(self, messages: list[dict[str, str]], **kw: Any) -> ModelResponse:
|
|
61
|
+
attempt = sum(1 for m in messages if m["role"] == "assistant")
|
|
62
|
+
if attempt == 0:
|
|
63
|
+
text = "PATCH:\n*** broken on purpose (missing import) ***"
|
|
64
|
+
else:
|
|
65
|
+
text = "PATCH:\n*** corrected patch passing the gate ***"
|
|
66
|
+
return ModelResponse(text=text, prompt_tokens=128, completion_tokens=64, model=self.model)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Router:
|
|
70
|
+
def __init__(self, pol: dict | None = None):
|
|
71
|
+
self.policy = pol or policy_mod.DEFAULT_POLICY
|
|
72
|
+
|
|
73
|
+
def select(self, *, task: str, sensitivity: str = "high") -> tuple[str, bool]:
|
|
74
|
+
"""Return (model_name, is_local). MVP always local; never egress."""
|
|
75
|
+
model = settings.model_name
|
|
76
|
+
if not policy_mod.model_allowed(self.policy, model):
|
|
77
|
+
raise PermissionError(f"model {model} not allowed by policy")
|
|
78
|
+
# sensitivity high -> must stay local. egress disabled in MVP.
|
|
79
|
+
return model, True
|
|
80
|
+
|
|
81
|
+
def client(self) -> ModelClient | MockModelClient:
|
|
82
|
+
if settings.model_use_mock:
|
|
83
|
+
return MockModelClient(settings.model_name)
|
|
84
|
+
return ModelClient(settings.model_base_url, settings.model_name)
|
abom/orchestration.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Temporal workflow + activities + worker entrypoint.
|
|
2
|
+
|
|
3
|
+
The workflow is deterministic orchestration only; all IO (DB, model, sandbox)
|
|
4
|
+
happens in activities. A worker crash mid-run resumes from the last completed
|
|
5
|
+
activity — the durability guarantee the architecture relies on.
|
|
6
|
+
|
|
7
|
+
Run the worker with: python -m abom.orchestration
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import timedelta
|
|
14
|
+
|
|
15
|
+
from temporalio import activity, workflow
|
|
16
|
+
from temporalio.client import Client
|
|
17
|
+
from temporalio.common import RetryPolicy
|
|
18
|
+
from temporalio.worker import Worker
|
|
19
|
+
|
|
20
|
+
with workflow.unsafe.imports_passed_through():
|
|
21
|
+
from .config import settings
|
|
22
|
+
from . import policy as policy_mod
|
|
23
|
+
from . import audit
|
|
24
|
+
from .db import SessionLocal, Run, RunStep, GateResult, Approval
|
|
25
|
+
from .agents import SimpleAgent
|
|
26
|
+
from .models_router import Router
|
|
27
|
+
from .execution import Sandbox
|
|
28
|
+
from sqlalchemy import select
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ----------------------------- activity payloads ----------------------------
|
|
32
|
+
@dataclass
|
|
33
|
+
class ProposeResult:
|
|
34
|
+
patch_text: str
|
|
35
|
+
changed_paths: list[str]
|
|
36
|
+
writes_outside_workspace: bool
|
|
37
|
+
prompt_tokens: int
|
|
38
|
+
completion_tokens: int
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class GateRecord:
|
|
43
|
+
status: str
|
|
44
|
+
exit_code: int
|
|
45
|
+
duration_ms: int
|
|
46
|
+
summary: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# --------------------------------- activities -------------------------------
|
|
50
|
+
class Activities:
|
|
51
|
+
"""Stateful only across a single worker process; safe because Temporal
|
|
52
|
+
re-invokes activities idempotently keyed by run_id + iteration."""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
self._agents: dict[str, SimpleAgent] = {}
|
|
56
|
+
self._sandboxes: dict[str, Sandbox] = {}
|
|
57
|
+
|
|
58
|
+
async def _audit(self, run_id: str, event_type: str, actor: str, data: dict):
|
|
59
|
+
async with SessionLocal() as s:
|
|
60
|
+
await audit.append_event(s, run_id=run_id, event_type=event_type, actor=actor, data=data)
|
|
61
|
+
await s.commit()
|
|
62
|
+
|
|
63
|
+
@activity.defn
|
|
64
|
+
async def start_run(self, run_id: str) -> None:
|
|
65
|
+
async with SessionLocal() as s:
|
|
66
|
+
run = (await s.execute(select(Run).where(Run.id == run_id))).scalar_one()
|
|
67
|
+
run.status = "running"
|
|
68
|
+
run.model = settings.model_name
|
|
69
|
+
await s.commit()
|
|
70
|
+
await self._audit(run_id, "run.created", "system", {"model": settings.model_name})
|
|
71
|
+
self._agents[run_id] = SimpleAgent(Router())
|
|
72
|
+
sb = Sandbox(run_id)
|
|
73
|
+
self._sandboxes[run_id] = sb
|
|
74
|
+
# NOTE: materialize repo here in real impl (needs project.repo_url)
|
|
75
|
+
await self._audit(run_id, "run.planned", "system", {"workspace": sb.path})
|
|
76
|
+
|
|
77
|
+
@activity.defn
|
|
78
|
+
async def agent_step(self, run_id: str, iteration: int, feedback: dict | None) -> ProposeResult:
|
|
79
|
+
agent = self._agents[run_id]
|
|
80
|
+
proposal = await agent.propose(intent=await self._intent(run_id), iteration=iteration, feedback=feedback)
|
|
81
|
+
await self._audit(run_id, "model.called", "system",
|
|
82
|
+
{"iteration": iteration, "prompt_tokens": proposal.prompt_tokens,
|
|
83
|
+
"completion_tokens": proposal.completion_tokens})
|
|
84
|
+
sb = self._sandboxes[run_id]
|
|
85
|
+
applied = sb.apply_patch(proposal.patch_text)
|
|
86
|
+
await self._audit(run_id, "action.proposed", "system",
|
|
87
|
+
{"iteration": iteration, "changed_paths": applied["changed_paths"]})
|
|
88
|
+
return ProposeResult(
|
|
89
|
+
patch_text=proposal.patch_text,
|
|
90
|
+
changed_paths=applied["changed_paths"],
|
|
91
|
+
writes_outside_workspace=applied["writes_outside_workspace"],
|
|
92
|
+
prompt_tokens=proposal.prompt_tokens,
|
|
93
|
+
completion_tokens=proposal.completion_tokens,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
@activity.defn
|
|
97
|
+
async def run_gate(self, run_id: str, iteration: int) -> GateRecord:
|
|
98
|
+
sb = self._sandboxes[run_id]
|
|
99
|
+
test_command = await self._test_command(run_id)
|
|
100
|
+
# MVP without a real repo: simulate based on iteration via the mock path.
|
|
101
|
+
try:
|
|
102
|
+
outcome = sb.run_gate(test_command)
|
|
103
|
+
status, code, dur, summary = outcome.status, outcome.exit_code, outcome.duration_ms, outcome.stderr[-500:]
|
|
104
|
+
except Exception as exc: # no repo materialized in pure-mock demo
|
|
105
|
+
status = "pass" if iteration > 0 else "fail"
|
|
106
|
+
code, dur, summary = (0 if status == "pass" else 1), 5, f"(mock) {exc}"
|
|
107
|
+
async with SessionLocal() as s:
|
|
108
|
+
s.add(GateResult(run_id=run_id, gate="build+test", status=status,
|
|
109
|
+
details={"summary": summary}, duration_ms=dur))
|
|
110
|
+
await s.commit()
|
|
111
|
+
await self._audit(run_id, "gate.evaluated", "system",
|
|
112
|
+
{"iteration": iteration, "status": status, "exit_code": code})
|
|
113
|
+
return GateRecord(status=status, exit_code=code, duration_ms=dur, summary=summary)
|
|
114
|
+
|
|
115
|
+
@activity.defn
|
|
116
|
+
async def needs_approval(self, run_id: str, proposal: ProposeResult) -> bool:
|
|
117
|
+
required = policy_mod.approval_required(
|
|
118
|
+
policy_mod.DEFAULT_POLICY,
|
|
119
|
+
{"changed_paths": proposal.changed_paths,
|
|
120
|
+
"writes_outside_workspace": proposal.writes_outside_workspace},
|
|
121
|
+
)
|
|
122
|
+
if required:
|
|
123
|
+
async with SessionLocal() as s:
|
|
124
|
+
s.add(Approval(run_id=run_id, required=True, status="pending"))
|
|
125
|
+
await s.commit()
|
|
126
|
+
await self._audit(run_id, "approval.requested", "system", {})
|
|
127
|
+
return required
|
|
128
|
+
|
|
129
|
+
@activity.defn
|
|
130
|
+
async def record_approval(self, run_id: str, decision: str, approver: str) -> None:
|
|
131
|
+
from datetime import datetime, timezone
|
|
132
|
+
async with SessionLocal() as s:
|
|
133
|
+
ap = (await s.execute(
|
|
134
|
+
select(Approval).where(Approval.run_id == run_id, Approval.status == "pending")
|
|
135
|
+
)).scalars().first()
|
|
136
|
+
if ap:
|
|
137
|
+
ap.status = decision
|
|
138
|
+
ap.approver = approver
|
|
139
|
+
ap.decided_at = datetime.now(timezone.utc)
|
|
140
|
+
await s.commit()
|
|
141
|
+
await self._audit(run_id, "approval.decided", approver, {"decision": decision})
|
|
142
|
+
|
|
143
|
+
@activity.defn
|
|
144
|
+
async def finalize(self, run_id: str, status: str, tokens: dict) -> None:
|
|
145
|
+
async with SessionLocal() as s:
|
|
146
|
+
run = (await s.execute(select(Run).where(Run.id == run_id))).scalar_one()
|
|
147
|
+
run.status = status
|
|
148
|
+
run.prompt_tokens = tokens.get("prompt", 0)
|
|
149
|
+
run.completion_tokens = tokens.get("completion", 0)
|
|
150
|
+
await s.commit()
|
|
151
|
+
await self._audit(run_id, "run.completed" if status == "succeeded" else "run.failed",
|
|
152
|
+
"system", {"status": status})
|
|
153
|
+
sb = self._sandboxes.pop(run_id, None)
|
|
154
|
+
if sb:
|
|
155
|
+
sb.cleanup()
|
|
156
|
+
self._agents.pop(run_id, None)
|
|
157
|
+
|
|
158
|
+
async def _intent(self, run_id: str) -> str:
|
|
159
|
+
async with SessionLocal() as s:
|
|
160
|
+
return (await s.execute(select(Run.intent).where(Run.id == run_id))).scalar_one()
|
|
161
|
+
|
|
162
|
+
async def _test_command(self, run_id: str) -> str:
|
|
163
|
+
from .db import Project
|
|
164
|
+
async with SessionLocal() as s:
|
|
165
|
+
run = (await s.execute(select(Run).where(Run.id == run_id))).scalar_one()
|
|
166
|
+
proj = (await s.execute(select(Project).where(Project.id == run.project_id))).scalar_one()
|
|
167
|
+
return proj.test_command
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# --------------------------------- workflow ---------------------------------
|
|
171
|
+
@workflow.defn
|
|
172
|
+
class AgentRunWorkflow:
|
|
173
|
+
def __init__(self):
|
|
174
|
+
self._approval: str | None = None
|
|
175
|
+
|
|
176
|
+
@workflow.signal
|
|
177
|
+
def approve(self, decision: str, approver: str) -> None:
|
|
178
|
+
self._approval = decision
|
|
179
|
+
self._approver = approver
|
|
180
|
+
|
|
181
|
+
@workflow.run
|
|
182
|
+
async def run(self, run_id: str, max_iterations: int) -> str:
|
|
183
|
+
retry = RetryPolicy(maximum_attempts=3)
|
|
184
|
+
opts = dict(start_to_close_timeout=timedelta(minutes=20), retry_policy=retry)
|
|
185
|
+
|
|
186
|
+
await workflow.execute_activity(Activities.start_run, run_id, **opts)
|
|
187
|
+
|
|
188
|
+
feedback: dict | None = None
|
|
189
|
+
last: ProposeResult | None = None
|
|
190
|
+
gate: GateRecord | None = None
|
|
191
|
+
tokens = {"prompt": 0, "completion": 0}
|
|
192
|
+
|
|
193
|
+
for i in range(max_iterations):
|
|
194
|
+
last = await workflow.execute_activity(Activities.agent_step, args=[run_id, i, feedback], **opts)
|
|
195
|
+
tokens["prompt"] += last.prompt_tokens
|
|
196
|
+
tokens["completion"] += last.completion_tokens
|
|
197
|
+
gate = await workflow.execute_activity(Activities.run_gate, args=[run_id, i], **opts)
|
|
198
|
+
if gate.status == "pass":
|
|
199
|
+
break
|
|
200
|
+
feedback = {"summary": gate.summary, "exit_code": gate.exit_code}
|
|
201
|
+
else:
|
|
202
|
+
await workflow.execute_activity(Activities.finalize, args=[run_id, "failed", tokens], **opts)
|
|
203
|
+
return "failed"
|
|
204
|
+
|
|
205
|
+
if await workflow.execute_activity(Activities.needs_approval, args=[run_id, last], **opts):
|
|
206
|
+
await workflow.wait_condition(lambda: self._approval is not None,
|
|
207
|
+
timeout=timedelta(hours=24))
|
|
208
|
+
await workflow.execute_activity(
|
|
209
|
+
Activities.record_approval,
|
|
210
|
+
args=[run_id, self._approval, getattr(self, "_approver", "operator")], **opts)
|
|
211
|
+
if self._approval == "rejected":
|
|
212
|
+
await workflow.execute_activity(Activities.finalize, args=[run_id, "failed", tokens], **opts)
|
|
213
|
+
return "rejected"
|
|
214
|
+
|
|
215
|
+
await workflow.execute_activity(Activities.finalize, args=[run_id, "succeeded", tokens], **opts)
|
|
216
|
+
return "succeeded"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# ---------------------------------- worker ----------------------------------
|
|
220
|
+
async def main() -> None:
|
|
221
|
+
client = await Client.connect(settings.temporal_host, namespace=settings.temporal_namespace)
|
|
222
|
+
acts = Activities()
|
|
223
|
+
worker = Worker(
|
|
224
|
+
client,
|
|
225
|
+
task_queue=settings.task_queue,
|
|
226
|
+
workflows=[AgentRunWorkflow],
|
|
227
|
+
activities=[
|
|
228
|
+
acts.start_run, acts.agent_step, acts.run_gate, acts.needs_approval,
|
|
229
|
+
acts.record_approval, acts.finalize,
|
|
230
|
+
],
|
|
231
|
+
)
|
|
232
|
+
print(f"[abom] worker listening on task queue '{settings.task_queue}'")
|
|
233
|
+
await worker.run()
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == "__main__":
|
|
237
|
+
asyncio.run(main())
|
abom/policy.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Simple JSON policy engine (MVP).
|
|
2
|
+
|
|
3
|
+
Phase-2 replaces this module with OPA/Rego behind the same function signatures.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import fnmatch
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
DEFAULT_POLICY: dict[str, Any] = {
|
|
11
|
+
"name": "default",
|
|
12
|
+
"allowed_models": ["local/qwen2.5-coder"],
|
|
13
|
+
"egress_allowed": False,
|
|
14
|
+
"consequential_actions": ["writes_outside_workspace", "touches_paths:**/auth/**"],
|
|
15
|
+
"approval_required_for_consequential": True,
|
|
16
|
+
"max_iterations_cap": 8,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def model_allowed(policy: dict, model: str) -> bool:
|
|
21
|
+
return model in policy.get("allowed_models", [])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def egress_allowed(policy: dict) -> bool:
|
|
25
|
+
return bool(policy.get("egress_allowed", False))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_consequential(policy: dict, proposal: dict) -> bool:
|
|
29
|
+
"""proposal = {"changed_paths": [...], "writes_outside_workspace": bool}."""
|
|
30
|
+
rules = policy.get("consequential_actions", [])
|
|
31
|
+
if proposal.get("writes_outside_workspace") and "writes_outside_workspace" in rules:
|
|
32
|
+
return True
|
|
33
|
+
for rule in rules:
|
|
34
|
+
if rule.startswith("touches_paths:"):
|
|
35
|
+
pattern = rule.split(":", 1)[1]
|
|
36
|
+
for path in proposal.get("changed_paths", []):
|
|
37
|
+
if fnmatch.fnmatch(path, pattern):
|
|
38
|
+
return True
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def approval_required(policy: dict, proposal: dict) -> bool:
|
|
43
|
+
return (
|
|
44
|
+
policy.get("approval_required_for_consequential", True)
|
|
45
|
+
and is_consequential(policy, proposal)
|
|
46
|
+
)
|