omneval-devloop 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devloop/__init__.py ADDED
@@ -0,0 +1,27 @@
1
+ """omneval-devloop: autonomous Dev Loop framework for Kubernetes."""
2
+
3
+ from devloop.dev_loop import DevLoopInput, DevLoopWorkflow
4
+ from devloop.summarization import SummarizationWorkflow, SummarizeInput
5
+ from devloop.shared import (
6
+ AgentJobResult,
7
+ AnswerInput,
8
+ AwaitInput,
9
+ DispatchInput,
10
+ JobStatus,
11
+ Phase,
12
+ TaskSpec,
13
+ )
14
+
15
+ __all__ = [
16
+ "DevLoopWorkflow",
17
+ "DevLoopInput",
18
+ "SummarizationWorkflow",
19
+ "SummarizeInput",
20
+ "AgentJobResult",
21
+ "AnswerInput",
22
+ "AwaitInput",
23
+ "DispatchInput",
24
+ "JobStatus",
25
+ "Phase",
26
+ "TaskSpec",
27
+ ]
devloop/cluster.py ADDED
@@ -0,0 +1,79 @@
1
+ """Single seam for Kubernetes cluster access.
2
+
3
+ All ConfigMap/Secret I/O and client construction live here so the
4
+ incluster→kubeconfig fallback and ConfigMap-``data`` parsing exist in exactly
5
+ one place. Callers cross one interface; tests mount fakes by patching ``core`` /
6
+ ``batch`` (or the read/patch helpers).
7
+
8
+ Non-deterministic (imports ``kubernetes``): never import this from a Temporal
9
+ workflow module β€” only from activity code.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import base64
15
+ import os
16
+
17
+ NAMESPACE = os.getenv("AGENTS_NAMESPACE", "agents")
18
+
19
+
20
+ # --------------------------------------------------------------------------- #
21
+ # Client accessors (the patchable seam for tests)
22
+ # --------------------------------------------------------------------------- #
23
+ def _load_config() -> None:
24
+ from kubernetes import config
25
+
26
+ try:
27
+ config.load_incluster_config()
28
+ except config.ConfigException:
29
+ config.load_kube_config()
30
+
31
+
32
+ def core():
33
+ from kubernetes import client
34
+
35
+ _load_config()
36
+ return client.CoreV1Api()
37
+
38
+
39
+ def batch():
40
+ from kubernetes import client
41
+
42
+ _load_config()
43
+ return client.BatchV1Api()
44
+
45
+
46
+ # --------------------------------------------------------------------------- #
47
+ # ConfigMap / Secret helpers (hide data parsing + 404 + base64)
48
+ # --------------------------------------------------------------------------- #
49
+ def _data(obj) -> dict:
50
+ """Pull the ``data`` mapping off a ConfigMap/Secret, tolerating both the
51
+ kubernetes client object (``obj.data``) and a plain dict (as fakes pass)."""
52
+ if isinstance(obj, dict):
53
+ return obj.get("data") or {}
54
+ return obj.data or {}
55
+
56
+
57
+ def read_configmap_data(name: str, namespace: str = NAMESPACE) -> dict | None:
58
+ """Return a ConfigMap's ``data`` mapping, or ``None`` if it doesn't exist."""
59
+ from kubernetes.client.exceptions import ApiException
60
+
61
+ try:
62
+ cm = core().read_namespaced_config_map(name, namespace)
63
+ except ApiException as exc: # 404 = the ConfigMap hasn't been written yet
64
+ if getattr(exc, "status", None) == 404:
65
+ return None
66
+ raise
67
+ return _data(cm)
68
+
69
+
70
+ def patch_configmap_data(name: str, data: dict, namespace: str = NAMESPACE) -> None:
71
+ """Patch ``data`` keys onto an existing ConfigMap."""
72
+ core().patch_namespaced_config_map(name, namespace, {"data": data})
73
+
74
+
75
+ def read_secret_value(name: str, key: str, namespace: str = NAMESPACE) -> str:
76
+ """Read and base64-decode a single key from a Secret; ``""`` if absent."""
77
+ sec = core().read_namespaced_secret(name, namespace)
78
+ raw = _data(sec).get(key, "")
79
+ return base64.b64decode(raw).decode() if raw else ""
devloop/dev_loop.py ADDED
@@ -0,0 +1,395 @@
1
+ """Dev Loop Temporal workflow (issues #20-#23) β€” sequential model.
2
+
3
+ Mirrors the Sandcastle loop: each round the planner picks the next unblocked
4
+ issue, a human approves it (Plan gate), the implementer works it, the reviewer
5
+ refines it, and after a Merge gate the merger merges + closes it. The loop
6
+ repeats so newly-unblocked issues are picked up after each merge.
7
+
8
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ round ──────────────────────────┐
9
+ Plan ─▢ [Plan gate] ─▢ Execute ─▢ Review ─▢ [Merge gate] ─▢ Merge
10
+ └──────────────────────── repeat β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
11
+
12
+ One issue at a time: the homelab DGX model serves a single request at a time,
13
+ so parallel agent Jobs would just block on inference. Each phase is a K8s
14
+ Agent Job driven by a bundled prompt (plan/implement/review/merge).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ from dataclasses import dataclass, field
21
+ from datetime import timedelta
22
+
23
+ from temporalio import workflow
24
+ from temporalio.common import RetryPolicy
25
+
26
+ from . import dev_loop_logic as logic
27
+ from .shared import (
28
+ CHANNEL_APPROVALS,
29
+ MESSAGING_QUEUE,
30
+ AgentJobResult,
31
+ AnswerInput,
32
+ AwaitInput,
33
+ DispatchInput,
34
+ InlineComment,
35
+ JobStatus,
36
+ OpenAgentPRsInput,
37
+ PostCommentsInput,
38
+ SendMessageInput,
39
+ SendNotificationInput,
40
+ TaskSpec,
41
+ )
42
+
43
+
44
+ # --------------------------------------------------------------------------- #
45
+ # Workflow input / result
46
+ # --------------------------------------------------------------------------- #
47
+ @dataclass
48
+ class DevLoopInput:
49
+ project_id: str
50
+ agent_label: str = "agent-ready"
51
+ max_iterations: int = 30
52
+ # configurable down to seconds for tests
53
+ question_timeout_seconds: float = 14400.0 # 4h mid-run gate
54
+ replan_max: int = 3
55
+ poll_interval_seconds: float = 5.0
56
+
57
+
58
+ @dataclass
59
+ class DevLoopResult:
60
+ status: str # completed | failed_plan | failed_merge
61
+ merged_issues: list[int] = field(default_factory=list)
62
+ detail: str = ""
63
+
64
+
65
+ _RETRY = RetryPolicy(maximum_attempts=3)
66
+ _ACTIVITY_TIMEOUT = timedelta(hours=2)
67
+ _DISCORD_TIMEOUT = timedelta(seconds=60)
68
+
69
+
70
+ def _as_int(value) -> int:
71
+ try:
72
+ return int(value)
73
+ except (TypeError, ValueError):
74
+ return 0
75
+
76
+
77
+ @workflow.defn
78
+ class DevLoopWorkflow:
79
+ def __init__(self) -> None:
80
+ self._replies: list[str] = []
81
+ self._consumed = 0
82
+ self._ask_lock: asyncio.Lock | None = None
83
+
84
+ # ---- signals -------------------------------------------------------- #
85
+ @workflow.signal
86
+ def human_reply(self, text: str) -> None:
87
+ self._replies.append(text)
88
+
89
+ # ---- discord helpers ------------------------------------------------ #
90
+ def _wid(self) -> str:
91
+ return workflow.info().workflow_id
92
+
93
+ async def _say(
94
+ self, message: str, thread_name: str = "", channel: str = CHANNEL_APPROVALS
95
+ ) -> None:
96
+ await workflow.execute_activity(
97
+ "send_message",
98
+ SendMessageInput(self._wid(), message, channel, thread_name),
99
+ task_queue=MESSAGING_QUEUE,
100
+ start_to_close_timeout=_DISCORD_TIMEOUT,
101
+ retry_policy=_RETRY,
102
+ )
103
+
104
+ async def _notify(self, message: str) -> None:
105
+ await workflow.execute_activity(
106
+ "send_notification",
107
+ SendNotificationInput(self._wid(), message),
108
+ task_queue=MESSAGING_QUEUE,
109
+ start_to_close_timeout=_DISCORD_TIMEOUT,
110
+ retry_policy=_RETRY,
111
+ )
112
+
113
+ async def _await_reply(self, timeout: float | None = None) -> str | None:
114
+ """Block for the next unconsumed human reply. None on timeout."""
115
+ target = self._consumed + 1
116
+ try:
117
+ await workflow.wait_condition(
118
+ lambda: len(self._replies) >= target,
119
+ timeout=timedelta(seconds=timeout) if timeout else None,
120
+ )
121
+ except asyncio.TimeoutError:
122
+ return None
123
+ reply = self._replies[self._consumed]
124
+ self._consumed += 1
125
+ return reply
126
+
127
+ async def _dispatch(
128
+ self, inp: DevLoopInput, spec: TaskSpec, issue_number: int = 0
129
+ ) -> AgentJobResult:
130
+ return await workflow.execute_activity(
131
+ "dispatch_agent_job",
132
+ DispatchInput(
133
+ inp.project_id,
134
+ issue_number,
135
+ spec,
136
+ poll_interval_seconds=inp.poll_interval_seconds,
137
+ ),
138
+ result_type=AgentJobResult,
139
+ start_to_close_timeout=_ACTIVITY_TIMEOUT,
140
+ retry_policy=_RETRY,
141
+ )
142
+
143
+ async def _drop_issues_in_review(
144
+ self, inp: DevLoopInput, issues: list[dict]
145
+ ) -> list[dict]:
146
+ """Drop planned issues that already have an open agent PR.
147
+
148
+ Under the PR-review merge model an issue stays open until a human merges
149
+ its PR, so the planner would otherwise re-surface it every round. We ask
150
+ GitHub which issues already have an ``agent/issue-<N>`` PR open and filter
151
+ them out, telling the channel they're parked on review."""
152
+ if not issues:
153
+ return issues
154
+ in_review = await workflow.execute_activity(
155
+ "open_agent_pr_issue_numbers",
156
+ OpenAgentPRsInput(inp.project_id),
157
+ result_type=list,
158
+ start_to_close_timeout=timedelta(minutes=2),
159
+ retry_policy=_RETRY,
160
+ )
161
+ in_review = {_as_int(n) for n in (in_review or [])}
162
+ if not in_review:
163
+ return issues
164
+ kept, skipped = [], []
165
+ for issue in issues:
166
+ (skipped if _as_int(issue.get("id")) in in_review else kept).append(issue)
167
+ if skipped:
168
+ await self._notify(
169
+ "⏭️ Skipping "
170
+ + ", ".join(f"#{i.get('id')}" for i in skipped)
171
+ + " β€” already has an open review PR awaiting merge."
172
+ )
173
+ return kept
174
+
175
+ # ---- run ------------------------------------------------------------ #
176
+ @workflow.run
177
+ async def run(self, inp: DevLoopInput) -> DevLoopResult:
178
+ self._ask_lock = asyncio.Lock()
179
+ thread_name = f"{inp.project_id} β€” Dev Loop"
180
+ merged: list[int] = []
181
+
182
+ for rnd in range(1, inp.max_iterations + 1):
183
+ plan = await self._plan_phase(inp, thread_name, rnd)
184
+ if plan is None:
185
+ return DevLoopResult(
186
+ "failed_plan", merged_issues=merged, detail="plan rejected"
187
+ )
188
+ issues = plan.get("issues") or []
189
+ if not issues:
190
+ await self._notify(
191
+ "No unblocked agent-ready issues remain β€” Dev Loop complete."
192
+ )
193
+ return DevLoopResult("completed", merged_issues=merged)
194
+
195
+ issue = issues[0] # sequential: work one issue per round
196
+ exec_result = await self._execute_phase(inp, issue)
197
+ if not exec_result["commits"]:
198
+ await self._notify(
199
+ f"⚠️ #{issue.get('id')} produced no commits β€” skipping this round."
200
+ )
201
+ continue
202
+
203
+ await self._review_phase(inp, issue, exec_result)
204
+
205
+ outcome = await self._merge_phase(inp, issue, exec_result, thread_name)
206
+ if outcome == "merged":
207
+ merged.append(_as_int(issue.get("id")))
208
+ elif outcome == "failed":
209
+ return DevLoopResult(
210
+ "failed_merge", merged_issues=merged, detail=f"#{issue.get('id')}"
211
+ )
212
+
213
+ await self._notify(
214
+ f"Reached max iterations ({inp.max_iterations}) β€” pausing Dev Loop."
215
+ )
216
+ return DevLoopResult("completed", merged_issues=merged)
217
+
218
+ # ---- Plan phase + gate (#20) --------------------------------------- #
219
+ async def _plan_phase(self, inp: DevLoopInput, thread_name: str, rnd: int):
220
+ replans = 0
221
+ feedback = ""
222
+ while True:
223
+ spec = TaskSpec(
224
+ phase="plan",
225
+ project_id=inp.project_id,
226
+ extra={"agent_label": inp.agent_label, "feedback": feedback},
227
+ )
228
+ result = await self._dispatch(inp, spec)
229
+ plan = result.plan or {"issues": []}
230
+ issues = plan.get("issues") or []
231
+ issues = await self._drop_issues_in_review(inp, issues)
232
+ plan = {**plan, "issues": issues}
233
+ if not issues:
234
+ return plan # run() turns an empty plan into a completed result
235
+
236
+ await self._say(
237
+ logic.render_plan(inp.project_id, rnd, issues), thread_name=thread_name
238
+ )
239
+ reply = await self._await_reply() # plan gate does NOT time out
240
+ if reply is not None and logic.is_approval(reply):
241
+ return plan
242
+ replans += 1
243
+ if replans > inp.replan_max:
244
+ await self._notify(
245
+ f"❌ Plan rejected {inp.replan_max} times β€” aborting Dev Loop."
246
+ )
247
+ return None
248
+ feedback = reply or ""
249
+
250
+ # ---- Execute phase (#21) ------------------------------------------- #
251
+ async def _execute_phase(self, inp: DevLoopInput, issue: dict) -> dict:
252
+ issue_no = _as_int(issue.get("id"))
253
+ spec = TaskSpec(
254
+ phase="execute",
255
+ project_id=inp.project_id,
256
+ issue_number=issue_no,
257
+ title=issue.get("title", ""),
258
+ branch=issue.get("branch", ""),
259
+ )
260
+ result = await self._dispatch(inp, spec, issue_number=issue_no)
261
+ result = await self._answer_questions(inp, issue_no, result)
262
+
263
+ if result.status != JobStatus.COMPLETE.value:
264
+ return {"issue_id": issue_no, "branch": "", "pr_url": "", "commits": 0}
265
+ if result.commits:
266
+ await self._notify(
267
+ f"βœ… Implemented #{issue_no} β†’ {result.pr_url or result.branch}"
268
+ )
269
+ return {
270
+ "issue_id": issue_no,
271
+ "branch": result.branch,
272
+ "pr_url": result.pr_url,
273
+ "commits": result.commits,
274
+ }
275
+
276
+ async def _answer_questions(
277
+ self, inp: DevLoopInput, issue_no: int, result: AgentJobResult
278
+ ) -> AgentJobResult:
279
+ while result.status == JobStatus.AWAITING_HUMAN.value:
280
+ async with self._ask_lock:
281
+ await self._say(f"❓ [#{issue_no}] {result.question}")
282
+ answer = await self._await_reply(timeout=inp.question_timeout_seconds)
283
+ if answer is None:
284
+ answer = (
285
+ "No human reply within the timeout β€” proceed with your best guess."
286
+ )
287
+ await self._notify(
288
+ f"⏱️ [#{issue_no}] no reply β€” proceeding with best-guess."
289
+ )
290
+ await workflow.execute_activity(
291
+ "answer_agent_job",
292
+ AnswerInput(result.job_name, answer),
293
+ start_to_close_timeout=timedelta(minutes=2),
294
+ retry_policy=_RETRY,
295
+ )
296
+ result = await workflow.execute_activity(
297
+ "await_agent_job",
298
+ AwaitInput(
299
+ result.job_name,
300
+ poll_interval_seconds=inp.poll_interval_seconds,
301
+ ),
302
+ result_type=AgentJobResult,
303
+ start_to_close_timeout=_ACTIVITY_TIMEOUT,
304
+ retry_policy=_RETRY,
305
+ )
306
+ return result
307
+
308
+ # ---- Review phase (#22) -------------------------------------------- #
309
+ async def _review_phase(
310
+ self, inp: DevLoopInput, issue: dict, exec_result: dict
311
+ ) -> None:
312
+ issue_no = _as_int(issue.get("id"))
313
+ spec = TaskSpec(
314
+ phase="review",
315
+ project_id=inp.project_id,
316
+ issue_number=issue_no,
317
+ branch=exec_result["branch"],
318
+ )
319
+ result = await self._dispatch(inp, spec, issue_number=issue_no)
320
+ if result.commits:
321
+ await self._notify(
322
+ f"πŸ”Ž Reviewed #{issue_no} β€” pushed {result.commits} refinement commit(s)."
323
+ )
324
+ else:
325
+ await self._notify(f"πŸ”Ž Reviewed #{issue_no} β€” no changes needed.")
326
+ await self._post_review_findings(inp, exec_result, result)
327
+
328
+ async def _post_review_findings(
329
+ self, inp: DevLoopInput, exec_result: dict, result: AgentJobResult
330
+ ) -> None:
331
+ """Post the reviewer's findings to the PR. No-ops when the review Agent
332
+ Execution Job returned no findings or the PR number can't be resolved."""
333
+ review = result.review or {}
334
+ summary = review.get("summary", "")
335
+ inline = [
336
+ InlineComment(
337
+ file=c.get("file", ""),
338
+ line=_as_int(c.get("line")),
339
+ body=c.get("body", ""),
340
+ )
341
+ for c in (review.get("inline_comments") or [])
342
+ ]
343
+ if not summary and not inline:
344
+ return
345
+ pr_number = logic.pr_number_from_url(exec_result.get("pr_url", ""))
346
+ if not pr_number:
347
+ return
348
+ await workflow.execute_activity(
349
+ "post_pr_comments",
350
+ PostCommentsInput(inp.project_id, pr_number, summary, inline),
351
+ start_to_close_timeout=timedelta(minutes=2),
352
+ retry_policy=_RETRY,
353
+ )
354
+ await self._notify(
355
+ f"πŸ’¬ Posted review findings to {exec_result.get('pr_url') or f'#{pr_number}'}"
356
+ )
357
+
358
+ # ---- Merge gate + Merge (#23) -------------------------------------- #
359
+ async def _merge_phase(
360
+ self, inp: DevLoopInput, issue: dict, exec_result: dict, thread_name: str
361
+ ) -> str:
362
+ issue_no = _as_int(issue.get("id"))
363
+ await self._say(
364
+ logic.merge_gate_message(issue, exec_result["pr_url"]),
365
+ thread_name=thread_name,
366
+ )
367
+ reply = await self._await_reply() # merge gate does NOT time out
368
+ if not (reply is not None and logic.is_approval(reply)):
369
+ await self._notify(f"#{issue_no} not approved for merge β€” skipping.")
370
+ return "skipped"
371
+
372
+ spec = TaskSpec(
373
+ phase="merge",
374
+ project_id=inp.project_id,
375
+ issue_number=issue_no,
376
+ extra={
377
+ "branches": [exec_result["branch"]],
378
+ "issues": [
379
+ {"id": str(issue.get("id")), "title": issue.get("title", "")}
380
+ ],
381
+ },
382
+ )
383
+ merge = await self._dispatch(inp, spec, issue_number=issue_no)
384
+ if merge.status != JobStatus.COMPLETE.value:
385
+ await self._notify(
386
+ f"❌ Merge #{issue_no} failed β€” manual intervention needed:\n"
387
+ f"{merge.error or merge.summary}"
388
+ )
389
+ return "failed"
390
+
391
+ await self._notify(
392
+ f"πŸ“¬ Opened review PR for #{issue_no}: {merge.pr_url or '(branch pushed)'} "
393
+ "β€” tagged the reviewer. Approve & merge it on GitHub to close the issue."
394
+ )
395
+ return "merged"
@@ -0,0 +1,66 @@
1
+ """Pure helpers for the Dev Loop workflow.
2
+
3
+ No Temporal / I/O imports β€” safe to use from both the workflow sandbox and
4
+ unit tests.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+
11
+ _APPROVE_TOKENS = ("approve", "approved", "yes", "lgtm", "βœ…", "πŸ‘")
12
+ _PR_NUMBER = re.compile(r"/pull/(\d+)")
13
+
14
+
15
+ def is_approval(reply: str) -> bool:
16
+ """True if a Discord reply approves a Phase Gate."""
17
+ low = (reply or "").strip().lower()
18
+ if not low:
19
+ return False
20
+ return any(tok in low for tok in _APPROVE_TOKENS)
21
+
22
+
23
+ def pr_number_from_url(pr_url: str) -> int:
24
+ """Extract the PR number from a GitHub PR URL (``…/pull/<N>``); 0 if absent."""
25
+ m = _PR_NUMBER.search(pr_url or "")
26
+ return int(m.group(1)) if m else 0
27
+
28
+
29
+ # --------------------------------------------------------------------------- #
30
+ # Sequential Dev Loop rendering (one issue per round)
31
+ # --------------------------------------------------------------------------- #
32
+ def render_plan(project_id: str, iteration: int, issues: list[dict]) -> str:
33
+ """Render the Plan gate for a round: the issue about to be worked plus the
34
+ other unblocked candidates the planner surfaced.
35
+
36
+ ``issues`` is the planner's ``<plan>`` list ([{id, title, branch}, …]); the
37
+ workflow works ``issues[0]`` next.
38
+ """
39
+ if not issues:
40
+ return f"_No unblocked issues for `{project_id}` this round._"
41
+ nxt = issues[0]
42
+ lines = [
43
+ f"**Dev Loop `{project_id}` β€” round {iteration}**",
44
+ "",
45
+ f"Next up: **#{nxt.get('id')} β€” {nxt.get('title', '')}** "
46
+ f"β†’ `{nxt.get('branch', '')}`",
47
+ ]
48
+ if len(issues) > 1:
49
+ lines += ["", "Other unblocked candidates this round:"]
50
+ lines += [f"- #{i.get('id')} β€” {i.get('title', '')}" for i in issues[1:]]
51
+ lines += [
52
+ "",
53
+ "Reply **approve** to implement it, or reply with feedback to re-plan.",
54
+ ]
55
+ return "\n".join(lines)
56
+
57
+
58
+ def merge_gate_message(issue: dict, pr_url: str) -> str:
59
+ """Render the per-issue Merge gate prompt."""
60
+ where = pr_url or "branch pushed (no PR link)"
61
+ return (
62
+ f"**Merge gate β€” #{issue.get('id')} {issue.get('title', '')}**\n"
63
+ f"{where}\n\n"
64
+ "Reply **approve** to open a review PR (tagging the reviewer for the "
65
+ "final review + merge on GitHub), or anything else to skip."
66
+ )