bernstein 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bernstein/__init__.py +32 -0
- bernstein/__main__.py +5 -0
- bernstein/_default_templates/bernstein.yaml +80 -0
- bernstein/_default_templates/prompts/judge.md +39 -0
- bernstein/_default_templates/prompts/plan.md +63 -0
- bernstein/_default_templates/prompts/review.md +45 -0
- bernstein/adapters/__init__.py +1 -0
- bernstein/adapters/base.py +112 -0
- bernstein/adapters/ci/__init__.py +1 -0
- bernstein/adapters/ci/github_actions.py +276 -0
- bernstein/adapters/claude.py +301 -0
- bernstein/adapters/codex.py +118 -0
- bernstein/adapters/gemini.py +115 -0
- bernstein/adapters/generic.py +90 -0
- bernstein/adapters/manager.py +86 -0
- bernstein/adapters/qwen.py +174 -0
- bernstein/adapters/registry.py +55 -0
- bernstein/agents/__init__.py +1 -0
- bernstein/agents/agency_provider.py +183 -0
- bernstein/agents/catalog.py +576 -0
- bernstein/agents/discovery.py +434 -0
- bernstein/agents/registry.py +431 -0
- bernstein/benchmark/__init__.py +0 -0
- bernstein/benchmark/swe_bench.py +525 -0
- bernstein/cli/__init__.py +1 -0
- bernstein/cli/cost.py +277 -0
- bernstein/cli/dashboard.py +718 -0
- bernstein/cli/errors.py +75 -0
- bernstein/cli/main.py +4462 -0
- bernstein/core/__init__.py +1 -0
- bernstein/core/a2a.py +305 -0
- bernstein/core/agency_loader.py +163 -0
- bernstein/core/bootstrap.py +959 -0
- bernstein/core/bulletin.py +183 -0
- bernstein/core/ci_fix.py +650 -0
- bernstein/core/ci_log_parser.py +82 -0
- bernstein/core/cluster.py +392 -0
- bernstein/core/context.py +1156 -0
- bernstein/core/cost.py +394 -0
- bernstein/core/cost_tracker.py +375 -0
- bernstein/core/evolution.py +77 -0
- bernstein/core/fast_path.py +683 -0
- bernstein/core/git_context.py +388 -0
- bernstein/core/git_ops.py +712 -0
- bernstein/core/github.py +701 -0
- bernstein/core/graph.py +325 -0
- bernstein/core/hijacker.py +579 -0
- bernstein/core/home.py +182 -0
- bernstein/core/janitor.py +633 -0
- bernstein/core/llm.py +188 -0
- bernstein/core/manager.py +1009 -0
- bernstein/core/mcp_manager.py +365 -0
- bernstein/core/mcp_registry.py +288 -0
- bernstein/core/metrics.py +912 -0
- bernstein/core/models.py +422 -0
- bernstein/core/multi_cell.py +447 -0
- bernstein/core/orchestrator.py +2893 -0
- bernstein/core/policy.py +599 -0
- bernstein/core/prometheus.py +129 -0
- bernstein/core/rag.py +502 -0
- bernstein/core/researcher.py +277 -0
- bernstein/core/retrospective.py +419 -0
- bernstein/core/router.py +733 -0
- bernstein/core/seed.py +426 -0
- bernstein/core/server.py +2043 -0
- bernstein/core/signals.py +231 -0
- bernstein/core/spawner.py +677 -0
- bernstein/core/store.py +279 -0
- bernstein/core/store_factory.py +157 -0
- bernstein/core/store_postgres.py +780 -0
- bernstein/core/store_redis.py +124 -0
- bernstein/core/sync.py +465 -0
- bernstein/core/traces.py +493 -0
- bernstein/core/upgrade_executor.py +576 -0
- bernstein/core/worker.py +120 -0
- bernstein/core/workspace.py +256 -0
- bernstein/core/worktree.py +157 -0
- bernstein/dashboard/__init__.py +8 -0
- bernstein/dashboard/templates/index.html +281 -0
- bernstein/evolution/__init__.py +443 -0
- bernstein/evolution/aggregator.py +1134 -0
- bernstein/evolution/applicator.py +225 -0
- bernstein/evolution/benchmark.py +340 -0
- bernstein/evolution/circuit.py +200 -0
- bernstein/evolution/creative.py +651 -0
- bernstein/evolution/detector.py +644 -0
- bernstein/evolution/gate.py +437 -0
- bernstein/evolution/invariants.py +134 -0
- bernstein/evolution/loop.py +1043 -0
- bernstein/evolution/proposals.py +174 -0
- bernstein/evolution/report.py +532 -0
- bernstein/evolution/sandbox.py +361 -0
- bernstein/evolution/types.py +121 -0
- bernstein/github_app/__init__.py +29 -0
- bernstein/github_app/app.py +244 -0
- bernstein/github_app/mapper.py +307 -0
- bernstein/github_app/webhooks.py +116 -0
- bernstein/plugins/__init__.py +10 -0
- bernstein/plugins/hookspecs.py +74 -0
- bernstein/plugins/manager.py +257 -0
- bernstein/templates/__init__.py +1 -0
- bernstein/templates/renderer.py +138 -0
- bernstein/tui/__init__.py +1 -0
- bernstein/tui/app.py +186 -0
- bernstein/tui/styles.tcss +75 -0
- bernstein/tui/widgets.py +162 -0
- bernstein-0.1.0.dist-info/METADATA +24 -0
- bernstein-0.1.0.dist-info/RECORD +111 -0
- bernstein-0.1.0.dist-info/WHEEL +4 -0
- bernstein-0.1.0.dist-info/entry_points.txt +3 -0
- bernstein-0.1.0.dist-info/licenses/LICENSE +191 -0
bernstein/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Bernstein — Multi-agent orchestration for CLI coding agents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
_PACKAGE_DIR = Path(__file__).resolve().parent
|
|
10
|
+
|
|
11
|
+
# Bundled default templates — present inside the wheel after pip install.
|
|
12
|
+
# In dev/editable mode, fall back to <repo>/templates/ at the project root.
|
|
13
|
+
_bundled_templates_dir = _PACKAGE_DIR / "_default_templates"
|
|
14
|
+
if not _bundled_templates_dir.is_dir():
|
|
15
|
+
# Dev mode: src/bernstein/../../templates → <repo>/templates
|
|
16
|
+
_bundled_templates_dir = _PACKAGE_DIR.parent.parent / "templates"
|
|
17
|
+
|
|
18
|
+
# Public access via uppercase constant
|
|
19
|
+
_BUNDLED_TEMPLATES_DIR = _bundled_templates_dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_templates_dir(workdir: Path) -> Path:
|
|
23
|
+
"""Return the templates directory for a project, with bundled fallback.
|
|
24
|
+
|
|
25
|
+
Checks ``workdir / "templates"`` first; falls back to the package's
|
|
26
|
+
bundled defaults so that ``bernstein`` works right after ``pip install``
|
|
27
|
+
without requiring ``bernstein init`` first.
|
|
28
|
+
"""
|
|
29
|
+
local = workdir / "templates"
|
|
30
|
+
if local.is_dir():
|
|
31
|
+
return local
|
|
32
|
+
return _BUNDLED_TEMPLATES_DIR
|
bernstein/__main__.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
## bernstein.yaml — project seed file
|
|
2
|
+
## Copy to your project root and edit before running `bernstein run`.
|
|
3
|
+
|
|
4
|
+
# Required: high-level objective for this run.
|
|
5
|
+
goal: >
|
|
6
|
+
Describe your project goal here.
|
|
7
|
+
|
|
8
|
+
# CLI agent backend. One of: claude, codex, gemini, qwen.
|
|
9
|
+
cli: claude
|
|
10
|
+
|
|
11
|
+
# Maximum number of agents running concurrently.
|
|
12
|
+
max_agents: 4
|
|
13
|
+
|
|
14
|
+
# Optional model override (e.g. opus, sonnet, gpt-4.1).
|
|
15
|
+
# model: opus
|
|
16
|
+
|
|
17
|
+
# Role team. "auto" lets the manager choose; or list explicit roles.
|
|
18
|
+
# team: auto
|
|
19
|
+
# team:
|
|
20
|
+
# - backend
|
|
21
|
+
# - qa
|
|
22
|
+
|
|
23
|
+
# Spending cap. Accepts "$20", 20, or 20.0.
|
|
24
|
+
# budget: "$20"
|
|
25
|
+
|
|
26
|
+
# Constraints passed verbatim to every agent.
|
|
27
|
+
# constraints:
|
|
28
|
+
# - "Python 3.12+ only"
|
|
29
|
+
# - "No external dependencies without approval"
|
|
30
|
+
|
|
31
|
+
# Extra files appended to the manager's context.
|
|
32
|
+
# context_files:
|
|
33
|
+
# - docs/DESIGN.md
|
|
34
|
+
# - CLAUDE.md
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Agent catalogs
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Bernstein can load agent role definitions from external catalogs.
|
|
40
|
+
# Each entry specifies a source and how to map its fields to Bernstein's
|
|
41
|
+
# schema. Higher priority values are checked first.
|
|
42
|
+
#
|
|
43
|
+
# Default (no catalogs section): Agency remote catalog only, priority 100.
|
|
44
|
+
catalogs:
|
|
45
|
+
# Agency catalog — pulls role definitions from a GitHub repo.
|
|
46
|
+
- name: agency
|
|
47
|
+
type: agency
|
|
48
|
+
enabled: true
|
|
49
|
+
source: https://github.com/msitarzewski/agency-agents
|
|
50
|
+
priority: 100
|
|
51
|
+
|
|
52
|
+
# Generic local catalog — YAML files with a custom field mapping.
|
|
53
|
+
# - name: internal-agents
|
|
54
|
+
# type: generic
|
|
55
|
+
# enabled: true
|
|
56
|
+
# path: ./custom-agents/
|
|
57
|
+
# format: yaml
|
|
58
|
+
# glob: "**/*.yaml"
|
|
59
|
+
# field_map:
|
|
60
|
+
# id: agent_id
|
|
61
|
+
# name: display_name
|
|
62
|
+
# role: category
|
|
63
|
+
# system_prompt: prompt
|
|
64
|
+
# priority: 50
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# MCP servers (passed to every spawned agent)
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# mcp_servers:
|
|
70
|
+
# filesystem:
|
|
71
|
+
# command: npx
|
|
72
|
+
# args: ["-y", "@modelcontextprotocol/server-filesystem", "."]
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Webhook notifications
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# notify:
|
|
78
|
+
# webhook: https://hooks.example.com/bernstein
|
|
79
|
+
# on_complete: true
|
|
80
|
+
# on_failure: true
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# LLM Judge — Task Completion Verification
|
|
2
|
+
|
|
3
|
+
You are a strict but fair judge evaluating whether a coding task was completed correctly.
|
|
4
|
+
|
|
5
|
+
## Task
|
|
6
|
+
|
|
7
|
+
**Title:** {{TASK_TITLE}}
|
|
8
|
+
**Description:**
|
|
9
|
+
{{TASK_DESCRIPTION}}
|
|
10
|
+
|
|
11
|
+
## Evaluation Criteria
|
|
12
|
+
|
|
13
|
+
{{CRITERIA}}
|
|
14
|
+
|
|
15
|
+
## Git Diff (changes made)
|
|
16
|
+
|
|
17
|
+
```diff
|
|
18
|
+
{{GIT_DIFF}}
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Instructions
|
|
22
|
+
|
|
23
|
+
Evaluate whether the changes satisfy the task description and criteria above.
|
|
24
|
+
|
|
25
|
+
Consider:
|
|
26
|
+
1. **Correctness** — Do the changes implement what was requested?
|
|
27
|
+
2. **Completeness** — Are all aspects of the task addressed?
|
|
28
|
+
3. **Quality** — Is the code well-structured and following conventions?
|
|
29
|
+
|
|
30
|
+
Respond with ONLY a JSON object (no markdown fences, no text before or after):
|
|
31
|
+
|
|
32
|
+
{"verdict": "accept", "confidence": 0.95, "feedback": "All criteria met."}
|
|
33
|
+
|
|
34
|
+
Rules:
|
|
35
|
+
- `verdict`: "accept" if the task is substantially complete and correct, "retry" if there are clear gaps or errors.
|
|
36
|
+
- `confidence`: float from 0.0 to 1.0 reflecting certainty in your verdict.
|
|
37
|
+
- `feedback`: Specific actionable explanation. If "retry", describe exactly what needs fixing.
|
|
38
|
+
|
|
39
|
+
Output ONLY the JSON object.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Task Planning
|
|
2
|
+
|
|
3
|
+
You are the Manager of a multi-agent coding team. Your job is to decompose the goal into specific, actionable tasks that specialist agents can execute independently.
|
|
4
|
+
|
|
5
|
+
## Goal
|
|
6
|
+
|
|
7
|
+
{{GOAL}}
|
|
8
|
+
|
|
9
|
+
## Project context
|
|
10
|
+
|
|
11
|
+
{{CONTEXT}}
|
|
12
|
+
|
|
13
|
+
## Available roles
|
|
14
|
+
|
|
15
|
+
{{AVAILABLE_ROLES}}
|
|
16
|
+
|
|
17
|
+
## Existing tasks
|
|
18
|
+
|
|
19
|
+
{{EXISTING_TASKS}}
|
|
20
|
+
|
|
21
|
+
## Instructions
|
|
22
|
+
|
|
23
|
+
Break the goal into tasks. Each task should be completable by a single agent in 30-120 minutes.
|
|
24
|
+
|
|
25
|
+
Rules:
|
|
26
|
+
1. Never assign two tasks to the same files — prevent merge conflicts.
|
|
27
|
+
2. Include test-writing in implementation tasks or as separate QA tasks.
|
|
28
|
+
3. Order tasks by dependency — foundational work first.
|
|
29
|
+
4. Use the most appropriate role for each task.
|
|
30
|
+
5. Keep tasks focused: one concern per task.
|
|
31
|
+
6. If existing tasks already cover part of the goal, do not duplicate them.
|
|
32
|
+
7. Every task must have at least one completion signal so the janitor can verify it.
|
|
33
|
+
|
|
34
|
+
Output a JSON array of tasks. Each task object must have exactly these fields:
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
[
|
|
38
|
+
{
|
|
39
|
+
"title": "Short actionable title",
|
|
40
|
+
"description": "Detailed description including acceptance criteria",
|
|
41
|
+
"role": "one of the available roles",
|
|
42
|
+
"priority": 2,
|
|
43
|
+
"scope": "small | medium | large",
|
|
44
|
+
"complexity": "low | medium | high",
|
|
45
|
+
"estimated_minutes": 60,
|
|
46
|
+
"depends_on": ["title of dependency task, if any"],
|
|
47
|
+
"owned_files": ["src/path/to/file.py"],
|
|
48
|
+
"completion_signals": [
|
|
49
|
+
{"type": "path_exists", "value": "src/path/to/file.py"},
|
|
50
|
+
{"type": "test_passes", "value": "pytest tests/test_file.py -x"}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Completion signal types:
|
|
57
|
+
- `path_exists` — a file or directory must exist
|
|
58
|
+
- `glob_exists` — at least one file matching a glob must exist
|
|
59
|
+
- `test_passes` — a shell command must exit 0
|
|
60
|
+
- `file_contains` — a file must contain a given string
|
|
61
|
+
- `llm_review` — requires LLM review (use sparingly)
|
|
62
|
+
|
|
63
|
+
Output ONLY the JSON array. No markdown fences, no explanation before or after.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Task Review
|
|
2
|
+
|
|
3
|
+
You are the Manager reviewing completed work from a specialist agent.
|
|
4
|
+
|
|
5
|
+
## Task
|
|
6
|
+
|
|
7
|
+
**Title:** {{TASK_TITLE}}
|
|
8
|
+
**Role:** {{TASK_ROLE}}
|
|
9
|
+
**Description:**
|
|
10
|
+
{{TASK_DESCRIPTION}}
|
|
11
|
+
|
|
12
|
+
## Completion signals
|
|
13
|
+
|
|
14
|
+
{{COMPLETION_SIGNALS}}
|
|
15
|
+
|
|
16
|
+
## Agent's result summary
|
|
17
|
+
|
|
18
|
+
{{RESULT_SUMMARY}}
|
|
19
|
+
|
|
20
|
+
## Project context
|
|
21
|
+
|
|
22
|
+
{{CONTEXT}}
|
|
23
|
+
|
|
24
|
+
## Instructions
|
|
25
|
+
|
|
26
|
+
Review the completed work and decide:
|
|
27
|
+
|
|
28
|
+
1. **approve** — the work meets acceptance criteria and is ready to merge.
|
|
29
|
+
2. **request_changes** — the work is on the right track but needs specific fixes.
|
|
30
|
+
3. **reject** — the work is fundamentally wrong and should be redone from scratch.
|
|
31
|
+
|
|
32
|
+
Output a JSON object with exactly these fields:
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"verdict": "approve | request_changes | reject",
|
|
37
|
+
"reasoning": "Brief explanation of your decision",
|
|
38
|
+
"feedback": "Specific actionable feedback for the agent (empty string if approved)",
|
|
39
|
+
"follow_up_tasks": []
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
For `follow_up_tasks`, use the same task format as planning (title, description, role, etc.). Only include follow-up tasks if the review reveals additional work needed beyond the original scope.
|
|
44
|
+
|
|
45
|
+
Output ONLY the JSON object. No markdown fences, no explanation before or after.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Adapters for different CLI agents (Claude Code, Codex, Gemini, etc.)."""
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Base adapter for CLI coding agents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import signal
|
|
8
|
+
import sys
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from bernstein.core.models import ApiTierInfo, ModelConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SpawnResult:
|
|
21
|
+
"""Result of spawning an agent process."""
|
|
22
|
+
|
|
23
|
+
pid: int
|
|
24
|
+
log_path: Path
|
|
25
|
+
proc: object | None = None # subprocess.Popen, kept for poll()-based alive check
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_worker_cmd(
|
|
29
|
+
cmd: list[str],
|
|
30
|
+
*,
|
|
31
|
+
role: str,
|
|
32
|
+
session_id: str,
|
|
33
|
+
pid_dir: Path,
|
|
34
|
+
model: str = "",
|
|
35
|
+
) -> list[str]:
|
|
36
|
+
"""Wrap a CLI command with bernstein-worker for process visibility.
|
|
37
|
+
|
|
38
|
+
The worker sets the process title to "bernstein: <role> [<session>]"
|
|
39
|
+
and writes a PID metadata file for ``bernstein ps``.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
cmd: The original CLI command to wrap.
|
|
43
|
+
role: Agent role (qa, backend, etc.).
|
|
44
|
+
session_id: Unique session identifier.
|
|
45
|
+
pid_dir: Directory for PID metadata JSON files.
|
|
46
|
+
model: Model name for metadata display.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Wrapped command list.
|
|
50
|
+
"""
|
|
51
|
+
return [
|
|
52
|
+
sys.executable,
|
|
53
|
+
"-m",
|
|
54
|
+
"bernstein.core.worker",
|
|
55
|
+
"--role",
|
|
56
|
+
role,
|
|
57
|
+
"--session",
|
|
58
|
+
session_id,
|
|
59
|
+
"--pid-dir",
|
|
60
|
+
str(pid_dir),
|
|
61
|
+
"--model",
|
|
62
|
+
model,
|
|
63
|
+
"--",
|
|
64
|
+
*cmd,
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class CLIAdapter(ABC):
|
|
69
|
+
"""Interface for launching and monitoring CLI coding agents.
|
|
70
|
+
|
|
71
|
+
Implement this for each supported CLI (Claude Code, Codex, Gemini, etc.).
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def spawn(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
prompt: str,
|
|
79
|
+
workdir: Path,
|
|
80
|
+
model_config: ModelConfig,
|
|
81
|
+
session_id: str,
|
|
82
|
+
mcp_config: dict[str, Any] | None = None,
|
|
83
|
+
) -> SpawnResult:
|
|
84
|
+
"""Launch an agent process with the given prompt."""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
def is_alive(self, pid: int) -> bool:
|
|
88
|
+
"""Check if the agent process is still running."""
|
|
89
|
+
try:
|
|
90
|
+
os.kill(pid, 0)
|
|
91
|
+
return True
|
|
92
|
+
except OSError:
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
def kill(self, pid: int) -> None:
|
|
96
|
+
"""Terminate the agent process."""
|
|
97
|
+
with contextlib.suppress(OSError):
|
|
98
|
+
os.killpg(os.getpgid(pid), signal.SIGTERM)
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def name(self) -> str:
|
|
102
|
+
"""Human-readable name of this CLI adapter."""
|
|
103
|
+
...
|
|
104
|
+
|
|
105
|
+
def detect_tier(self) -> ApiTierInfo | None:
|
|
106
|
+
"""Detect the current API tier and remaining quota.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
ApiTierInfo if tier detection is supported and successful, None otherwise.
|
|
110
|
+
Subclasses should override this to return provider-specific tier info.
|
|
111
|
+
"""
|
|
112
|
+
return None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CI system adapters for log parsing and failure extraction."""
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""GitHub Actions CI adapter.
|
|
2
|
+
|
|
3
|
+
Parses GitHub Actions log format, extracts job/step names and failure
|
|
4
|
+
output, and maps results to ``CIFailure`` objects. Supports log download
|
|
5
|
+
via the ``gh`` CLI and the GitHub REST API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
from bernstein.core.ci_fix import CIFailure, parse_failures
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# GitHub Actions log structure helpers
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
# Matches the timestamp prefix on every GHA log line.
|
|
24
|
+
# Example: 2024-01-15T10:30:00.0000000Z ##[group]Run ruff check src/
|
|
25
|
+
_TS_PREFIX_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T[\d:.]+Z\s*", re.MULTILINE)
|
|
26
|
+
|
|
27
|
+
# Matches ##[group]<name> / ##[endgroup] blocks.
|
|
28
|
+
_GROUP_RE = re.compile(
|
|
29
|
+
r"##\[group\](.+?)$\n(.*?)##\[endgroup\]",
|
|
30
|
+
re.MULTILINE | re.DOTALL,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Matches ##[error] annotations.
|
|
34
|
+
_ERROR_RE = re.compile(r"##\[error\](.+?)$", re.MULTILINE)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class GHAStep:
|
|
39
|
+
"""A parsed GitHub Actions step extracted from the log.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
name: Step name taken from the ``##[group]`` marker.
|
|
43
|
+
body: Body text between group/endgroup markers (stripped of timestamps).
|
|
44
|
+
errors: Any ``##[error]`` annotations found in the body.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
name: str
|
|
48
|
+
body: str
|
|
49
|
+
errors: list[str] = field(default_factory=list[str])
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Parser
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _strip_timestamps(text: str) -> str:
|
|
58
|
+
"""Remove the ISO-8601 timestamp prefix from every log line.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
text: Raw GitHub Actions log text.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Log text with timestamps removed.
|
|
65
|
+
"""
|
|
66
|
+
return _TS_PREFIX_RE.sub("", text)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_steps(raw_log: str) -> list[GHAStep]:
|
|
70
|
+
"""Extract grouped steps from a GitHub Actions log.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
raw_log: Full raw log (timestamps already stripped).
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of ``GHAStep`` objects.
|
|
77
|
+
"""
|
|
78
|
+
steps: list[GHAStep] = []
|
|
79
|
+
for m in _GROUP_RE.finditer(raw_log):
|
|
80
|
+
name = m.group(1).strip()
|
|
81
|
+
body = m.group(2).strip()
|
|
82
|
+
errors = _ERROR_RE.findall(body)
|
|
83
|
+
steps.append(GHAStep(name=name, body=body, errors=errors))
|
|
84
|
+
return steps
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _extract_job_name(raw_log: str) -> str:
|
|
88
|
+
"""Attempt to extract the job name from the log header.
|
|
89
|
+
|
|
90
|
+
GitHub Actions logs often start with a line like:
|
|
91
|
+
``##[group]Run <job-name>``
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
raw_log: Raw (timestamp-stripped) log.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Extracted job name, or ``"github_actions"`` as fallback.
|
|
98
|
+
"""
|
|
99
|
+
m = re.search(r"##\[group\]Run\s+(.+?)$", raw_log, re.MULTILINE)
|
|
100
|
+
if m:
|
|
101
|
+
return m.group(1).strip()[:80]
|
|
102
|
+
return "github_actions"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class GitHubActionsParser:
|
|
106
|
+
"""CI log parser for GitHub Actions.
|
|
107
|
+
|
|
108
|
+
Parses the ``##[group]`` / ``##[endgroup]`` structure and ``##[error]``
|
|
109
|
+
annotations, then delegates to the core ``parse_failures`` function for
|
|
110
|
+
content-level classification.
|
|
111
|
+
|
|
112
|
+
Attributes:
|
|
113
|
+
name: Parser identifier (``"github_actions"``).
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
name: str = "github_actions"
|
|
117
|
+
|
|
118
|
+
def parse(self, raw_log: str) -> list[CIFailure]:
|
|
119
|
+
"""Parse a GitHub Actions log into structured CI failures.
|
|
120
|
+
|
|
121
|
+
Strategy:
|
|
122
|
+
1. Strip timestamps so content matchers work on clean text.
|
|
123
|
+
2. Extract ``##[group]``/``##[endgroup]`` steps.
|
|
124
|
+
3. For each step that contains ``##[error]`` annotations, run the
|
|
125
|
+
core ``parse_failures`` on the step body.
|
|
126
|
+
4. If no steps are found (log may not use group markers), fall back
|
|
127
|
+
to parsing the whole log.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
raw_log: Raw log output from a GitHub Actions run.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
List of ``CIFailure`` objects.
|
|
134
|
+
"""
|
|
135
|
+
clean = _strip_timestamps(raw_log)
|
|
136
|
+
steps = _extract_steps(clean)
|
|
137
|
+
|
|
138
|
+
# If we found steps with errors, parse each one individually.
|
|
139
|
+
failing_steps = [s for s in steps if s.errors]
|
|
140
|
+
if failing_steps:
|
|
141
|
+
failures: list[CIFailure] = []
|
|
142
|
+
for step in failing_steps:
|
|
143
|
+
step_failures = parse_failures(step.body, job=step.name)
|
|
144
|
+
failures.extend(step_failures)
|
|
145
|
+
return failures
|
|
146
|
+
|
|
147
|
+
# Fallback: parse the entire log as a single block.
|
|
148
|
+
job = _extract_job_name(clean)
|
|
149
|
+
return parse_failures(clean, job=job)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# Log download
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def download_github_actions_log(
|
|
158
|
+
run_url: str,
|
|
159
|
+
*,
|
|
160
|
+
timeout: int = 60,
|
|
161
|
+
) -> str:
|
|
162
|
+
"""Download the failed-step log from a GitHub Actions run.
|
|
163
|
+
|
|
164
|
+
Uses ``gh run view --log-failed`` which requires the ``gh`` CLI to be
|
|
165
|
+
installed and authenticated.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
run_url: URL of the GitHub Actions run, e.g.
|
|
169
|
+
``https://github.com/owner/repo/actions/runs/123456``.
|
|
170
|
+
timeout: Subprocess timeout in seconds.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Raw log text from the failed steps.
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
RuntimeError: If the ``gh`` command fails.
|
|
177
|
+
"""
|
|
178
|
+
# Extract the run ID from the URL.
|
|
179
|
+
run_id = _extract_run_id(run_url)
|
|
180
|
+
|
|
181
|
+
result = subprocess.run(
|
|
182
|
+
["gh", "run", "view", run_id, "--log-failed"],
|
|
183
|
+
capture_output=True,
|
|
184
|
+
text=True,
|
|
185
|
+
timeout=timeout,
|
|
186
|
+
)
|
|
187
|
+
if result.returncode != 0:
|
|
188
|
+
msg = f"gh run view failed (exit {result.returncode}): {result.stderr.strip()[:200]}"
|
|
189
|
+
raise RuntimeError(msg)
|
|
190
|
+
return result.stdout
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def download_github_actions_log_api(
|
|
194
|
+
run_url: str,
|
|
195
|
+
*,
|
|
196
|
+
timeout: int = 60,
|
|
197
|
+
) -> str:
|
|
198
|
+
"""Download the failed-step log via ``gh api``.
|
|
199
|
+
|
|
200
|
+
This is an alternative to ``download_github_actions_log`` that uses the
|
|
201
|
+
GitHub REST API through ``gh api``, which can be more reliable in some
|
|
202
|
+
environments.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
run_url: URL of the GitHub Actions run.
|
|
206
|
+
timeout: Subprocess timeout in seconds.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Raw log text (JSON) from the API.
|
|
210
|
+
|
|
211
|
+
Raises:
|
|
212
|
+
RuntimeError: If the ``gh`` command fails.
|
|
213
|
+
"""
|
|
214
|
+
run_id = _extract_run_id(run_url)
|
|
215
|
+
|
|
216
|
+
# First get the jobs for this run.
|
|
217
|
+
# Extract owner/repo from the URL.
|
|
218
|
+
m = re.match(r"https?://github\.com/([^/]+/[^/]+)/actions/runs/\d+", run_url)
|
|
219
|
+
if not m:
|
|
220
|
+
msg = f"Cannot parse owner/repo from URL: {run_url}"
|
|
221
|
+
raise ValueError(msg)
|
|
222
|
+
repo = m.group(1)
|
|
223
|
+
|
|
224
|
+
# Get failed jobs.
|
|
225
|
+
result = subprocess.run(
|
|
226
|
+
[
|
|
227
|
+
"gh",
|
|
228
|
+
"api",
|
|
229
|
+
f"repos/{repo}/actions/runs/{run_id}/jobs",
|
|
230
|
+
"--jq",
|
|
231
|
+
'.jobs[] | select(.conclusion == "failure") | .id',
|
|
232
|
+
],
|
|
233
|
+
capture_output=True,
|
|
234
|
+
text=True,
|
|
235
|
+
timeout=timeout,
|
|
236
|
+
)
|
|
237
|
+
if result.returncode != 0:
|
|
238
|
+
msg = f"gh api failed (exit {result.returncode}): {result.stderr.strip()[:200]}"
|
|
239
|
+
raise RuntimeError(msg)
|
|
240
|
+
|
|
241
|
+
job_ids = result.stdout.strip().splitlines()
|
|
242
|
+
if not job_ids:
|
|
243
|
+
return ""
|
|
244
|
+
|
|
245
|
+
# Download logs for each failed job.
|
|
246
|
+
logs: list[str] = []
|
|
247
|
+
for job_id in job_ids:
|
|
248
|
+
log_result = subprocess.run(
|
|
249
|
+
["gh", "api", f"repos/{repo}/actions/jobs/{job_id}/logs"],
|
|
250
|
+
capture_output=True,
|
|
251
|
+
text=True,
|
|
252
|
+
timeout=timeout,
|
|
253
|
+
)
|
|
254
|
+
if log_result.returncode == 0:
|
|
255
|
+
logs.append(log_result.stdout)
|
|
256
|
+
|
|
257
|
+
return "\n".join(logs)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _extract_run_id(run_url: str) -> str:
|
|
261
|
+
"""Extract the numeric run ID from a GitHub Actions URL.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
run_url: URL like ``https://github.com/owner/repo/actions/runs/123456``.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
The run ID as a string.
|
|
268
|
+
|
|
269
|
+
Raises:
|
|
270
|
+
ValueError: If the URL does not match the expected pattern.
|
|
271
|
+
"""
|
|
272
|
+
m = re.search(r"/actions/runs/(\d+)", run_url)
|
|
273
|
+
if not m:
|
|
274
|
+
msg = f"Cannot extract run ID from URL: {run_url}"
|
|
275
|
+
raise ValueError(msg)
|
|
276
|
+
return m.group(1)
|