bernstein 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. bernstein/__init__.py +32 -0
  2. bernstein/__main__.py +5 -0
  3. bernstein/_default_templates/bernstein.yaml +80 -0
  4. bernstein/_default_templates/prompts/judge.md +39 -0
  5. bernstein/_default_templates/prompts/plan.md +63 -0
  6. bernstein/_default_templates/prompts/review.md +45 -0
  7. bernstein/adapters/__init__.py +1 -0
  8. bernstein/adapters/base.py +112 -0
  9. bernstein/adapters/ci/__init__.py +1 -0
  10. bernstein/adapters/ci/github_actions.py +276 -0
  11. bernstein/adapters/claude.py +301 -0
  12. bernstein/adapters/codex.py +118 -0
  13. bernstein/adapters/gemini.py +115 -0
  14. bernstein/adapters/generic.py +90 -0
  15. bernstein/adapters/manager.py +86 -0
  16. bernstein/adapters/qwen.py +174 -0
  17. bernstein/adapters/registry.py +55 -0
  18. bernstein/agents/__init__.py +1 -0
  19. bernstein/agents/agency_provider.py +183 -0
  20. bernstein/agents/catalog.py +576 -0
  21. bernstein/agents/discovery.py +434 -0
  22. bernstein/agents/registry.py +431 -0
  23. bernstein/benchmark/__init__.py +0 -0
  24. bernstein/benchmark/swe_bench.py +525 -0
  25. bernstein/cli/__init__.py +1 -0
  26. bernstein/cli/cost.py +277 -0
  27. bernstein/cli/dashboard.py +718 -0
  28. bernstein/cli/errors.py +75 -0
  29. bernstein/cli/main.py +4462 -0
  30. bernstein/core/__init__.py +1 -0
  31. bernstein/core/a2a.py +305 -0
  32. bernstein/core/agency_loader.py +163 -0
  33. bernstein/core/bootstrap.py +959 -0
  34. bernstein/core/bulletin.py +183 -0
  35. bernstein/core/ci_fix.py +650 -0
  36. bernstein/core/ci_log_parser.py +82 -0
  37. bernstein/core/cluster.py +392 -0
  38. bernstein/core/context.py +1156 -0
  39. bernstein/core/cost.py +394 -0
  40. bernstein/core/cost_tracker.py +375 -0
  41. bernstein/core/evolution.py +77 -0
  42. bernstein/core/fast_path.py +683 -0
  43. bernstein/core/git_context.py +388 -0
  44. bernstein/core/git_ops.py +712 -0
  45. bernstein/core/github.py +701 -0
  46. bernstein/core/graph.py +325 -0
  47. bernstein/core/hijacker.py +579 -0
  48. bernstein/core/home.py +182 -0
  49. bernstein/core/janitor.py +633 -0
  50. bernstein/core/llm.py +188 -0
  51. bernstein/core/manager.py +1009 -0
  52. bernstein/core/mcp_manager.py +365 -0
  53. bernstein/core/mcp_registry.py +288 -0
  54. bernstein/core/metrics.py +912 -0
  55. bernstein/core/models.py +422 -0
  56. bernstein/core/multi_cell.py +447 -0
  57. bernstein/core/orchestrator.py +2893 -0
  58. bernstein/core/policy.py +599 -0
  59. bernstein/core/prometheus.py +129 -0
  60. bernstein/core/rag.py +502 -0
  61. bernstein/core/researcher.py +277 -0
  62. bernstein/core/retrospective.py +419 -0
  63. bernstein/core/router.py +733 -0
  64. bernstein/core/seed.py +426 -0
  65. bernstein/core/server.py +2043 -0
  66. bernstein/core/signals.py +231 -0
  67. bernstein/core/spawner.py +677 -0
  68. bernstein/core/store.py +279 -0
  69. bernstein/core/store_factory.py +157 -0
  70. bernstein/core/store_postgres.py +780 -0
  71. bernstein/core/store_redis.py +124 -0
  72. bernstein/core/sync.py +465 -0
  73. bernstein/core/traces.py +493 -0
  74. bernstein/core/upgrade_executor.py +576 -0
  75. bernstein/core/worker.py +120 -0
  76. bernstein/core/workspace.py +256 -0
  77. bernstein/core/worktree.py +157 -0
  78. bernstein/dashboard/__init__.py +8 -0
  79. bernstein/dashboard/templates/index.html +281 -0
  80. bernstein/evolution/__init__.py +443 -0
  81. bernstein/evolution/aggregator.py +1134 -0
  82. bernstein/evolution/applicator.py +225 -0
  83. bernstein/evolution/benchmark.py +340 -0
  84. bernstein/evolution/circuit.py +200 -0
  85. bernstein/evolution/creative.py +651 -0
  86. bernstein/evolution/detector.py +644 -0
  87. bernstein/evolution/gate.py +437 -0
  88. bernstein/evolution/invariants.py +134 -0
  89. bernstein/evolution/loop.py +1043 -0
  90. bernstein/evolution/proposals.py +174 -0
  91. bernstein/evolution/report.py +532 -0
  92. bernstein/evolution/sandbox.py +361 -0
  93. bernstein/evolution/types.py +121 -0
  94. bernstein/github_app/__init__.py +29 -0
  95. bernstein/github_app/app.py +244 -0
  96. bernstein/github_app/mapper.py +307 -0
  97. bernstein/github_app/webhooks.py +116 -0
  98. bernstein/plugins/__init__.py +10 -0
  99. bernstein/plugins/hookspecs.py +74 -0
  100. bernstein/plugins/manager.py +257 -0
  101. bernstein/templates/__init__.py +1 -0
  102. bernstein/templates/renderer.py +138 -0
  103. bernstein/tui/__init__.py +1 -0
  104. bernstein/tui/app.py +186 -0
  105. bernstein/tui/styles.tcss +75 -0
  106. bernstein/tui/widgets.py +162 -0
  107. bernstein-0.1.0.dist-info/METADATA +24 -0
  108. bernstein-0.1.0.dist-info/RECORD +111 -0
  109. bernstein-0.1.0.dist-info/WHEEL +4 -0
  110. bernstein-0.1.0.dist-info/entry_points.txt +3 -0
  111. bernstein-0.1.0.dist-info/licenses/LICENSE +191 -0
bernstein/__init__.py ADDED
@@ -0,0 +1,32 @@
1
+ """Bernstein — Multi-agent orchestration for CLI coding agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ _PACKAGE_DIR = Path(__file__).resolve().parent
10
+
11
+ # Bundled default templates — present inside the wheel after pip install.
12
+ # In dev/editable mode, fall back to <repo>/templates/ at the project root.
13
+ _bundled_templates_dir = _PACKAGE_DIR / "_default_templates"
14
+ if not _bundled_templates_dir.is_dir():
15
+ # Dev mode: src/bernstein/../../templates → <repo>/templates
16
+ _bundled_templates_dir = _PACKAGE_DIR.parent.parent / "templates"
17
+
18
+ # Public access via uppercase constant
19
+ _BUNDLED_TEMPLATES_DIR = _bundled_templates_dir
20
+
21
+
22
+ def get_templates_dir(workdir: Path) -> Path:
23
+ """Return the templates directory for a project, with bundled fallback.
24
+
25
+ Checks ``workdir / "templates"`` first; falls back to the package's
26
+ bundled defaults so that ``bernstein`` works right after ``pip install``
27
+ without requiring ``bernstein init`` first.
28
+ """
29
+ local = workdir / "templates"
30
+ if local.is_dir():
31
+ return local
32
+ return _BUNDLED_TEMPLATES_DIR
bernstein/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running as ``python -m bernstein``."""
2
+
3
+ from bernstein.cli.main import cli
4
+
5
+ cli()
@@ -0,0 +1,80 @@
1
+ ## bernstein.yaml — project seed file
2
+ ## Copy to your project root and edit before running `bernstein run`.
3
+
4
+ # Required: high-level objective for this run.
5
+ goal: >
6
+ Describe your project goal here.
7
+
8
+ # CLI agent backend. One of: claude, codex, gemini, qwen.
9
+ cli: claude
10
+
11
+ # Maximum number of agents running concurrently.
12
+ max_agents: 4
13
+
14
+ # Optional model override (e.g. opus, sonnet, gpt-4.1).
15
+ # model: opus
16
+
17
+ # Role team. "auto" lets the manager choose; or list explicit roles.
18
+ # team: auto
19
+ # team:
20
+ # - backend
21
+ # - qa
22
+
23
+ # Spending cap. Accepts "$20", 20, or 20.0.
24
+ # budget: "$20"
25
+
26
+ # Constraints passed verbatim to every agent.
27
+ # constraints:
28
+ # - "Python 3.12+ only"
29
+ # - "No external dependencies without approval"
30
+
31
+ # Extra files appended to the manager's context.
32
+ # context_files:
33
+ # - docs/DESIGN.md
34
+ # - CLAUDE.md
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Agent catalogs
38
+ # ---------------------------------------------------------------------------
39
+ # Bernstein can load agent role definitions from external catalogs.
40
+ # Each entry specifies a source and how to map its fields to Bernstein's
41
+ # schema. Higher priority values are checked first.
42
+ #
43
+ # Default (no catalogs section): Agency remote catalog only, priority 100.
44
+ catalogs:
45
+ # Agency catalog — pulls role definitions from a GitHub repo.
46
+ - name: agency
47
+ type: agency
48
+ enabled: true
49
+ source: https://github.com/msitarzewski/agency-agents
50
+ priority: 100
51
+
52
+ # Generic local catalog — YAML files with a custom field mapping.
53
+ # - name: internal-agents
54
+ # type: generic
55
+ # enabled: true
56
+ # path: ./custom-agents/
57
+ # format: yaml
58
+ # glob: "**/*.yaml"
59
+ # field_map:
60
+ # id: agent_id
61
+ # name: display_name
62
+ # role: category
63
+ # system_prompt: prompt
64
+ # priority: 50
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # MCP servers (passed to every spawned agent)
68
+ # ---------------------------------------------------------------------------
69
+ # mcp_servers:
70
+ # filesystem:
71
+ # command: npx
72
+ # args: ["-y", "@modelcontextprotocol/server-filesystem", "."]
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Webhook notifications
76
+ # ---------------------------------------------------------------------------
77
+ # notify:
78
+ # webhook: https://hooks.example.com/bernstein
79
+ # on_complete: true
80
+ # on_failure: true
@@ -0,0 +1,39 @@
1
+ # LLM Judge — Task Completion Verification
2
+
3
+ You are a strict but fair judge evaluating whether a coding task was completed correctly.
4
+
5
+ ## Task
6
+
7
+ **Title:** {{TASK_TITLE}}
8
+ **Description:**
9
+ {{TASK_DESCRIPTION}}
10
+
11
+ ## Evaluation Criteria
12
+
13
+ {{CRITERIA}}
14
+
15
+ ## Git Diff (changes made)
16
+
17
+ ```diff
18
+ {{GIT_DIFF}}
19
+ ```
20
+
21
+ ## Instructions
22
+
23
+ Evaluate whether the changes satisfy the task description and criteria above.
24
+
25
+ Consider:
26
+ 1. **Correctness** — Do the changes implement what was requested?
27
+ 2. **Completeness** — Are all aspects of the task addressed?
28
+ 3. **Quality** — Is the code well-structured and following conventions?
29
+
30
+ Respond with ONLY a JSON object (no markdown fences, no text before or after):
31
+
32
+ {"verdict": "accept", "confidence": 0.95, "feedback": "All criteria met."}
33
+
34
+ Rules:
35
+ - `verdict`: "accept" if the task is substantially complete and correct, "retry" if there are clear gaps or errors.
36
+ - `confidence`: float from 0.0 to 1.0 reflecting certainty in your verdict.
37
+ - `feedback`: Specific actionable explanation. If "retry", describe exactly what needs fixing.
38
+
39
+ Output ONLY the JSON object.
@@ -0,0 +1,63 @@
1
+ # Task Planning
2
+
3
+ You are the Manager of a multi-agent coding team. Your job is to decompose the goal into specific, actionable tasks that specialist agents can execute independently.
4
+
5
+ ## Goal
6
+
7
+ {{GOAL}}
8
+
9
+ ## Project context
10
+
11
+ {{CONTEXT}}
12
+
13
+ ## Available roles
14
+
15
+ {{AVAILABLE_ROLES}}
16
+
17
+ ## Existing tasks
18
+
19
+ {{EXISTING_TASKS}}
20
+
21
+ ## Instructions
22
+
23
+ Break the goal into tasks. Each task should be completable by a single agent in 30-120 minutes.
24
+
25
+ Rules:
26
+ 1. Never assign two tasks to the same files — prevent merge conflicts.
27
+ 2. Include test-writing in implementation tasks or as separate QA tasks.
28
+ 3. Order tasks by dependency — foundational work first.
29
+ 4. Use the most appropriate role for each task.
30
+ 5. Keep tasks focused: one concern per task.
31
+ 6. If existing tasks already cover part of the goal, do not duplicate them.
32
+ 7. Every task must have at least one completion signal so the janitor can verify it.
33
+
34
+ Output a JSON array of tasks. Each task object must have exactly these fields:
35
+
36
+ ```json
37
+ [
38
+ {
39
+ "title": "Short actionable title",
40
+ "description": "Detailed description including acceptance criteria",
41
+ "role": "one of the available roles",
42
+ "priority": 2,
43
+ "scope": "small | medium | large",
44
+ "complexity": "low | medium | high",
45
+ "estimated_minutes": 60,
46
+ "depends_on": ["title of dependency task, if any"],
47
+ "owned_files": ["src/path/to/file.py"],
48
+ "completion_signals": [
49
+ {"type": "path_exists", "value": "src/path/to/file.py"},
50
+ {"type": "test_passes", "value": "pytest tests/test_file.py -x"}
51
+ ]
52
+ }
53
+ ]
54
+ ```
55
+
56
+ Completion signal types:
57
+ - `path_exists` — a file or directory must exist
58
+ - `glob_exists` — at least one file matching a glob must exist
59
+ - `test_passes` — a shell command must exit 0
60
+ - `file_contains` — a file must contain a given string
61
+ - `llm_review` — requires LLM review (use sparingly)
62
+
63
+ Output ONLY the JSON array. No markdown fences, no explanation before or after.
@@ -0,0 +1,45 @@
1
+ # Task Review
2
+
3
+ You are the Manager reviewing completed work from a specialist agent.
4
+
5
+ ## Task
6
+
7
+ **Title:** {{TASK_TITLE}}
8
+ **Role:** {{TASK_ROLE}}
9
+ **Description:**
10
+ {{TASK_DESCRIPTION}}
11
+
12
+ ## Completion signals
13
+
14
+ {{COMPLETION_SIGNALS}}
15
+
16
+ ## Agent's result summary
17
+
18
+ {{RESULT_SUMMARY}}
19
+
20
+ ## Project context
21
+
22
+ {{CONTEXT}}
23
+
24
+ ## Instructions
25
+
26
+ Review the completed work and decide:
27
+
28
+ 1. **approve** — the work meets acceptance criteria and is ready to merge.
29
+ 2. **request_changes** — the work is on the right track but needs specific fixes.
30
+ 3. **reject** — the work is fundamentally wrong and should be redone from scratch.
31
+
32
+ Output a JSON object with exactly these fields:
33
+
34
+ ```json
35
+ {
36
+ "verdict": "approve | request_changes | reject",
37
+ "reasoning": "Brief explanation of your decision",
38
+ "feedback": "Specific actionable feedback for the agent (empty string if approved)",
39
+ "follow_up_tasks": []
40
+ }
41
+ ```
42
+
43
+ For `follow_up_tasks`, use the same task format as planning (title, description, role, etc.). Only include follow-up tasks if the review reveals additional work needed beyond the original scope.
44
+
45
+ Output ONLY the JSON object. No markdown fences, no explanation before or after.
@@ -0,0 +1 @@
1
+ """Adapters for different CLI agents (Claude Code, Codex, Gemini, etc.)."""
@@ -0,0 +1,112 @@
1
+ """Base adapter for CLI coding agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import signal
8
+ import sys
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ if TYPE_CHECKING:
14
+ from pathlib import Path
15
+
16
+ from bernstein.core.models import ApiTierInfo, ModelConfig
17
+
18
+
19
+ @dataclass
20
+ class SpawnResult:
21
+ """Result of spawning an agent process."""
22
+
23
+ pid: int
24
+ log_path: Path
25
+ proc: object | None = None # subprocess.Popen, kept for poll()-based alive check
26
+
27
+
28
+ def build_worker_cmd(
29
+ cmd: list[str],
30
+ *,
31
+ role: str,
32
+ session_id: str,
33
+ pid_dir: Path,
34
+ model: str = "",
35
+ ) -> list[str]:
36
+ """Wrap a CLI command with bernstein-worker for process visibility.
37
+
38
+ The worker sets the process title to "bernstein: <role> [<session>]"
39
+ and writes a PID metadata file for ``bernstein ps``.
40
+
41
+ Args:
42
+ cmd: The original CLI command to wrap.
43
+ role: Agent role (qa, backend, etc.).
44
+ session_id: Unique session identifier.
45
+ pid_dir: Directory for PID metadata JSON files.
46
+ model: Model name for metadata display.
47
+
48
+ Returns:
49
+ Wrapped command list.
50
+ """
51
+ return [
52
+ sys.executable,
53
+ "-m",
54
+ "bernstein.core.worker",
55
+ "--role",
56
+ role,
57
+ "--session",
58
+ session_id,
59
+ "--pid-dir",
60
+ str(pid_dir),
61
+ "--model",
62
+ model,
63
+ "--",
64
+ *cmd,
65
+ ]
66
+
67
+
68
+ class CLIAdapter(ABC):
69
+ """Interface for launching and monitoring CLI coding agents.
70
+
71
+ Implement this for each supported CLI (Claude Code, Codex, Gemini, etc.).
72
+ """
73
+
74
+ @abstractmethod
75
+ def spawn(
76
+ self,
77
+ *,
78
+ prompt: str,
79
+ workdir: Path,
80
+ model_config: ModelConfig,
81
+ session_id: str,
82
+ mcp_config: dict[str, Any] | None = None,
83
+ ) -> SpawnResult:
84
+ """Launch an agent process with the given prompt."""
85
+ ...
86
+
87
+ def is_alive(self, pid: int) -> bool:
88
+ """Check if the agent process is still running."""
89
+ try:
90
+ os.kill(pid, 0)
91
+ return True
92
+ except OSError:
93
+ return False
94
+
95
+ def kill(self, pid: int) -> None:
96
+ """Terminate the agent process."""
97
+ with contextlib.suppress(OSError):
98
+ os.killpg(os.getpgid(pid), signal.SIGTERM)
99
+
100
+ @abstractmethod
101
+ def name(self) -> str:
102
+ """Human-readable name of this CLI adapter."""
103
+ ...
104
+
105
+ def detect_tier(self) -> ApiTierInfo | None:
106
+ """Detect the current API tier and remaining quota.
107
+
108
+ Returns:
109
+ ApiTierInfo if tier detection is supported and successful, None otherwise.
110
+ Subclasses should override this to return provider-specific tier info.
111
+ """
112
+ return None
@@ -0,0 +1 @@
1
+ """CI system adapters for log parsing and failure extraction."""
@@ -0,0 +1,276 @@
1
+ """GitHub Actions CI adapter.
2
+
3
+ Parses GitHub Actions log format, extracts job/step names and failure
4
+ output, and maps results to ``CIFailure`` objects. Supports log download
5
+ via the ``gh`` CLI and the GitHub REST API.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import re
12
+ import subprocess
13
+ from dataclasses import dataclass, field
14
+
15
+ from bernstein.core.ci_fix import CIFailure, parse_failures
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # GitHub Actions log structure helpers
21
+ # ---------------------------------------------------------------------------
22
+
23
+ # Matches the timestamp prefix on every GHA log line.
24
+ # Example: 2024-01-15T10:30:00.0000000Z ##[group]Run ruff check src/
25
+ _TS_PREFIX_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T[\d:.]+Z\s*", re.MULTILINE)
26
+
27
+ # Matches ##[group]<name> / ##[endgroup] blocks.
28
+ _GROUP_RE = re.compile(
29
+ r"##\[group\](.+?)$\n(.*?)##\[endgroup\]",
30
+ re.MULTILINE | re.DOTALL,
31
+ )
32
+
33
+ # Matches ##[error] annotations.
34
+ _ERROR_RE = re.compile(r"##\[error\](.+?)$", re.MULTILINE)
35
+
36
+
37
+ @dataclass
38
+ class GHAStep:
39
+ """A parsed GitHub Actions step extracted from the log.
40
+
41
+ Attributes:
42
+ name: Step name taken from the ``##[group]`` marker.
43
+ body: Body text between group/endgroup markers (stripped of timestamps).
44
+ errors: Any ``##[error]`` annotations found in the body.
45
+ """
46
+
47
+ name: str
48
+ body: str
49
+ errors: list[str] = field(default_factory=list[str])
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Parser
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ def _strip_timestamps(text: str) -> str:
58
+ """Remove the ISO-8601 timestamp prefix from every log line.
59
+
60
+ Args:
61
+ text: Raw GitHub Actions log text.
62
+
63
+ Returns:
64
+ Log text with timestamps removed.
65
+ """
66
+ return _TS_PREFIX_RE.sub("", text)
67
+
68
+
69
+ def _extract_steps(raw_log: str) -> list[GHAStep]:
70
+ """Extract grouped steps from a GitHub Actions log.
71
+
72
+ Args:
73
+ raw_log: Full raw log (timestamps already stripped).
74
+
75
+ Returns:
76
+ List of ``GHAStep`` objects.
77
+ """
78
+ steps: list[GHAStep] = []
79
+ for m in _GROUP_RE.finditer(raw_log):
80
+ name = m.group(1).strip()
81
+ body = m.group(2).strip()
82
+ errors = _ERROR_RE.findall(body)
83
+ steps.append(GHAStep(name=name, body=body, errors=errors))
84
+ return steps
85
+
86
+
87
+ def _extract_job_name(raw_log: str) -> str:
88
+ """Attempt to extract the job name from the log header.
89
+
90
+ GitHub Actions logs often start with a line like:
91
+ ``##[group]Run <job-name>``
92
+
93
+ Args:
94
+ raw_log: Raw (timestamp-stripped) log.
95
+
96
+ Returns:
97
+ Extracted job name, or ``"github_actions"`` as fallback.
98
+ """
99
+ m = re.search(r"##\[group\]Run\s+(.+?)$", raw_log, re.MULTILINE)
100
+ if m:
101
+ return m.group(1).strip()[:80]
102
+ return "github_actions"
103
+
104
+
105
+ class GitHubActionsParser:
106
+ """CI log parser for GitHub Actions.
107
+
108
+ Parses the ``##[group]`` / ``##[endgroup]`` structure and ``##[error]``
109
+ annotations, then delegates to the core ``parse_failures`` function for
110
+ content-level classification.
111
+
112
+ Attributes:
113
+ name: Parser identifier (``"github_actions"``).
114
+ """
115
+
116
+ name: str = "github_actions"
117
+
118
+ def parse(self, raw_log: str) -> list[CIFailure]:
119
+ """Parse a GitHub Actions log into structured CI failures.
120
+
121
+ Strategy:
122
+ 1. Strip timestamps so content matchers work on clean text.
123
+ 2. Extract ``##[group]``/``##[endgroup]`` steps.
124
+ 3. For each step that contains ``##[error]`` annotations, run the
125
+ core ``parse_failures`` on the step body.
126
+ 4. If no steps are found (log may not use group markers), fall back
127
+ to parsing the whole log.
128
+
129
+ Args:
130
+ raw_log: Raw log output from a GitHub Actions run.
131
+
132
+ Returns:
133
+ List of ``CIFailure`` objects.
134
+ """
135
+ clean = _strip_timestamps(raw_log)
136
+ steps = _extract_steps(clean)
137
+
138
+ # If we found steps with errors, parse each one individually.
139
+ failing_steps = [s for s in steps if s.errors]
140
+ if failing_steps:
141
+ failures: list[CIFailure] = []
142
+ for step in failing_steps:
143
+ step_failures = parse_failures(step.body, job=step.name)
144
+ failures.extend(step_failures)
145
+ return failures
146
+
147
+ # Fallback: parse the entire log as a single block.
148
+ job = _extract_job_name(clean)
149
+ return parse_failures(clean, job=job)
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # Log download
154
+ # ---------------------------------------------------------------------------
155
+
156
+
157
+ def download_github_actions_log(
158
+ run_url: str,
159
+ *,
160
+ timeout: int = 60,
161
+ ) -> str:
162
+ """Download the failed-step log from a GitHub Actions run.
163
+
164
+ Uses ``gh run view --log-failed`` which requires the ``gh`` CLI to be
165
+ installed and authenticated.
166
+
167
+ Args:
168
+ run_url: URL of the GitHub Actions run, e.g.
169
+ ``https://github.com/owner/repo/actions/runs/123456``.
170
+ timeout: Subprocess timeout in seconds.
171
+
172
+ Returns:
173
+ Raw log text from the failed steps.
174
+
175
+ Raises:
176
+ RuntimeError: If the ``gh`` command fails.
177
+ """
178
+ # Extract the run ID from the URL.
179
+ run_id = _extract_run_id(run_url)
180
+
181
+ result = subprocess.run(
182
+ ["gh", "run", "view", run_id, "--log-failed"],
183
+ capture_output=True,
184
+ text=True,
185
+ timeout=timeout,
186
+ )
187
+ if result.returncode != 0:
188
+ msg = f"gh run view failed (exit {result.returncode}): {result.stderr.strip()[:200]}"
189
+ raise RuntimeError(msg)
190
+ return result.stdout
191
+
192
+
193
+ def download_github_actions_log_api(
194
+ run_url: str,
195
+ *,
196
+ timeout: int = 60,
197
+ ) -> str:
198
+ """Download the failed-step log via ``gh api``.
199
+
200
+ This is an alternative to ``download_github_actions_log`` that uses the
201
+ GitHub REST API through ``gh api``, which can be more reliable in some
202
+ environments.
203
+
204
+ Args:
205
+ run_url: URL of the GitHub Actions run.
206
+ timeout: Subprocess timeout in seconds.
207
+
208
+ Returns:
209
+ Raw log text (JSON) from the API.
210
+
211
+ Raises:
212
+ RuntimeError: If the ``gh`` command fails.
213
+ """
214
+ run_id = _extract_run_id(run_url)
215
+
216
+ # First get the jobs for this run.
217
+ # Extract owner/repo from the URL.
218
+ m = re.match(r"https?://github\.com/([^/]+/[^/]+)/actions/runs/\d+", run_url)
219
+ if not m:
220
+ msg = f"Cannot parse owner/repo from URL: {run_url}"
221
+ raise ValueError(msg)
222
+ repo = m.group(1)
223
+
224
+ # Get failed jobs.
225
+ result = subprocess.run(
226
+ [
227
+ "gh",
228
+ "api",
229
+ f"repos/{repo}/actions/runs/{run_id}/jobs",
230
+ "--jq",
231
+ '.jobs[] | select(.conclusion == "failure") | .id',
232
+ ],
233
+ capture_output=True,
234
+ text=True,
235
+ timeout=timeout,
236
+ )
237
+ if result.returncode != 0:
238
+ msg = f"gh api failed (exit {result.returncode}): {result.stderr.strip()[:200]}"
239
+ raise RuntimeError(msg)
240
+
241
+ job_ids = result.stdout.strip().splitlines()
242
+ if not job_ids:
243
+ return ""
244
+
245
+ # Download logs for each failed job.
246
+ logs: list[str] = []
247
+ for job_id in job_ids:
248
+ log_result = subprocess.run(
249
+ ["gh", "api", f"repos/{repo}/actions/jobs/{job_id}/logs"],
250
+ capture_output=True,
251
+ text=True,
252
+ timeout=timeout,
253
+ )
254
+ if log_result.returncode == 0:
255
+ logs.append(log_result.stdout)
256
+
257
+ return "\n".join(logs)
258
+
259
+
260
+ def _extract_run_id(run_url: str) -> str:
261
+ """Extract the numeric run ID from a GitHub Actions URL.
262
+
263
+ Args:
264
+ run_url: URL like ``https://github.com/owner/repo/actions/runs/123456``.
265
+
266
+ Returns:
267
+ The run ID as a string.
268
+
269
+ Raises:
270
+ ValueError: If the URL does not match the expected pattern.
271
+ """
272
+ m = re.search(r"/actions/runs/(\d+)", run_url)
273
+ if not m:
274
+ msg = f"Cannot extract run ID from URL: {run_url}"
275
+ raise ValueError(msg)
276
+ return m.group(1)