ci-log-intelligence 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ci_log_intelligence-0.1.0/LICENSE +21 -0
- ci_log_intelligence-0.1.0/PKG-INFO +291 -0
- ci_log_intelligence-0.1.0/README.md +259 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/__init__.py +50 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/api/__init__.py +47 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ci_analysis.py +290 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ci_report_builder.py +203 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/cli/__init__.py +1 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/cli/main.py +81 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/__init__.py +13 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/__init__.py +13 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/fetcher.py +342 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/fetcher_helpers.py +127 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/models.py +182 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/resolver.py +50 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/transports.py +131 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/__init__.py +1 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/cache.py +69 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/server.py +181 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/tools.py +368 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/models/__init__.py +93 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/parsing/__init__.py +115 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/__init__.py +80 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/classification/__init__.py +93 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/clustering/__init__.py +53 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/__init__.py +15 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/analyzer.py +349 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/excerpt.py +184 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/__init__.py +100 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/base.py +62 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_gcc.py +163 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_go.py +92 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_make.py +97 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_npm.py +162 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_rust.py +227 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/generic.py +63 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/go_test_fail.py +138 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/hash_mismatch.py +177 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/junit_xml.py +153 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/patterns.py +51 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/pytest_fail.py +190 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/rust_test_fail.py +138 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/expansion/__init__.py +113 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/merge/__init__.py +59 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/scoring/__init__.py +72 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/suppression/__init__.py +55 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/signals.py +63 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/storage/__init__.py +78 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/summarizer/__init__.py +25 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/utils/__init__.py +4 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/utils/logging.py +22 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence/utils/metrics.py +56 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/PKG-INFO +291 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/SOURCES.txt +58 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/dependency_links.txt +1 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/entry_points.txt +3 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/requires.txt +5 -0
- ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/top_level.txt +1 -0
- ci_log_intelligence-0.1.0/pyproject.toml +62 -0
- ci_log_intelligence-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kuldeep Kumar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ci-log-intelligence
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server that turns 50K-line CI logs into focused failure context for AI coding agents.
|
|
5
|
+
Author: Kuldeep Kumar
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence
|
|
8
|
+
Project-URL: Repository, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence
|
|
9
|
+
Project-URL: Issues, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence/issues
|
|
10
|
+
Keywords: ci,logs,mcp,model-context-protocol,ai-agents,github-actions,debugging,claude,codex,copilot
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
21
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: fastapi>=0.136.0
|
|
27
|
+
Requires-Dist: fastmcp>=2.14.7
|
|
28
|
+
Requires-Dist: pydantic>=2.12.0
|
|
29
|
+
Requires-Dist: requests>=2.33.0
|
|
30
|
+
Requires-Dist: uvicorn>=0.44.0
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# ci-log-intelligence
|
|
34
|
+
|
|
35
|
+
**Stop dumping 50,000-line CI logs into your AI coding agent.** This MCP server reads the logs *for* the agent and returns a few hundred tokens of focused, typed failure context — so the agent can debug your CI without flooding its context window.
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/ci-log-intelligence/)
|
|
38
|
+
[](https://pypi.org/project/ci-log-intelligence/)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
|
|
41
|
+
## The problem
|
|
42
|
+
|
|
43
|
+
You ask Claude / Codex / Copilot to fix a failing CI build. The agent runs `gh run view --log`, gets back 60,000 lines of pytest output, and pastes the whole thing into its context. Now:
|
|
44
|
+
|
|
45
|
+
- The actual failure is buried somewhere on line 47,892.
|
|
46
|
+
- Your context window is ~80% spent on log output before any work begins.
|
|
47
|
+
- Every tool call after this costs more because the cached context is enormous.
|
|
48
|
+
- The agent's reasoning quality drops because the relevant signal is diluted.
|
|
49
|
+
|
|
50
|
+
After a few of these, your conversation either OOMs the context or gets too expensive to be useful.
|
|
51
|
+
|
|
52
|
+
## What this does
|
|
53
|
+
|
|
54
|
+
`ci-log-intelligence` is an MCP server (also usable as a CLI / Python library) that sits between the agent and the CI logs. You give it a GitHub URL — a PR, a workflow run, or a single job — and it does the heavy reading in its own process:
|
|
55
|
+
|
|
56
|
+
```text
|
|
57
|
+
PR / run / job URL → fetch logs → parse → 11 detector plugins → typed failure records
|
|
58
|
+
│
|
|
59
|
+
▼
|
|
60
|
+
a few hundred tokens
|
|
61
|
+
of focused context
|
|
62
|
+
back to your agent
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
You get back a structured response: a ranked list of typed `FailureRecord`s (`hash_mismatch`, `build_error_rust`, `pytest_fail`, `go_test_fail`, …), each with the test name / file path / error code / log excerpt that's actually relevant — not 50K lines of `npm install` output.
|
|
66
|
+
|
|
67
|
+
## Three MCP tools, designed to explore-then-drill
|
|
68
|
+
|
|
69
|
+
Rather than one omnibus call that returns a fixed payload, the server exposes three tools that map onto how an agent actually wants to work:
|
|
70
|
+
|
|
71
|
+
| Tool | When to use | Approximate response size |
|
|
72
|
+
|---|---|---|
|
|
73
|
+
| `list_failed_jobs(ci_url)` | First call. Cheap map of failed jobs with classifications + the failure types present in each. No per-block content. | ~200–500 tokens |
|
|
74
|
+
| `analyze_ci_failure(ci_url, top_k=3, failure_types=None, …)` | Get the top-K typed failure records with content. Filterable by detector (`failure_types=["hash_mismatch"]`). | ~1–4K tokens |
|
|
75
|
+
| `get_block(ci_url, block_index, surround=5)` | Drill into a specific block. Returns full content with `in_block` / `is_anchor` flags. | per-block |
|
|
76
|
+
|
|
77
|
+
Results are cached per `(repo, run_id, job_id)`. A second call against the same URL skips the GitHub fetch, the parse, and the reducer entirely.
|
|
78
|
+
|
|
79
|
+
## Quick start
|
|
80
|
+
|
|
81
|
+
### Install
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install ci-log-intelligence
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or from source:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
git clone https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence.git
|
|
91
|
+
cd ci-log-intelligence
|
|
92
|
+
pip install -e .
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Authenticate with GitHub
|
|
96
|
+
|
|
97
|
+
The fetcher prefers the local `gh` CLI; falls back to a `GITHUB_TOKEN` env var.
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
gh auth login # preferred
|
|
101
|
+
# or
|
|
102
|
+
export GITHUB_TOKEN=ghp_…
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Wire up your MCP client
|
|
106
|
+
|
|
107
|
+
This repo ships shared MCP configuration for several clients (see [INSTALL.md](INSTALL.md) for the full setup guide):
|
|
108
|
+
|
|
109
|
+
- **Codex**: `.codex/config.toml` (auto-discovered)
|
|
110
|
+
- **VS Code / GitHub Copilot**: `.vscode/mcp.json` (workspace-scoped)
|
|
111
|
+
- **Claude Desktop**: example at `docs/claude_desktop_config.example.json`
|
|
112
|
+
|
|
113
|
+
For any other MCP client, point it at the `ci-log-intelligence-mcp` command installed by the package.
|
|
114
|
+
|
|
115
|
+
## A 30-second demo
|
|
116
|
+
|
|
117
|
+
In your AI agent, after wiring up the MCP server:
|
|
118
|
+
|
|
119
|
+
> "The build at `https://github.com/me/myrepo/actions/runs/12345` failed. Can you fix it?"
|
|
120
|
+
|
|
121
|
+
The agent now has three tools available. A reasonable trace:
|
|
122
|
+
|
|
123
|
+
```text
|
|
124
|
+
agent → list_failed_jobs("https://github.com/me/myrepo/actions/runs/12345")
|
|
125
|
+
|
|
126
|
+
server → {
|
|
127
|
+
"jobs": [
|
|
128
|
+
{
|
|
129
|
+
"job_name": "postgres-test (bundling)",
|
|
130
|
+
"block_count": 3,
|
|
131
|
+
"failure_types_present": ["hash_mismatch", "generic"],
|
|
132
|
+
"classifications": {"root_cause": 1, "symptom": 2},
|
|
133
|
+
"job_url": "…/runs/12345/jobs/678"
|
|
134
|
+
}
|
|
135
|
+
],
|
|
136
|
+
"metadata": {"failed_jobs": 1, "total_runs_analyzed": 1}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
agent → analyze_ci_failure(
|
|
140
|
+
ci_url="…/runs/12345",
|
|
141
|
+
failure_types=["hash_mismatch"]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
server → {
|
|
145
|
+
"root_cause": {
|
|
146
|
+
"summary": "Run 12345 job postgres-test (bundling) root_cause at lines 1058-1062: ...",
|
|
147
|
+
"log_excerpt": "common.go:1058: file hashes don't match for ...\n--- FAIL: TestRunSetPartial (45.3s)\n…",
|
|
148
|
+
"has_traceback": false,
|
|
149
|
+
"has_assertion": true,
|
|
150
|
+
"score": 10.0,
|
|
151
|
+
"score_components": {"severity_weight": 10.0, "signal_density": 0.5, "duplicate_penalty": 0.0}
|
|
152
|
+
},
|
|
153
|
+
"failures": [
|
|
154
|
+
{
|
|
155
|
+
"type": "hash_mismatch",
|
|
156
|
+
"classification": "root_cause",
|
|
157
|
+
"severity": 2,
|
|
158
|
+
"score": 10.0,
|
|
159
|
+
"start_line": 1058,
|
|
160
|
+
"end_line": 1062,
|
|
161
|
+
"summary": "…",
|
|
162
|
+
"log_excerpt": "…",
|
|
163
|
+
"extracted_fields": {
|
|
164
|
+
"test_name": "TestRunSetPartial",
|
|
165
|
+
"warehouse_target": "postgres",
|
|
166
|
+
"job_name": "postgres-test (bundling)"
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
],
|
|
170
|
+
"metadata": {"failures_returned": 1, "failures_total": 1, …}
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
The agent now knows: it's a golden-file hash mismatch in `TestRunSetPartial` on the postgres warehouse target. It can run `make update_ref_samples` scoped to that one test. Total context consumed: <2K tokens instead of 50K.
|
|
175
|
+
|
|
176
|
+
## CLI usage
|
|
177
|
+
|
|
178
|
+
For humans debugging CI in a terminal:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
ci-log-intel analyze --url https://github.com/owner/repo/pull/123 --include-passed
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Machine-readable JSON:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
ci-log-intel analyze --url https://github.com/owner/repo/actions/runs/12345 --json
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Python usage
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
from ci_log_intelligence import analyze_ci_url
|
|
194
|
+
|
|
195
|
+
report = analyze_ci_url(
|
|
196
|
+
"https://github.com/owner/repo/pull/123",
|
|
197
|
+
include_passed=True,
|
|
198
|
+
max_passed_runs=3,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
print(report.root_cause.summary)
|
|
202
|
+
for record in report.failures:
|
|
203
|
+
print(record.type, record.classification, record.score, record.extracted_fields)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
For raw log strings (no GitHub fetch):
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from ci_log_intelligence import analyze_log
|
|
210
|
+
|
|
211
|
+
result = analyze_log("STEP: test\nERROR build failed\nException: boom")
|
|
212
|
+
for failure in result.detected_failures:
|
|
213
|
+
print(failure.type, failure.anchor_lines, failure.extracted_fields)
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## How it works
|
|
217
|
+
|
|
218
|
+
The pipeline is deterministic and heuristic — no LLM in the loop. A set of `Detector` plugins scans each parsed line and emits typed `DetectedFailure` records; the framework clusters anchors, expands context (step-bounded), suppresses noise, scores, classifies, and ranks.
|
|
219
|
+
|
|
220
|
+
### Detectors shipped in v1
|
|
221
|
+
|
|
222
|
+
| Detector | Severity | What it catches |
|
|
223
|
+
|---|---|---|
|
|
224
|
+
| `hash_mismatch` | 2 | `file hashes don't match` paired with `--- FAIL:` in the same step (golden-file failures) |
|
|
225
|
+
| `go_test_fail` | 2 | Standalone `--- FAIL: TestName` from `go test` (not paired with hash mismatches) |
|
|
226
|
+
| `pytest_fail` | 2 | `FAILED tests/x.py::test_y - …` summary lines with traceback pairing |
|
|
227
|
+
| `rust_test_fail` | 2 | `test foo::bar ... FAILED` paired with `thread '…' panicked at` |
|
|
228
|
+
| `junit_xml` | 2 | `<testcase>...<failure>` / `<error>` fragments embedded in log streams |
|
|
229
|
+
| `build_error_rust` | 3 | `error[E####]:` + `-->` location, plus bare cargo summaries |
|
|
230
|
+
| `build_error_go` | 3 | `./pkg/file.go:line:col: message` |
|
|
231
|
+
| `build_error_npm` | 3 | Multi-line `npm ERR!` / `yarn error` blocks |
|
|
232
|
+
| `build_error_make` | 3 | `make: *** [target] Error N` |
|
|
233
|
+
| `build_error_gcc` | 3 | `file:line:col: error: …` with note continuation (gcc/clang) |
|
|
234
|
+
| `generic` | 1–3 | Hardened keyword fallback (`Traceback`, `Exception`, `ERROR`, `FAILED`, etc.) with word boundaries, case-insensitive matching, and a benign-mention filter (`"0 errors"` won't anchor) |
|
|
235
|
+
|
|
236
|
+
Build errors at severity 3 outrank test failures at severity 2, so when a build broke *before* any test ran the build error is correctly selected as `root_cause` and the cascading test failures show as `symptom`s.
|
|
237
|
+
|
|
238
|
+
### Adding a detector
|
|
239
|
+
|
|
240
|
+
Each detector is a single file under `ci_log_intelligence/reducer/detectors/`. Implement the `Detector` Protocol (one `scan()` method that returns a list of `DetectedFailure` records) and add yourself to the registry. The framework handles clustering, expansion, scoring, classification, and the typed-record output.
|
|
241
|
+
|
|
242
|
+
See [architecture.md](architecture.md) for the full pipeline description, data contracts, and design rationale.
|
|
243
|
+
|
|
244
|
+
## CI-aware comparison
|
|
245
|
+
|
|
246
|
+
When you give it a PR URL, the server fetches **both** failed and passed jobs in the same workflow run. Failed jobs go through the full reducer; passed jobs use targeted extraction (matching step IDs, test names, or assertion text from failed blocks). A cross-run analyzer then surfaces insights like:
|
|
247
|
+
|
|
248
|
+
- "Failure occurs only in variant `snowflake` for job group `test`."
|
|
249
|
+
- "Step `build-stage` is present in passed runs but missing in failing run for job group `test`."
|
|
250
|
+
- "Test `foo` behaves differently between passed and failed runs."
|
|
251
|
+
|
|
252
|
+
These come back in `cross_run_insights` so the agent can quickly see whether a failure is environment-specific, a regression, or flaky.
|
|
253
|
+
|
|
254
|
+
## HTTP API
|
|
255
|
+
|
|
256
|
+
If you'd rather not use MCP, there's a small FastAPI endpoint for raw-log analysis:
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
uvicorn ci_log_intelligence.api:app --reload
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
curl -X POST http://127.0.0.1:8000/analyze \
|
|
264
|
+
-H "Content-Type: application/json" \
|
|
265
|
+
-d '{"log":"STEP: test\nERROR build failed\nException: boom"}'
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Testing
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
python -m unittest discover -s tests -v
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
250+ tests covering each detector, the cache, the MCP tool surface, and end-to-end scenarios across multiple detector types.
|
|
275
|
+
|
|
276
|
+
## Known limitations
|
|
277
|
+
|
|
278
|
+
- All specialized detectors are severity 2 or 3 and tiebreak on earliest anchor line. A `specificity` weighting on `DetectedFailure` is on the v1.1 roadmap.
|
|
279
|
+
- Windows-style paths (`C:\src\foo.cpp:5:1:`) may not parse correctly in the GCC build-error detector. Linux CI only for now.
|
|
280
|
+
- The JUnit XML detector caps at 50 records per scan; consumers should check `extracted_fields.get("truncated", False)`.
|
|
281
|
+
- Long-running Go tests with `(1m30s)` duration format report the seconds tail only.
|
|
282
|
+
|
|
283
|
+
See [architecture.md](architecture.md#known-limitations) for the full list.
|
|
284
|
+
|
|
285
|
+
## Contributing
|
|
286
|
+
|
|
287
|
+
Issues and PRs welcome. The codebase is small (~2.5K LOC + tests) and the detector framework is designed to make adding a new language / tool a single-file change. Run the tests, follow the existing patterns in `ci_log_intelligence/reducer/detectors/`, and open a PR.
|
|
288
|
+
|
|
289
|
+
## License
|
|
290
|
+
|
|
291
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# ci-log-intelligence
|
|
2
|
+
|
|
3
|
+
**Stop dumping 50,000-line CI logs into your AI coding agent.** This MCP server reads the logs *for* the agent and returns a few hundred tokens of focused, typed failure context — so the agent can debug your CI without flooding its context window.
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/ci-log-intelligence/)
|
|
6
|
+
[](https://pypi.org/project/ci-log-intelligence/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
## The problem
|
|
10
|
+
|
|
11
|
+
You ask Claude / Codex / Copilot to fix a failing CI build. The agent runs `gh run view --log`, gets back 60,000 lines of pytest output, and pastes the whole thing into its context. Now:
|
|
12
|
+
|
|
13
|
+
- The actual failure is buried somewhere on line 47,892.
|
|
14
|
+
- Your context window is ~80% spent on log output before any work begins.
|
|
15
|
+
- Every tool call after this costs more because the cached context is enormous.
|
|
16
|
+
- The agent's reasoning quality drops because the relevant signal is diluted.
|
|
17
|
+
|
|
18
|
+
After a few of these, your conversation either OOMs the context or gets too expensive to be useful.
|
|
19
|
+
|
|
20
|
+
## What this does
|
|
21
|
+
|
|
22
|
+
`ci-log-intelligence` is an MCP server (also usable as a CLI / Python library) that sits between the agent and the CI logs. You give it a GitHub URL — a PR, a workflow run, or a single job — and it does the heavy reading in its own process:
|
|
23
|
+
|
|
24
|
+
```text
|
|
25
|
+
PR / run / job URL → fetch logs → parse → 11 detector plugins → typed failure records
|
|
26
|
+
│
|
|
27
|
+
▼
|
|
28
|
+
a few hundred tokens
|
|
29
|
+
of focused context
|
|
30
|
+
back to your agent
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
You get back a structured response: a ranked list of typed `FailureRecord`s (`hash_mismatch`, `build_error_rust`, `pytest_fail`, `go_test_fail`, …), each with the test name / file path / error code / log excerpt that's actually relevant — not 50K lines of `npm install` output.
|
|
34
|
+
|
|
35
|
+
## Three MCP tools, designed to explore-then-drill
|
|
36
|
+
|
|
37
|
+
Rather than one omnibus call that returns a fixed payload, the server exposes three tools that map onto how an agent actually wants to work:
|
|
38
|
+
|
|
39
|
+
| Tool | When to use | Approximate response size |
|
|
40
|
+
|---|---|---|
|
|
41
|
+
| `list_failed_jobs(ci_url)` | First call. Cheap map of failed jobs with classifications + the failure types present in each. No per-block content. | ~200–500 tokens |
|
|
42
|
+
| `analyze_ci_failure(ci_url, top_k=3, failure_types=None, …)` | Get the top-K typed failure records with content. Filterable by detector (`failure_types=["hash_mismatch"]`). | ~1–4K tokens |
|
|
43
|
+
| `get_block(ci_url, block_index, surround=5)` | Drill into a specific block. Returns full content with `in_block` / `is_anchor` flags. | per-block |
|
|
44
|
+
|
|
45
|
+
Results are cached per `(repo, run_id, job_id)`. A second call against the same URL skips the GitHub fetch, the parse, and the reducer entirely.
|
|
46
|
+
|
|
47
|
+
## Quick start
|
|
48
|
+
|
|
49
|
+
### Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install ci-log-intelligence
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Or from source:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence.git
|
|
59
|
+
cd ci-log-intelligence
|
|
60
|
+
pip install -e .
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Authenticate with GitHub
|
|
64
|
+
|
|
65
|
+
The fetcher prefers the local `gh` CLI; falls back to a `GITHUB_TOKEN` env var.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
gh auth login # preferred
|
|
69
|
+
# or
|
|
70
|
+
export GITHUB_TOKEN=ghp_…
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Wire up your MCP client
|
|
74
|
+
|
|
75
|
+
This repo ships shared MCP configuration for several clients (see [INSTALL.md](INSTALL.md) for the full setup guide):
|
|
76
|
+
|
|
77
|
+
- **Codex**: `.codex/config.toml` (auto-discovered)
|
|
78
|
+
- **VS Code / GitHub Copilot**: `.vscode/mcp.json` (workspace-scoped)
|
|
79
|
+
- **Claude Desktop**: example at `docs/claude_desktop_config.example.json`
|
|
80
|
+
|
|
81
|
+
For any other MCP client, point it at the `ci-log-intelligence-mcp` command installed by the package.
|
|
82
|
+
|
|
83
|
+
## A 30-second demo
|
|
84
|
+
|
|
85
|
+
In your AI agent, after wiring up the MCP server:
|
|
86
|
+
|
|
87
|
+
> "The build at `https://github.com/me/myrepo/actions/runs/12345` failed. Can you fix it?"
|
|
88
|
+
|
|
89
|
+
The agent now has three tools available. A reasonable trace:
|
|
90
|
+
|
|
91
|
+
```text
|
|
92
|
+
agent → list_failed_jobs("https://github.com/me/myrepo/actions/runs/12345")
|
|
93
|
+
|
|
94
|
+
server → {
|
|
95
|
+
"jobs": [
|
|
96
|
+
{
|
|
97
|
+
"job_name": "postgres-test (bundling)",
|
|
98
|
+
"block_count": 3,
|
|
99
|
+
"failure_types_present": ["hash_mismatch", "generic"],
|
|
100
|
+
"classifications": {"root_cause": 1, "symptom": 2},
|
|
101
|
+
"job_url": "…/runs/12345/jobs/678"
|
|
102
|
+
}
|
|
103
|
+
],
|
|
104
|
+
"metadata": {"failed_jobs": 1, "total_runs_analyzed": 1}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
agent → analyze_ci_failure(
|
|
108
|
+
ci_url="…/runs/12345",
|
|
109
|
+
failure_types=["hash_mismatch"]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
server → {
|
|
113
|
+
"root_cause": {
|
|
114
|
+
"summary": "Run 12345 job postgres-test (bundling) root_cause at lines 1058-1062: ...",
|
|
115
|
+
"log_excerpt": "common.go:1058: file hashes don't match for ...\n--- FAIL: TestRunSetPartial (45.3s)\n…",
|
|
116
|
+
"has_traceback": false,
|
|
117
|
+
"has_assertion": true,
|
|
118
|
+
"score": 10.0,
|
|
119
|
+
"score_components": {"severity_weight": 10.0, "signal_density": 0.5, "duplicate_penalty": 0.0}
|
|
120
|
+
},
|
|
121
|
+
"failures": [
|
|
122
|
+
{
|
|
123
|
+
"type": "hash_mismatch",
|
|
124
|
+
"classification": "root_cause",
|
|
125
|
+
"severity": 2,
|
|
126
|
+
"score": 10.0,
|
|
127
|
+
"start_line": 1058,
|
|
128
|
+
"end_line": 1062,
|
|
129
|
+
"summary": "…",
|
|
130
|
+
"log_excerpt": "…",
|
|
131
|
+
"extracted_fields": {
|
|
132
|
+
"test_name": "TestRunSetPartial",
|
|
133
|
+
"warehouse_target": "postgres",
|
|
134
|
+
"job_name": "postgres-test (bundling)"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
],
|
|
138
|
+
"metadata": {"failures_returned": 1, "failures_total": 1, …}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The agent now knows: it's a golden-file hash mismatch in `TestRunSetPartial` on the postgres warehouse target. It can run `make update_ref_samples` scoped to that one test. Total context consumed: <2K tokens instead of 50K.
|
|
143
|
+
|
|
144
|
+
## CLI usage
|
|
145
|
+
|
|
146
|
+
For humans debugging CI in a terminal:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
ci-log-intel analyze --url https://github.com/owner/repo/pull/123 --include-passed
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Machine-readable JSON:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
ci-log-intel analyze --url https://github.com/owner/repo/actions/runs/12345 --json
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Python usage
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from ci_log_intelligence import analyze_ci_url
|
|
162
|
+
|
|
163
|
+
report = analyze_ci_url(
|
|
164
|
+
"https://github.com/owner/repo/pull/123",
|
|
165
|
+
include_passed=True,
|
|
166
|
+
max_passed_runs=3,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
print(report.root_cause.summary)
|
|
170
|
+
for record in report.failures:
|
|
171
|
+
print(record.type, record.classification, record.score, record.extracted_fields)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
For raw log strings (no GitHub fetch):
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from ci_log_intelligence import analyze_log
|
|
178
|
+
|
|
179
|
+
result = analyze_log("STEP: test\nERROR build failed\nException: boom")
|
|
180
|
+
for failure in result.detected_failures:
|
|
181
|
+
print(failure.type, failure.anchor_lines, failure.extracted_fields)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## How it works
|
|
185
|
+
|
|
186
|
+
The pipeline is deterministic and heuristic — no LLM in the loop. A set of `Detector` plugins scans each parsed line and emits typed `DetectedFailure` records; the framework clusters anchors, expands context (step-bounded), suppresses noise, scores, classifies, and ranks.
|
|
187
|
+
|
|
188
|
+
### Detectors shipped in v1
|
|
189
|
+
|
|
190
|
+
| Detector | Severity | What it catches |
|
|
191
|
+
|---|---|---|
|
|
192
|
+
| `hash_mismatch` | 2 | `file hashes don't match` paired with `--- FAIL:` in the same step (golden-file failures) |
|
|
193
|
+
| `go_test_fail` | 2 | Standalone `--- FAIL: TestName` from `go test` (not paired with hash mismatches) |
|
|
194
|
+
| `pytest_fail` | 2 | `FAILED tests/x.py::test_y - …` summary lines with traceback pairing |
|
|
195
|
+
| `rust_test_fail` | 2 | `test foo::bar ... FAILED` paired with `thread '…' panicked at` |
|
|
196
|
+
| `junit_xml` | 2 | `<testcase>...<failure>` / `<error>` fragments embedded in log streams |
|
|
197
|
+
| `build_error_rust` | 3 | `error[E####]:` + `-->` location, plus bare cargo summaries |
|
|
198
|
+
| `build_error_go` | 3 | `./pkg/file.go:line:col: message` |
|
|
199
|
+
| `build_error_npm` | 3 | Multi-line `npm ERR!` / `yarn error` blocks |
|
|
200
|
+
| `build_error_make` | 3 | `make: *** [target] Error N` |
|
|
201
|
+
| `build_error_gcc` | 3 | `file:line:col: error: …` with note continuation (gcc/clang) |
|
|
202
|
+
| `generic` | 1–3 | Hardened keyword fallback (`Traceback`, `Exception`, `ERROR`, `FAILED`, etc.) with word boundaries, case-insensitive matching, and a benign-mention filter (`"0 errors"` won't anchor) |
|
|
203
|
+
|
|
204
|
+
Build errors at severity 3 outrank test failures at severity 2, so when a build broke *before* any test ran the build error is correctly selected as `root_cause` and the cascading test failures show as `symptom`s.
|
|
205
|
+
|
|
206
|
+
### Adding a detector
|
|
207
|
+
|
|
208
|
+
Each detector is a single file under `ci_log_intelligence/reducer/detectors/`. Implement the `Detector` Protocol (one `scan()` method that returns a list of `DetectedFailure` records) and add yourself to the registry. The framework handles clustering, expansion, scoring, classification, and the typed-record output.
|
|
209
|
+
|
|
210
|
+
See [architecture.md](architecture.md) for the full pipeline description, data contracts, and design rationale.
|
|
211
|
+
|
|
212
|
+
## CI-aware comparison
|
|
213
|
+
|
|
214
|
+
When you give it a PR URL, the server fetches **both** failed and passed jobs in the same workflow run. Failed jobs go through the full reducer; passed jobs use targeted extraction (matching step IDs, test names, or assertion text from failed blocks). A cross-run analyzer then surfaces insights like:
|
|
215
|
+
|
|
216
|
+
- "Failure occurs only in variant `snowflake` for job group `test`."
|
|
217
|
+
- "Step `build-stage` is present in passed runs but missing in failing run for job group `test`."
|
|
218
|
+
- "Test `foo` behaves differently between passed and failed runs."
|
|
219
|
+
|
|
220
|
+
These come back in `cross_run_insights` so the agent can quickly see whether a failure is environment-specific, a regression, or flaky.
|
|
221
|
+
|
|
222
|
+
## HTTP API
|
|
223
|
+
|
|
224
|
+
If you'd rather not use MCP, there's a small FastAPI endpoint for raw-log analysis:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
uvicorn ci_log_intelligence.api:app --reload
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
curl -X POST http://127.0.0.1:8000/analyze \
|
|
232
|
+
-H "Content-Type: application/json" \
|
|
233
|
+
-d '{"log":"STEP: test\nERROR build failed\nException: boom"}'
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Testing
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
python -m unittest discover -s tests -v
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
250+ tests covering each detector, the cache, the MCP tool surface, and end-to-end scenarios across multiple detector types.
|
|
243
|
+
|
|
244
|
+
## Known limitations
|
|
245
|
+
|
|
246
|
+
- All specialized detectors are severity 2 or 3 and tiebreak on earliest anchor line. A `specificity` weighting on `DetectedFailure` is on the v1.1 roadmap.
|
|
247
|
+
- Windows-style paths (`C:\src\foo.cpp:5:1:`) may not parse correctly in the GCC build-error detector. Linux CI only for now.
|
|
248
|
+
- The JUnit XML detector caps at 50 records per scan; consumers should check `extracted_fields.get("truncated", False)`.
|
|
249
|
+
- Long-running Go tests with `(1m30s)` duration format report the seconds tail only.
|
|
250
|
+
|
|
251
|
+
See [architecture.md](architecture.md#known-limitations) for the full list.
|
|
252
|
+
|
|
253
|
+
## Contributing
|
|
254
|
+
|
|
255
|
+
Issues and PRs welcome. The codebase is small (~2.5K LOC + tests) and the detector framework is designed to make adding a new language / tool a single-file change. Run the tests, follow the existing patterns in `ci_log_intelligence/reducer/detectors/`, and open a PR.
|
|
256
|
+
|
|
257
|
+
## License
|
|
258
|
+
|
|
259
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .ci_analysis import analyze_ci_url
|
|
6
|
+
from .ingestion import ingest_log
|
|
7
|
+
from .models import ReductionResult
|
|
8
|
+
from .parsing import parse_log
|
|
9
|
+
from .reducer import reduce_parsed_lines
|
|
10
|
+
from .storage import StorageBackend, create_storage_backend
|
|
11
|
+
from .summarizer import summarize_reduction_result
|
|
12
|
+
from .utils.logging import get_structured_logger
|
|
13
|
+
from .utils.metrics import MetricsCollector, measure_stage
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"analyze_log",
|
|
17
|
+
"analyze_ci_url",
|
|
18
|
+
"ReductionResult",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def analyze_log(
|
|
23
|
+
log: str,
|
|
24
|
+
storage_backend: Optional[StorageBackend] = None,
|
|
25
|
+
spill_threshold_bytes: int = 5_000_000,
|
|
26
|
+
metrics: Optional[MetricsCollector] = None,
|
|
27
|
+
) -> ReductionResult:
|
|
28
|
+
logger = get_structured_logger("ci_log_intelligence")
|
|
29
|
+
collector = metrics or MetricsCollector()
|
|
30
|
+
backend = storage_backend or create_storage_backend(
|
|
31
|
+
byte_size=len(log.encode("utf-8")),
|
|
32
|
+
spill_threshold_bytes=spill_threshold_bytes,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
stored_log = ingest_log(log, backend)
|
|
36
|
+
try:
|
|
37
|
+
with measure_stage("parse", collector, logger):
|
|
38
|
+
parsed_lines = parse_log(stored_log, backend)
|
|
39
|
+
|
|
40
|
+
result = reduce_parsed_lines(parsed_lines, metrics=collector, logger=logger)
|
|
41
|
+
|
|
42
|
+
with measure_stage("summarize", collector, logger):
|
|
43
|
+
result.summary = summarize_reduction_result(result)
|
|
44
|
+
|
|
45
|
+
selected_lines = sum(len(scored.block.lines) for scored in result.blocks)
|
|
46
|
+
collector.record_metric("reduction_ratio", selected_lines / max(len(parsed_lines), 1))
|
|
47
|
+
collector.record_metric("number_of_blocks", float(len(result.blocks)))
|
|
48
|
+
return result
|
|
49
|
+
finally:
|
|
50
|
+
backend.delete(stored_log.reference)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import FastAPI
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from .. import analyze_log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnalyzeRequest(BaseModel):
|
|
10
|
+
log: str = Field(..., min_length=1)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AnalyzeBlockResponse(BaseModel):
|
|
14
|
+
start_line: int
|
|
15
|
+
end_line: int
|
|
16
|
+
score: float
|
|
17
|
+
classification: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AnalyzeResponse(BaseModel):
|
|
21
|
+
blocks: list[AnalyzeBlockResponse]
|
|
22
|
+
summary: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_app() -> FastAPI:
|
|
26
|
+
app = FastAPI(title="CI Log Intelligence")
|
|
27
|
+
|
|
28
|
+
@app.post("/analyze", response_model=AnalyzeResponse)
|
|
29
|
+
def analyze(request: AnalyzeRequest) -> AnalyzeResponse:
|
|
30
|
+
result = analyze_log(request.log)
|
|
31
|
+
blocks = [
|
|
32
|
+
AnalyzeBlockResponse(
|
|
33
|
+
start_line=scored.block.start_line,
|
|
34
|
+
end_line=scored.block.end_line,
|
|
35
|
+
score=scored.score,
|
|
36
|
+
classification=scored.classification,
|
|
37
|
+
)
|
|
38
|
+
for scored in result.blocks
|
|
39
|
+
]
|
|
40
|
+
return AnalyzeResponse(blocks=blocks, summary=result.summary or "")
|
|
41
|
+
|
|
42
|
+
return app
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
app = create_app()
|
|
46
|
+
|
|
47
|
+
__all__ = ["AnalyzeRequest", "AnalyzeResponse", "app", "create_app"]
|