ci-log-intelligence 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. ci_log_intelligence-0.1.0/LICENSE +21 -0
  2. ci_log_intelligence-0.1.0/PKG-INFO +291 -0
  3. ci_log_intelligence-0.1.0/README.md +259 -0
  4. ci_log_intelligence-0.1.0/ci_log_intelligence/__init__.py +50 -0
  5. ci_log_intelligence-0.1.0/ci_log_intelligence/api/__init__.py +47 -0
  6. ci_log_intelligence-0.1.0/ci_log_intelligence/ci_analysis.py +290 -0
  7. ci_log_intelligence-0.1.0/ci_log_intelligence/ci_report_builder.py +203 -0
  8. ci_log_intelligence-0.1.0/ci_log_intelligence/cli/__init__.py +1 -0
  9. ci_log_intelligence-0.1.0/ci_log_intelligence/cli/main.py +81 -0
  10. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/__init__.py +13 -0
  11. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/__init__.py +13 -0
  12. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/fetcher.py +342 -0
  13. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/fetcher_helpers.py +127 -0
  14. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/models.py +182 -0
  15. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/resolver.py +50 -0
  16. ci_log_intelligence-0.1.0/ci_log_intelligence/ingestion/github/transports.py +131 -0
  17. ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/__init__.py +1 -0
  18. ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/cache.py +69 -0
  19. ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/server.py +181 -0
  20. ci_log_intelligence-0.1.0/ci_log_intelligence/mcp/tools.py +368 -0
  21. ci_log_intelligence-0.1.0/ci_log_intelligence/models/__init__.py +93 -0
  22. ci_log_intelligence-0.1.0/ci_log_intelligence/parsing/__init__.py +115 -0
  23. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/__init__.py +80 -0
  24. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/classification/__init__.py +93 -0
  25. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/clustering/__init__.py +53 -0
  26. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/__init__.py +15 -0
  27. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/analyzer.py +349 -0
  28. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/comparison/excerpt.py +184 -0
  29. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/__init__.py +100 -0
  30. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/base.py +62 -0
  31. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_gcc.py +163 -0
  32. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_go.py +92 -0
  33. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_make.py +97 -0
  34. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_npm.py +162 -0
  35. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/build_error_rust.py +227 -0
  36. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/generic.py +63 -0
  37. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/go_test_fail.py +138 -0
  38. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/hash_mismatch.py +177 -0
  39. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/junit_xml.py +153 -0
  40. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/patterns.py +51 -0
  41. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/pytest_fail.py +190 -0
  42. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/detectors/rust_test_fail.py +138 -0
  43. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/expansion/__init__.py +113 -0
  44. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/merge/__init__.py +59 -0
  45. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/scoring/__init__.py +72 -0
  46. ci_log_intelligence-0.1.0/ci_log_intelligence/reducer/suppression/__init__.py +55 -0
  47. ci_log_intelligence-0.1.0/ci_log_intelligence/signals.py +63 -0
  48. ci_log_intelligence-0.1.0/ci_log_intelligence/storage/__init__.py +78 -0
  49. ci_log_intelligence-0.1.0/ci_log_intelligence/summarizer/__init__.py +25 -0
  50. ci_log_intelligence-0.1.0/ci_log_intelligence/utils/__init__.py +4 -0
  51. ci_log_intelligence-0.1.0/ci_log_intelligence/utils/logging.py +22 -0
  52. ci_log_intelligence-0.1.0/ci_log_intelligence/utils/metrics.py +56 -0
  53. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/PKG-INFO +291 -0
  54. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/SOURCES.txt +58 -0
  55. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/dependency_links.txt +1 -0
  56. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/entry_points.txt +3 -0
  57. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/requires.txt +5 -0
  58. ci_log_intelligence-0.1.0/ci_log_intelligence.egg-info/top_level.txt +1 -0
  59. ci_log_intelligence-0.1.0/pyproject.toml +62 -0
  60. ci_log_intelligence-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kuldeep Kumar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,291 @@
1
+ Metadata-Version: 2.4
2
+ Name: ci-log-intelligence
3
+ Version: 0.1.0
4
+ Summary: MCP server that turns 50K-line CI logs into focused failure context for AI coding agents.
5
+ Author: Kuldeep Kumar
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence
8
+ Project-URL: Repository, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence
9
+ Project-URL: Issues, https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence/issues
10
+ Keywords: ci,logs,mcp,model-context-protocol,ai-agents,github-actions,debugging,claude,codex,copilot
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Build Tools
21
+ Classifier: Topic :: Software Development :: Debuggers
22
+ Classifier: Topic :: Software Development :: Testing
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: fastapi>=0.136.0
27
+ Requires-Dist: fastmcp>=2.14.7
28
+ Requires-Dist: pydantic>=2.12.0
29
+ Requires-Dist: requests>=2.33.0
30
+ Requires-Dist: uvicorn>=0.44.0
31
+ Dynamic: license-file
32
+
33
+ # ci-log-intelligence
34
+
35
+ **Stop dumping 50,000-line CI logs into your AI coding agent.** This MCP server reads the logs *for* the agent and returns a few hundred tokens of focused, typed failure context — so the agent can debug your CI without flooding its context window.
36
+
37
+ [![PyPI version](https://img.shields.io/pypi/v/ci-log-intelligence.svg)](https://pypi.org/project/ci-log-intelligence/)
38
+ [![Python](https://img.shields.io/pypi/pyversions/ci-log-intelligence.svg)](https://pypi.org/project/ci-log-intelligence/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
40
+
41
+ ## The problem
42
+
43
+ You ask Claude / Codex / Copilot to fix a failing CI build. The agent runs `gh run view --log`, gets back 60,000 lines of pytest output, and pastes the whole thing into its context. Now:
44
+
45
+ - The actual failure is buried somewhere on line 47,892.
46
+ - Your context window is ~80% spent on log output before any work begins.
47
+ - Every tool call after this costs more because the cached context is enormous.
48
+ - The agent's reasoning quality drops because the relevant signal is diluted.
49
+
50
+ After a few of these, your conversation either OOMs the context or gets too expensive to be useful.
51
+
52
+ ## What this does
53
+
54
+ `ci-log-intelligence` is an MCP server (also usable as a CLI / Python library) that sits between the agent and the CI logs. You give it a GitHub URL — a PR, a workflow run, or a single job — and it does the heavy reading in its own process:
55
+
56
+ ```text
57
+ PR / run / job URL → fetch logs → parse → 11 detector plugins → typed failure records
58
+
59
+
60
+ a few hundred tokens
61
+ of focused context
62
+ back to your agent
63
+ ```
64
+
65
+ You get back a structured response: a ranked list of typed `FailureRecord`s (`hash_mismatch`, `build_error_rust`, `pytest_fail`, `go_test_fail`, …), each with the test name / file path / error code / log excerpt that's actually relevant — not 50K lines of `npm install` output.
66
+
67
+ ## Three MCP tools, designed to explore-then-drill
68
+
69
+ Rather than one omnibus call that returns a fixed payload, the server exposes three tools that map onto how an agent actually wants to work:
70
+
71
+ | Tool | When to use | Approximate response size |
72
+ |---|---|---|
73
+ | `list_failed_jobs(ci_url)` | First call. Cheap map of failed jobs with classifications + the failure types present in each. No per-block content. | ~200–500 tokens |
74
+ | `analyze_ci_failure(ci_url, top_k=3, failure_types=None, …)` | Get the top-K typed failure records with content. Filterable by detector (`failure_types=["hash_mismatch"]`). | ~1–4K tokens |
75
+ | `get_block(ci_url, block_index, surround=5)` | Drill into a specific block. Returns full content with `in_block` / `is_anchor` flags. | per-block |
76
+
77
+ Results are cached per `(repo, run_id, job_id)`. A second call against the same URL skips the GitHub fetch, the parse, and the reducer entirely.
78
+
79
+ ## Quick start
80
+
81
+ ### Install
82
+
83
+ ```bash
84
+ pip install ci-log-intelligence
85
+ ```
86
+
87
+ Or from source:
88
+
89
+ ```bash
90
+ git clone https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence.git
91
+ cd ci-log-intelligence
92
+ pip install -e .
93
+ ```
94
+
95
+ ### Authenticate with GitHub
96
+
97
+ The fetcher prefers the local `gh` CLI; falls back to a `GITHUB_TOKEN` env var.
98
+
99
+ ```bash
100
+ gh auth login # preferred
101
+ # or
102
+ export GITHUB_TOKEN=ghp_…
103
+ ```
104
+
105
+ ### Wire up your MCP client
106
+
107
+ This repo ships shared MCP configuration for several clients (see [INSTALL.md](INSTALL.md) for the full setup guide):
108
+
109
+ - **Codex**: `.codex/config.toml` (auto-discovered)
110
+ - **VS Code / GitHub Copilot**: `.vscode/mcp.json` (workspace-scoped)
111
+ - **Claude Desktop**: example at `docs/claude_desktop_config.example.json`
112
+
113
+ For any other MCP client, point it at the `ci-log-intelligence-mcp` command installed by the package.
114
+
115
+ ## A 30-second demo
116
+
117
+ In your AI agent, after wiring up the MCP server:
118
+
119
+ > "The build at `https://github.com/me/myrepo/actions/runs/12345` failed. Can you fix it?"
120
+
121
+ The agent now has three tools available. A reasonable trace:
122
+
123
+ ```text
124
+ agent → list_failed_jobs("https://github.com/me/myrepo/actions/runs/12345")
125
+
126
+ server → {
127
+ "jobs": [
128
+ {
129
+ "job_name": "postgres-test (bundling)",
130
+ "block_count": 3,
131
+ "failure_types_present": ["hash_mismatch", "generic"],
132
+ "classifications": {"root_cause": 1, "symptom": 2},
133
+ "job_url": "…/runs/12345/jobs/678"
134
+ }
135
+ ],
136
+ "metadata": {"failed_jobs": 1, "total_runs_analyzed": 1}
137
+ }
138
+
139
+ agent → analyze_ci_failure(
140
+ ci_url="…/runs/12345",
141
+ failure_types=["hash_mismatch"]
142
+ )
143
+
144
+ server → {
145
+ "root_cause": {
146
+ "summary": "Run 12345 job postgres-test (bundling) root_cause at lines 1058-1062: ...",
147
+ "log_excerpt": "common.go:1058: file hashes don't match for ...\n--- FAIL: TestRunSetPartial (45.3s)\n…",
148
+ "has_traceback": false,
149
+ "has_assertion": true,
150
+ "score": 10.0,
151
+ "score_components": {"severity_weight": 10.0, "signal_density": 0.5, "duplicate_penalty": 0.0}
152
+ },
153
+ "failures": [
154
+ {
155
+ "type": "hash_mismatch",
156
+ "classification": "root_cause",
157
+ "severity": 2,
158
+ "score": 10.0,
159
+ "start_line": 1058,
160
+ "end_line": 1062,
161
+ "summary": "…",
162
+ "log_excerpt": "…",
163
+ "extracted_fields": {
164
+ "test_name": "TestRunSetPartial",
165
+ "warehouse_target": "postgres",
166
+ "job_name": "postgres-test (bundling)"
167
+ }
168
+ }
169
+ ],
170
+ "metadata": {"failures_returned": 1, "failures_total": 1, …}
171
+ }
172
+ ```
173
+
174
+ The agent now knows: it's a golden-file hash mismatch in `TestRunSetPartial` on the postgres warehouse target. It can run `make update_ref_samples` scoped to that one test. Total context consumed: <2K tokens instead of 50K.
175
+
176
+ ## CLI usage
177
+
178
+ For humans debugging CI in a terminal:
179
+
180
+ ```bash
181
+ ci-log-intel analyze --url https://github.com/owner/repo/pull/123 --include-passed
182
+ ```
183
+
184
+ Machine-readable JSON:
185
+
186
+ ```bash
187
+ ci-log-intel analyze --url https://github.com/owner/repo/actions/runs/12345 --json
188
+ ```
189
+
190
+ ## Python usage
191
+
192
+ ```python
193
+ from ci_log_intelligence import analyze_ci_url
194
+
195
+ report = analyze_ci_url(
196
+ "https://github.com/owner/repo/pull/123",
197
+ include_passed=True,
198
+ max_passed_runs=3,
199
+ )
200
+
201
+ print(report.root_cause.summary)
202
+ for record in report.failures:
203
+ print(record.type, record.classification, record.score, record.extracted_fields)
204
+ ```
205
+
206
+ For raw log strings (no GitHub fetch):
207
+
208
+ ```python
209
+ from ci_log_intelligence import analyze_log
210
+
211
+ result = analyze_log("STEP: test\nERROR build failed\nException: boom")
212
+ for failure in result.detected_failures:
213
+ print(failure.type, failure.anchor_lines, failure.extracted_fields)
214
+ ```
215
+
216
+ ## How it works
217
+
218
+ The pipeline is deterministic and heuristic — no LLM in the loop. A set of `Detector` plugins scans each parsed line and emits typed `DetectedFailure` records; the framework clusters anchors, expands context (step-bounded), suppresses noise, scores, classifies, and ranks.
219
+
220
+ ### Detectors shipped in v1
221
+
222
+ | Detector | Severity | What it catches |
223
+ |---|---|---|
224
+ | `hash_mismatch` | 2 | `file hashes don't match` paired with `--- FAIL:` in the same step (golden-file failures) |
225
+ | `go_test_fail` | 2 | Standalone `--- FAIL: TestName` from `go test` (not paired with hash mismatches) |
226
+ | `pytest_fail` | 2 | `FAILED tests/x.py::test_y - …` summary lines with traceback pairing |
227
+ | `rust_test_fail` | 2 | `test foo::bar ... FAILED` paired with `thread '…' panicked at` |
228
+ | `junit_xml` | 2 | `<testcase>...<failure>` / `<error>` fragments embedded in log streams |
229
+ | `build_error_rust` | 3 | `error[E####]:` + `-->` location, plus bare cargo summaries |
230
+ | `build_error_go` | 3 | `./pkg/file.go:line:col: message` |
231
+ | `build_error_npm` | 3 | Multi-line `npm ERR!` / `yarn error` blocks |
232
+ | `build_error_make` | 3 | `make: *** [target] Error N` |
233
+ | `build_error_gcc` | 3 | `file:line:col: error: …` with note continuation (gcc/clang) |
234
+ | `generic` | 1–3 | Hardened keyword fallback (`Traceback`, `Exception`, `ERROR`, `FAILED`, etc.) with word boundaries, case-insensitive matching, and a benign-mention filter (`"0 errors"` won't anchor) |
235
+
236
+ Build errors at severity 3 outrank test failures at severity 2, so when a build broke *before* any test ran the build error is correctly selected as `root_cause` and the cascading test failures show as `symptom`s.
237
+
238
+ ### Adding a detector
239
+
240
+ Each detector is a single file under `ci_log_intelligence/reducer/detectors/`. Implement the `Detector` Protocol (one `scan()` method that returns a list of `DetectedFailure` records) and add yourself to the registry. The framework handles clustering, expansion, scoring, classification, and the typed-record output.
241
+
242
+ See [architecture.md](architecture.md) for the full pipeline description, data contracts, and design rationale.
243
+
244
+ ## CI-aware comparison
245
+
246
+ When you give it a PR URL, the server fetches **both** failed and passed jobs in the same workflow run. Failed jobs go through the full reducer; passed jobs use targeted extraction (matching step IDs, test names, or assertion text from failed blocks). A cross-run analyzer then surfaces insights like:
247
+
248
+ - "Failure occurs only in variant `snowflake` for job group `test`."
249
+ - "Step `build-stage` is present in passed runs but missing in failing run for job group `test`."
250
+ - "Test `foo` behaves differently between passed and failed runs."
251
+
252
+ These come back in `cross_run_insights` so the agent can quickly see whether a failure is environment-specific, a regression, or flaky.
253
+
254
+ ## HTTP API
255
+
256
+ If you'd rather not use MCP, there's a small FastAPI endpoint for raw-log analysis:
257
+
258
+ ```bash
259
+ uvicorn ci_log_intelligence.api:app --reload
260
+ ```
261
+
262
+ ```bash
263
+ curl -X POST http://127.0.0.1:8000/analyze \
264
+ -H "Content-Type: application/json" \
265
+ -d '{"log":"STEP: test\nERROR build failed\nException: boom"}'
266
+ ```
267
+
268
+ ## Testing
269
+
270
+ ```bash
271
+ python -m unittest discover -s tests -v
272
+ ```
273
+
274
+ 250+ tests covering each detector, the cache, the MCP tool surface, and end-to-end scenarios across multiple detector types.
275
+
276
+ ## Known limitations
277
+
278
+ - All specialized detectors are severity 2 or 3 and tiebreak on earliest anchor line. A `specificity` weighting on `DetectedFailure` is on the v1.1 roadmap.
279
+ - Windows-style paths (`C:\src\foo.cpp:5:1:`) may not parse correctly in the GCC build-error detector. Linux CI only for now.
280
+ - The JUnit XML detector caps at 50 records per scan; consumers should check `extracted_fields.get("truncated", False)`.
281
+ - Long-running Go tests with `(1m30s)` duration format report the seconds tail only.
282
+
283
+ See [architecture.md](architecture.md#known-limitations) for the full list.
284
+
285
+ ## Contributing
286
+
287
+ Issues and PRs welcome. The codebase is small (~2.5K LOC + tests) and the detector framework is designed to make adding a new language / tool a single-file change. Run the tests, follow the existing patterns in `ci_log_intelligence/reducer/detectors/`, and open a PR.
288
+
289
+ ## License
290
+
291
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,259 @@
1
+ # ci-log-intelligence
2
+
3
+ **Stop dumping 50,000-line CI logs into your AI coding agent.** This MCP server reads the logs *for* the agent and returns a few hundred tokens of focused, typed failure context — so the agent can debug your CI without flooding its context window.
4
+
5
+ [![PyPI version](https://img.shields.io/pypi/v/ci-log-intelligence.svg)](https://pypi.org/project/ci-log-intelligence/)
6
+ [![Python](https://img.shields.io/pypi/pyversions/ci-log-intelligence.svg)](https://pypi.org/project/ci-log-intelligence/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ## The problem
10
+
11
+ You ask Claude / Codex / Copilot to fix a failing CI build. The agent runs `gh run view --log`, gets back 60,000 lines of pytest output, and pastes the whole thing into its context. Now:
12
+
13
+ - The actual failure is buried somewhere on line 47,892.
14
+ - Your context window is ~80% spent on log output before any work begins.
15
+ - Every tool call after this costs more because the cached context is enormous.
16
+ - The agent's reasoning quality drops because the relevant signal is diluted.
17
+
18
+ After a few of these, your conversation either OOMs the context or gets too expensive to be useful.
19
+
20
+ ## What this does
21
+
22
+ `ci-log-intelligence` is an MCP server (also usable as a CLI / Python library) that sits between the agent and the CI logs. You give it a GitHub URL — a PR, a workflow run, or a single job — and it does the heavy reading in its own process:
23
+
24
+ ```text
25
+ PR / run / job URL → fetch logs → parse → 11 detector plugins → typed failure records
26
+
27
+
28
+ a few hundred tokens
29
+ of focused context
30
+ back to your agent
31
+ ```
32
+
33
+ You get back a structured response: a ranked list of typed `FailureRecord`s (`hash_mismatch`, `build_error_rust`, `pytest_fail`, `go_test_fail`, …), each with the test name / file path / error code / log excerpt that's actually relevant — not 50K lines of `npm install` output.
34
+
35
+ ## Three MCP tools, designed to explore-then-drill
36
+
37
+ Rather than one omnibus call that returns a fixed payload, the server exposes three tools that map onto how an agent actually wants to work:
38
+
39
+ | Tool | When to use | Approximate response size |
40
+ |---|---|---|
41
+ | `list_failed_jobs(ci_url)` | First call. Cheap map of failed jobs with classifications + the failure types present in each. No per-block content. | ~200–500 tokens |
42
+ | `analyze_ci_failure(ci_url, top_k=3, failure_types=None, …)` | Get the top-K typed failure records with content. Filterable by detector (`failure_types=["hash_mismatch"]`). | ~1–4K tokens |
43
+ | `get_block(ci_url, block_index, surround=5)` | Drill into a specific block. Returns full content with `in_block` / `is_anchor` flags. | per-block |
44
+
45
+ Results are cached per `(repo, run_id, job_id)`. A second call against the same URL skips the GitHub fetch, the parse, and the reducer entirely.
46
+
47
+ ## Quick start
48
+
49
+ ### Install
50
+
51
+ ```bash
52
+ pip install ci-log-intelligence
53
+ ```
54
+
55
+ Or from source:
56
+
57
+ ```bash
58
+ git clone https://github.com/YOUR-GITHUB-USERNAME/ci-log-intelligence.git
59
+ cd ci-log-intelligence
60
+ pip install -e .
61
+ ```
62
+
63
+ ### Authenticate with GitHub
64
+
65
+ The fetcher prefers the local `gh` CLI; falls back to a `GITHUB_TOKEN` env var.
66
+
67
+ ```bash
68
+ gh auth login # preferred
69
+ # or
70
+ export GITHUB_TOKEN=ghp_…
71
+ ```
72
+
73
+ ### Wire up your MCP client
74
+
75
+ This repo ships shared MCP configuration for several clients (see [INSTALL.md](INSTALL.md) for the full setup guide):
76
+
77
+ - **Codex**: `.codex/config.toml` (auto-discovered)
78
+ - **VS Code / GitHub Copilot**: `.vscode/mcp.json` (workspace-scoped)
79
+ - **Claude Desktop**: example at `docs/claude_desktop_config.example.json`
80
+
81
+ For any other MCP client, point it at the `ci-log-intelligence-mcp` command installed by the package.
82
+
83
+ ## A 30-second demo
84
+
85
+ In your AI agent, after wiring up the MCP server:
86
+
87
+ > "The build at `https://github.com/me/myrepo/actions/runs/12345` failed. Can you fix it?"
88
+
89
+ The agent now has three tools available. A reasonable trace:
90
+
91
+ ```text
92
+ agent → list_failed_jobs("https://github.com/me/myrepo/actions/runs/12345")
93
+
94
+ server → {
95
+ "jobs": [
96
+ {
97
+ "job_name": "postgres-test (bundling)",
98
+ "block_count": 3,
99
+ "failure_types_present": ["hash_mismatch", "generic"],
100
+ "classifications": {"root_cause": 1, "symptom": 2},
101
+ "job_url": "…/runs/12345/jobs/678"
102
+ }
103
+ ],
104
+ "metadata": {"failed_jobs": 1, "total_runs_analyzed": 1}
105
+ }
106
+
107
+ agent → analyze_ci_failure(
108
+ ci_url="…/runs/12345",
109
+ failure_types=["hash_mismatch"]
110
+ )
111
+
112
+ server → {
113
+ "root_cause": {
114
+ "summary": "Run 12345 job postgres-test (bundling) root_cause at lines 1058-1062: ...",
115
+ "log_excerpt": "common.go:1058: file hashes don't match for ...\n--- FAIL: TestRunSetPartial (45.3s)\n…",
116
+ "has_traceback": false,
117
+ "has_assertion": true,
118
+ "score": 10.0,
119
+ "score_components": {"severity_weight": 10.0, "signal_density": 0.5, "duplicate_penalty": 0.0}
120
+ },
121
+ "failures": [
122
+ {
123
+ "type": "hash_mismatch",
124
+ "classification": "root_cause",
125
+ "severity": 2,
126
+ "score": 10.0,
127
+ "start_line": 1058,
128
+ "end_line": 1062,
129
+ "summary": "…",
130
+ "log_excerpt": "…",
131
+ "extracted_fields": {
132
+ "test_name": "TestRunSetPartial",
133
+ "warehouse_target": "postgres",
134
+ "job_name": "postgres-test (bundling)"
135
+ }
136
+ }
137
+ ],
138
+ "metadata": {"failures_returned": 1, "failures_total": 1, …}
139
+ }
140
+ ```
141
+
142
+ The agent now knows: it's a golden-file hash mismatch in `TestRunSetPartial` on the postgres warehouse target. It can run `make update_ref_samples` scoped to that one test. Total context consumed: <2K tokens instead of 50K.
143
+
144
+ ## CLI usage
145
+
146
+ For humans debugging CI in a terminal:
147
+
148
+ ```bash
149
+ ci-log-intel analyze --url https://github.com/owner/repo/pull/123 --include-passed
150
+ ```
151
+
152
+ Machine-readable JSON:
153
+
154
+ ```bash
155
+ ci-log-intel analyze --url https://github.com/owner/repo/actions/runs/12345 --json
156
+ ```
157
+
158
+ ## Python usage
159
+
160
+ ```python
161
+ from ci_log_intelligence import analyze_ci_url
162
+
163
+ report = analyze_ci_url(
164
+ "https://github.com/owner/repo/pull/123",
165
+ include_passed=True,
166
+ max_passed_runs=3,
167
+ )
168
+
169
+ print(report.root_cause.summary)
170
+ for record in report.failures:
171
+ print(record.type, record.classification, record.score, record.extracted_fields)
172
+ ```
173
+
174
+ For raw log strings (no GitHub fetch):
175
+
176
+ ```python
177
+ from ci_log_intelligence import analyze_log
178
+
179
+ result = analyze_log("STEP: test\nERROR build failed\nException: boom")
180
+ for failure in result.detected_failures:
181
+ print(failure.type, failure.anchor_lines, failure.extracted_fields)
182
+ ```
183
+
184
+ ## How it works
185
+
186
+ The pipeline is deterministic and heuristic — no LLM in the loop. A set of `Detector` plugins scans each parsed line and emits typed `DetectedFailure` records; the framework clusters anchors, expands context (step-bounded), suppresses noise, scores, classifies, and ranks.
187
+
188
+ ### Detectors shipped in v1
189
+
190
+ | Detector | Severity | What it catches |
191
+ |---|---|---|
192
+ | `hash_mismatch` | 2 | `file hashes don't match` paired with `--- FAIL:` in the same step (golden-file failures) |
193
+ | `go_test_fail` | 2 | Standalone `--- FAIL: TestName` from `go test` (not paired with hash mismatches) |
194
+ | `pytest_fail` | 2 | `FAILED tests/x.py::test_y - …` summary lines with traceback pairing |
195
+ | `rust_test_fail` | 2 | `test foo::bar ... FAILED` paired with `thread '…' panicked at` |
196
+ | `junit_xml` | 2 | `<testcase>...<failure>` / `<error>` fragments embedded in log streams |
197
+ | `build_error_rust` | 3 | `error[E####]:` + `-->` location, plus bare cargo summaries |
198
+ | `build_error_go` | 3 | `./pkg/file.go:line:col: message` |
199
+ | `build_error_npm` | 3 | Multi-line `npm ERR!` / `yarn error` blocks |
200
+ | `build_error_make` | 3 | `make: *** [target] Error N` |
201
+ | `build_error_gcc` | 3 | `file:line:col: error: …` with note continuation (gcc/clang) |
202
+ | `generic` | 1–3 | Hardened keyword fallback (`Traceback`, `Exception`, `ERROR`, `FAILED`, etc.) with word boundaries, case-insensitive matching, and a benign-mention filter (`"0 errors"` won't anchor) |
203
+
204
+ Build errors at severity 3 outrank test failures at severity 2, so when a build broke *before* any test ran the build error is correctly selected as `root_cause` and the cascading test failures show as `symptom`s.
205
+
206
+ ### Adding a detector
207
+
208
+ Each detector is a single file under `ci_log_intelligence/reducer/detectors/`. Implement the `Detector` Protocol (one `scan()` method that returns a list of `DetectedFailure` records) and add yourself to the registry. The framework handles clustering, expansion, scoring, classification, and the typed-record output.
209
+
210
+ See [architecture.md](architecture.md) for the full pipeline description, data contracts, and design rationale.
211
+
212
+ ## CI-aware comparison
213
+
214
+ When you give it a PR URL, the server fetches **both** failed and passed jobs in the same workflow run. Failed jobs go through the full reducer; passed jobs use targeted extraction (matching step IDs, test names, or assertion text from failed blocks). A cross-run analyzer then surfaces insights like:
215
+
216
+ - "Failure occurs only in variant `snowflake` for job group `test`."
217
+ - "Step `build-stage` is present in passed runs but missing in failing run for job group `test`."
218
+ - "Test `foo` behaves differently between passed and failed runs."
219
+
220
+ These come back in `cross_run_insights` so the agent can quickly see whether a failure is environment-specific, a regression, or flaky.
221
+
222
+ ## HTTP API
223
+
224
+ If you'd rather not use MCP, there's a small FastAPI endpoint for raw-log analysis:
225
+
226
+ ```bash
227
+ uvicorn ci_log_intelligence.api:app --reload
228
+ ```
229
+
230
+ ```bash
231
+ curl -X POST http://127.0.0.1:8000/analyze \
232
+ -H "Content-Type: application/json" \
233
+ -d '{"log":"STEP: test\nERROR build failed\nException: boom"}'
234
+ ```
235
+
236
+ ## Testing
237
+
238
+ ```bash
239
+ python -m unittest discover -s tests -v
240
+ ```
241
+
242
+ 250+ tests covering each detector, the cache, the MCP tool surface, and end-to-end scenarios across multiple detector types.
243
+
244
+ ## Known limitations
245
+
246
+ - All specialized detectors are severity 2 or 3 and tiebreak on earliest anchor line. A `specificity` weighting on `DetectedFailure` is on the v1.1 roadmap.
247
+ - Windows-style paths (`C:\src\foo.cpp:5:1:`) may not parse correctly in the GCC build-error detector. Linux CI only for now.
248
+ - The JUnit XML detector caps at 50 records per scan; consumers should check `extracted_fields.get("truncated", False)`.
249
+ - Long-running Go tests with `(1m30s)` duration format report the seconds tail only.
250
+
251
+ See [architecture.md](architecture.md#known-limitations) for the full list.
252
+
253
+ ## Contributing
254
+
255
+ Issues and PRs welcome. The codebase is small (~2.5K LOC + tests) and the detector framework is designed to make adding a new language / tool a single-file change. Run the tests, follow the existing patterns in `ci_log_intelligence/reducer/detectors/`, and open a PR.
256
+
257
+ ## License
258
+
259
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from .ci_analysis import analyze_ci_url
6
+ from .ingestion import ingest_log
7
+ from .models import ReductionResult
8
+ from .parsing import parse_log
9
+ from .reducer import reduce_parsed_lines
10
+ from .storage import StorageBackend, create_storage_backend
11
+ from .summarizer import summarize_reduction_result
12
+ from .utils.logging import get_structured_logger
13
+ from .utils.metrics import MetricsCollector, measure_stage
14
+
15
+ __all__ = [
16
+ "analyze_log",
17
+ "analyze_ci_url",
18
+ "ReductionResult",
19
+ ]
20
+
21
+
22
+ def analyze_log(
23
+ log: str,
24
+ storage_backend: Optional[StorageBackend] = None,
25
+ spill_threshold_bytes: int = 5_000_000,
26
+ metrics: Optional[MetricsCollector] = None,
27
+ ) -> ReductionResult:
28
+ logger = get_structured_logger("ci_log_intelligence")
29
+ collector = metrics or MetricsCollector()
30
+ backend = storage_backend or create_storage_backend(
31
+ byte_size=len(log.encode("utf-8")),
32
+ spill_threshold_bytes=spill_threshold_bytes,
33
+ )
34
+
35
+ stored_log = ingest_log(log, backend)
36
+ try:
37
+ with measure_stage("parse", collector, logger):
38
+ parsed_lines = parse_log(stored_log, backend)
39
+
40
+ result = reduce_parsed_lines(parsed_lines, metrics=collector, logger=logger)
41
+
42
+ with measure_stage("summarize", collector, logger):
43
+ result.summary = summarize_reduction_result(result)
44
+
45
+ selected_lines = sum(len(scored.block.lines) for scored in result.blocks)
46
+ collector.record_metric("reduction_ratio", selected_lines / max(len(parsed_lines), 1))
47
+ collector.record_metric("number_of_blocks", float(len(result.blocks)))
48
+ return result
49
+ finally:
50
+ backend.delete(stored_log.reference)
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel, Field
5
+
6
+ from .. import analyze_log
7
+
8
+
9
+ class AnalyzeRequest(BaseModel):
10
+ log: str = Field(..., min_length=1)
11
+
12
+
13
+ class AnalyzeBlockResponse(BaseModel):
14
+ start_line: int
15
+ end_line: int
16
+ score: float
17
+ classification: str
18
+
19
+
20
+ class AnalyzeResponse(BaseModel):
21
+ blocks: list[AnalyzeBlockResponse]
22
+ summary: str
23
+
24
+
25
+ def create_app() -> FastAPI:
26
+ app = FastAPI(title="CI Log Intelligence")
27
+
28
+ @app.post("/analyze", response_model=AnalyzeResponse)
29
+ def analyze(request: AnalyzeRequest) -> AnalyzeResponse:
30
+ result = analyze_log(request.log)
31
+ blocks = [
32
+ AnalyzeBlockResponse(
33
+ start_line=scored.block.start_line,
34
+ end_line=scored.block.end_line,
35
+ score=scored.score,
36
+ classification=scored.classification,
37
+ )
38
+ for scored in result.blocks
39
+ ]
40
+ return AnalyzeResponse(blocks=blocks, summary=result.summary or "")
41
+
42
+ return app
43
+
44
+
45
+ app = create_app()
46
+
47
+ __all__ = ["AnalyzeRequest", "AnalyzeResponse", "app", "create_app"]