ci-log-intelligence 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. ci_log_intelligence/__init__.py +50 -0
  2. ci_log_intelligence/api/__init__.py +47 -0
  3. ci_log_intelligence/ci_analysis.py +290 -0
  4. ci_log_intelligence/ci_report_builder.py +203 -0
  5. ci_log_intelligence/cli/__init__.py +1 -0
  6. ci_log_intelligence/cli/main.py +81 -0
  7. ci_log_intelligence/ingestion/__init__.py +13 -0
  8. ci_log_intelligence/ingestion/github/__init__.py +13 -0
  9. ci_log_intelligence/ingestion/github/fetcher.py +342 -0
  10. ci_log_intelligence/ingestion/github/fetcher_helpers.py +127 -0
  11. ci_log_intelligence/ingestion/github/models.py +182 -0
  12. ci_log_intelligence/ingestion/github/resolver.py +50 -0
  13. ci_log_intelligence/ingestion/github/transports.py +131 -0
  14. ci_log_intelligence/mcp/__init__.py +1 -0
  15. ci_log_intelligence/mcp/cache.py +69 -0
  16. ci_log_intelligence/mcp/server.py +181 -0
  17. ci_log_intelligence/mcp/tools.py +368 -0
  18. ci_log_intelligence/models/__init__.py +93 -0
  19. ci_log_intelligence/parsing/__init__.py +115 -0
  20. ci_log_intelligence/reducer/__init__.py +80 -0
  21. ci_log_intelligence/reducer/classification/__init__.py +93 -0
  22. ci_log_intelligence/reducer/clustering/__init__.py +53 -0
  23. ci_log_intelligence/reducer/comparison/__init__.py +15 -0
  24. ci_log_intelligence/reducer/comparison/analyzer.py +349 -0
  25. ci_log_intelligence/reducer/comparison/excerpt.py +184 -0
  26. ci_log_intelligence/reducer/detectors/__init__.py +100 -0
  27. ci_log_intelligence/reducer/detectors/base.py +62 -0
  28. ci_log_intelligence/reducer/detectors/build_error_gcc.py +163 -0
  29. ci_log_intelligence/reducer/detectors/build_error_go.py +92 -0
  30. ci_log_intelligence/reducer/detectors/build_error_make.py +97 -0
  31. ci_log_intelligence/reducer/detectors/build_error_npm.py +162 -0
  32. ci_log_intelligence/reducer/detectors/build_error_rust.py +227 -0
  33. ci_log_intelligence/reducer/detectors/generic.py +63 -0
  34. ci_log_intelligence/reducer/detectors/go_test_fail.py +138 -0
  35. ci_log_intelligence/reducer/detectors/hash_mismatch.py +177 -0
  36. ci_log_intelligence/reducer/detectors/junit_xml.py +153 -0
  37. ci_log_intelligence/reducer/detectors/patterns.py +51 -0
  38. ci_log_intelligence/reducer/detectors/pytest_fail.py +190 -0
  39. ci_log_intelligence/reducer/detectors/rust_test_fail.py +138 -0
  40. ci_log_intelligence/reducer/expansion/__init__.py +113 -0
  41. ci_log_intelligence/reducer/merge/__init__.py +59 -0
  42. ci_log_intelligence/reducer/scoring/__init__.py +72 -0
  43. ci_log_intelligence/reducer/suppression/__init__.py +55 -0
  44. ci_log_intelligence/signals.py +63 -0
  45. ci_log_intelligence/storage/__init__.py +78 -0
  46. ci_log_intelligence/summarizer/__init__.py +25 -0
  47. ci_log_intelligence/utils/__init__.py +4 -0
  48. ci_log_intelligence/utils/logging.py +22 -0
  49. ci_log_intelligence/utils/metrics.py +56 -0
  50. ci_log_intelligence-0.1.0.dist-info/METADATA +291 -0
  51. ci_log_intelligence-0.1.0.dist-info/RECORD +55 -0
  52. ci_log_intelligence-0.1.0.dist-info/WHEEL +5 -0
  53. ci_log_intelligence-0.1.0.dist-info/entry_points.txt +3 -0
  54. ci_log_intelligence-0.1.0.dist-info/licenses/LICENSE +21 -0
  55. ci_log_intelligence-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from .ci_analysis import analyze_ci_url
6
+ from .ingestion import ingest_log
7
+ from .models import ReductionResult
8
+ from .parsing import parse_log
9
+ from .reducer import reduce_parsed_lines
10
+ from .storage import StorageBackend, create_storage_backend
11
+ from .summarizer import summarize_reduction_result
12
+ from .utils.logging import get_structured_logger
13
+ from .utils.metrics import MetricsCollector, measure_stage
14
+
15
+ __all__ = [
16
+ "analyze_log",
17
+ "analyze_ci_url",
18
+ "ReductionResult",
19
+ ]
20
+
21
+
22
+ def analyze_log(
23
+ log: str,
24
+ storage_backend: Optional[StorageBackend] = None,
25
+ spill_threshold_bytes: int = 5_000_000,
26
+ metrics: Optional[MetricsCollector] = None,
27
+ ) -> ReductionResult:
28
+ logger = get_structured_logger("ci_log_intelligence")
29
+ collector = metrics or MetricsCollector()
30
+ backend = storage_backend or create_storage_backend(
31
+ byte_size=len(log.encode("utf-8")),
32
+ spill_threshold_bytes=spill_threshold_bytes,
33
+ )
34
+
35
+ stored_log = ingest_log(log, backend)
36
+ try:
37
+ with measure_stage("parse", collector, logger):
38
+ parsed_lines = parse_log(stored_log, backend)
39
+
40
+ result = reduce_parsed_lines(parsed_lines, metrics=collector, logger=logger)
41
+
42
+ with measure_stage("summarize", collector, logger):
43
+ result.summary = summarize_reduction_result(result)
44
+
45
+ selected_lines = sum(len(scored.block.lines) for scored in result.blocks)
46
+ collector.record_metric("reduction_ratio", selected_lines / max(len(parsed_lines), 1))
47
+ collector.record_metric("number_of_blocks", float(len(result.blocks)))
48
+ return result
49
+ finally:
50
+ backend.delete(stored_log.reference)
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel, Field
5
+
6
+ from .. import analyze_log
7
+
8
+
9
+ class AnalyzeRequest(BaseModel):
10
+ log: str = Field(..., min_length=1)
11
+
12
+
13
+ class AnalyzeBlockResponse(BaseModel):
14
+ start_line: int
15
+ end_line: int
16
+ score: float
17
+ classification: str
18
+
19
+
20
+ class AnalyzeResponse(BaseModel):
21
+ blocks: list[AnalyzeBlockResponse]
22
+ summary: str
23
+
24
+
25
+ def create_app() -> FastAPI:
26
+ app = FastAPI(title="CI Log Intelligence")
27
+
28
+ @app.post("/analyze", response_model=AnalyzeResponse)
29
+ def analyze(request: AnalyzeRequest) -> AnalyzeResponse:
30
+ result = analyze_log(request.log)
31
+ blocks = [
32
+ AnalyzeBlockResponse(
33
+ start_line=scored.block.start_line,
34
+ end_line=scored.block.end_line,
35
+ score=scored.score,
36
+ classification=scored.classification,
37
+ )
38
+ for scored in result.blocks
39
+ ]
40
+ return AnalyzeResponse(blocks=blocks, summary=result.summary or "")
41
+
42
+ return app
43
+
44
+
45
+ app = create_app()
46
+
47
+ __all__ = ["AnalyzeRequest", "AnalyzeResponse", "app", "create_app"]
@@ -0,0 +1,290 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Sequence, TYPE_CHECKING
4
+
5
+ from .ci_report_builder import (
6
+ _summarize_root_cause,
7
+ build_report,
8
+ resolve_failure_type,
9
+ )
10
+ from .ci_report_builder import build_report as _build_report
11
+ from .ci_report_builder import resolve_failure_type as _resolve_failure_type
12
+ from .ingestion import ingest_log
13
+ from .ingestion.github.fetcher import GitHubLogFetcher, _sort_logs, normalize_job_name
14
+ from .ingestion.github.models import (
15
+ CIAnalysisReport,
16
+ FailedLogAnalysis,
17
+ NormalizedLog,
18
+ )
19
+ from .ingestion.github.resolver import resolve_github_url
20
+ from .mcp.cache import CachedJob, CacheKey
21
+ from .parsing import parse_log
22
+ from .reducer import reduce_parsed_lines
23
+ from .reducer.comparison import analyze_cross_run, extract_passed_context
24
+ from .reducer.detectors import JobContext
25
+ from .storage import InMemoryStorage
26
+ from .summarizer import summarize_reduction_result
27
+ from .utils.logging import get_structured_logger, log_stage_event
28
+ from .utils.metrics import MetricsCollector, measure_stage
29
+
30
+ if TYPE_CHECKING: # pragma: no cover - typing-only import to avoid circulars
31
+ from .mcp.cache import JobCache
32
+
33
+
34
+ def analyze_ci_url(
35
+ ci_url: str,
36
+ *,
37
+ include_passed: bool = True,
38
+ max_passed_runs: int = 3,
39
+ max_runs: int = 5,
40
+ fetcher: Optional[GitHubLogFetcher] = None,
41
+ metrics: Optional[MetricsCollector] = None,
42
+ cache: Optional["JobCache"] = None,
43
+ top_k: Optional[int] = None,
44
+ failure_types: Optional[Sequence[str]] = None,
45
+ ) -> CIAnalysisReport:
46
+ """Run the end-to-end CI analysis pipeline for a GitHub URL.
47
+
48
+ When ``cache`` is provided, per-job (parse + reduce) results are looked up on
49
+ ``(repo, run_id, job_id)`` and stored after computation. Cache hits skip the
50
+ parse + reduce work. The fetch (GitHub API call for the log content) still
51
+ happens unless the caller arranges to suppress it; today the cache short-
52
+ circuits only the CPU-bound stages, which is the dominant cost for cached
53
+ runs because the GitHub API returns 304 (or is otherwise cheap for an
54
+ already-completed immutable job log).
55
+ """
56
+ logger = get_structured_logger("ci_log_intelligence.ci")
57
+ collector = metrics or MetricsCollector()
58
+
59
+ with measure_stage("resolve_ci_url", collector, logger):
60
+ target = resolve_github_url(ci_url)
61
+
62
+ github_fetcher = fetcher or GitHubLogFetcher(logger=logger)
63
+ fetch_run_limit = max(max_runs, max_passed_runs + 2 if include_passed else max_runs)
64
+ with measure_stage("fetch_github_logs", collector, logger):
65
+ fetched = fetch_with_cache_awareness(
66
+ github_fetcher,
67
+ target,
68
+ include_passed=include_passed,
69
+ max_runs=fetch_run_limit,
70
+ max_passed_runs=max_passed_runs,
71
+ cache=cache,
72
+ )
73
+
74
+ failed_logs = [log for log in fetched.logs if log.status == "failed"]
75
+ passed_logs = [log for log in fetched.logs if log.status == "passed"]
76
+ collector.record_metric("failed_jobs", float(len(failed_logs)))
77
+ collector.record_metric("passed_jobs", float(len(passed_logs)))
78
+ log_stage_event(
79
+ logger,
80
+ "analyze_ci_url",
81
+ runs=len(fetched.runs),
82
+ failed_jobs=len(failed_logs),
83
+ passed_jobs=len(passed_logs),
84
+ )
85
+
86
+ failed_analyses: list[FailedLogAnalysis] = []
87
+ total_anchors = 0.0
88
+ total_blocks = 0.0
89
+ for failed_log in failed_logs:
90
+ failed_metrics = MetricsCollector()
91
+ job_context = JobContext(
92
+ job_name=failed_log.job_name,
93
+ run_id=failed_log.run_id,
94
+ repo=target.repo,
95
+ )
96
+
97
+ cached = _lookup_cache(cache, target.repo, failed_log)
98
+ if cached is not None:
99
+ reduction_result = cached.reduction_result
100
+ cache_hit_anchors = float(_count_anchors(reduction_result))
101
+ cache_hit_blocks = float(len(reduction_result.blocks))
102
+ failed_metrics.record_metric("number_of_anchors", cache_hit_anchors)
103
+ failed_metrics.record_metric("number_of_blocks", cache_hit_blocks)
104
+ log_stage_event(logger, "job_cache_hit", run_id=failed_log.run_id, job_id=failed_log.job_id)
105
+ else:
106
+ with measure_stage("reduce_failed_log", collector, logger):
107
+ reduction_result, parsed_lines = _analyze_single_log(
108
+ failed_log.content,
109
+ metrics=failed_metrics,
110
+ job_context=job_context,
111
+ )
112
+ _store_cache(cache, target.repo, failed_log, parsed_lines, reduction_result)
113
+
114
+ snapshot = failed_metrics.snapshot()
115
+ total_anchors += float(snapshot["metrics"].get("number_of_anchors", 0.0))
116
+ total_blocks += float(snapshot["metrics"].get("number_of_blocks", 0.0))
117
+ failed_analyses.append(
118
+ FailedLogAnalysis(
119
+ log=failed_log,
120
+ logical_job_name=normalize_job_name(failed_log.job_name),
121
+ result=reduction_result,
122
+ )
123
+ )
124
+
125
+ collector.record_metric("anchors_detected", total_anchors)
126
+ collector.record_metric("blocks_generated", total_blocks)
127
+ log_stage_event(
128
+ logger,
129
+ "failed_log_analysis",
130
+ anchors_detected=total_anchors,
131
+ blocks_generated=total_blocks,
132
+ )
133
+
134
+ with measure_stage("extract_passed_context", collector, logger):
135
+ passed_contexts = extract_passed_context(failed_analyses, passed_logs)
136
+
137
+ with measure_stage("cross_run_analysis", collector, logger):
138
+ insights = analyze_cross_run(failed_analyses, passed_contexts)
139
+
140
+ return build_report(
141
+ runs=fetched.runs,
142
+ failed_logs=failed_logs,
143
+ passed_logs=passed_logs,
144
+ failed_analyses=failed_analyses,
145
+ passed_contexts=passed_contexts,
146
+ insights=insights,
147
+ top_k=top_k,
148
+ failure_types=failure_types,
149
+ )
150
+
151
+
152
+ def fetch_with_cache_awareness(
153
+ fetcher: GitHubLogFetcher,
154
+ target,
155
+ *,
156
+ include_passed: bool,
157
+ max_runs: int,
158
+ max_passed_runs: int,
159
+ cache: Optional["JobCache"],
160
+ ):
161
+ """Plan the fetch, then fetch only the log content the cache doesn't already cover.
162
+
163
+ When ``cache`` is ``None`` behavior is identical to ``fetcher.fetch_logs``.
164
+ When a cache is provided, planned jobs whose ``(repo, run_id, job_id)`` is
165
+ already present in the cache are short-circuited: an empty-``content``
166
+ placeholder ``NormalizedLog`` is emitted so the downstream loop still sees
167
+ the job (and the cache-hit branch picks it up). Passed jobs are always
168
+ fetched because their reduction is consumed by ``extract_passed_context``
169
+ without going through the cache.
170
+ """
171
+ if cache is None:
172
+ return fetcher.fetch_logs(
173
+ target,
174
+ include_passed=include_passed,
175
+ max_runs=max_runs,
176
+ max_passed_runs=max_passed_runs,
177
+ )
178
+
179
+ plan = fetcher.plan_logs(
180
+ target,
181
+ include_passed=include_passed,
182
+ max_runs=max_runs,
183
+ max_passed_runs=max_passed_runs,
184
+ )
185
+
186
+ cached_logs: list = []
187
+ jobs_to_fetch: list = []
188
+ for run, job, _, status in plan.planned_jobs:
189
+ if status == "failed":
190
+ key = CacheKey(repo=target.repo, run_id=run.run_id, job_id=job.job_id)
191
+ if cache.get(key) is not None:
192
+ # Emit a placeholder log so the analyze loop iterates this job
193
+ # and takes the cache-hit branch. The empty ``content`` is
194
+ # never read because the cache lookup short-circuits it.
195
+ # See ``NormalizedLog`` docstring for the placeholder contract.
196
+ cached_logs.append(
197
+ NormalizedLog(
198
+ run_id=run.run_id,
199
+ job_id=job.job_id,
200
+ job_name=job.job_name,
201
+ status="failed",
202
+ content="",
203
+ )
204
+ )
205
+ continue
206
+ jobs_to_fetch.append((run, job, _, status))
207
+
208
+ fetched_logs = fetcher.fetch_planned_log_content(target.repo, jobs_to_fetch) if jobs_to_fetch else []
209
+ all_logs = _sort_logs(cached_logs + fetched_logs)
210
+
211
+ # ``assemble_fetched_data`` handles both include_passed=True (group + cap)
212
+ # and include_passed=False (failed-only filter), so a single call covers
213
+ # both paths.
214
+ return fetcher.assemble_fetched_data(
215
+ plan,
216
+ all_logs,
217
+ include_passed=include_passed,
218
+ max_passed_runs=max_passed_runs,
219
+ )
220
+
221
+
222
+ def _lookup_cache(
223
+ cache: Optional["JobCache"],
224
+ repo: str,
225
+ failed_log: NormalizedLog,
226
+ ) -> Optional["CachedJob"]:
227
+ if cache is None:
228
+ return None
229
+ key = CacheKey(repo=repo, run_id=failed_log.run_id, job_id=failed_log.job_id)
230
+ return cache.get(key)
231
+
232
+
233
+ def _store_cache(
234
+ cache: Optional["JobCache"],
235
+ repo: str,
236
+ failed_log: NormalizedLog,
237
+ parsed_lines,
238
+ reduction_result,
239
+ ) -> None:
240
+ if cache is None:
241
+ return
242
+ key = CacheKey(repo=repo, run_id=failed_log.run_id, job_id=failed_log.job_id)
243
+ cache.put(
244
+ key,
245
+ CachedJob(
246
+ job_name=failed_log.job_name,
247
+ parsed_lines=list(parsed_lines),
248
+ reduction_result=reduction_result,
249
+ ),
250
+ )
251
+
252
+
253
+ def _count_anchors(reduction_result) -> int:
254
+ return sum(len(scored.block.anchors) for scored in reduction_result.blocks)
255
+
256
+
257
+ def _analyze_single_log(
258
+ content: str,
259
+ metrics: Optional[MetricsCollector] = None,
260
+ job_context: Optional[JobContext] = None,
261
+ ):
262
+ """Parse and reduce one log; returns ``(ReductionResult, parsed_lines)``.
263
+
264
+ Returning the parsed line list alongside the reduction result lets the cache
265
+ retain the raw line content needed by ``get_block`` without re-parsing.
266
+ """
267
+ logger = get_structured_logger("ci_log_intelligence")
268
+ collector = metrics or MetricsCollector()
269
+ backend = InMemoryStorage()
270
+ stored_log = ingest_log(content, backend)
271
+ try:
272
+ with measure_stage("parse", collector, logger):
273
+ parsed_lines = parse_log(stored_log, backend)
274
+
275
+ result = reduce_parsed_lines(
276
+ parsed_lines,
277
+ metrics=collector,
278
+ logger=logger,
279
+ job_context=job_context,
280
+ )
281
+
282
+ with measure_stage("summarize", collector, logger):
283
+ result.summary = summarize_reduction_result(result)
284
+
285
+ selected_lines = sum(len(scored.block.lines) for scored in result.blocks)
286
+ collector.record_metric("reduction_ratio", selected_lines / max(len(parsed_lines), 1))
287
+ collector.record_metric("number_of_blocks", float(len(result.blocks)))
288
+ return result, parsed_lines
289
+ finally:
290
+ backend.delete(stored_log.reference)
@@ -0,0 +1,203 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Iterable, Optional, Sequence
4
+
5
+ from .ingestion.github.fetcher import normalize_job_name
6
+ from .ingestion.github.models import (
7
+ AnalysisMetadata,
8
+ CIAnalysisReport,
9
+ FailedLogAnalysis,
10
+ FailureRecord,
11
+ NormalizedLog,
12
+ PassedContextView,
13
+ RootCauseSummary,
14
+ WorkflowRun,
15
+ )
16
+ from .models import ScoreComponents, ScoredBlock
17
+ from .reducer.comparison import (
18
+ render_block_excerpt,
19
+ select_root_cause,
20
+ summarize_failed_block,
21
+ )
22
+ from .reducer.detectors import DetectedFailure
23
+
24
+
25
+ def build_report(
26
+ *,
27
+ runs: Sequence[WorkflowRun],
28
+ failed_logs: Sequence[NormalizedLog],
29
+ passed_logs: Sequence[NormalizedLog],
30
+ failed_analyses: Sequence[FailedLogAnalysis],
31
+ passed_contexts: Iterable,
32
+ insights: Iterable[str],
33
+ top_k: Optional[int] = None,
34
+ failure_types: Optional[Sequence[str]] = None,
35
+ ) -> CIAnalysisReport:
36
+ """Assemble the final ``CIAnalysisReport`` from per-job analyses.
37
+
38
+ When ``failure_types`` is provided, the ``failures`` array is filtered to records whose
39
+ ``type`` matches one of the listed strings. When ``top_k`` is provided, the (already
40
+ score-sorted) array is truncated to that length. Both filters are reflected in
41
+ ``metadata.failures_returned`` / ``metadata.failures_total`` so callers can detect
42
+ truncation.
43
+ """
44
+ root_cause_candidate = select_root_cause(failed_analyses)
45
+ if root_cause_candidate is None:
46
+ root_cause = _empty_root_cause()
47
+ all_failures: list[FailureRecord] = []
48
+ else:
49
+ _, scored_block = root_cause_candidate
50
+ analysis = root_cause_candidate[0]
51
+ root_cause = _summarize_root_cause(
52
+ scored_block, analysis.log.job_name, analysis.log.run_id
53
+ )
54
+ all_failures = _build_failure_records(failed_analyses)
55
+
56
+ failures_total = len(all_failures)
57
+
58
+ if failure_types is not None:
59
+ allowed = {failure_type for failure_type in failure_types}
60
+ all_failures = [record for record in all_failures if record.type in allowed]
61
+
62
+ if top_k is not None:
63
+ all_failures = all_failures[: max(top_k, 0)]
64
+
65
+ failures_returned = len(all_failures)
66
+
67
+ passed_context_views = [
68
+ PassedContextView(job_name=context.job_name, excerpt=context.excerpt)
69
+ for context in passed_contexts
70
+ ]
71
+ metadata = AnalysisMetadata(
72
+ total_runs_analyzed=len({run.run_id for run in runs}),
73
+ failed_runs=len({log.run_id for log in failed_logs}),
74
+ passed_runs=len({log.run_id for log in passed_logs}),
75
+ failures_returned=failures_returned,
76
+ failures_total=failures_total,
77
+ )
78
+
79
+ return CIAnalysisReport(
80
+ root_cause=root_cause,
81
+ failures=all_failures,
82
+ passed_context=passed_context_views,
83
+ cross_run_insights=list(insights),
84
+ metadata=metadata,
85
+ )
86
+
87
+
88
+ def _empty_root_cause() -> RootCauseSummary:
89
+ return RootCauseSummary(
90
+ summary="No failing jobs found in the analyzed CI runs.",
91
+ log_excerpt="",
92
+ has_traceback=False,
93
+ has_stack_trace=False,
94
+ has_assertion=False,
95
+ score=0.0,
96
+ score_components=ScoreComponents(
97
+ severity_weight=0.0,
98
+ signal_density=0.0,
99
+ duplicate_penalty=0.0,
100
+ ),
101
+ )
102
+
103
+
104
+ def _build_failure_records(
105
+ failed_analyses: Iterable[FailedLogAnalysis],
106
+ ) -> list[FailureRecord]:
107
+ failures: list[FailureRecord] = []
108
+ for current_analysis in sorted(
109
+ failed_analyses,
110
+ key=lambda item: (-item.log.run_id, item.log.job_name.lower(), item.log.job_id),
111
+ ):
112
+ detected = current_analysis.result.detected_failures
113
+ for block in current_analysis.result.blocks:
114
+ failure_type, extracted = resolve_failure_type(block, detected)
115
+ highest_severity = max(
116
+ (anchor.severity for anchor in block.block.anchors),
117
+ default=0,
118
+ )
119
+ failures.append(
120
+ FailureRecord(
121
+ type=failure_type,
122
+ classification=block.classification,
123
+ severity=highest_severity,
124
+ score=block.score,
125
+ start_line=block.block.start_line,
126
+ end_line=block.block.end_line,
127
+ summary=summarize_failed_block(
128
+ block, current_analysis.log.job_name, current_analysis.log.run_id
129
+ ),
130
+ log_excerpt=render_block_excerpt(block),
131
+ extracted_fields=extracted,
132
+ )
133
+ )
134
+ return failures
135
+
136
+
137
+ def resolve_failure_type(
138
+ scored_block: ScoredBlock,
139
+ detected_failures: list[DetectedFailure],
140
+ ) -> tuple[str, dict[str, Any]]:
141
+ """Resolve the FailureRecord ``type`` and ``extracted_fields`` for a scored block.
142
+
143
+ Walks the DetectedFailures whose ``anchor_lines`` fall inside the block, picks the
144
+ most-specific type (``"generic"`` loses to anything else; ties between specialized
145
+ types break by highest severity then earliest anchor line), and merges
146
+ ``extracted_fields`` from contributors of the winning type ONLY.
147
+ """
148
+ block_line_range = range(
149
+ scored_block.block.start_line, scored_block.block.end_line + 1
150
+ )
151
+ contributors = [
152
+ failure
153
+ for failure in detected_failures
154
+ if any(line in block_line_range for line in failure.anchor_lines)
155
+ ]
156
+ if not contributors:
157
+ return "generic", {}
158
+
159
+ specialized = [c for c in contributors if c.type != "generic"]
160
+ if specialized:
161
+ primary = min(
162
+ specialized,
163
+ key=lambda failure: (-failure.severity, min(failure.anchor_lines, default=0)),
164
+ )
165
+ merged: dict[str, Any] = {}
166
+ for c in specialized:
167
+ if c.type == primary.type:
168
+ merged.update(c.extracted_fields)
169
+ return primary.type, merged
170
+
171
+ signal_names: list[str] = []
172
+ for c in contributors:
173
+ name = c.extracted_fields.get("signal_name")
174
+ if name and name not in signal_names:
175
+ signal_names.append(name)
176
+ return "generic", {"signal_names": signal_names}
177
+
178
+
179
+ def _summarize_root_cause(
180
+ scored_block: ScoredBlock,
181
+ job_name: str,
182
+ run_id: int,
183
+ ) -> RootCauseSummary:
184
+ block_signals = {signal for line in scored_block.block.lines for signal in line.signals}
185
+ has_traceback = "traceback" in block_signals
186
+ has_stack_trace = any(
187
+ line.content.startswith(" File ") for line in scored_block.block.lines
188
+ )
189
+ has_assertion = "assertion_error" in block_signals or any(
190
+ "AssertionError" in line.content for line in scored_block.block.lines
191
+ )
192
+ return RootCauseSummary(
193
+ summary=summarize_failed_block(scored_block, job_name, run_id),
194
+ log_excerpt=render_block_excerpt(scored_block),
195
+ has_traceback=has_traceback,
196
+ has_stack_trace=has_stack_trace,
197
+ has_assertion=has_assertion,
198
+ score=scored_block.score,
199
+ score_components=scored_block.score_components,
200
+ )
201
+
202
+
203
+ __all__ = ["build_report", "resolve_failure_type"]
@@ -0,0 +1 @@
1
+ """CLI entrypoints for CI Log Intelligence."""
@@ -0,0 +1,81 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+
7
+ from ..ci_analysis import analyze_ci_url
8
+
9
+
10
+ def build_parser() -> argparse.ArgumentParser:
11
+ parser = argparse.ArgumentParser(prog="ci-log-intel")
12
+ subparsers = parser.add_subparsers(dest="command", required=True)
13
+
14
+ analyze_parser = subparsers.add_parser("analyze")
15
+ analyze_parser.add_argument("--url", required=True)
16
+ analyze_parser.add_argument("--include-passed", action="store_true")
17
+ analyze_parser.add_argument("--max-passed-runs", type=int, default=3)
18
+ analyze_parser.add_argument("--json", action="store_true")
19
+
20
+ return parser
21
+
22
+
23
+ def main(argv: list[str] | None = None) -> int:
24
+ parser = build_parser()
25
+ args = parser.parse_args(argv)
26
+
27
+ if args.command != "analyze":
28
+ parser.error(f"Unsupported command: {args.command}")
29
+
30
+ report = analyze_ci_url(
31
+ args.url,
32
+ include_passed=args.include_passed,
33
+ max_passed_runs=args.max_passed_runs,
34
+ )
35
+
36
+ if args.json:
37
+ print(json.dumps(report.to_dict(), indent=2, sort_keys=True))
38
+ return 0
39
+
40
+ print(f"Root cause: {report.root_cause.summary}")
41
+ if report.root_cause.log_excerpt:
42
+ print()
43
+ print("Log excerpt:")
44
+ print(report.root_cause.log_excerpt)
45
+
46
+ if report.failures:
47
+ print()
48
+ print("Failures:")
49
+ for failure in report.failures:
50
+ print(f"- [{failure.type}/{failure.classification}] {failure.summary}")
51
+ if failure.extracted_fields:
52
+ kv = ", ".join(
53
+ f"{k}={v}" for k, v in sorted(failure.extracted_fields.items())
54
+ )
55
+ print(f" {kv}")
56
+
57
+ if report.passed_context:
58
+ print()
59
+ print("Passed context:")
60
+ for context in report.passed_context:
61
+ print(f"- {context.job_name}")
62
+ print(context.excerpt)
63
+
64
+ if report.cross_run_insights:
65
+ print()
66
+ print("Cross-run insights:")
67
+ for insight in report.cross_run_insights:
68
+ print(f"- {insight}")
69
+
70
+ print()
71
+ print(
72
+ "Metadata: "
73
+ f"runs={report.metadata.total_runs_analyzed} "
74
+ f"failed_runs={report.metadata.failed_runs} "
75
+ f"passed_runs={report.metadata.passed_runs}"
76
+ )
77
+ return 0
78
+
79
+
80
+ if __name__ == "__main__":
81
+ sys.exit(main())
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from ..models import StoredLog
4
+ from ..storage import StorageBackend
5
+
6
+
7
+ def ingest_log(log: str, storage_backend: StorageBackend) -> StoredLog:
8
+ reference = storage_backend.write_text(log)
9
+ return StoredLog(
10
+ reference=reference,
11
+ byte_size=len(log.encode("utf-8")),
12
+ backend_name=storage_backend.name,
13
+ )