blindspot 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. blindspot/__init__.py +1 -0
  2. blindspot/__main__.py +4 -0
  3. blindspot/actions/__init__.py +18 -0
  4. blindspot/actions/models.py +55 -0
  5. blindspot/actions/recommender.py +316 -0
  6. blindspot/ai_signal/__init__.py +23 -0
  7. blindspot/ai_signal/detector.py +217 -0
  8. blindspot/ai_signal/models.py +71 -0
  9. blindspot/ai_signal/profile.py +136 -0
  10. blindspot/ai_signal/quality.py +198 -0
  11. blindspot/cli.py +973 -0
  12. blindspot/codeowners/__init__.py +21 -0
  13. blindspot/codeowners/engine.py +225 -0
  14. blindspot/codeowners/parser.py +133 -0
  15. blindspot/collector/__init__.py +15 -0
  16. blindspot/collector/bitbucket/__init__.py +29 -0
  17. blindspot/collector/bitbucket/client.py +117 -0
  18. blindspot/collector/bitbucket/config.py +99 -0
  19. blindspot/collector/bitbucket/pr_collector.py +225 -0
  20. blindspot/collector/bitbucket/remote.py +71 -0
  21. blindspot/collector/bots.py +66 -0
  22. blindspot/collector/filters.py +164 -0
  23. blindspot/collector/git.py +81 -0
  24. blindspot/collector/github/__init__.py +49 -0
  25. blindspot/collector/github/client.py +116 -0
  26. blindspot/collector/github/config.py +94 -0
  27. blindspot/collector/github/gh_client.py +177 -0
  28. blindspot/collector/github/pr_collector.py +147 -0
  29. blindspot/collector/github/pr_models.py +16 -0
  30. blindspot/collector/github/remote.py +51 -0
  31. blindspot/collector/mailmap.py +123 -0
  32. blindspot/collector/models.py +20 -0
  33. blindspot/collector/review_models.py +63 -0
  34. blindspot/config.py +46 -0
  35. blindspot/dependency_graph/__init__.py +24 -0
  36. blindspot/dependency_graph/aggregation.py +150 -0
  37. blindspot/dependency_graph/builder.py +178 -0
  38. blindspot/dependency_graph/extractors/__init__.py +62 -0
  39. blindspot/dependency_graph/extractors/base.py +48 -0
  40. blindspot/dependency_graph/extractors/cpp.py +74 -0
  41. blindspot/dependency_graph/extractors/dotnet.py +81 -0
  42. blindspot/dependency_graph/extractors/go.py +98 -0
  43. blindspot/dependency_graph/extractors/java.py +66 -0
  44. blindspot/dependency_graph/extractors/javascript.py +95 -0
  45. blindspot/dependency_graph/extractors/kotlin.py +62 -0
  46. blindspot/dependency_graph/extractors/php.py +102 -0
  47. blindspot/dependency_graph/extractors/python.py +273 -0
  48. blindspot/dependency_graph/extractors/ruby.py +73 -0
  49. blindspot/dependency_graph/extractors/rust.py +111 -0
  50. blindspot/dependency_graph/extractors/swift.py +48 -0
  51. blindspot/dependency_graph/importance.py +90 -0
  52. blindspot/dependency_graph/llm_fallback.py +137 -0
  53. blindspot/dependency_graph/models.py +115 -0
  54. blindspot/diff_analysis/__init__.py +19 -0
  55. blindspot/diff_analysis/classifier.py +202 -0
  56. blindspot/narrative/__init__.py +16 -0
  57. blindspot/narrative/client.py +92 -0
  58. blindspot/narrative/config.py +98 -0
  59. blindspot/narrative/engine.py +103 -0
  60. blindspot/narrative/models.py +26 -0
  61. blindspot/narrative/prompt.py +327 -0
  62. blindspot/ownership/__init__.py +4 -0
  63. blindspot/ownership/engine.py +131 -0
  64. blindspot/ownership/models.py +55 -0
  65. blindspot/report/__init__.py +15 -0
  66. blindspot/report/context.py +126 -0
  67. blindspot/report/renderer.py +23 -0
  68. blindspot/report/templates/departure.html.j2 +294 -0
  69. blindspot/report/templates/report.html.j2 +767 -0
  70. blindspot/resilience/__init__.py +7 -0
  71. blindspot/resilience/score.py +171 -0
  72. blindspot/review_graph/__init__.py +15 -0
  73. blindspot/review_graph/engine.py +153 -0
  74. blindspot/risk_models/__init__.py +37 -0
  75. blindspot/risk_models/bus_factor.py +145 -0
  76. blindspot/risk_models/departure.py +142 -0
  77. blindspot/risk_models/knowledge_decay.py +161 -0
  78. blindspot/trend/__init__.py +3 -0
  79. blindspot/trend/engine.py +72 -0
  80. blindspot-0.0.2.dist-info/METADATA +93 -0
  81. blindspot-0.0.2.dist-info/RECORD +84 -0
  82. blindspot-0.0.2.dist-info/WHEEL +4 -0
  83. blindspot-0.0.2.dist-info/entry_points.txt +2 -0
  84. blindspot-0.0.2.dist-info/licenses/LICENSE +21 -0
blindspot/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2"
blindspot/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from blindspot.cli import app
2
+
3
+ if __name__ == "__main__":
4
+ app()
@@ -0,0 +1,18 @@
1
+ from blindspot.actions.models import (
2
+ PRIORITY_ORDER,
3
+ ActionCategory,
4
+ ActionPriority,
5
+ FragilityPattern,
6
+ RecommendedAction,
7
+ )
8
+ from blindspot.actions.recommender import RecommendationContext, RecommendationEngine
9
+
10
+ __all__ = [
11
+ "ActionCategory",
12
+ "ActionPriority",
13
+ "FragilityPattern",
14
+ "PRIORITY_ORDER",
15
+ "RecommendationContext",
16
+ "RecommendationEngine",
17
+ "RecommendedAction",
18
+ ]
@@ -0,0 +1,55 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+
4
+
5
+ class ActionPriority(str, Enum):
6
+ HIGH = "High"
7
+ MEDIUM = "Medium"
8
+ LOW = "Low"
9
+
10
+
11
+ PRIORITY_ORDER = {
12
+ ActionPriority.HIGH: 0,
13
+ ActionPriority.MEDIUM: 1,
14
+ ActionPriority.LOW: 2,
15
+ }
16
+
17
+
18
+ class ActionCategory(str, Enum):
19
+ OWNERSHIP_DIVERSIFICATION = "Ownership Diversification"
20
+ KNOWLEDGE_TRANSFER = "Knowledge Transfer"
21
+ REVIEW_HYGIENE = "Review Hygiene"
22
+ QUALITY_GUARDRAIL = "Quality Guardrail"
23
+ CODEOWNERS_UPDATE = "Codeowners Update"
24
+
25
+
26
+ class FragilityPattern(str, Enum):
27
+ """Named patterns for AI-era engineering fragility.
28
+
29
+ Slugs stay machine-readable; the human label lives on the enum value.
30
+ Recommendations are tagged with at most one pattern so the report
31
+ surface can group them and so the README can document each one.
32
+ """
33
+ REVIEW_WITHOUT_SCRUTINY = "Review without scrutiny"
34
+ SINGLE_OWNER_CONCENTRATION = "Single-owner concentration"
35
+ VELOCITY_WITHOUT_REVIEW = "Velocity without review"
36
+
37
+
38
+ @dataclass(frozen=True, slots=True)
39
+ class RecommendedAction:
40
+ priority: ActionPriority
41
+ category: ActionCategory
42
+ title: str
43
+ description: str
44
+ target: str
45
+ evidence: str
46
+ pattern: FragilityPattern | None = None
47
+
48
+
49
+ __all__ = [
50
+ "ActionCategory",
51
+ "ActionPriority",
52
+ "FragilityPattern",
53
+ "PRIORITY_ORDER",
54
+ "RecommendedAction",
55
+ ]
@@ -0,0 +1,316 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ from blindspot.actions.models import (
4
+ PRIORITY_ORDER,
5
+ ActionCategory,
6
+ ActionPriority,
7
+ FragilityPattern,
8
+ RecommendedAction,
9
+ )
10
+ from blindspot.ai_signal.models import AuthorProfile, AuthorProfileType
11
+ from blindspot.codeowners import CodeOwnersReport
12
+ from blindspot.diff_analysis.classifier import classify_file
13
+ from blindspot.review_graph.engine import FileReviewStats
14
+ from blindspot.risk_models.bus_factor import FileBusFactor, ServiceBusFactor
15
+ from blindspot.risk_models.knowledge_decay import FileDecay
16
+
17
+
18
+ @dataclass
19
+ class RecommendationContext:
20
+ services: tuple[ServiceBusFactor, ...] = ()
21
+ critical_files: tuple[FileBusFactor, ...] = ()
22
+ decays: tuple[FileDecay, ...] = ()
23
+ review_stats: dict[str, FileReviewStats] = field(default_factory=dict)
24
+ author_profiles: dict[str, AuthorProfile] = field(default_factory=dict)
25
+ ownership_names: dict[str, str] = field(default_factory=dict)
26
+ codeowners_report: CodeOwnersReport | None = None
27
+ importance_map: dict[str, float] = field(default_factory=dict)
28
+
29
+
30
+ @dataclass
31
+ class RecommendationEngine:
32
+ decay_critical_threshold: float = 0.75
33
+ decay_high_threshold: float = 0.50
34
+ rubber_stamp_threshold: float = 0.70
35
+ diversity_floor: float = 0.20
36
+ fast_approval_seconds: float = 30 * 60
37
+ min_reviews_for_rubber_stamp: int = 2
38
+ min_reviews_for_diversity: int = 3
39
+ min_approvals_for_latency: int = 3
40
+ max_per_rule: int = 5
41
+ importance_threshold: float = 0.005
42
+
43
+ def _passes_importance(self, ctx: RecommendationContext, file: str) -> bool:
44
+ """Filter out files structurally unimportant to the codebase.
45
+
46
+ When no importance_map is provided (or it's empty), behaviour is
47
+ unchanged — every file passes. With a map, files below the
48
+ threshold (default 0.5% PageRank weight) are filtered.
49
+ """
50
+ if not ctx.importance_map:
51
+ return True
52
+ return ctx.importance_map.get(file, 0.0) >= self.importance_threshold
53
+
54
+ def recommend(self, ctx: RecommendationContext) -> list[RecommendedAction]:
55
+ actions: list[RecommendedAction] = []
56
+ actions.extend(self._service_bus_factor(ctx))
57
+ actions.extend(self._file_decay(ctx))
58
+ actions.extend(self._rubber_stamp(ctx))
59
+ actions.extend(self._reviewer_diversity(ctx))
60
+ actions.extend(self._fast_approval(ctx))
61
+ actions.extend(self._fake_velocity(ctx))
62
+ actions.extend(self._codeowners(ctx))
63
+ actions.sort(key=lambda a: (PRIORITY_ORDER[a.priority], a.category.value, a.target))
64
+ return actions
65
+
66
+ def _label(self, ctx: RecommendationContext, email: str) -> str:
67
+ name = ctx.ownership_names.get(email)
68
+ if name and name != email:
69
+ return f"{name} ({email})"
70
+ return email
71
+
72
+ def _service_bus_factor(self, ctx: RecommendationContext) -> list[RecommendedAction]:
73
+ out: list[RecommendedAction] = []
74
+ for s in ctx.services:
75
+ if s.bus_factor > 1 or not s.top_owners:
76
+ continue
77
+ owner_email, owner_cov = s.top_owners[0]
78
+ owner_label = self._label(ctx, owner_email)
79
+ priority = ActionPriority.HIGH if s.file_count >= 5 else ActionPriority.MEDIUM
80
+ out.append(
81
+ RecommendedAction(
82
+ priority=priority,
83
+ category=ActionCategory.OWNERSHIP_DIVERSIFICATION,
84
+ title=f"Diversify ownership of '{s.service}' (currently single-owner)",
85
+ description=(
86
+ f"Service '{s.service}' has bus factor 1 across {s.file_count} files; "
87
+ f"{owner_label} holds {owner_cov:.0%} of effective ownership. "
88
+ "Pair them with at least two additional engineers and rotate code reviews "
89
+ "for this area over the next 60 days."
90
+ ),
91
+ target=s.service,
92
+ evidence=(
93
+ f"bus_factor=1, top_owner_coverage={owner_cov:.0%}, files={s.file_count}"
94
+ ),
95
+ pattern=FragilityPattern.SINGLE_OWNER_CONCENTRATION,
96
+ )
97
+ )
98
+ return out[: self.max_per_rule]
99
+
100
+ def _file_decay(self, ctx: RecommendationContext) -> list[RecommendedAction]:
101
+ out: list[RecommendedAction] = []
102
+ for d in ctx.decays:
103
+ if d.decay_score < self.decay_high_threshold:
104
+ continue
105
+ if not self._passes_importance(ctx, d.file):
106
+ continue
107
+ owner_label = self._label(ctx, d.top_owner)
108
+ critical = d.decay_score >= self.decay_critical_threshold
109
+ priority = ActionPriority.HIGH if critical else ActionPriority.MEDIUM
110
+ urgency = "critical" if critical else "elevated"
111
+ out.append(
112
+ RecommendedAction(
113
+ priority=priority,
114
+ category=ActionCategory.KNOWLEDGE_TRANSFER,
115
+ title=f"Knowledge transfer for {d.file}",
116
+ description=(
117
+ f"Decay is {urgency} ({d.decay_score:.0%}). "
118
+ f"{owner_label} last touched this file {d.days_since_owner_touch:.0f} days ago, "
119
+ f"and {d.lines_changed_after} lines have been changed since by others. "
120
+ "Schedule a transfer session and designate a secondary owner before the next "
121
+ "non-trivial change."
122
+ ),
123
+ target=d.file,
124
+ evidence=(
125
+ f"decay={d.decay_score:.0%}, days_since_touch={d.days_since_owner_touch:.0f}, "
126
+ f"lines_after={d.lines_changed_after}"
127
+ ),
128
+ )
129
+ )
130
+ return out[: self.max_per_rule]
131
+
132
+ def _rubber_stamp(self, ctx: RecommendationContext) -> list[RecommendedAction]:
133
+ out: list[RecommendedAction] = []
134
+ candidates = sorted(
135
+ (
136
+ s for s in ctx.review_stats.values()
137
+ if s.total_reviews >= self.min_reviews_for_rubber_stamp
138
+ and s.rubber_stamp_ratio >= self.rubber_stamp_threshold
139
+ and classify_file(s.file) == "code"
140
+ and self._passes_importance(ctx, s.file)
141
+ ),
142
+ key=lambda s: (-s.rubber_stamp_ratio, -s.total_reviews),
143
+ )
144
+ for s in candidates:
145
+ out.append(
146
+ RecommendedAction(
147
+ priority=ActionPriority.MEDIUM,
148
+ category=ActionCategory.REVIEW_HYGIENE,
149
+ title=f"Add review depth requirement for {s.file}",
150
+ description=(
151
+ f"{s.rubber_stamp_ratio:.0%} of approvals on this file arrived without a "
152
+ f"substantive review comment (across {s.total_reviews} reviews). "
153
+ "Introduce a review checklist or require at least one substantive comment "
154
+ "before approval is allowed."
155
+ ),
156
+ target=s.file,
157
+ evidence=(
158
+ f"rubber_stamp_ratio={s.rubber_stamp_ratio:.0%}, reviews={s.total_reviews}"
159
+ ),
160
+ pattern=FragilityPattern.REVIEW_WITHOUT_SCRUTINY,
161
+ )
162
+ )
163
+ return out[: self.max_per_rule]
164
+
165
+ def _reviewer_diversity(self, ctx: RecommendationContext) -> list[RecommendedAction]:
166
+ out: list[RecommendedAction] = []
167
+ candidates = sorted(
168
+ (
169
+ s for s in ctx.review_stats.values()
170
+ if s.total_reviews >= self.min_reviews_for_diversity
171
+ and s.diversity_hhi < self.diversity_floor
172
+ and classify_file(s.file) == "code"
173
+ and self._passes_importance(ctx, s.file)
174
+ ),
175
+ key=lambda s: s.diversity_hhi,
176
+ )
177
+ for s in candidates:
178
+ out.append(
179
+ RecommendedAction(
180
+ priority=ActionPriority.LOW,
181
+ category=ActionCategory.REVIEW_HYGIENE,
182
+ title=f"Rotate reviewers for {s.file}",
183
+ description=(
184
+ f"Reviewer diversity for this file is {s.diversity_hhi:.0%}, "
185
+ "meaning a single reviewer is carrying most of the review burden. "
186
+ "Add the file to a CODEOWNERS group or require a second reviewer to spread "
187
+ "knowledge of the area."
188
+ ),
189
+ target=s.file,
190
+ evidence=(
191
+ f"diversity_hhi={s.diversity_hhi:.0%}, unique_reviewers={s.unique_reviewers}"
192
+ ),
193
+ )
194
+ )
195
+ return out[: self.max_per_rule]
196
+
197
+ def _fast_approval(self, ctx: RecommendationContext) -> list[RecommendedAction]:
198
+ out: list[RecommendedAction] = []
199
+ candidates = sorted(
200
+ (
201
+ s for s in ctx.review_stats.values()
202
+ if s.median_approval_latency_seconds is not None
203
+ and s.approval_sample_size >= self.min_approvals_for_latency
204
+ and s.median_approval_latency_seconds < self.fast_approval_seconds
205
+ and classify_file(s.file) == "code"
206
+ and self._passes_importance(ctx, s.file)
207
+ ),
208
+ key=lambda s: s.median_approval_latency_seconds or 0,
209
+ )
210
+ for s in candidates:
211
+ minutes = (s.median_approval_latency_seconds or 0) / 60
212
+ out.append(
213
+ RecommendedAction(
214
+ priority=ActionPriority.MEDIUM,
215
+ category=ActionCategory.REVIEW_HYGIENE,
216
+ title=f"Slow down fast approvals on {s.file}",
217
+ description=(
218
+ f"Median time from PR open to first approval is "
219
+ f"{minutes:.0f} minutes across {s.approval_sample_size} approvals — "
220
+ "too short for meaningful review of non-trivial code. "
221
+ "Add a minimum review time, CODEOWNERS review requirement, "
222
+ "or required checklist."
223
+ ),
224
+ target=s.file,
225
+ evidence=(
226
+ f"median_approval={minutes:.0f}min, samples={s.approval_sample_size}"
227
+ ),
228
+ pattern=FragilityPattern.REVIEW_WITHOUT_SCRUTINY,
229
+ )
230
+ )
231
+ return out[: self.max_per_rule]
232
+
233
+ def _fake_velocity(self, ctx: RecommendationContext) -> list[RecommendedAction]:
234
+ out: list[RecommendedAction] = []
235
+ for profile in ctx.author_profiles.values():
236
+ if profile.profile_type != AuthorProfileType.FAKE_VELOCITY:
237
+ continue
238
+ label = self._label(ctx, profile.author_email)
239
+ quality_pct = (
240
+ f"{profile.quality_signal.risk_score:.0%}" if profile.quality_signal else "n/a"
241
+ )
242
+ ai_score = (
243
+ f"{profile.ai_signal.score:.2f}" if profile.ai_signal else "n/a"
244
+ )
245
+ out.append(
246
+ RecommendedAction(
247
+ priority=ActionPriority.HIGH,
248
+ category=ActionCategory.QUALITY_GUARDRAIL,
249
+ title=f"Deep review of recent work by {label}",
250
+ description=(
251
+ f"Recent activity shows AI amplification signals "
252
+ f"(AI score {ai_score}) together with elevated quality risk "
253
+ f"({quality_pct}). Schedule a dedicated review of their last 90 days of "
254
+ "changes, with focus on architectural correctness and test coverage. "
255
+ "Treat this as a verification step, not a punishment."
256
+ ),
257
+ target=profile.author_email,
258
+ evidence=(
259
+ f"ai_score={ai_score}, quality_risk={quality_pct}, "
260
+ f"profile={profile.profile_type.value}"
261
+ ),
262
+ pattern=FragilityPattern.VELOCITY_WITHOUT_REVIEW,
263
+ )
264
+ )
265
+ return out[: self.max_per_rule]
266
+
267
+ def _codeowners(self, ctx: RecommendationContext) -> list[RecommendedAction]:
268
+ report = ctx.codeowners_report
269
+ if report is None:
270
+ return []
271
+ out: list[RecommendedAction] = []
272
+
273
+ # Mismatches: highest signal — declared owner is wrong.
274
+ for f in report.mismatches[: self.max_per_rule]:
275
+ actual_label = (
276
+ self._label(ctx, f.actual_top_owner) if f.actual_top_owner else "(unknown)"
277
+ )
278
+ declared = ", ".join(f.declared_owners) if f.declared_owners else "(none)"
279
+ out.append(RecommendedAction(
280
+ priority=ActionPriority.MEDIUM,
281
+ category=ActionCategory.CODEOWNERS_UPDATE,
282
+ title=f"Update CODEOWNERS for {f.file}",
283
+ description=(
284
+ f"Declared owners ({declared}) do not include the current top contributor. "
285
+ f"{actual_label} holds {f.actual_coverage:.0%} of effective ownership. "
286
+ "Either add them to the CODEOWNERS rule or assign explicit cross-coverage."
287
+ ),
288
+ target=f.file,
289
+ evidence=(
290
+ f"declared={declared}, actual_top={f.actual_top_owner or 'n/a'}, "
291
+ f"coverage={f.actual_coverage:.0%}, line={f.rule_line}"
292
+ ),
293
+ ))
294
+
295
+ # Stale: declared owner hasn't touched it in a long time.
296
+ for f in report.stale[: self.max_per_rule]:
297
+ declared = ", ".join(f.declared_owners) if f.declared_owners else "(none)"
298
+ days = f.days_since_declared_touch
299
+ days_txt = f"{days}" if days is not None else "no record"
300
+ out.append(RecommendedAction(
301
+ priority=ActionPriority.LOW,
302
+ category=ActionCategory.CODEOWNERS_UPDATE,
303
+ title=f"Refresh stale CODEOWNERS entry for {f.file}",
304
+ description=(
305
+ f"Declared owner ({declared}) has not touched this file in {days_txt} days. "
306
+ "Confirm the assignment is still accurate; if not, rotate to a recent contributor."
307
+ ),
308
+ target=f.file,
309
+ evidence=(
310
+ f"declared={declared}, days_since_touch={days_txt}, line={f.rule_line}"
311
+ ),
312
+ ))
313
+ return out
314
+
315
+
316
+ __all__ = ["RecommendationContext", "RecommendationEngine"]
@@ -0,0 +1,23 @@
1
+ from blindspot.ai_signal.detector import AIAmplificationDetector
2
+ from blindspot.ai_signal.models import (
3
+ AIFlag,
4
+ AISignal,
5
+ AuthorProfile,
6
+ AuthorProfileType,
7
+ QualitySignal,
8
+ SignalStrength,
9
+ )
10
+ from blindspot.ai_signal.profile import AuthorProfiler
11
+ from blindspot.ai_signal.quality import QualitySignalEngine
12
+
13
+ __all__ = [
14
+ "AIAmplificationDetector",
15
+ "AIFlag",
16
+ "AISignal",
17
+ "AuthorProfile",
18
+ "AuthorProfileType",
19
+ "AuthorProfiler",
20
+ "QualitySignal",
21
+ "QualitySignalEngine",
22
+ "SignalStrength",
23
+ ]
@@ -0,0 +1,217 @@
1
+ import math
2
+ from collections.abc import Iterable
3
+ from dataclasses import dataclass, field
4
+ from datetime import UTC, datetime, timedelta
5
+
6
+ from blindspot.ai_signal.models import AIFlag, AISignal
7
+ from blindspot.collector.models import Commit
8
+
9
+
10
+ @dataclass
11
+ class AIAmplificationDetector:
12
+ """Heuristic detector for unusual recent activity that may indicate AI amplification.
13
+
14
+ Five signals compared to a per-author baseline:
15
+ 1. commit frequency spike
16
+ 2. average change-size spike
17
+ 3. commit message length spike
18
+ 4. share of unusually large single commits
19
+ 5. share of off-hours commits
20
+ """
21
+
22
+ measurement_days: int = 90
23
+ baseline_days: int = 365
24
+ high_threshold: float = 0.70
25
+ medium_threshold: float = 0.40
26
+ weights: tuple[float, float, float, float, float] = (0.30, 0.25, 0.20, 0.15, 0.10)
27
+ min_baseline_commits: int = 5
28
+ as_of: datetime | None = None
29
+ _as_of: datetime = field(init=False)
30
+
31
+ def __post_init__(self) -> None:
32
+ self._as_of = (self.as_of or datetime.now(UTC)).astimezone(UTC)
33
+
34
+ def detect(self, commits: Iterable[Commit]) -> dict[str, AISignal]:
35
+ recent_cutoff = self._as_of - timedelta(days=self.measurement_days)
36
+ baseline_cutoff = recent_cutoff - timedelta(days=self.baseline_days)
37
+
38
+ recent: dict[str, list[Commit]] = {}
39
+ baseline: dict[str, list[Commit]] = {}
40
+ for c in commits:
41
+ if c.authored_at >= recent_cutoff:
42
+ recent.setdefault(c.author_email, []).append(c)
43
+ elif c.authored_at >= baseline_cutoff:
44
+ baseline.setdefault(c.author_email, []).append(c)
45
+
46
+ results: dict[str, AISignal] = {}
47
+ for email in set(recent) | set(baseline):
48
+ r = recent.get(email, [])
49
+ b = baseline.get(email, [])
50
+ if not r:
51
+ continue
52
+
53
+ if len(b) < self.min_baseline_commits:
54
+ signal = self._insufficient_baseline_signal(email, r, b)
55
+ else:
56
+ freq = self._frequency_score(r, b)
57
+ vol = self._volume_score(r, b)
58
+ msg = self._message_score(r, b)
59
+ large = self._large_commit_score(r, b)
60
+ timing = self._timing_score(r, b)
61
+
62
+ w = self.weights
63
+ score = (
64
+ freq * w[0] + vol * w[1] + msg * w[2] + large * w[3] + timing * w[4]
65
+ )
66
+ flag = self._flag_from_score(score)
67
+
68
+ signal = AISignal(
69
+ author_email=email,
70
+ flag=flag,
71
+ score=score,
72
+ frequency_score=freq,
73
+ volume_score=vol,
74
+ message_score=msg,
75
+ large_commit_score=large,
76
+ timing_score=timing,
77
+ recent_commits=len(r),
78
+ baseline_commits=len(b),
79
+ )
80
+ results[email] = signal
81
+ return results
82
+
83
+ def _flag_from_score(self, score: float) -> AIFlag:
84
+ if score >= self.high_threshold:
85
+ return AIFlag.HIGH
86
+ if score >= self.medium_threshold:
87
+ return AIFlag.MEDIUM
88
+ return AIFlag.LOW
89
+
90
+ def _insufficient_baseline_signal(
91
+ self, email: str, r: list[Commit], b: list[Commit]
92
+ ) -> AISignal:
93
+ return AISignal(
94
+ author_email=email,
95
+ flag=AIFlag.LOW,
96
+ score=0.0,
97
+ frequency_score=0.0,
98
+ volume_score=0.0,
99
+ message_score=0.0,
100
+ large_commit_score=0.0,
101
+ timing_score=0.0,
102
+ recent_commits=len(r),
103
+ baseline_commits=len(b),
104
+ )
105
+
106
+ def _frequency_score(self, r: list[Commit], b: list[Commit]) -> float:
107
+ recent_rate = len(r) / self.measurement_days
108
+ baseline_rate = len(b) / self.baseline_days
109
+ if baseline_rate == 0:
110
+ return 0.0
111
+ ratio = recent_rate / baseline_rate
112
+ return _bucketed_score(ratio, [(3.0, 1.0), (2.0, 0.7), (1.5, 0.4)])
113
+
114
+ def _volume_score(self, r: list[Commit], b: list[Commit]) -> float:
115
+ r_avg = _avg_change_size(r)
116
+ b_avg = _avg_change_size(b)
117
+ if b_avg == 0:
118
+ return 0.0
119
+ ratio = r_avg / b_avg
120
+ return _bucketed_score(ratio, [(4.0, 1.0), (2.5, 0.7), (1.5, 0.4)])
121
+
122
+ def _message_score(self, r: list[Commit], b: list[Commit]) -> float:
123
+ r_avg = _avg(len(c.message) for c in r)
124
+ b_avg = _avg(len(c.message) for c in b)
125
+ if b_avg == 0:
126
+ return 0.0
127
+ ratio = r_avg / b_avg
128
+ return _bucketed_score(ratio, [(2.5, 1.0), (1.8, 0.6), (1.3, 0.3)])
129
+
130
+ def _large_commit_score(self, r: list[Commit], b: list[Commit]) -> float:
131
+ b_avg = _avg_change_size(b)
132
+ if b_avg == 0:
133
+ return 0.0
134
+ threshold = b_avg * 3.0
135
+ large = sum(1 for c in r if _change_size(c) > threshold)
136
+ if not r:
137
+ return 0.0
138
+ ratio = large / len(r)
139
+ return _bucketed_score(ratio, [(0.5, 1.0), (0.3, 0.6), (0.15, 0.3)])
140
+
141
+ def _timing_score(self, r: list[Commit], b: list[Commit]) -> float:
142
+ # Off-hours = UTC hours outside the busiest 8-hour window in baseline.
143
+ # If baseline gives no working-hours signal, fall back to a generous default
144
+ # (treat 22:00–08:00 UTC as off-hours).
145
+ baseline_hours = [c.authored_at.hour for c in b]
146
+ if len(baseline_hours) >= self.min_baseline_commits:
147
+ work_start, work_end = _busy_window(baseline_hours)
148
+ else:
149
+ work_start, work_end = 8, 22
150
+
151
+ recent_off = sum(1 for c in r if not _within(c.authored_at.hour, work_start, work_end))
152
+ baseline_off = sum(1 for c in b if not _within(c.authored_at.hour, work_start, work_end))
153
+ if len(r) == 0 or len(b) == 0:
154
+ return 0.0
155
+ recent_ratio = recent_off / len(r)
156
+ baseline_ratio = baseline_off / len(b)
157
+ if baseline_ratio == 0:
158
+ ratio = float("inf") if recent_ratio > 0 else 0.0
159
+ else:
160
+ ratio = recent_ratio / baseline_ratio
161
+ return _bucketed_score(ratio, [(2.0, 0.8), (1.5, 0.4)])
162
+
163
+
164
+ def _change_size(c: Commit) -> int:
165
+ return sum(f.additions + f.deletions for f in c.files)
166
+
167
+
168
+ def _avg_change_size(commits: list[Commit]) -> float:
169
+ if not commits:
170
+ return 0.0
171
+ return sum(_change_size(c) for c in commits) / len(commits)
172
+
173
+
174
+ def _avg(values: Iterable[float]) -> float:
175
+ values = list(values)
176
+ if not values:
177
+ return 0.0
178
+ return sum(values) / len(values)
179
+
180
+
181
+ def _bucketed_score(ratio: float, buckets: list[tuple[float, float]]) -> float:
182
+ """buckets sorted desc by threshold; return first matching score, else 0."""
183
+ if math.isinf(ratio) or math.isnan(ratio):
184
+ return buckets[0][1] if buckets else 0.0
185
+ for threshold, score in buckets:
186
+ if ratio > threshold:
187
+ return score
188
+ return 0.0
189
+
190
+
191
+ def _busy_window(hours: list[int]) -> tuple[int, int]:
192
+ """Find the 8-hour window with the most commits.
193
+
194
+ Returns (start_hour_inclusive, end_hour_exclusive) in UTC. Works on a circular
195
+ hour clock so a window of (20, 4) means 20:00 → 04:00 UTC.
196
+ """
197
+ counts = [0] * 24
198
+ for h in hours:
199
+ if 0 <= h < 24:
200
+ counts[h] += 1
201
+ best_total = -1
202
+ best_start = 8
203
+ for start in range(24):
204
+ total = sum(counts[(start + i) % 24] for i in range(8))
205
+ if total > best_total:
206
+ best_total = total
207
+ best_start = start
208
+ return best_start, (best_start + 8) % 24
209
+
210
+
211
+ def _within(hour: int, start: int, end: int) -> bool:
212
+ if start <= end:
213
+ return start <= hour < end
214
+ return hour >= start or hour < end
215
+
216
+
217
+ __all__ = ["AIAmplificationDetector"]