blindspot 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blindspot/__init__.py +1 -0
- blindspot/__main__.py +4 -0
- blindspot/actions/__init__.py +18 -0
- blindspot/actions/models.py +55 -0
- blindspot/actions/recommender.py +316 -0
- blindspot/ai_signal/__init__.py +23 -0
- blindspot/ai_signal/detector.py +217 -0
- blindspot/ai_signal/models.py +71 -0
- blindspot/ai_signal/profile.py +136 -0
- blindspot/ai_signal/quality.py +198 -0
- blindspot/cli.py +973 -0
- blindspot/codeowners/__init__.py +21 -0
- blindspot/codeowners/engine.py +225 -0
- blindspot/codeowners/parser.py +133 -0
- blindspot/collector/__init__.py +15 -0
- blindspot/collector/bitbucket/__init__.py +29 -0
- blindspot/collector/bitbucket/client.py +117 -0
- blindspot/collector/bitbucket/config.py +99 -0
- blindspot/collector/bitbucket/pr_collector.py +225 -0
- blindspot/collector/bitbucket/remote.py +71 -0
- blindspot/collector/bots.py +66 -0
- blindspot/collector/filters.py +164 -0
- blindspot/collector/git.py +81 -0
- blindspot/collector/github/__init__.py +49 -0
- blindspot/collector/github/client.py +116 -0
- blindspot/collector/github/config.py +94 -0
- blindspot/collector/github/gh_client.py +177 -0
- blindspot/collector/github/pr_collector.py +147 -0
- blindspot/collector/github/pr_models.py +16 -0
- blindspot/collector/github/remote.py +51 -0
- blindspot/collector/mailmap.py +123 -0
- blindspot/collector/models.py +20 -0
- blindspot/collector/review_models.py +63 -0
- blindspot/config.py +46 -0
- blindspot/dependency_graph/__init__.py +24 -0
- blindspot/dependency_graph/aggregation.py +150 -0
- blindspot/dependency_graph/builder.py +178 -0
- blindspot/dependency_graph/extractors/__init__.py +62 -0
- blindspot/dependency_graph/extractors/base.py +48 -0
- blindspot/dependency_graph/extractors/cpp.py +74 -0
- blindspot/dependency_graph/extractors/dotnet.py +81 -0
- blindspot/dependency_graph/extractors/go.py +98 -0
- blindspot/dependency_graph/extractors/java.py +66 -0
- blindspot/dependency_graph/extractors/javascript.py +95 -0
- blindspot/dependency_graph/extractors/kotlin.py +62 -0
- blindspot/dependency_graph/extractors/php.py +102 -0
- blindspot/dependency_graph/extractors/python.py +273 -0
- blindspot/dependency_graph/extractors/ruby.py +73 -0
- blindspot/dependency_graph/extractors/rust.py +111 -0
- blindspot/dependency_graph/extractors/swift.py +48 -0
- blindspot/dependency_graph/importance.py +90 -0
- blindspot/dependency_graph/llm_fallback.py +137 -0
- blindspot/dependency_graph/models.py +115 -0
- blindspot/diff_analysis/__init__.py +19 -0
- blindspot/diff_analysis/classifier.py +202 -0
- blindspot/narrative/__init__.py +16 -0
- blindspot/narrative/client.py +92 -0
- blindspot/narrative/config.py +98 -0
- blindspot/narrative/engine.py +103 -0
- blindspot/narrative/models.py +26 -0
- blindspot/narrative/prompt.py +327 -0
- blindspot/ownership/__init__.py +4 -0
- blindspot/ownership/engine.py +131 -0
- blindspot/ownership/models.py +55 -0
- blindspot/report/__init__.py +15 -0
- blindspot/report/context.py +126 -0
- blindspot/report/renderer.py +23 -0
- blindspot/report/templates/departure.html.j2 +294 -0
- blindspot/report/templates/report.html.j2 +767 -0
- blindspot/resilience/__init__.py +7 -0
- blindspot/resilience/score.py +171 -0
- blindspot/review_graph/__init__.py +15 -0
- blindspot/review_graph/engine.py +153 -0
- blindspot/risk_models/__init__.py +37 -0
- blindspot/risk_models/bus_factor.py +145 -0
- blindspot/risk_models/departure.py +142 -0
- blindspot/risk_models/knowledge_decay.py +161 -0
- blindspot/trend/__init__.py +3 -0
- blindspot/trend/engine.py +72 -0
- blindspot-0.0.2.dist-info/METADATA +93 -0
- blindspot-0.0.2.dist-info/RECORD +84 -0
- blindspot-0.0.2.dist-info/WHEEL +4 -0
- blindspot-0.0.2.dist-info/entry_points.txt +2 -0
- blindspot-0.0.2.dist-info/licenses/LICENSE +21 -0
blindspot/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.2"
|
blindspot/__main__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from blindspot.actions.models import (
|
|
2
|
+
PRIORITY_ORDER,
|
|
3
|
+
ActionCategory,
|
|
4
|
+
ActionPriority,
|
|
5
|
+
FragilityPattern,
|
|
6
|
+
RecommendedAction,
|
|
7
|
+
)
|
|
8
|
+
from blindspot.actions.recommender import RecommendationContext, RecommendationEngine
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ActionCategory",
|
|
12
|
+
"ActionPriority",
|
|
13
|
+
"FragilityPattern",
|
|
14
|
+
"PRIORITY_ORDER",
|
|
15
|
+
"RecommendationContext",
|
|
16
|
+
"RecommendationEngine",
|
|
17
|
+
"RecommendedAction",
|
|
18
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ActionPriority(str, Enum):
|
|
6
|
+
HIGH = "High"
|
|
7
|
+
MEDIUM = "Medium"
|
|
8
|
+
LOW = "Low"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
PRIORITY_ORDER = {
|
|
12
|
+
ActionPriority.HIGH: 0,
|
|
13
|
+
ActionPriority.MEDIUM: 1,
|
|
14
|
+
ActionPriority.LOW: 2,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ActionCategory(str, Enum):
|
|
19
|
+
OWNERSHIP_DIVERSIFICATION = "Ownership Diversification"
|
|
20
|
+
KNOWLEDGE_TRANSFER = "Knowledge Transfer"
|
|
21
|
+
REVIEW_HYGIENE = "Review Hygiene"
|
|
22
|
+
QUALITY_GUARDRAIL = "Quality Guardrail"
|
|
23
|
+
CODEOWNERS_UPDATE = "Codeowners Update"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FragilityPattern(str, Enum):
|
|
27
|
+
"""Named patterns for AI-era engineering fragility.
|
|
28
|
+
|
|
29
|
+
Slugs stay machine-readable; the human label lives on the enum value.
|
|
30
|
+
Recommendations are tagged with at most one pattern so the report
|
|
31
|
+
surface can group them and so the README can document each one.
|
|
32
|
+
"""
|
|
33
|
+
REVIEW_WITHOUT_SCRUTINY = "Review without scrutiny"
|
|
34
|
+
SINGLE_OWNER_CONCENTRATION = "Single-owner concentration"
|
|
35
|
+
VELOCITY_WITHOUT_REVIEW = "Velocity without review"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True, slots=True)
|
|
39
|
+
class RecommendedAction:
|
|
40
|
+
priority: ActionPriority
|
|
41
|
+
category: ActionCategory
|
|
42
|
+
title: str
|
|
43
|
+
description: str
|
|
44
|
+
target: str
|
|
45
|
+
evidence: str
|
|
46
|
+
pattern: FragilityPattern | None = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"ActionCategory",
|
|
51
|
+
"ActionPriority",
|
|
52
|
+
"FragilityPattern",
|
|
53
|
+
"PRIORITY_ORDER",
|
|
54
|
+
"RecommendedAction",
|
|
55
|
+
]
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
from blindspot.actions.models import (
|
|
4
|
+
PRIORITY_ORDER,
|
|
5
|
+
ActionCategory,
|
|
6
|
+
ActionPriority,
|
|
7
|
+
FragilityPattern,
|
|
8
|
+
RecommendedAction,
|
|
9
|
+
)
|
|
10
|
+
from blindspot.ai_signal.models import AuthorProfile, AuthorProfileType
|
|
11
|
+
from blindspot.codeowners import CodeOwnersReport
|
|
12
|
+
from blindspot.diff_analysis.classifier import classify_file
|
|
13
|
+
from blindspot.review_graph.engine import FileReviewStats
|
|
14
|
+
from blindspot.risk_models.bus_factor import FileBusFactor, ServiceBusFactor
|
|
15
|
+
from blindspot.risk_models.knowledge_decay import FileDecay
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class RecommendationContext:
|
|
20
|
+
services: tuple[ServiceBusFactor, ...] = ()
|
|
21
|
+
critical_files: tuple[FileBusFactor, ...] = ()
|
|
22
|
+
decays: tuple[FileDecay, ...] = ()
|
|
23
|
+
review_stats: dict[str, FileReviewStats] = field(default_factory=dict)
|
|
24
|
+
author_profiles: dict[str, AuthorProfile] = field(default_factory=dict)
|
|
25
|
+
ownership_names: dict[str, str] = field(default_factory=dict)
|
|
26
|
+
codeowners_report: CodeOwnersReport | None = None
|
|
27
|
+
importance_map: dict[str, float] = field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class RecommendationEngine:
|
|
32
|
+
decay_critical_threshold: float = 0.75
|
|
33
|
+
decay_high_threshold: float = 0.50
|
|
34
|
+
rubber_stamp_threshold: float = 0.70
|
|
35
|
+
diversity_floor: float = 0.20
|
|
36
|
+
fast_approval_seconds: float = 30 * 60
|
|
37
|
+
min_reviews_for_rubber_stamp: int = 2
|
|
38
|
+
min_reviews_for_diversity: int = 3
|
|
39
|
+
min_approvals_for_latency: int = 3
|
|
40
|
+
max_per_rule: int = 5
|
|
41
|
+
importance_threshold: float = 0.005
|
|
42
|
+
|
|
43
|
+
def _passes_importance(self, ctx: RecommendationContext, file: str) -> bool:
|
|
44
|
+
"""Filter out files structurally unimportant to the codebase.
|
|
45
|
+
|
|
46
|
+
When no importance_map is provided (or it's empty), behaviour is
|
|
47
|
+
unchanged — every file passes. With a map, files below the
|
|
48
|
+
threshold (default 0.5% PageRank weight) are filtered.
|
|
49
|
+
"""
|
|
50
|
+
if not ctx.importance_map:
|
|
51
|
+
return True
|
|
52
|
+
return ctx.importance_map.get(file, 0.0) >= self.importance_threshold
|
|
53
|
+
|
|
54
|
+
def recommend(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
55
|
+
actions: list[RecommendedAction] = []
|
|
56
|
+
actions.extend(self._service_bus_factor(ctx))
|
|
57
|
+
actions.extend(self._file_decay(ctx))
|
|
58
|
+
actions.extend(self._rubber_stamp(ctx))
|
|
59
|
+
actions.extend(self._reviewer_diversity(ctx))
|
|
60
|
+
actions.extend(self._fast_approval(ctx))
|
|
61
|
+
actions.extend(self._fake_velocity(ctx))
|
|
62
|
+
actions.extend(self._codeowners(ctx))
|
|
63
|
+
actions.sort(key=lambda a: (PRIORITY_ORDER[a.priority], a.category.value, a.target))
|
|
64
|
+
return actions
|
|
65
|
+
|
|
66
|
+
def _label(self, ctx: RecommendationContext, email: str) -> str:
|
|
67
|
+
name = ctx.ownership_names.get(email)
|
|
68
|
+
if name and name != email:
|
|
69
|
+
return f"{name} ({email})"
|
|
70
|
+
return email
|
|
71
|
+
|
|
72
|
+
def _service_bus_factor(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
73
|
+
out: list[RecommendedAction] = []
|
|
74
|
+
for s in ctx.services:
|
|
75
|
+
if s.bus_factor > 1 or not s.top_owners:
|
|
76
|
+
continue
|
|
77
|
+
owner_email, owner_cov = s.top_owners[0]
|
|
78
|
+
owner_label = self._label(ctx, owner_email)
|
|
79
|
+
priority = ActionPriority.HIGH if s.file_count >= 5 else ActionPriority.MEDIUM
|
|
80
|
+
out.append(
|
|
81
|
+
RecommendedAction(
|
|
82
|
+
priority=priority,
|
|
83
|
+
category=ActionCategory.OWNERSHIP_DIVERSIFICATION,
|
|
84
|
+
title=f"Diversify ownership of '{s.service}' (currently single-owner)",
|
|
85
|
+
description=(
|
|
86
|
+
f"Service '{s.service}' has bus factor 1 across {s.file_count} files; "
|
|
87
|
+
f"{owner_label} holds {owner_cov:.0%} of effective ownership. "
|
|
88
|
+
"Pair them with at least two additional engineers and rotate code reviews "
|
|
89
|
+
"for this area over the next 60 days."
|
|
90
|
+
),
|
|
91
|
+
target=s.service,
|
|
92
|
+
evidence=(
|
|
93
|
+
f"bus_factor=1, top_owner_coverage={owner_cov:.0%}, files={s.file_count}"
|
|
94
|
+
),
|
|
95
|
+
pattern=FragilityPattern.SINGLE_OWNER_CONCENTRATION,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
return out[: self.max_per_rule]
|
|
99
|
+
|
|
100
|
+
def _file_decay(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
101
|
+
out: list[RecommendedAction] = []
|
|
102
|
+
for d in ctx.decays:
|
|
103
|
+
if d.decay_score < self.decay_high_threshold:
|
|
104
|
+
continue
|
|
105
|
+
if not self._passes_importance(ctx, d.file):
|
|
106
|
+
continue
|
|
107
|
+
owner_label = self._label(ctx, d.top_owner)
|
|
108
|
+
critical = d.decay_score >= self.decay_critical_threshold
|
|
109
|
+
priority = ActionPriority.HIGH if critical else ActionPriority.MEDIUM
|
|
110
|
+
urgency = "critical" if critical else "elevated"
|
|
111
|
+
out.append(
|
|
112
|
+
RecommendedAction(
|
|
113
|
+
priority=priority,
|
|
114
|
+
category=ActionCategory.KNOWLEDGE_TRANSFER,
|
|
115
|
+
title=f"Knowledge transfer for {d.file}",
|
|
116
|
+
description=(
|
|
117
|
+
f"Decay is {urgency} ({d.decay_score:.0%}). "
|
|
118
|
+
f"{owner_label} last touched this file {d.days_since_owner_touch:.0f} days ago, "
|
|
119
|
+
f"and {d.lines_changed_after} lines have been changed since by others. "
|
|
120
|
+
"Schedule a transfer session and designate a secondary owner before the next "
|
|
121
|
+
"non-trivial change."
|
|
122
|
+
),
|
|
123
|
+
target=d.file,
|
|
124
|
+
evidence=(
|
|
125
|
+
f"decay={d.decay_score:.0%}, days_since_touch={d.days_since_owner_touch:.0f}, "
|
|
126
|
+
f"lines_after={d.lines_changed_after}"
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
return out[: self.max_per_rule]
|
|
131
|
+
|
|
132
|
+
def _rubber_stamp(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
133
|
+
out: list[RecommendedAction] = []
|
|
134
|
+
candidates = sorted(
|
|
135
|
+
(
|
|
136
|
+
s for s in ctx.review_stats.values()
|
|
137
|
+
if s.total_reviews >= self.min_reviews_for_rubber_stamp
|
|
138
|
+
and s.rubber_stamp_ratio >= self.rubber_stamp_threshold
|
|
139
|
+
and classify_file(s.file) == "code"
|
|
140
|
+
and self._passes_importance(ctx, s.file)
|
|
141
|
+
),
|
|
142
|
+
key=lambda s: (-s.rubber_stamp_ratio, -s.total_reviews),
|
|
143
|
+
)
|
|
144
|
+
for s in candidates:
|
|
145
|
+
out.append(
|
|
146
|
+
RecommendedAction(
|
|
147
|
+
priority=ActionPriority.MEDIUM,
|
|
148
|
+
category=ActionCategory.REVIEW_HYGIENE,
|
|
149
|
+
title=f"Add review depth requirement for {s.file}",
|
|
150
|
+
description=(
|
|
151
|
+
f"{s.rubber_stamp_ratio:.0%} of approvals on this file arrived without a "
|
|
152
|
+
f"substantive review comment (across {s.total_reviews} reviews). "
|
|
153
|
+
"Introduce a review checklist or require at least one substantive comment "
|
|
154
|
+
"before approval is allowed."
|
|
155
|
+
),
|
|
156
|
+
target=s.file,
|
|
157
|
+
evidence=(
|
|
158
|
+
f"rubber_stamp_ratio={s.rubber_stamp_ratio:.0%}, reviews={s.total_reviews}"
|
|
159
|
+
),
|
|
160
|
+
pattern=FragilityPattern.REVIEW_WITHOUT_SCRUTINY,
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
return out[: self.max_per_rule]
|
|
164
|
+
|
|
165
|
+
def _reviewer_diversity(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
166
|
+
out: list[RecommendedAction] = []
|
|
167
|
+
candidates = sorted(
|
|
168
|
+
(
|
|
169
|
+
s for s in ctx.review_stats.values()
|
|
170
|
+
if s.total_reviews >= self.min_reviews_for_diversity
|
|
171
|
+
and s.diversity_hhi < self.diversity_floor
|
|
172
|
+
and classify_file(s.file) == "code"
|
|
173
|
+
and self._passes_importance(ctx, s.file)
|
|
174
|
+
),
|
|
175
|
+
key=lambda s: s.diversity_hhi,
|
|
176
|
+
)
|
|
177
|
+
for s in candidates:
|
|
178
|
+
out.append(
|
|
179
|
+
RecommendedAction(
|
|
180
|
+
priority=ActionPriority.LOW,
|
|
181
|
+
category=ActionCategory.REVIEW_HYGIENE,
|
|
182
|
+
title=f"Rotate reviewers for {s.file}",
|
|
183
|
+
description=(
|
|
184
|
+
f"Reviewer diversity for this file is {s.diversity_hhi:.0%}, "
|
|
185
|
+
"meaning a single reviewer is carrying most of the review burden. "
|
|
186
|
+
"Add the file to a CODEOWNERS group or require a second reviewer to spread "
|
|
187
|
+
"knowledge of the area."
|
|
188
|
+
),
|
|
189
|
+
target=s.file,
|
|
190
|
+
evidence=(
|
|
191
|
+
f"diversity_hhi={s.diversity_hhi:.0%}, unique_reviewers={s.unique_reviewers}"
|
|
192
|
+
),
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
return out[: self.max_per_rule]
|
|
196
|
+
|
|
197
|
+
def _fast_approval(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
198
|
+
out: list[RecommendedAction] = []
|
|
199
|
+
candidates = sorted(
|
|
200
|
+
(
|
|
201
|
+
s for s in ctx.review_stats.values()
|
|
202
|
+
if s.median_approval_latency_seconds is not None
|
|
203
|
+
and s.approval_sample_size >= self.min_approvals_for_latency
|
|
204
|
+
and s.median_approval_latency_seconds < self.fast_approval_seconds
|
|
205
|
+
and classify_file(s.file) == "code"
|
|
206
|
+
and self._passes_importance(ctx, s.file)
|
|
207
|
+
),
|
|
208
|
+
key=lambda s: s.median_approval_latency_seconds or 0,
|
|
209
|
+
)
|
|
210
|
+
for s in candidates:
|
|
211
|
+
minutes = (s.median_approval_latency_seconds or 0) / 60
|
|
212
|
+
out.append(
|
|
213
|
+
RecommendedAction(
|
|
214
|
+
priority=ActionPriority.MEDIUM,
|
|
215
|
+
category=ActionCategory.REVIEW_HYGIENE,
|
|
216
|
+
title=f"Slow down fast approvals on {s.file}",
|
|
217
|
+
description=(
|
|
218
|
+
f"Median time from PR open to first approval is "
|
|
219
|
+
f"{minutes:.0f} minutes across {s.approval_sample_size} approvals — "
|
|
220
|
+
"too short for meaningful review of non-trivial code. "
|
|
221
|
+
"Add a minimum review time, CODEOWNERS review requirement, "
|
|
222
|
+
"or required checklist."
|
|
223
|
+
),
|
|
224
|
+
target=s.file,
|
|
225
|
+
evidence=(
|
|
226
|
+
f"median_approval={minutes:.0f}min, samples={s.approval_sample_size}"
|
|
227
|
+
),
|
|
228
|
+
pattern=FragilityPattern.REVIEW_WITHOUT_SCRUTINY,
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
return out[: self.max_per_rule]
|
|
232
|
+
|
|
233
|
+
def _fake_velocity(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
234
|
+
out: list[RecommendedAction] = []
|
|
235
|
+
for profile in ctx.author_profiles.values():
|
|
236
|
+
if profile.profile_type != AuthorProfileType.FAKE_VELOCITY:
|
|
237
|
+
continue
|
|
238
|
+
label = self._label(ctx, profile.author_email)
|
|
239
|
+
quality_pct = (
|
|
240
|
+
f"{profile.quality_signal.risk_score:.0%}" if profile.quality_signal else "n/a"
|
|
241
|
+
)
|
|
242
|
+
ai_score = (
|
|
243
|
+
f"{profile.ai_signal.score:.2f}" if profile.ai_signal else "n/a"
|
|
244
|
+
)
|
|
245
|
+
out.append(
|
|
246
|
+
RecommendedAction(
|
|
247
|
+
priority=ActionPriority.HIGH,
|
|
248
|
+
category=ActionCategory.QUALITY_GUARDRAIL,
|
|
249
|
+
title=f"Deep review of recent work by {label}",
|
|
250
|
+
description=(
|
|
251
|
+
f"Recent activity shows AI amplification signals "
|
|
252
|
+
f"(AI score {ai_score}) together with elevated quality risk "
|
|
253
|
+
f"({quality_pct}). Schedule a dedicated review of their last 90 days of "
|
|
254
|
+
"changes, with focus on architectural correctness and test coverage. "
|
|
255
|
+
"Treat this as a verification step, not a punishment."
|
|
256
|
+
),
|
|
257
|
+
target=profile.author_email,
|
|
258
|
+
evidence=(
|
|
259
|
+
f"ai_score={ai_score}, quality_risk={quality_pct}, "
|
|
260
|
+
f"profile={profile.profile_type.value}"
|
|
261
|
+
),
|
|
262
|
+
pattern=FragilityPattern.VELOCITY_WITHOUT_REVIEW,
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
return out[: self.max_per_rule]
|
|
266
|
+
|
|
267
|
+
def _codeowners(self, ctx: RecommendationContext) -> list[RecommendedAction]:
|
|
268
|
+
report = ctx.codeowners_report
|
|
269
|
+
if report is None:
|
|
270
|
+
return []
|
|
271
|
+
out: list[RecommendedAction] = []
|
|
272
|
+
|
|
273
|
+
# Mismatches: highest signal — declared owner is wrong.
|
|
274
|
+
for f in report.mismatches[: self.max_per_rule]:
|
|
275
|
+
actual_label = (
|
|
276
|
+
self._label(ctx, f.actual_top_owner) if f.actual_top_owner else "(unknown)"
|
|
277
|
+
)
|
|
278
|
+
declared = ", ".join(f.declared_owners) if f.declared_owners else "(none)"
|
|
279
|
+
out.append(RecommendedAction(
|
|
280
|
+
priority=ActionPriority.MEDIUM,
|
|
281
|
+
category=ActionCategory.CODEOWNERS_UPDATE,
|
|
282
|
+
title=f"Update CODEOWNERS for {f.file}",
|
|
283
|
+
description=(
|
|
284
|
+
f"Declared owners ({declared}) do not include the current top contributor. "
|
|
285
|
+
f"{actual_label} holds {f.actual_coverage:.0%} of effective ownership. "
|
|
286
|
+
"Either add them to the CODEOWNERS rule or assign explicit cross-coverage."
|
|
287
|
+
),
|
|
288
|
+
target=f.file,
|
|
289
|
+
evidence=(
|
|
290
|
+
f"declared={declared}, actual_top={f.actual_top_owner or 'n/a'}, "
|
|
291
|
+
f"coverage={f.actual_coverage:.0%}, line={f.rule_line}"
|
|
292
|
+
),
|
|
293
|
+
))
|
|
294
|
+
|
|
295
|
+
# Stale: declared owner hasn't touched it in a long time.
|
|
296
|
+
for f in report.stale[: self.max_per_rule]:
|
|
297
|
+
declared = ", ".join(f.declared_owners) if f.declared_owners else "(none)"
|
|
298
|
+
days = f.days_since_declared_touch
|
|
299
|
+
days_txt = f"{days}" if days is not None else "no record"
|
|
300
|
+
out.append(RecommendedAction(
|
|
301
|
+
priority=ActionPriority.LOW,
|
|
302
|
+
category=ActionCategory.CODEOWNERS_UPDATE,
|
|
303
|
+
title=f"Refresh stale CODEOWNERS entry for {f.file}",
|
|
304
|
+
description=(
|
|
305
|
+
f"Declared owner ({declared}) has not touched this file in {days_txt} days. "
|
|
306
|
+
"Confirm the assignment is still accurate; if not, rotate to a recent contributor."
|
|
307
|
+
),
|
|
308
|
+
target=f.file,
|
|
309
|
+
evidence=(
|
|
310
|
+
f"declared={declared}, days_since_touch={days_txt}, line={f.rule_line}"
|
|
311
|
+
),
|
|
312
|
+
))
|
|
313
|
+
return out
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
__all__ = ["RecommendationContext", "RecommendationEngine"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from blindspot.ai_signal.detector import AIAmplificationDetector
|
|
2
|
+
from blindspot.ai_signal.models import (
|
|
3
|
+
AIFlag,
|
|
4
|
+
AISignal,
|
|
5
|
+
AuthorProfile,
|
|
6
|
+
AuthorProfileType,
|
|
7
|
+
QualitySignal,
|
|
8
|
+
SignalStrength,
|
|
9
|
+
)
|
|
10
|
+
from blindspot.ai_signal.profile import AuthorProfiler
|
|
11
|
+
from blindspot.ai_signal.quality import QualitySignalEngine
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"AIAmplificationDetector",
|
|
15
|
+
"AIFlag",
|
|
16
|
+
"AISignal",
|
|
17
|
+
"AuthorProfile",
|
|
18
|
+
"AuthorProfileType",
|
|
19
|
+
"AuthorProfiler",
|
|
20
|
+
"QualitySignal",
|
|
21
|
+
"QualitySignalEngine",
|
|
22
|
+
"SignalStrength",
|
|
23
|
+
]
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import UTC, datetime, timedelta
|
|
5
|
+
|
|
6
|
+
from blindspot.ai_signal.models import AIFlag, AISignal
|
|
7
|
+
from blindspot.collector.models import Commit
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AIAmplificationDetector:
|
|
12
|
+
"""Heuristic detector for unusual recent activity that may indicate AI amplification.
|
|
13
|
+
|
|
14
|
+
Five signals compared to a per-author baseline:
|
|
15
|
+
1. commit frequency spike
|
|
16
|
+
2. average change-size spike
|
|
17
|
+
3. commit message length spike
|
|
18
|
+
4. share of unusually large single commits
|
|
19
|
+
5. share of off-hours commits
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
measurement_days: int = 90
|
|
23
|
+
baseline_days: int = 365
|
|
24
|
+
high_threshold: float = 0.70
|
|
25
|
+
medium_threshold: float = 0.40
|
|
26
|
+
weights: tuple[float, float, float, float, float] = (0.30, 0.25, 0.20, 0.15, 0.10)
|
|
27
|
+
min_baseline_commits: int = 5
|
|
28
|
+
as_of: datetime | None = None
|
|
29
|
+
_as_of: datetime = field(init=False)
|
|
30
|
+
|
|
31
|
+
def __post_init__(self) -> None:
|
|
32
|
+
self._as_of = (self.as_of or datetime.now(UTC)).astimezone(UTC)
|
|
33
|
+
|
|
34
|
+
def detect(self, commits: Iterable[Commit]) -> dict[str, AISignal]:
|
|
35
|
+
recent_cutoff = self._as_of - timedelta(days=self.measurement_days)
|
|
36
|
+
baseline_cutoff = recent_cutoff - timedelta(days=self.baseline_days)
|
|
37
|
+
|
|
38
|
+
recent: dict[str, list[Commit]] = {}
|
|
39
|
+
baseline: dict[str, list[Commit]] = {}
|
|
40
|
+
for c in commits:
|
|
41
|
+
if c.authored_at >= recent_cutoff:
|
|
42
|
+
recent.setdefault(c.author_email, []).append(c)
|
|
43
|
+
elif c.authored_at >= baseline_cutoff:
|
|
44
|
+
baseline.setdefault(c.author_email, []).append(c)
|
|
45
|
+
|
|
46
|
+
results: dict[str, AISignal] = {}
|
|
47
|
+
for email in set(recent) | set(baseline):
|
|
48
|
+
r = recent.get(email, [])
|
|
49
|
+
b = baseline.get(email, [])
|
|
50
|
+
if not r:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
if len(b) < self.min_baseline_commits:
|
|
54
|
+
signal = self._insufficient_baseline_signal(email, r, b)
|
|
55
|
+
else:
|
|
56
|
+
freq = self._frequency_score(r, b)
|
|
57
|
+
vol = self._volume_score(r, b)
|
|
58
|
+
msg = self._message_score(r, b)
|
|
59
|
+
large = self._large_commit_score(r, b)
|
|
60
|
+
timing = self._timing_score(r, b)
|
|
61
|
+
|
|
62
|
+
w = self.weights
|
|
63
|
+
score = (
|
|
64
|
+
freq * w[0] + vol * w[1] + msg * w[2] + large * w[3] + timing * w[4]
|
|
65
|
+
)
|
|
66
|
+
flag = self._flag_from_score(score)
|
|
67
|
+
|
|
68
|
+
signal = AISignal(
|
|
69
|
+
author_email=email,
|
|
70
|
+
flag=flag,
|
|
71
|
+
score=score,
|
|
72
|
+
frequency_score=freq,
|
|
73
|
+
volume_score=vol,
|
|
74
|
+
message_score=msg,
|
|
75
|
+
large_commit_score=large,
|
|
76
|
+
timing_score=timing,
|
|
77
|
+
recent_commits=len(r),
|
|
78
|
+
baseline_commits=len(b),
|
|
79
|
+
)
|
|
80
|
+
results[email] = signal
|
|
81
|
+
return results
|
|
82
|
+
|
|
83
|
+
def _flag_from_score(self, score: float) -> AIFlag:
|
|
84
|
+
if score >= self.high_threshold:
|
|
85
|
+
return AIFlag.HIGH
|
|
86
|
+
if score >= self.medium_threshold:
|
|
87
|
+
return AIFlag.MEDIUM
|
|
88
|
+
return AIFlag.LOW
|
|
89
|
+
|
|
90
|
+
def _insufficient_baseline_signal(
|
|
91
|
+
self, email: str, r: list[Commit], b: list[Commit]
|
|
92
|
+
) -> AISignal:
|
|
93
|
+
return AISignal(
|
|
94
|
+
author_email=email,
|
|
95
|
+
flag=AIFlag.LOW,
|
|
96
|
+
score=0.0,
|
|
97
|
+
frequency_score=0.0,
|
|
98
|
+
volume_score=0.0,
|
|
99
|
+
message_score=0.0,
|
|
100
|
+
large_commit_score=0.0,
|
|
101
|
+
timing_score=0.0,
|
|
102
|
+
recent_commits=len(r),
|
|
103
|
+
baseline_commits=len(b),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _frequency_score(self, r: list[Commit], b: list[Commit]) -> float:
|
|
107
|
+
recent_rate = len(r) / self.measurement_days
|
|
108
|
+
baseline_rate = len(b) / self.baseline_days
|
|
109
|
+
if baseline_rate == 0:
|
|
110
|
+
return 0.0
|
|
111
|
+
ratio = recent_rate / baseline_rate
|
|
112
|
+
return _bucketed_score(ratio, [(3.0, 1.0), (2.0, 0.7), (1.5, 0.4)])
|
|
113
|
+
|
|
114
|
+
def _volume_score(self, r: list[Commit], b: list[Commit]) -> float:
|
|
115
|
+
r_avg = _avg_change_size(r)
|
|
116
|
+
b_avg = _avg_change_size(b)
|
|
117
|
+
if b_avg == 0:
|
|
118
|
+
return 0.0
|
|
119
|
+
ratio = r_avg / b_avg
|
|
120
|
+
return _bucketed_score(ratio, [(4.0, 1.0), (2.5, 0.7), (1.5, 0.4)])
|
|
121
|
+
|
|
122
|
+
def _message_score(self, r: list[Commit], b: list[Commit]) -> float:
|
|
123
|
+
r_avg = _avg(len(c.message) for c in r)
|
|
124
|
+
b_avg = _avg(len(c.message) for c in b)
|
|
125
|
+
if b_avg == 0:
|
|
126
|
+
return 0.0
|
|
127
|
+
ratio = r_avg / b_avg
|
|
128
|
+
return _bucketed_score(ratio, [(2.5, 1.0), (1.8, 0.6), (1.3, 0.3)])
|
|
129
|
+
|
|
130
|
+
def _large_commit_score(self, r: list[Commit], b: list[Commit]) -> float:
|
|
131
|
+
b_avg = _avg_change_size(b)
|
|
132
|
+
if b_avg == 0:
|
|
133
|
+
return 0.0
|
|
134
|
+
threshold = b_avg * 3.0
|
|
135
|
+
large = sum(1 for c in r if _change_size(c) > threshold)
|
|
136
|
+
if not r:
|
|
137
|
+
return 0.0
|
|
138
|
+
ratio = large / len(r)
|
|
139
|
+
return _bucketed_score(ratio, [(0.5, 1.0), (0.3, 0.6), (0.15, 0.3)])
|
|
140
|
+
|
|
141
|
+
def _timing_score(self, r: list[Commit], b: list[Commit]) -> float:
|
|
142
|
+
# Off-hours = UTC hours outside the busiest 8-hour window in baseline.
|
|
143
|
+
# If baseline gives no working-hours signal, fall back to a generous default
|
|
144
|
+
# (treat 22:00–08:00 UTC as off-hours).
|
|
145
|
+
baseline_hours = [c.authored_at.hour for c in b]
|
|
146
|
+
if len(baseline_hours) >= self.min_baseline_commits:
|
|
147
|
+
work_start, work_end = _busy_window(baseline_hours)
|
|
148
|
+
else:
|
|
149
|
+
work_start, work_end = 8, 22
|
|
150
|
+
|
|
151
|
+
recent_off = sum(1 for c in r if not _within(c.authored_at.hour, work_start, work_end))
|
|
152
|
+
baseline_off = sum(1 for c in b if not _within(c.authored_at.hour, work_start, work_end))
|
|
153
|
+
if len(r) == 0 or len(b) == 0:
|
|
154
|
+
return 0.0
|
|
155
|
+
recent_ratio = recent_off / len(r)
|
|
156
|
+
baseline_ratio = baseline_off / len(b)
|
|
157
|
+
if baseline_ratio == 0:
|
|
158
|
+
ratio = float("inf") if recent_ratio > 0 else 0.0
|
|
159
|
+
else:
|
|
160
|
+
ratio = recent_ratio / baseline_ratio
|
|
161
|
+
return _bucketed_score(ratio, [(2.0, 0.8), (1.5, 0.4)])
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _change_size(c: Commit) -> int:
|
|
165
|
+
return sum(f.additions + f.deletions for f in c.files)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _avg_change_size(commits: list[Commit]) -> float:
|
|
169
|
+
if not commits:
|
|
170
|
+
return 0.0
|
|
171
|
+
return sum(_change_size(c) for c in commits) / len(commits)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _avg(values: Iterable[float]) -> float:
|
|
175
|
+
values = list(values)
|
|
176
|
+
if not values:
|
|
177
|
+
return 0.0
|
|
178
|
+
return sum(values) / len(values)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _bucketed_score(ratio: float, buckets: list[tuple[float, float]]) -> float:
|
|
182
|
+
"""buckets sorted desc by threshold; return first matching score, else 0."""
|
|
183
|
+
if math.isinf(ratio) or math.isnan(ratio):
|
|
184
|
+
return buckets[0][1] if buckets else 0.0
|
|
185
|
+
for threshold, score in buckets:
|
|
186
|
+
if ratio > threshold:
|
|
187
|
+
return score
|
|
188
|
+
return 0.0
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _busy_window(hours: list[int]) -> tuple[int, int]:
|
|
192
|
+
"""Find the 8-hour window with the most commits.
|
|
193
|
+
|
|
194
|
+
Returns (start_hour_inclusive, end_hour_exclusive) in UTC. Works on a circular
|
|
195
|
+
hour clock so a window of (20, 4) means 20:00 → 04:00 UTC.
|
|
196
|
+
"""
|
|
197
|
+
counts = [0] * 24
|
|
198
|
+
for h in hours:
|
|
199
|
+
if 0 <= h < 24:
|
|
200
|
+
counts[h] += 1
|
|
201
|
+
best_total = -1
|
|
202
|
+
best_start = 8
|
|
203
|
+
for start in range(24):
|
|
204
|
+
total = sum(counts[(start + i) % 24] for i in range(8))
|
|
205
|
+
if total > best_total:
|
|
206
|
+
best_total = total
|
|
207
|
+
best_start = start
|
|
208
|
+
return best_start, (best_start + 8) % 24
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _within(hour: int, start: int, end: int) -> bool:
|
|
212
|
+
if start <= end:
|
|
213
|
+
return start <= hour < end
|
|
214
|
+
return hour >= start or hour < end
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
__all__ = ["AIAmplificationDetector"]
|