patchrail 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. patchrail/__init__.py +7 -0
  2. patchrail/__main__.py +7 -0
  3. patchrail/ci/__init__.py +7 -0
  4. patchrail/ci/classify.py +888 -0
  5. patchrail/cli.py +8566 -0
  6. patchrail/funded_issues/__init__.py +138 -0
  7. patchrail/funded_issues/algora_board.py +240 -0
  8. patchrail/funded_issues/blocklist.py +112 -0
  9. patchrail/funded_issues/discovery.py +4091 -0
  10. patchrail/funded_issues/importers.py +316 -0
  11. patchrail/funded_issues/source_noise.py +349 -0
  12. patchrail/funded_issues/store.py +459 -0
  13. patchrail/queue/__init__.py +75 -0
  14. patchrail/queue/server.py +273 -0
  15. patchrail/queue/status.py +756 -0
  16. patchrail/queue/store.py +600 -0
  17. patchrail/reviewer_quick_check.py +650 -0
  18. patchrail/schemas/__init__.py +1 -0
  19. patchrail/schemas/application-dossier.v1.schema.json +305 -0
  20. patchrail/schemas/ci-benchmark.v1.schema.json +174 -0
  21. patchrail/schemas/ci-fixture-check.v1.schema.json +122 -0
  22. patchrail/schemas/ci-pilot-metrics.v1.schema.json +164 -0
  23. patchrail/schemas/ci-pilot-summary.v1.schema.json +146 -0
  24. patchrail/schemas/ci-result.v1.schema.json +133 -0
  25. patchrail/schemas/funded-issues-client-report.v1.schema.json +524 -0
  26. patchrail/schemas/funded-issues-recheck-queue.v1.schema.json +333 -0
  27. patchrail/schemas/funded-issues-recheck-summary.v1.schema.json +136 -0
  28. patchrail/schemas/funded-issues-report.v1.schema.json +836 -0
  29. patchrail/schemas/funded-issues-shortlist.v1.schema.json +953 -0
  30. patchrail/schemas/funded-issues-store-status.v1.schema.json +96 -0
  31. patchrail/schemas/funded-issues-store.v1.schema.json +117 -0
  32. patchrail/schemas/queue-audit-event.v1.schema.json +44 -0
  33. patchrail/schemas/queue-audit-summary.v1.schema.json +169 -0
  34. patchrail/schemas/queue-gate-report.v1.schema.json +158 -0
  35. patchrail/schemas/queue-policy-resolution.v1.schema.json +188 -0
  36. patchrail/schemas/queue-policy-scan.v1.schema.json +175 -0
  37. patchrail/schemas/queue-proposal.v1.schema.json +61 -0
  38. patchrail/schemas/queue-review.v1.schema.json +218 -0
  39. patchrail/schemas/queue-status.v1.schema.json +179 -0
  40. patchrail/schemas/queue-work-item.v1.schema.json +64 -0
  41. patchrail/schemas/reviewer-quick-check-artifacts.v1.schema.json +104 -0
  42. patchrail/web_metrics.py +649 -0
  43. patchrail-0.1.0.dist-info/METADATA +279 -0
  44. patchrail-0.1.0.dist-info/RECORD +47 -0
  45. patchrail-0.1.0.dist-info/WHEEL +4 -0
  46. patchrail-0.1.0.dist-info/entry_points.txt +2 -0
  47. patchrail-0.1.0.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,138 @@
1
+ """Read-only funded issue discovery helpers."""
2
+
3
+ from patchrail.funded_issues.discovery import (
4
+ CLIENT_PROFILE_SCHEMA_VERSION,
5
+ COMPETITION_BATCH_SCHEMA_VERSION,
6
+ COMPETITION_SIGNAL_SCHEMA_VERSION,
7
+ PAYOUT_EFFORT_BATCH_SCHEMA_VERSION,
8
+ PAYOUT_EFFORT_SIGNAL_SCHEMA_VERSION,
9
+ STALENESS_BATCH_SCHEMA_VERSION,
10
+ STALENESS_SIGNAL_SCHEMA_VERSION,
11
+ TESTABILITY_BATCH_SCHEMA_VERSION,
12
+ TESTABILITY_SIGNAL_SCHEMA_VERSION,
13
+ ClientProfile,
14
+ FundedIssue,
15
+ VALID_OPPORTUNITY_STATES,
16
+ VALID_RISK_LEVELS,
17
+ assess_bounty_competition,
18
+ assess_competition_batch,
19
+ assess_issue_staleness,
20
+ assess_issue_testability,
21
+ assess_payout_effort,
22
+ assess_payout_effort_batch,
23
+ assess_staleness_batch,
24
+ assess_testability_batch,
25
+ cash_actions_funded_issues,
26
+ client_report_funded_issues,
27
+ explain_issue,
28
+ fulfillment_packet_funded_issues,
29
+ funded_issues_payload,
30
+ load_client_profile,
31
+ load_funded_issues,
32
+ recheck_funded_issues,
33
+ report_funded_issues,
34
+ score_funded_issues,
35
+ shortlist_funded_issues,
36
+ summarize_issues,
37
+ validate_funded_issues,
38
+ )
39
+ from patchrail.funded_issues.algora_board import (
40
+ ALGORA_BOARD_SCHEMA_VERSION,
41
+ board_issue_records,
42
+ board_payload,
43
+ board_url,
44
+ parse_board_html,
45
+ )
46
+ from patchrail.funded_issues.blocklist import (
47
+ BLOCKLIST_SCHEMA_VERSION,
48
+ BLOCKLISTED_OWNERS,
49
+ is_blocklisted_owner,
50
+ is_blocklisted_record,
51
+ purge_blocklisted_entries,
52
+ )
53
+ from patchrail.funded_issues.importers import SUPPORTED_PROVIDERS, import_provider_export
54
+ from patchrail.funded_issues.source_noise import (
55
+ SOURCE_NOISE_SCHEMA_VERSION,
56
+ STRONG_NOISE_FLAGS,
57
+ apply_source_noise_to_store,
58
+ assess_owner_source_noise,
59
+ entries_by_owner,
60
+ )
61
+ from patchrail.funded_issues.store import (
62
+ RECHECK_SUMMARY_SCHEMA_VERSION,
63
+ STORE_SCHEMA_VERSION,
64
+ STORE_STATUS_SCHEMA_VERSION,
65
+ MergeSummary,
66
+ RecheckSummary,
67
+ apply_recheck_to_store,
68
+ empty_store,
69
+ load_store,
70
+ merge_into_store,
71
+ save_store,
72
+ store_status,
73
+ )
74
+
75
+ __all__ = [
76
+ "FundedIssue",
77
+ "ALGORA_BOARD_SCHEMA_VERSION",
78
+ "BLOCKLIST_SCHEMA_VERSION",
79
+ "BLOCKLISTED_OWNERS",
80
+ "CLIENT_PROFILE_SCHEMA_VERSION",
81
+ "COMPETITION_BATCH_SCHEMA_VERSION",
82
+ "COMPETITION_SIGNAL_SCHEMA_VERSION",
83
+ "PAYOUT_EFFORT_BATCH_SCHEMA_VERSION",
84
+ "PAYOUT_EFFORT_SIGNAL_SCHEMA_VERSION",
85
+ "RECHECK_SUMMARY_SCHEMA_VERSION",
86
+ "SOURCE_NOISE_SCHEMA_VERSION",
87
+ "STALENESS_BATCH_SCHEMA_VERSION",
88
+ "STALENESS_SIGNAL_SCHEMA_VERSION",
89
+ "STORE_SCHEMA_VERSION",
90
+ "STORE_STATUS_SCHEMA_VERSION",
91
+ "STRONG_NOISE_FLAGS",
92
+ "TESTABILITY_BATCH_SCHEMA_VERSION",
93
+ "TESTABILITY_SIGNAL_SCHEMA_VERSION",
94
+ "ClientProfile",
95
+ "MergeSummary",
96
+ "RecheckSummary",
97
+ "SUPPORTED_PROVIDERS",
98
+ "VALID_OPPORTUNITY_STATES",
99
+ "VALID_RISK_LEVELS",
100
+ "assess_bounty_competition",
101
+ "assess_competition_batch",
102
+ "assess_issue_staleness",
103
+ "assess_issue_testability",
104
+ "assess_payout_effort",
105
+ "assess_payout_effort_batch",
106
+ "assess_staleness_batch",
107
+ "assess_testability_batch",
108
+ "apply_recheck_to_store",
109
+ "apply_source_noise_to_store",
110
+ "assess_owner_source_noise",
111
+ "board_issue_records",
112
+ "board_payload",
113
+ "board_url",
114
+ "cash_actions_funded_issues",
115
+ "client_report_funded_issues",
116
+ "empty_store",
117
+ "entries_by_owner",
118
+ "explain_issue",
119
+ "fulfillment_packet_funded_issues",
120
+ "funded_issues_payload",
121
+ "import_provider_export",
122
+ "is_blocklisted_owner",
123
+ "is_blocklisted_record",
124
+ "load_client_profile",
125
+ "load_funded_issues",
126
+ "load_store",
127
+ "merge_into_store",
128
+ "parse_board_html",
129
+ "purge_blocklisted_entries",
130
+ "recheck_funded_issues",
131
+ "report_funded_issues",
132
+ "save_store",
133
+ "score_funded_issues",
134
+ "shortlist_funded_issues",
135
+ "store_status",
136
+ "summarize_issues",
137
+ "validate_funded_issues",
138
+ ]
@@ -0,0 +1,240 @@
1
+ """Parse a saved Algora organization bounty-board page into funded issues.
2
+
3
+ Algora renders each organization's public bounty board at
4
+ ``https://algora.io/<org>/bounties``. The initial server-rendered HTML carries
5
+ the board's open/completed totals and a table of open bounties with the four
6
+ public facts the tracker needs and that generic issue scraping cannot provide:
7
+
8
+ * the **funder-stated USD amount** (the board is the funding organization's own
9
+ listing, so the amount is primary-source evidence, not aggregator hearsay);
10
+ * the GitHub issue URL and reference;
11
+ * the posting age shown on the board;
12
+ * the number of **claims** (declared solve attempts) on the bounty.
13
+
14
+ This module is a pure, offline parser for a *locally saved copy* of that page:
15
+ save the board with your browser or any HTTP client, then run
16
+ ``patchrail funded-issues import-algora-board``. Keeping the fetch outside the
17
+ toolkit preserves the tracker's no-network rule (network access requires
18
+ explicit opt-in) and keeps tests hermetic. Nothing here claims, comments, or
19
+ writes to any third party.
20
+
21
+ Honesty note: the server-rendered table contains only the first page of open
22
+ bounties (about ten rows); the board's ``open_count`` is still the true total,
23
+ so the payload reports both and never pretends the visible subset is complete.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import html as html_lib
29
+ import re
30
+ from typing import Any
31
+
32
+ from patchrail.funded_issues.discovery import (
33
+ BLOCKED_ACTIONS,
34
+ COMPETITION_THRESHOLDS,
35
+ CONTESTED_BOUNTY_FLAG,
36
+ FundedIssue,
37
+ score_funded_issues,
38
+ )
39
+
40
+ ALGORA_BOARD_SCHEMA_VERSION = "patchrail.funded_issues.algora_board.v1"
41
+
42
+ # Stable markers in the board's server-rendered markup. The page is a LiveView
43
+ # app, but these classes/attributes have been stable across organizations; the
44
+ # parser fails loudly (ValueError) when the board scaffolding is absent so a
45
+ # login redirect or an unrelated page is never silently parsed as zero bounties.
46
+ _BOARD_MARKER = 'phx-value-tab="open"'
47
+ _ROW_SPLIT_RE = re.compile(r"<tr\b")
48
+ _AMOUNT_RE = re.compile(r"font-extrabold text-emerald-300[^\"]*\">\s*\$([\d,]+(?:\.\d{1,2})?)")
49
+ _ISSUE_LINK_RE = re.compile(
50
+ r"<a href=\"https://github\.com/([^/\"]+)/([^/\"]+)/issues/(\d+)\"[^>]*class=\"group/issue"
51
+ )
52
+ _TITLE_RE = re.compile(r"line-clamp-2[^\"]*\">\s*(.*?)\s*</p>", re.S)
53
+ _AGE_RE = re.compile(r"text-xs text-gray-400\">\s*([^<]+?)\s*</p>")
54
+ _CLAIMS_RE = re.compile(r">\s*([\d,]+)\s+claims?\s*<")
55
+ _TAB_COUNT_RE_TEMPLATE = r"{label}</div>\s*<span[^>]*>\s*([\d,]+)\s*</span>"
56
+ _AGE_TEXT_RE = re.compile(r"(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago")
57
+
58
+ _AGE_UNIT_DAYS = {
59
+ "minute": 0.0,
60
+ "hour": 1.0 / 24.0,
61
+ "day": 1.0,
62
+ "week": 7.0,
63
+ "month": 30.0,
64
+ "year": 365.0,
65
+ }
66
+
67
+
68
+ def board_url(org: str) -> str:
69
+ """Public URL of an organization's Algora bounty board."""
70
+
71
+ return f"https://algora.io/{org}/bounties"
72
+
73
+
74
+ def _to_int(text: str) -> int:
75
+ return int(text.replace(",", ""))
76
+
77
+
78
+ def _tab_count(html: str, label: str) -> int | None:
79
+ match = re.search(_TAB_COUNT_RE_TEMPLATE.format(label=label), html, re.S)
80
+ return _to_int(match.group(1)) if match else None
81
+
82
+
83
+ def approximate_age_days(text: str) -> int | None:
84
+ """Approximate days from a board age label like ``"3 weeks ago"``.
85
+
86
+ Months count as 30 days and years as 365; sub-day labels round to 0. Returns
87
+ ``None`` for labels the board has not been observed to use -- an unknown
88
+ label must read as "age unknown", never as "brand new".
89
+ """
90
+
91
+ match = _AGE_TEXT_RE.search(text.strip().lower())
92
+ if not match:
93
+ return None
94
+ value, unit = int(match.group(1)), match.group(2)
95
+ return int(value * _AGE_UNIT_DAYS[unit])
96
+
97
+
98
+ def _clean_text(value: str) -> str:
99
+ return re.sub(r"\s+", " ", html_lib.unescape(value)).strip()
100
+
101
+
102
+ def parse_board_html(html: str, org: str) -> dict[str, Any]:
103
+ """Parse one saved board page into a normalized board mapping.
104
+
105
+ Returns ``org``, ``source_url``, the board's true ``open_count`` /
106
+ ``completed_count`` (when rendered), and the visible ``bounties``: each with
107
+ ``amount_usd``, ``repository`` (GitHub ``owner/repo``, which may differ from
108
+ the Algora org handle), ``issue_number``, ``url``, ``title``, ``age``
109
+ (board label plus ``approx_days``), and ``attempt_count`` (declared claims).
110
+ Rows missing an amount or issue link are skipped rather than guessed.
111
+ Raises ``ValueError`` when ``html`` is not an Algora bounty board (for
112
+ example a login redirect).
113
+ """
114
+
115
+ if _BOARD_MARKER not in html:
116
+ raise ValueError(
117
+ "source is not a server-rendered Algora bounty board page "
118
+ "(expected the open-bounties tab marker)"
119
+ )
120
+ bounties: list[dict[str, Any]] = []
121
+ for chunk in _ROW_SPLIT_RE.split(html)[1:]:
122
+ amount_match = _AMOUNT_RE.search(chunk)
123
+ link_match = _ISSUE_LINK_RE.search(chunk)
124
+ if not amount_match or not link_match:
125
+ continue
126
+ owner, repo, number = link_match.group(1), link_match.group(2), link_match.group(3)
127
+ title_match = _TITLE_RE.search(chunk)
128
+ age_match = _AGE_RE.search(chunk)
129
+ claims_match = _CLAIMS_RE.search(chunk)
130
+ age_text = _clean_text(age_match.group(1)) if age_match else None
131
+ bounties.append(
132
+ {
133
+ "amount_usd": float(amount_match.group(1).replace(",", "")),
134
+ "repository": f"{owner}/{repo}",
135
+ "issue_number": int(number),
136
+ "url": f"https://github.com/{owner}/{repo}/issues/{number}",
137
+ "title": _clean_text(title_match.group(1)) if title_match else "Untitled bounty",
138
+ "age": {
139
+ "text": age_text,
140
+ "approx_days": approximate_age_days(age_text) if age_text else None,
141
+ },
142
+ "attempt_count": _to_int(claims_match.group(1)) if claims_match else 0,
143
+ }
144
+ )
145
+ return {
146
+ "schema_version": ALGORA_BOARD_SCHEMA_VERSION,
147
+ "org": org,
148
+ "source_url": board_url(org),
149
+ "open_count": _tab_count(html, "Open"),
150
+ "completed_count": _tab_count(html, "Completed"),
151
+ "bounties": bounties,
152
+ "visible_usd_total": round(sum(b["amount_usd"] for b in bounties), 2),
153
+ # The server renders only the first page of open bounties; open_count is
154
+ # the true total, so consumers can see exactly how partial the table is.
155
+ "server_rendered_rows_only": True,
156
+ }
157
+
158
+
159
+ def board_issue_records(
160
+ board: dict[str, Any], *, retrieved_at: str | None = None
161
+ ) -> list[dict[str, Any]]:
162
+ """Convert a parsed board into scored, store-ready issue records.
163
+
164
+ Each record is a normalized :class:`FundedIssue` mapping (so risk flags,
165
+ readiness score, and the read-only contract match every other tracker
166
+ source) extended with the board evidence: ``funding.verified`` /
167
+ ``funding.evidence_url`` (the funder's own public board), ``attempt_count``,
168
+ ``posted`` age, and the ``board`` provenance. A bounty whose declared claims
169
+ reach the contested threshold carries the existing ``contested_bounty``
170
+ flag. The records feed ``merge_into_store`` directly.
171
+ """
172
+
173
+ contested_at = COMPETITION_THRESHOLDS["distinct_claimants_contested"]
174
+ issues = []
175
+ for bounty in board["bounties"]:
176
+ risk_flags = ["no_contribution_guidelines", "spam_attractive"]
177
+ if bounty["attempt_count"] >= contested_at:
178
+ risk_flags.append(CONTESTED_BOUNTY_FLAG)
179
+ issues.append(
180
+ FundedIssue(
181
+ id=f"algora-board-{bounty['repository']}#{bounty['issue_number']}",
182
+ platform="algora",
183
+ repository=bounty["repository"],
184
+ issue_number=bounty["issue_number"],
185
+ title=bounty["title"],
186
+ url=bounty["url"],
187
+ funding_amount=bounty["amount_usd"],
188
+ funding_currency="USD",
189
+ labels=["bounty"],
190
+ risk_flags=sorted(risk_flags),
191
+ opportunity_state="active",
192
+ )
193
+ )
194
+ by_url = {bounty["url"]: bounty for bounty in board["bounties"]}
195
+ records: list[dict[str, Any]] = []
196
+ for row in score_funded_issues(issues)["scores"]:
197
+ record = dict(row["issue"])
198
+ record["score"] = row["score"]
199
+ bounty = by_url[record["url"]]
200
+ record["funding"] = {
201
+ **record["funding"],
202
+ "verified": True,
203
+ "evidence_url": board["source_url"],
204
+ }
205
+ record["attempt_count"] = bounty["attempt_count"]
206
+ record["posted"] = dict(bounty["age"])
207
+ record["board"] = {
208
+ "org": board["org"],
209
+ "source": "algora_board",
210
+ "retrieved_at": retrieved_at,
211
+ }
212
+ records.append(record)
213
+ return records
214
+
215
+
216
+ def board_payload(
217
+ board: dict[str, Any], records: list[dict[str, Any]], *, retrieved_at: str | None = None
218
+ ) -> dict[str, Any]:
219
+ """Wrap parsed board records in the standard read-only payload envelope."""
220
+
221
+ return {
222
+ "schema_version": ALGORA_BOARD_SCHEMA_VERSION,
223
+ "org": board["org"],
224
+ "source_url": board["source_url"],
225
+ "retrieved_at": retrieved_at,
226
+ "open_count": board["open_count"],
227
+ "completed_count": board["completed_count"],
228
+ "visible_rows": len(records),
229
+ "visible_usd_total": board["visible_usd_total"],
230
+ "server_rendered_rows_only": board["server_rendered_rows_only"],
231
+ "read_only": True,
232
+ "blocked_actions": list(BLOCKED_ACTIONS),
233
+ "requirements": {
234
+ "network_required": False,
235
+ "github_write_permission_required": False,
236
+ "external_model_required": False,
237
+ "billing_required": False,
238
+ },
239
+ "issues": records,
240
+ }
@@ -0,0 +1,112 @@
1
+ """Permanent source-level blocklist for the read-only funded-issues tracker.
2
+
3
+ The owner-level heuristic in :mod:`patchrail.funded_issues.source_noise` flags
4
+ suspicious sources *after* they are already in a tracker store. That is the
5
+ wrong layer for sources that have been positively verified as fake: a honeypot
6
+ owner that floods the feed with templated "Test Bounty" issues and unverifiable
7
+ payouts should never be allowed back in, no matter how its metadata looks on a
8
+ later screening pass.
9
+
10
+ This module is that hard gate. :data:`BLOCKLISTED_OWNERS` holds owners whose
11
+ listings were manually verified as fake bounty postings (templated test issues,
12
+ no payout trail, throwaway accounts). Records attributed to a blocklisted owner
13
+ are dropped at ingest time by :func:`patchrail.funded_issues.store.merge_into_store`,
14
+ and :func:`purge_blocklisted_entries` removes any that predate the blocklist
15
+ from existing stores -- the ``track`` CLI command runs it on every merge so old
16
+ stores self-heal.
17
+
18
+ Like the rest of the tracker this module is pure and offline: matching is
19
+ string comparison on already-collected records, nothing here performs a network
20
+ call or writes to any third party. The list is intentionally code, not config:
21
+ removing an owner requires a reviewed change, which is the point.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from typing import Any
28
+
29
+ BLOCKLIST_SCHEMA_VERSION = "patchrail.funded_issues.blocklist.v1"
30
+
31
+ # Owners verified as fake-bounty sources (2026-06-10 screening: templated
32
+ # honeypot issues, unverifiable payouts, throwaway accounts). Lowercase.
33
+ # Permanent: entries leave this set only via a reviewed code change.
34
+ BLOCKLISTED_OWNERS = frozenset(
35
+ {
36
+ "clankernation",
37
+ "securebananalabs",
38
+ "xevrion-v2",
39
+ }
40
+ )
41
+
42
+ # Owner extraction mirrors source_noise: GitHub API references keep the owner in
43
+ # a ``/repos/<owner>/`` segment, browser URLs in ``github.com/<owner>/``.
44
+ _REPOS_URL_OWNER_RE = re.compile(r"/repos/([^/]+)/")
45
+ _HTML_URL_OWNER_RE = re.compile(r"github\.com/([^/\s]+)/")
46
+
47
+
48
+ def is_blocklisted_owner(owner: Any) -> bool:
49
+ """True when ``owner`` (case-insensitive) is on the permanent blocklist."""
50
+
51
+ return str(owner or "").strip().lower() in BLOCKLISTED_OWNERS
52
+
53
+
54
+ def record_owner(record: dict[str, Any]) -> str:
55
+ """Derive the owning account from a normalized issue record.
56
+
57
+ Prefers an explicit ``owner``, then the ``/repos/<owner>/`` segment of the
58
+ canonical URL, then a ``github.com/<owner>/`` browser URL, and finally the
59
+ leading segment of ``repository`` (skipping the API-style ``repos/`` prefix).
60
+ Returns ``""`` when no owner can be derived -- unknown owners are never
61
+ treated as blocklisted.
62
+ """
63
+
64
+ owner = record.get("owner")
65
+ if owner:
66
+ return str(owner)
67
+ url = str(record.get("url") or "")
68
+ match = _REPOS_URL_OWNER_RE.search(url)
69
+ if match:
70
+ return match.group(1)
71
+ match = _HTML_URL_OWNER_RE.search(url)
72
+ if match:
73
+ return match.group(1)
74
+ repository = str(record.get("repository") or "")
75
+ segments = [part for part in repository.split("/") if part]
76
+ if len(segments) >= 2 and segments[0] == "repos":
77
+ return segments[1]
78
+ if segments:
79
+ return segments[0]
80
+ return ""
81
+
82
+
83
+ def is_blocklisted_record(record: dict[str, Any]) -> bool:
84
+ """True when a normalized issue record belongs to a blocklisted owner."""
85
+
86
+ return is_blocklisted_owner(record_owner(record))
87
+
88
+
89
+ def purge_blocklisted_entries(store: dict[str, Any]) -> dict[str, Any]:
90
+ """Remove every blocklisted owner's entries from ``store`` in place.
91
+
92
+ Returns a summary with the number of ``removed`` entries and a sorted
93
+ ``removed_owners`` list of the blocklisted owners that were present. Safe to
94
+ run repeatedly; a clean store is left untouched.
95
+ """
96
+
97
+ entries = store.get("entries", {})
98
+ removed_owners: set[str] = set()
99
+ removed_urls = []
100
+ for url, entry in entries.items():
101
+ issue = entry.get("issue") or {}
102
+ owner = record_owner(issue) or record_owner({"url": url})
103
+ if is_blocklisted_owner(owner):
104
+ removed_urls.append(url)
105
+ removed_owners.add(owner.lower())
106
+ for url in removed_urls:
107
+ del entries[url]
108
+ return {
109
+ "schema_version": BLOCKLIST_SCHEMA_VERSION,
110
+ "removed": len(removed_urls),
111
+ "removed_owners": sorted(removed_owners),
112
+ }