patchrail 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchrail/__init__.py +7 -0
- patchrail/__main__.py +7 -0
- patchrail/ci/__init__.py +7 -0
- patchrail/ci/classify.py +888 -0
- patchrail/cli.py +8566 -0
- patchrail/funded_issues/__init__.py +138 -0
- patchrail/funded_issues/algora_board.py +240 -0
- patchrail/funded_issues/blocklist.py +112 -0
- patchrail/funded_issues/discovery.py +4091 -0
- patchrail/funded_issues/importers.py +316 -0
- patchrail/funded_issues/source_noise.py +349 -0
- patchrail/funded_issues/store.py +459 -0
- patchrail/queue/__init__.py +75 -0
- patchrail/queue/server.py +273 -0
- patchrail/queue/status.py +756 -0
- patchrail/queue/store.py +600 -0
- patchrail/reviewer_quick_check.py +650 -0
- patchrail/schemas/__init__.py +1 -0
- patchrail/schemas/application-dossier.v1.schema.json +305 -0
- patchrail/schemas/ci-benchmark.v1.schema.json +174 -0
- patchrail/schemas/ci-fixture-check.v1.schema.json +122 -0
- patchrail/schemas/ci-pilot-metrics.v1.schema.json +164 -0
- patchrail/schemas/ci-pilot-summary.v1.schema.json +146 -0
- patchrail/schemas/ci-result.v1.schema.json +133 -0
- patchrail/schemas/funded-issues-client-report.v1.schema.json +524 -0
- patchrail/schemas/funded-issues-recheck-queue.v1.schema.json +333 -0
- patchrail/schemas/funded-issues-recheck-summary.v1.schema.json +136 -0
- patchrail/schemas/funded-issues-report.v1.schema.json +836 -0
- patchrail/schemas/funded-issues-shortlist.v1.schema.json +953 -0
- patchrail/schemas/funded-issues-store-status.v1.schema.json +96 -0
- patchrail/schemas/funded-issues-store.v1.schema.json +117 -0
- patchrail/schemas/queue-audit-event.v1.schema.json +44 -0
- patchrail/schemas/queue-audit-summary.v1.schema.json +169 -0
- patchrail/schemas/queue-gate-report.v1.schema.json +158 -0
- patchrail/schemas/queue-policy-resolution.v1.schema.json +188 -0
- patchrail/schemas/queue-policy-scan.v1.schema.json +175 -0
- patchrail/schemas/queue-proposal.v1.schema.json +61 -0
- patchrail/schemas/queue-review.v1.schema.json +218 -0
- patchrail/schemas/queue-status.v1.schema.json +179 -0
- patchrail/schemas/queue-work-item.v1.schema.json +64 -0
- patchrail/schemas/reviewer-quick-check-artifacts.v1.schema.json +104 -0
- patchrail/web_metrics.py +649 -0
- patchrail-0.1.0.dist-info/METADATA +279 -0
- patchrail-0.1.0.dist-info/RECORD +47 -0
- patchrail-0.1.0.dist-info/WHEEL +4 -0
- patchrail-0.1.0.dist-info/entry_points.txt +2 -0
- patchrail-0.1.0.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Read-only funded issue discovery helpers."""
|
|
2
|
+
|
|
3
|
+
from patchrail.funded_issues.discovery import (
|
|
4
|
+
CLIENT_PROFILE_SCHEMA_VERSION,
|
|
5
|
+
COMPETITION_BATCH_SCHEMA_VERSION,
|
|
6
|
+
COMPETITION_SIGNAL_SCHEMA_VERSION,
|
|
7
|
+
PAYOUT_EFFORT_BATCH_SCHEMA_VERSION,
|
|
8
|
+
PAYOUT_EFFORT_SIGNAL_SCHEMA_VERSION,
|
|
9
|
+
STALENESS_BATCH_SCHEMA_VERSION,
|
|
10
|
+
STALENESS_SIGNAL_SCHEMA_VERSION,
|
|
11
|
+
TESTABILITY_BATCH_SCHEMA_VERSION,
|
|
12
|
+
TESTABILITY_SIGNAL_SCHEMA_VERSION,
|
|
13
|
+
ClientProfile,
|
|
14
|
+
FundedIssue,
|
|
15
|
+
VALID_OPPORTUNITY_STATES,
|
|
16
|
+
VALID_RISK_LEVELS,
|
|
17
|
+
assess_bounty_competition,
|
|
18
|
+
assess_competition_batch,
|
|
19
|
+
assess_issue_staleness,
|
|
20
|
+
assess_issue_testability,
|
|
21
|
+
assess_payout_effort,
|
|
22
|
+
assess_payout_effort_batch,
|
|
23
|
+
assess_staleness_batch,
|
|
24
|
+
assess_testability_batch,
|
|
25
|
+
cash_actions_funded_issues,
|
|
26
|
+
client_report_funded_issues,
|
|
27
|
+
explain_issue,
|
|
28
|
+
fulfillment_packet_funded_issues,
|
|
29
|
+
funded_issues_payload,
|
|
30
|
+
load_client_profile,
|
|
31
|
+
load_funded_issues,
|
|
32
|
+
recheck_funded_issues,
|
|
33
|
+
report_funded_issues,
|
|
34
|
+
score_funded_issues,
|
|
35
|
+
shortlist_funded_issues,
|
|
36
|
+
summarize_issues,
|
|
37
|
+
validate_funded_issues,
|
|
38
|
+
)
|
|
39
|
+
from patchrail.funded_issues.algora_board import (
|
|
40
|
+
ALGORA_BOARD_SCHEMA_VERSION,
|
|
41
|
+
board_issue_records,
|
|
42
|
+
board_payload,
|
|
43
|
+
board_url,
|
|
44
|
+
parse_board_html,
|
|
45
|
+
)
|
|
46
|
+
from patchrail.funded_issues.blocklist import (
|
|
47
|
+
BLOCKLIST_SCHEMA_VERSION,
|
|
48
|
+
BLOCKLISTED_OWNERS,
|
|
49
|
+
is_blocklisted_owner,
|
|
50
|
+
is_blocklisted_record,
|
|
51
|
+
purge_blocklisted_entries,
|
|
52
|
+
)
|
|
53
|
+
from patchrail.funded_issues.importers import SUPPORTED_PROVIDERS, import_provider_export
|
|
54
|
+
from patchrail.funded_issues.source_noise import (
|
|
55
|
+
SOURCE_NOISE_SCHEMA_VERSION,
|
|
56
|
+
STRONG_NOISE_FLAGS,
|
|
57
|
+
apply_source_noise_to_store,
|
|
58
|
+
assess_owner_source_noise,
|
|
59
|
+
entries_by_owner,
|
|
60
|
+
)
|
|
61
|
+
from patchrail.funded_issues.store import (
|
|
62
|
+
RECHECK_SUMMARY_SCHEMA_VERSION,
|
|
63
|
+
STORE_SCHEMA_VERSION,
|
|
64
|
+
STORE_STATUS_SCHEMA_VERSION,
|
|
65
|
+
MergeSummary,
|
|
66
|
+
RecheckSummary,
|
|
67
|
+
apply_recheck_to_store,
|
|
68
|
+
empty_store,
|
|
69
|
+
load_store,
|
|
70
|
+
merge_into_store,
|
|
71
|
+
save_store,
|
|
72
|
+
store_status,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
__all__ = [
|
|
76
|
+
"FundedIssue",
|
|
77
|
+
"ALGORA_BOARD_SCHEMA_VERSION",
|
|
78
|
+
"BLOCKLIST_SCHEMA_VERSION",
|
|
79
|
+
"BLOCKLISTED_OWNERS",
|
|
80
|
+
"CLIENT_PROFILE_SCHEMA_VERSION",
|
|
81
|
+
"COMPETITION_BATCH_SCHEMA_VERSION",
|
|
82
|
+
"COMPETITION_SIGNAL_SCHEMA_VERSION",
|
|
83
|
+
"PAYOUT_EFFORT_BATCH_SCHEMA_VERSION",
|
|
84
|
+
"PAYOUT_EFFORT_SIGNAL_SCHEMA_VERSION",
|
|
85
|
+
"RECHECK_SUMMARY_SCHEMA_VERSION",
|
|
86
|
+
"SOURCE_NOISE_SCHEMA_VERSION",
|
|
87
|
+
"STALENESS_BATCH_SCHEMA_VERSION",
|
|
88
|
+
"STALENESS_SIGNAL_SCHEMA_VERSION",
|
|
89
|
+
"STORE_SCHEMA_VERSION",
|
|
90
|
+
"STORE_STATUS_SCHEMA_VERSION",
|
|
91
|
+
"STRONG_NOISE_FLAGS",
|
|
92
|
+
"TESTABILITY_BATCH_SCHEMA_VERSION",
|
|
93
|
+
"TESTABILITY_SIGNAL_SCHEMA_VERSION",
|
|
94
|
+
"ClientProfile",
|
|
95
|
+
"MergeSummary",
|
|
96
|
+
"RecheckSummary",
|
|
97
|
+
"SUPPORTED_PROVIDERS",
|
|
98
|
+
"VALID_OPPORTUNITY_STATES",
|
|
99
|
+
"VALID_RISK_LEVELS",
|
|
100
|
+
"assess_bounty_competition",
|
|
101
|
+
"assess_competition_batch",
|
|
102
|
+
"assess_issue_staleness",
|
|
103
|
+
"assess_issue_testability",
|
|
104
|
+
"assess_payout_effort",
|
|
105
|
+
"assess_payout_effort_batch",
|
|
106
|
+
"assess_staleness_batch",
|
|
107
|
+
"assess_testability_batch",
|
|
108
|
+
"apply_recheck_to_store",
|
|
109
|
+
"apply_source_noise_to_store",
|
|
110
|
+
"assess_owner_source_noise",
|
|
111
|
+
"board_issue_records",
|
|
112
|
+
"board_payload",
|
|
113
|
+
"board_url",
|
|
114
|
+
"cash_actions_funded_issues",
|
|
115
|
+
"client_report_funded_issues",
|
|
116
|
+
"empty_store",
|
|
117
|
+
"entries_by_owner",
|
|
118
|
+
"explain_issue",
|
|
119
|
+
"fulfillment_packet_funded_issues",
|
|
120
|
+
"funded_issues_payload",
|
|
121
|
+
"import_provider_export",
|
|
122
|
+
"is_blocklisted_owner",
|
|
123
|
+
"is_blocklisted_record",
|
|
124
|
+
"load_client_profile",
|
|
125
|
+
"load_funded_issues",
|
|
126
|
+
"load_store",
|
|
127
|
+
"merge_into_store",
|
|
128
|
+
"parse_board_html",
|
|
129
|
+
"purge_blocklisted_entries",
|
|
130
|
+
"recheck_funded_issues",
|
|
131
|
+
"report_funded_issues",
|
|
132
|
+
"save_store",
|
|
133
|
+
"score_funded_issues",
|
|
134
|
+
"shortlist_funded_issues",
|
|
135
|
+
"store_status",
|
|
136
|
+
"summarize_issues",
|
|
137
|
+
"validate_funded_issues",
|
|
138
|
+
]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Parse a saved Algora organization bounty-board page into funded issues.
|
|
2
|
+
|
|
3
|
+
Algora renders each organization's public bounty board at
|
|
4
|
+
``https://algora.io/<org>/bounties``. The initial server-rendered HTML carries
|
|
5
|
+
the board's open/completed totals and a table of open bounties with the four
|
|
6
|
+
public facts the tracker needs and that generic issue scraping cannot provide:
|
|
7
|
+
|
|
8
|
+
* the **funder-stated USD amount** (the board is the funding organization's own
|
|
9
|
+
listing, so the amount is primary-source evidence, not aggregator hearsay);
|
|
10
|
+
* the GitHub issue URL and reference;
|
|
11
|
+
* the posting age shown on the board;
|
|
12
|
+
* the number of **claims** (declared solve attempts) on the bounty.
|
|
13
|
+
|
|
14
|
+
This module is a pure, offline parser for a *locally saved copy* of that page:
|
|
15
|
+
save the board with your browser or any HTTP client, then run
|
|
16
|
+
``patchrail funded-issues import-algora-board``. Keeping the fetch outside the
|
|
17
|
+
toolkit preserves the tracker's no-network rule (network access requires
|
|
18
|
+
explicit opt-in) and keeps tests hermetic. Nothing here claims, comments, or
|
|
19
|
+
writes to any third party.
|
|
20
|
+
|
|
21
|
+
Honesty note: the server-rendered table contains only the first page of open
|
|
22
|
+
bounties (about ten rows); the board's ``open_count`` is still the true total,
|
|
23
|
+
so the payload reports both and never pretends the visible subset is complete.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import html as html_lib
|
|
29
|
+
import re
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from patchrail.funded_issues.discovery import (
|
|
33
|
+
BLOCKED_ACTIONS,
|
|
34
|
+
COMPETITION_THRESHOLDS,
|
|
35
|
+
CONTESTED_BOUNTY_FLAG,
|
|
36
|
+
FundedIssue,
|
|
37
|
+
score_funded_issues,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
ALGORA_BOARD_SCHEMA_VERSION = "patchrail.funded_issues.algora_board.v1"
|
|
41
|
+
|
|
42
|
+
# Stable markers in the board's server-rendered markup. The page is a LiveView
|
|
43
|
+
# app, but these classes/attributes have been stable across organizations; the
|
|
44
|
+
# parser fails loudly (ValueError) when the board scaffolding is absent so a
|
|
45
|
+
# login redirect or an unrelated page is never silently parsed as zero bounties.
|
|
46
|
+
_BOARD_MARKER = 'phx-value-tab="open"'
|
|
47
|
+
_ROW_SPLIT_RE = re.compile(r"<tr\b")
|
|
48
|
+
_AMOUNT_RE = re.compile(r"font-extrabold text-emerald-300[^\"]*\">\s*\$([\d,]+(?:\.\d{1,2})?)")
|
|
49
|
+
_ISSUE_LINK_RE = re.compile(
|
|
50
|
+
r"<a href=\"https://github\.com/([^/\"]+)/([^/\"]+)/issues/(\d+)\"[^>]*class=\"group/issue"
|
|
51
|
+
)
|
|
52
|
+
_TITLE_RE = re.compile(r"line-clamp-2[^\"]*\">\s*(.*?)\s*</p>", re.S)
|
|
53
|
+
_AGE_RE = re.compile(r"text-xs text-gray-400\">\s*([^<]+?)\s*</p>")
|
|
54
|
+
_CLAIMS_RE = re.compile(r">\s*([\d,]+)\s+claims?\s*<")
|
|
55
|
+
_TAB_COUNT_RE_TEMPLATE = r"{label}</div>\s*<span[^>]*>\s*([\d,]+)\s*</span>"
|
|
56
|
+
_AGE_TEXT_RE = re.compile(r"(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago")
|
|
57
|
+
|
|
58
|
+
_AGE_UNIT_DAYS = {
|
|
59
|
+
"minute": 0.0,
|
|
60
|
+
"hour": 1.0 / 24.0,
|
|
61
|
+
"day": 1.0,
|
|
62
|
+
"week": 7.0,
|
|
63
|
+
"month": 30.0,
|
|
64
|
+
"year": 365.0,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def board_url(org: str) -> str:
|
|
69
|
+
"""Public URL of an organization's Algora bounty board."""
|
|
70
|
+
|
|
71
|
+
return f"https://algora.io/{org}/bounties"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _to_int(text: str) -> int:
|
|
75
|
+
return int(text.replace(",", ""))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _tab_count(html: str, label: str) -> int | None:
|
|
79
|
+
match = re.search(_TAB_COUNT_RE_TEMPLATE.format(label=label), html, re.S)
|
|
80
|
+
return _to_int(match.group(1)) if match else None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def approximate_age_days(text: str) -> int | None:
|
|
84
|
+
"""Approximate days from a board age label like ``"3 weeks ago"``.
|
|
85
|
+
|
|
86
|
+
Months count as 30 days and years as 365; sub-day labels round to 0. Returns
|
|
87
|
+
``None`` for labels the board has not been observed to use -- an unknown
|
|
88
|
+
label must read as "age unknown", never as "brand new".
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
match = _AGE_TEXT_RE.search(text.strip().lower())
|
|
92
|
+
if not match:
|
|
93
|
+
return None
|
|
94
|
+
value, unit = int(match.group(1)), match.group(2)
|
|
95
|
+
return int(value * _AGE_UNIT_DAYS[unit])
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _clean_text(value: str) -> str:
|
|
99
|
+
return re.sub(r"\s+", " ", html_lib.unescape(value)).strip()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_board_html(html: str, org: str) -> dict[str, Any]:
|
|
103
|
+
"""Parse one saved board page into a normalized board mapping.
|
|
104
|
+
|
|
105
|
+
Returns ``org``, ``source_url``, the board's true ``open_count`` /
|
|
106
|
+
``completed_count`` (when rendered), and the visible ``bounties``: each with
|
|
107
|
+
``amount_usd``, ``repository`` (GitHub ``owner/repo``, which may differ from
|
|
108
|
+
the Algora org handle), ``issue_number``, ``url``, ``title``, ``age``
|
|
109
|
+
(board label plus ``approx_days``), and ``attempt_count`` (declared claims).
|
|
110
|
+
Rows missing an amount or issue link are skipped rather than guessed.
|
|
111
|
+
Raises ``ValueError`` when ``html`` is not an Algora bounty board (for
|
|
112
|
+
example a login redirect).
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
if _BOARD_MARKER not in html:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
"source is not a server-rendered Algora bounty board page "
|
|
118
|
+
"(expected the open-bounties tab marker)"
|
|
119
|
+
)
|
|
120
|
+
bounties: list[dict[str, Any]] = []
|
|
121
|
+
for chunk in _ROW_SPLIT_RE.split(html)[1:]:
|
|
122
|
+
amount_match = _AMOUNT_RE.search(chunk)
|
|
123
|
+
link_match = _ISSUE_LINK_RE.search(chunk)
|
|
124
|
+
if not amount_match or not link_match:
|
|
125
|
+
continue
|
|
126
|
+
owner, repo, number = link_match.group(1), link_match.group(2), link_match.group(3)
|
|
127
|
+
title_match = _TITLE_RE.search(chunk)
|
|
128
|
+
age_match = _AGE_RE.search(chunk)
|
|
129
|
+
claims_match = _CLAIMS_RE.search(chunk)
|
|
130
|
+
age_text = _clean_text(age_match.group(1)) if age_match else None
|
|
131
|
+
bounties.append(
|
|
132
|
+
{
|
|
133
|
+
"amount_usd": float(amount_match.group(1).replace(",", "")),
|
|
134
|
+
"repository": f"{owner}/{repo}",
|
|
135
|
+
"issue_number": int(number),
|
|
136
|
+
"url": f"https://github.com/{owner}/{repo}/issues/{number}",
|
|
137
|
+
"title": _clean_text(title_match.group(1)) if title_match else "Untitled bounty",
|
|
138
|
+
"age": {
|
|
139
|
+
"text": age_text,
|
|
140
|
+
"approx_days": approximate_age_days(age_text) if age_text else None,
|
|
141
|
+
},
|
|
142
|
+
"attempt_count": _to_int(claims_match.group(1)) if claims_match else 0,
|
|
143
|
+
}
|
|
144
|
+
)
|
|
145
|
+
return {
|
|
146
|
+
"schema_version": ALGORA_BOARD_SCHEMA_VERSION,
|
|
147
|
+
"org": org,
|
|
148
|
+
"source_url": board_url(org),
|
|
149
|
+
"open_count": _tab_count(html, "Open"),
|
|
150
|
+
"completed_count": _tab_count(html, "Completed"),
|
|
151
|
+
"bounties": bounties,
|
|
152
|
+
"visible_usd_total": round(sum(b["amount_usd"] for b in bounties), 2),
|
|
153
|
+
# The server renders only the first page of open bounties; open_count is
|
|
154
|
+
# the true total, so consumers can see exactly how partial the table is.
|
|
155
|
+
"server_rendered_rows_only": True,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def board_issue_records(
|
|
160
|
+
board: dict[str, Any], *, retrieved_at: str | None = None
|
|
161
|
+
) -> list[dict[str, Any]]:
|
|
162
|
+
"""Convert a parsed board into scored, store-ready issue records.
|
|
163
|
+
|
|
164
|
+
Each record is a normalized :class:`FundedIssue` mapping (so risk flags,
|
|
165
|
+
readiness score, and the read-only contract match every other tracker
|
|
166
|
+
source) extended with the board evidence: ``funding.verified`` /
|
|
167
|
+
``funding.evidence_url`` (the funder's own public board), ``attempt_count``,
|
|
168
|
+
``posted`` age, and the ``board`` provenance. A bounty whose declared claims
|
|
169
|
+
reach the contested threshold carries the existing ``contested_bounty``
|
|
170
|
+
flag. The records feed ``merge_into_store`` directly.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
contested_at = COMPETITION_THRESHOLDS["distinct_claimants_contested"]
|
|
174
|
+
issues = []
|
|
175
|
+
for bounty in board["bounties"]:
|
|
176
|
+
risk_flags = ["no_contribution_guidelines", "spam_attractive"]
|
|
177
|
+
if bounty["attempt_count"] >= contested_at:
|
|
178
|
+
risk_flags.append(CONTESTED_BOUNTY_FLAG)
|
|
179
|
+
issues.append(
|
|
180
|
+
FundedIssue(
|
|
181
|
+
id=f"algora-board-{bounty['repository']}#{bounty['issue_number']}",
|
|
182
|
+
platform="algora",
|
|
183
|
+
repository=bounty["repository"],
|
|
184
|
+
issue_number=bounty["issue_number"],
|
|
185
|
+
title=bounty["title"],
|
|
186
|
+
url=bounty["url"],
|
|
187
|
+
funding_amount=bounty["amount_usd"],
|
|
188
|
+
funding_currency="USD",
|
|
189
|
+
labels=["bounty"],
|
|
190
|
+
risk_flags=sorted(risk_flags),
|
|
191
|
+
opportunity_state="active",
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
by_url = {bounty["url"]: bounty for bounty in board["bounties"]}
|
|
195
|
+
records: list[dict[str, Any]] = []
|
|
196
|
+
for row in score_funded_issues(issues)["scores"]:
|
|
197
|
+
record = dict(row["issue"])
|
|
198
|
+
record["score"] = row["score"]
|
|
199
|
+
bounty = by_url[record["url"]]
|
|
200
|
+
record["funding"] = {
|
|
201
|
+
**record["funding"],
|
|
202
|
+
"verified": True,
|
|
203
|
+
"evidence_url": board["source_url"],
|
|
204
|
+
}
|
|
205
|
+
record["attempt_count"] = bounty["attempt_count"]
|
|
206
|
+
record["posted"] = dict(bounty["age"])
|
|
207
|
+
record["board"] = {
|
|
208
|
+
"org": board["org"],
|
|
209
|
+
"source": "algora_board",
|
|
210
|
+
"retrieved_at": retrieved_at,
|
|
211
|
+
}
|
|
212
|
+
records.append(record)
|
|
213
|
+
return records
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def board_payload(
|
|
217
|
+
board: dict[str, Any], records: list[dict[str, Any]], *, retrieved_at: str | None = None
|
|
218
|
+
) -> dict[str, Any]:
|
|
219
|
+
"""Wrap parsed board records in the standard read-only payload envelope."""
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
"schema_version": ALGORA_BOARD_SCHEMA_VERSION,
|
|
223
|
+
"org": board["org"],
|
|
224
|
+
"source_url": board["source_url"],
|
|
225
|
+
"retrieved_at": retrieved_at,
|
|
226
|
+
"open_count": board["open_count"],
|
|
227
|
+
"completed_count": board["completed_count"],
|
|
228
|
+
"visible_rows": len(records),
|
|
229
|
+
"visible_usd_total": board["visible_usd_total"],
|
|
230
|
+
"server_rendered_rows_only": board["server_rendered_rows_only"],
|
|
231
|
+
"read_only": True,
|
|
232
|
+
"blocked_actions": list(BLOCKED_ACTIONS),
|
|
233
|
+
"requirements": {
|
|
234
|
+
"network_required": False,
|
|
235
|
+
"github_write_permission_required": False,
|
|
236
|
+
"external_model_required": False,
|
|
237
|
+
"billing_required": False,
|
|
238
|
+
},
|
|
239
|
+
"issues": records,
|
|
240
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Permanent source-level blocklist for the read-only funded-issues tracker.
|
|
2
|
+
|
|
3
|
+
The owner-level heuristic in :mod:`patchrail.funded_issues.source_noise` flags
|
|
4
|
+
suspicious sources *after* they are already in a tracker store. That is the
|
|
5
|
+
wrong layer for sources that have been positively verified as fake: a honeypot
|
|
6
|
+
owner that floods the feed with templated "Test Bounty" issues and unverifiable
|
|
7
|
+
payouts should never be allowed back in, no matter how its metadata looks on a
|
|
8
|
+
later screening pass.
|
|
9
|
+
|
|
10
|
+
This module is that hard gate. :data:`BLOCKLISTED_OWNERS` holds owners whose
|
|
11
|
+
listings were manually verified as fake bounty postings (templated test issues,
|
|
12
|
+
no payout trail, throwaway accounts). Records attributed to a blocklisted owner
|
|
13
|
+
are dropped at ingest time by :func:`patchrail.funded_issues.store.merge_into_store`,
|
|
14
|
+
and :func:`purge_blocklisted_entries` removes any that predate the blocklist
|
|
15
|
+
from existing stores -- the ``track`` CLI command runs it on every merge so old
|
|
16
|
+
stores self-heal.
|
|
17
|
+
|
|
18
|
+
Like the rest of the tracker this module is pure and offline: matching is
|
|
19
|
+
string comparison on already-collected records, nothing here performs a network
|
|
20
|
+
call or writes to any third party. The list is intentionally code, not config:
|
|
21
|
+
removing an owner requires a reviewed change, which is the point.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import re
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
BLOCKLIST_SCHEMA_VERSION = "patchrail.funded_issues.blocklist.v1"
|
|
30
|
+
|
|
31
|
+
# Owners verified as fake-bounty sources (2026-06-10 screening: templated
|
|
32
|
+
# honeypot issues, unverifiable payouts, throwaway accounts). Lowercase.
|
|
33
|
+
# Permanent: entries leave this set only via a reviewed code change.
|
|
34
|
+
BLOCKLISTED_OWNERS = frozenset(
|
|
35
|
+
{
|
|
36
|
+
"clankernation",
|
|
37
|
+
"securebananalabs",
|
|
38
|
+
"xevrion-v2",
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Owner extraction mirrors source_noise: GitHub API references keep the owner in
|
|
43
|
+
# a ``/repos/<owner>/`` segment, browser URLs in ``github.com/<owner>/``.
|
|
44
|
+
_REPOS_URL_OWNER_RE = re.compile(r"/repos/([^/]+)/")
|
|
45
|
+
_HTML_URL_OWNER_RE = re.compile(r"github\.com/([^/\s]+)/")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_blocklisted_owner(owner: Any) -> bool:
|
|
49
|
+
"""True when ``owner`` (case-insensitive) is on the permanent blocklist."""
|
|
50
|
+
|
|
51
|
+
return str(owner or "").strip().lower() in BLOCKLISTED_OWNERS
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def record_owner(record: dict[str, Any]) -> str:
|
|
55
|
+
"""Derive the owning account from a normalized issue record.
|
|
56
|
+
|
|
57
|
+
Prefers an explicit ``owner``, then the ``/repos/<owner>/`` segment of the
|
|
58
|
+
canonical URL, then a ``github.com/<owner>/`` browser URL, and finally the
|
|
59
|
+
leading segment of ``repository`` (skipping the API-style ``repos/`` prefix).
|
|
60
|
+
Returns ``""`` when no owner can be derived -- unknown owners are never
|
|
61
|
+
treated as blocklisted.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
owner = record.get("owner")
|
|
65
|
+
if owner:
|
|
66
|
+
return str(owner)
|
|
67
|
+
url = str(record.get("url") or "")
|
|
68
|
+
match = _REPOS_URL_OWNER_RE.search(url)
|
|
69
|
+
if match:
|
|
70
|
+
return match.group(1)
|
|
71
|
+
match = _HTML_URL_OWNER_RE.search(url)
|
|
72
|
+
if match:
|
|
73
|
+
return match.group(1)
|
|
74
|
+
repository = str(record.get("repository") or "")
|
|
75
|
+
segments = [part for part in repository.split("/") if part]
|
|
76
|
+
if len(segments) >= 2 and segments[0] == "repos":
|
|
77
|
+
return segments[1]
|
|
78
|
+
if segments:
|
|
79
|
+
return segments[0]
|
|
80
|
+
return ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def is_blocklisted_record(record: dict[str, Any]) -> bool:
|
|
84
|
+
"""True when a normalized issue record belongs to a blocklisted owner."""
|
|
85
|
+
|
|
86
|
+
return is_blocklisted_owner(record_owner(record))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def purge_blocklisted_entries(store: dict[str, Any]) -> dict[str, Any]:
|
|
90
|
+
"""Remove every blocklisted owner's entries from ``store`` in place.
|
|
91
|
+
|
|
92
|
+
Returns a summary with the number of ``removed`` entries and a sorted
|
|
93
|
+
``removed_owners`` list of the blocklisted owners that were present. Safe to
|
|
94
|
+
run repeatedly; a clean store is left untouched.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
entries = store.get("entries", {})
|
|
98
|
+
removed_owners: set[str] = set()
|
|
99
|
+
removed_urls = []
|
|
100
|
+
for url, entry in entries.items():
|
|
101
|
+
issue = entry.get("issue") or {}
|
|
102
|
+
owner = record_owner(issue) or record_owner({"url": url})
|
|
103
|
+
if is_blocklisted_owner(owner):
|
|
104
|
+
removed_urls.append(url)
|
|
105
|
+
removed_owners.add(owner.lower())
|
|
106
|
+
for url in removed_urls:
|
|
107
|
+
del entries[url]
|
|
108
|
+
return {
|
|
109
|
+
"schema_version": BLOCKLIST_SCHEMA_VERSION,
|
|
110
|
+
"removed": len(removed_urls),
|
|
111
|
+
"removed_owners": sorted(removed_owners),
|
|
112
|
+
}
|