dependency-scout 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/README.md +28 -0
- api/__init__.py +0 -0
- api/webhook.py +391 -0
- checks/README.md +76 -0
- checks/__init__.py +0 -0
- checks/attestation.py +20 -0
- checks/classifier.py +17 -0
- checks/custom_checks.py +34 -0
- checks/depsdev.py +52 -0
- checks/maintainer.py +20 -0
- checks/metadata.py +18 -0
- checks/osv.py +40 -0
- checks/package_diff.py +1143 -0
- checks/release_age.py +20 -0
- checks/release_notes.py +21 -0
- checks/scorecard.py +116 -0
- checks/security_advisories.py +111 -0
- checks/signatures/README.md +35 -0
- checks/signatures/__init__.py +195 -0
- checks/signatures/file_types.yaml +39 -0
- checks/signatures/net_calls.yaml +376 -0
- checks/signatures/obfuscation.yaml +128 -0
- checks/signatures/persistence.yaml +64 -0
- checks/socket.py +116 -0
- checks/version_lineage.py +261 -0
- classifiers/README.md +28 -0
- classifiers/__init__.py +84 -0
- classifiers/_helpers.py +461 -0
- classifiers/anthropic.py +179 -0
- classifiers/ollama.py +57 -0
- classifiers/openai.py +172 -0
- dependency_scout-0.1.0.dist-info/METADATA +227 -0
- dependency_scout-0.1.0.dist-info/RECORD +83 -0
- dependency_scout-0.1.0.dist-info/WHEEL +4 -0
- dependency_scout-0.1.0.dist-info/entry_points.txt +13 -0
- dependency_scout-0.1.0.dist-info/licenses/LICENSE +21 -0
- ecosystems/README.md +40 -0
- ecosystems/__init__.py +702 -0
- ecosystems/_registry.py +192 -0
- ecosystems/cargo.py +221 -0
- ecosystems/composer.py +273 -0
- ecosystems/docker.py +141 -0
- ecosystems/elm.py +186 -0
- ecosystems/github_actions.py +232 -0
- ecosystems/gomod.py +189 -0
- ecosystems/maven.py +307 -0
- ecosystems/mix.py +233 -0
- ecosystems/npm.py +370 -0
- ecosystems/nuget.py +245 -0
- ecosystems/pip.py +363 -0
- ecosystems/pub.py +219 -0
- ecosystems/remote.py +239 -0
- ecosystems/rubygems.py +270 -0
- ecosystems/swift.py +367 -0
- ecosystems/terraform.py +320 -0
- helpers/README.md +19 -0
- helpers/__init__.py +0 -0
- helpers/bot_parsers.py +81 -0
- helpers/cache.py +98 -0
- helpers/comment_formatter.py +325 -0
- helpers/config_provider.py +169 -0
- helpers/display.py +63 -0
- helpers/github_app.py +86 -0
- helpers/http.py +24 -0
- helpers/notification.py +164 -0
- helpers/pr_parser.py +192 -0
- helpers/prompts.py +186 -0
- helpers/temporal_client.py +24 -0
- models/README.md +74 -0
- models/__init__.py +49 -0
- models/checks.py +131 -0
- models/package.py +43 -0
- models/pr.py +64 -0
- models/triage.py +11 -0
- models/verdict.py +44 -0
- platforms/README.md +40 -0
- platforms/__init__.py +59 -0
- platforms/github.py +394 -0
- platforms/gitlab.py +274 -0
- workflows/README.md +41 -0
- workflows/__init__.py +0 -0
- workflows/package_triage_workflow.py +166 -0
- workflows/pr_action_workflow.py +256 -0
api/README.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# API
|
|
2
|
+
|
|
3
|
+
**When do you need to touch this directory?** When adding support for a new webhook source (a new bot, a new event type, or a new code host's webhook format). For adding new triage logic, see [`checks/`](../checks/README.md) or [`workflows/`](../workflows/README.md) instead.
|
|
4
|
+
|
|
5
|
+
This is the production entry point: a FastAPI application that receives webhooks from GitHub and GitLab, verifies their authenticity, parses the PR/MR metadata, and starts `PRActionWorkflow` asynchronously.
|
|
6
|
+
|
|
7
|
+
## Endpoints
|
|
8
|
+
|
|
9
|
+
| Endpoint | Platform | Event types handled |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| `POST /webhook` | GitHub | `pull_request`, `pull_request_review` |
|
|
12
|
+
| `POST /webhook/github` | GitHub | Same (alias) |
|
|
13
|
+
| `POST /webhook/gitlab` | GitLab | Merge Request Hook |
|
|
14
|
+
|
|
15
|
+
## What happens on each request
|
|
16
|
+
|
|
17
|
+
1. **Verify** — GitHub: HMAC-SHA256 signature check against `GITHUB_WEBHOOK_SECRET`. GitLab: token comparison against `GITLAB_WEBHOOK_SECRET`. Requests that fail verification get a 401.
|
|
18
|
+
2. **Filter** — only Dependabot and Renovate bot events are processed; everything else returns 200 immediately.
|
|
19
|
+
3. **Parse** — ecosystem, package name, old version, and new version are extracted from the PR title/body/branch name via `helpers/pr_parser.py`.
|
|
20
|
+
4. **Start workflow** — `PRActionWorkflow` is started (or a signal is sent if already running) via the Temporal client. The HTTP response returns 200 immediately; the workflow runs asynchronously.
|
|
21
|
+
|
|
22
|
+
## Running locally
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uv run uvicorn api.webhook:app --reload
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Requires a running Temporal server (`temporal server start-dev`) and the worker (`uv run python -m worker`). For end-to-end local testing without a real webhook, use `uv run dependency-scout triage <PR URL>` directly.
|
api/__init__.py
ADDED
|
File without changes
|
api/webhook.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FastAPI webhook receiver for GitHub and GitLab pull_request / merge_request events.
|
|
3
|
+
|
|
4
|
+
GitHub: POST /webhook or /webhook/github
|
|
5
|
+
- Verifies HMAC-SHA256 signature (X-Hub-Signature-256 / GITHUB_WEBHOOK_SECRET)
|
|
6
|
+
- Handles pull_request and pull_request_review events
|
|
7
|
+
|
|
8
|
+
GitLab: POST /webhook/gitlab
|
|
9
|
+
- Verifies simple token comparison (X-Gitlab-Token / GITLAB_WEBHOOK_SECRET)
|
|
10
|
+
- Handles Merge Request Hook events
|
|
11
|
+
|
|
12
|
+
Both endpoints filter to Dependabot/Renovate bots, parse package + version from the
|
|
13
|
+
MR title/body/branch, and start PRActionWorkflow asynchronously, returning 200 immediately.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import hashlib
|
|
17
|
+
import hmac
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
from contextlib import asynccontextmanager
|
|
23
|
+
from datetime import timedelta
|
|
24
|
+
|
|
25
|
+
from fastapi import FastAPI, Header, HTTPException, Request
|
|
26
|
+
from packaging.utils import canonicalize_name
|
|
27
|
+
from temporalio.client import Client
|
|
28
|
+
from temporalio.common import WorkflowIDReusePolicy
|
|
29
|
+
from temporalio.contrib.pydantic import pydantic_data_converter
|
|
30
|
+
|
|
31
|
+
from ecosystems import get_name_re
|
|
32
|
+
from models import PRContext
|
|
33
|
+
from helpers.bot_parsers import get_bot_parser
|
|
34
|
+
from workflows.pr_action_workflow import PRActionWorkflow
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
_PR_ACTIONS = {"opened", "synchronize", "reopened"}
|
|
39
|
+
_GL_MR_ACTIONS = {"open", "reopen", "update"}
|
|
40
|
+
|
|
41
|
+
# Fallback for unknown ecosystems — strict enough to block injection attacks.
|
|
42
|
+
_FALLBACK_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,213}$")
|
|
43
|
+
|
|
44
|
+
# Version: semver-ish — digits, dots, hyphens, plus, tilde, caret, letters
|
|
45
|
+
_VERSION_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._+\-~^]{0,127}$")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _validate_parsed_package(ecosystem: str, package: str, old: str, new: str) -> str | None:
|
|
49
|
+
"""Return an error reason string, or None if the input is valid."""
|
|
50
|
+
name_re = get_name_re(ecosystem) or _FALLBACK_NAME_RE
|
|
51
|
+
if not name_re.match(package):
|
|
52
|
+
return f"invalid package name: {package!r}"
|
|
53
|
+
for label, ver in (("old_version", old), ("new_version", new)):
|
|
54
|
+
if ver != "unknown" and not _VERSION_RE.match(ver):
|
|
55
|
+
return f"invalid {label}: {ver!r}"
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
_temporal_client: Client | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _check_config() -> None:
|
|
63
|
+
"""Warn at startup about missing or suspicious configuration."""
|
|
64
|
+
has_github_secret = os.environ.get("GITHUB_WEBHOOK_SECRET")
|
|
65
|
+
has_gitlab_secret = os.environ.get("GITLAB_WEBHOOK_SECRET")
|
|
66
|
+
if not has_github_secret:
|
|
67
|
+
logger.error(
|
|
68
|
+
"GITHUB_WEBHOOK_SECRET is not set — /webhook/github requests will return 500. "
|
|
69
|
+
'Generate one with: python -c "import secrets; print(secrets.token_hex(32))"'
|
|
70
|
+
)
|
|
71
|
+
if not has_gitlab_secret:
|
|
72
|
+
logger.info("GITLAB_WEBHOOK_SECRET not set — /webhook/gitlab endpoint is disabled.")
|
|
73
|
+
has_github = os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_APP_ID")
|
|
74
|
+
has_gitlab = os.environ.get("GITLAB_TOKEN")
|
|
75
|
+
if not has_github and not has_gitlab:
|
|
76
|
+
logger.warning(
|
|
77
|
+
"No platform credentials found (GITHUB_TOKEN, GITHUB_APP_ID, or GITLAB_TOKEN). "
|
|
78
|
+
"The Scout will run but cannot post PR/MR comments or take actions."
|
|
79
|
+
)
|
|
80
|
+
classifier = os.environ.get("CLASSIFIER", "")
|
|
81
|
+
if not classifier and not os.environ.get("ANTHROPIC_API_KEY"):
|
|
82
|
+
logger.info(
|
|
83
|
+
"No LLM configured (ANTHROPIC_API_KEY / CLASSIFIER not set) — using rule-based classifier."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@asynccontextmanager
|
|
88
|
+
async def lifespan(app: FastAPI):
|
|
89
|
+
global _temporal_client
|
|
90
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
|
|
91
|
+
_check_config()
|
|
92
|
+
_temporal_client = await Client.connect(
|
|
93
|
+
os.environ.get("TEMPORAL_ADDRESS", "localhost:7233"),
|
|
94
|
+
namespace=os.environ.get("TEMPORAL_NAMESPACE", "default"),
|
|
95
|
+
data_converter=pydantic_data_converter,
|
|
96
|
+
)
|
|
97
|
+
logger.info(
|
|
98
|
+
"Connected to Temporal at %s (namespace=%s, task_queue=%s)",
|
|
99
|
+
os.environ.get("TEMPORAL_ADDRESS", "localhost:7233"),
|
|
100
|
+
os.environ.get("TEMPORAL_NAMESPACE", "default"),
|
|
101
|
+
os.environ.get("TEMPORAL_TASK_QUEUE", "dependency-scout"),
|
|
102
|
+
)
|
|
103
|
+
yield
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
app = FastAPI(lifespan=lifespan)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _verify_github_signature(body: bytes, signature: str) -> None:
|
|
110
|
+
secret = os.environ.get("GITHUB_WEBHOOK_SECRET", "")
|
|
111
|
+
if not secret:
|
|
112
|
+
raise HTTPException(status_code=500, detail="GITHUB_WEBHOOK_SECRET not configured")
|
|
113
|
+
expected = "sha256=" + hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
|
|
114
|
+
if not hmac.compare_digest(expected, signature):
|
|
115
|
+
raise HTTPException(status_code=401, detail="Invalid signature")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _verify_gitlab_token(token: str) -> None:
|
|
119
|
+
secret = os.environ.get("GITLAB_WEBHOOK_SECRET", "")
|
|
120
|
+
if not secret:
|
|
121
|
+
raise HTTPException(status_code=500, detail="GITLAB_WEBHOOK_SECRET not configured")
|
|
122
|
+
if not hmac.compare_digest(secret, token):
|
|
123
|
+
raise HTTPException(status_code=401, detail="Invalid token")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def _start_workflow(pr_context: PRContext) -> str:
|
|
127
|
+
"""Start (or attach to an existing) PRActionWorkflow and return the workflow ID."""
|
|
128
|
+
assert _temporal_client is not None
|
|
129
|
+
workflow_id = f"pr-action-{pr_context.repo.replace('/', '-')}-{pr_context.pr_number}"
|
|
130
|
+
await _temporal_client.start_workflow(
|
|
131
|
+
PRActionWorkflow.run,
|
|
132
|
+
pr_context,
|
|
133
|
+
id=workflow_id,
|
|
134
|
+
task_queue=os.environ.get("TEMPORAL_TASK_QUEUE", "dependency-scout"),
|
|
135
|
+
id_reuse_policy=WorkflowIDReusePolicy.ALLOW_DUPLICATE_FAILED_ONLY,
|
|
136
|
+
execution_timeout=timedelta(days=30),
|
|
137
|
+
)
|
|
138
|
+
return workflow_id
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@app.get("/healthz")
|
|
142
|
+
async def healthz() -> dict:
|
|
143
|
+
return {"status": "ok", "temporal_connected": _temporal_client is not None}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
# GitHub webhook — /webhook (legacy) and /webhook/github
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def _handle_github_payload(payload: dict, event: str) -> dict:
|
|
152
|
+
if event == "pull_request_review":
|
|
153
|
+
return await _handle_github_review(payload)
|
|
154
|
+
|
|
155
|
+
if event != "pull_request":
|
|
156
|
+
logger.debug("Ignored event type: %s", event)
|
|
157
|
+
return {"status": "ignored", "reason": "not a pull_request event"}
|
|
158
|
+
|
|
159
|
+
action = payload.get("action")
|
|
160
|
+
if action not in _PR_ACTIONS:
|
|
161
|
+
logger.debug("Ignored pull_request action: %s", action)
|
|
162
|
+
return {"status": "ignored", "reason": f"action={action}"}
|
|
163
|
+
|
|
164
|
+
pr_author = payload.get("pull_request", {}).get("user", {}).get("login", "")
|
|
165
|
+
bot_parser = get_bot_parser(pr_author)
|
|
166
|
+
if not bot_parser:
|
|
167
|
+
logger.debug("Ignored PR from non-bot author: %s", pr_author)
|
|
168
|
+
return {"status": "ignored", "reason": f"author={pr_author}"}
|
|
169
|
+
|
|
170
|
+
repo = payload["repository"]["full_name"]
|
|
171
|
+
pr_number = payload["pull_request"]["number"]
|
|
172
|
+
title = payload["pull_request"]["title"]
|
|
173
|
+
body_text = payload["pull_request"].get("body") or ""
|
|
174
|
+
head_ref = payload["pull_request"]["head"]["ref"]
|
|
175
|
+
parsed = bot_parser.parse(title, body_text, head_ref)
|
|
176
|
+
if not parsed:
|
|
177
|
+
logger.warning(
|
|
178
|
+
"Could not parse package/version from PR title — skipping %s#%s. Title: %r",
|
|
179
|
+
repo,
|
|
180
|
+
pr_number,
|
|
181
|
+
title,
|
|
182
|
+
)
|
|
183
|
+
return {"status": "ignored", "reason": "could not parse package/version from PR title"}
|
|
184
|
+
|
|
185
|
+
err = _validate_parsed_package(
|
|
186
|
+
parsed.ecosystem, parsed.package, parsed.old_version, parsed.new_version
|
|
187
|
+
)
|
|
188
|
+
if err:
|
|
189
|
+
logger.warning("Validation failed for %s#%s: %s", repo, pr_number, err)
|
|
190
|
+
return {"status": "ignored", "reason": err}
|
|
191
|
+
|
|
192
|
+
installation_id = payload.get("installation", {}).get("id") or None
|
|
193
|
+
head_sha = payload["pull_request"]["head"]["sha"]
|
|
194
|
+
package_name = (
|
|
195
|
+
canonicalize_name(parsed.package) if parsed.ecosystem == "pip" else parsed.package
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
pr_context = PRContext(
|
|
199
|
+
repo=repo,
|
|
200
|
+
pr_number=pr_number,
|
|
201
|
+
pr_author=pr_author,
|
|
202
|
+
installation_id=installation_id,
|
|
203
|
+
platform="github",
|
|
204
|
+
ecosystem=parsed.ecosystem,
|
|
205
|
+
package_name=package_name,
|
|
206
|
+
old_version=parsed.old_version,
|
|
207
|
+
new_version=parsed.new_version,
|
|
208
|
+
head_sha=head_sha,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
workflow_id = await _start_workflow(pr_context)
|
|
212
|
+
logger.info(
|
|
213
|
+
"Started workflow %s for %s#%s (%s %s %s→%s)",
|
|
214
|
+
workflow_id,
|
|
215
|
+
repo,
|
|
216
|
+
pr_number,
|
|
217
|
+
parsed.ecosystem,
|
|
218
|
+
package_name,
|
|
219
|
+
parsed.old_version,
|
|
220
|
+
parsed.new_version,
|
|
221
|
+
)
|
|
222
|
+
return {"status": "started", "workflow_id": workflow_id}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
async def _handle_github_review(payload: dict) -> dict:
|
|
226
|
+
"""Route a pull_request_review event to the waiting PRActionWorkflow.
|
|
227
|
+
|
|
228
|
+
The reviewer identity comes from the HMAC-verified GitHub payload, not from
|
|
229
|
+
a self-reported claim, so it can be trusted as the authoritative approver.
|
|
230
|
+
Only 'submitted' events with state 'approved' or 'changes_requested' are acted on.
|
|
231
|
+
"""
|
|
232
|
+
if payload.get("action") != "submitted":
|
|
233
|
+
return {"status": "ignored", "reason": "not a submitted review"}
|
|
234
|
+
|
|
235
|
+
state = payload.get("review", {}).get("state", "").lower()
|
|
236
|
+
if state not in {"approved", "changes_requested"}:
|
|
237
|
+
logger.debug("Ignored review with state=%s", state)
|
|
238
|
+
return {"status": "ignored", "reason": f"review state={state}"}
|
|
239
|
+
|
|
240
|
+
reviewer = payload.get("review", {}).get("user", {}).get("login", "")
|
|
241
|
+
repo = payload["repository"]["full_name"]
|
|
242
|
+
pr_number = payload["pull_request"]["number"]
|
|
243
|
+
workflow_id = f"pr-action-{repo.replace('/', '-')}-{pr_number}"
|
|
244
|
+
decision = "approve" if state == "approved" else "reject"
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
assert _temporal_client is not None
|
|
248
|
+
handle = _temporal_client.get_workflow_handle(workflow_id)
|
|
249
|
+
await handle.signal(PRActionWorkflow.submit_decision, args=[decision, reviewer])
|
|
250
|
+
except Exception:
|
|
251
|
+
logger.debug("No active workflow for %s#%s — review signal dropped", repo, pr_number)
|
|
252
|
+
return {"status": "ignored", "reason": "no active workflow for this PR"}
|
|
253
|
+
|
|
254
|
+
logger.info("Signalled %s with decision=%s from reviewer=%s", workflow_id, decision, reviewer)
|
|
255
|
+
return {"status": "signalled", "workflow_id": workflow_id, "decision": decision}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@app.post("/webhook")
|
|
259
|
+
async def webhook_legacy(
|
|
260
|
+
request: Request,
|
|
261
|
+
x_hub_signature_256: str = Header(...),
|
|
262
|
+
x_github_event: str = Header(...),
|
|
263
|
+
) -> dict:
|
|
264
|
+
"""Legacy GitHub webhook endpoint — kept for backward compatibility."""
|
|
265
|
+
body = await request.body()
|
|
266
|
+
_verify_github_signature(body, x_hub_signature_256)
|
|
267
|
+
return await _handle_github_payload(json.loads(body), x_github_event)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@app.post("/webhook/github")
|
|
271
|
+
async def webhook_github(
|
|
272
|
+
request: Request,
|
|
273
|
+
x_hub_signature_256: str = Header(...),
|
|
274
|
+
x_github_event: str = Header(...),
|
|
275
|
+
) -> dict:
|
|
276
|
+
body = await request.body()
|
|
277
|
+
_verify_github_signature(body, x_hub_signature_256)
|
|
278
|
+
return await _handle_github_payload(json.loads(body), x_github_event)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# ---------------------------------------------------------------------------
|
|
282
|
+
# GitLab webhook — /webhook/gitlab
|
|
283
|
+
# ---------------------------------------------------------------------------
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@app.post("/webhook/gitlab")
|
|
287
|
+
async def webhook_gitlab(
|
|
288
|
+
request: Request,
|
|
289
|
+
x_gitlab_token: str = Header(...),
|
|
290
|
+
x_gitlab_event: str = Header(...),
|
|
291
|
+
) -> dict:
|
|
292
|
+
_verify_gitlab_token(x_gitlab_token)
|
|
293
|
+
payload = json.loads(await request.body())
|
|
294
|
+
|
|
295
|
+
if x_gitlab_event == "Merge Request Hook":
|
|
296
|
+
return await _handle_gitlab_mr(payload)
|
|
297
|
+
|
|
298
|
+
logger.debug("Ignored GitLab event type: %s", x_gitlab_event)
|
|
299
|
+
return {"status": "ignored", "reason": f"event={x_gitlab_event}"}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def _handle_gitlab_mr(payload: dict) -> dict:
|
|
303
|
+
attrs = payload.get("object_attributes", {})
|
|
304
|
+
action = attrs.get("action", "")
|
|
305
|
+
|
|
306
|
+
# GitLab sends "approved" as an MR action — route it as a review signal
|
|
307
|
+
if action == "approved":
|
|
308
|
+
return await _handle_gitlab_approval(payload)
|
|
309
|
+
|
|
310
|
+
if action not in _GL_MR_ACTIONS:
|
|
311
|
+
logger.debug("Ignored GitLab MR action: %s", action)
|
|
312
|
+
return {"status": "ignored", "reason": f"action={action}"}
|
|
313
|
+
|
|
314
|
+
pr_author = payload.get("user", {}).get("username", "")
|
|
315
|
+
bot_parser = get_bot_parser(pr_author)
|
|
316
|
+
if not bot_parser:
|
|
317
|
+
logger.debug("Ignored MR from non-bot author: %s", pr_author)
|
|
318
|
+
return {"status": "ignored", "reason": f"author={pr_author}"}
|
|
319
|
+
|
|
320
|
+
repo = payload.get("project", {}).get("path_with_namespace", "")
|
|
321
|
+
pr_number = attrs.get("iid", 0)
|
|
322
|
+
title = attrs.get("title", "")
|
|
323
|
+
body_text = attrs.get("description", "") or ""
|
|
324
|
+
head_ref = attrs.get("source_branch", "")
|
|
325
|
+
parsed = bot_parser.parse(title, body_text, head_ref)
|
|
326
|
+
if not parsed:
|
|
327
|
+
logger.warning(
|
|
328
|
+
"Could not parse package/version from MR title — skipping %s!%s. Title: %r",
|
|
329
|
+
repo,
|
|
330
|
+
pr_number,
|
|
331
|
+
title,
|
|
332
|
+
)
|
|
333
|
+
return {"status": "ignored", "reason": "could not parse package/version from MR title"}
|
|
334
|
+
|
|
335
|
+
err = _validate_parsed_package(
|
|
336
|
+
parsed.ecosystem, parsed.package, parsed.old_version, parsed.new_version
|
|
337
|
+
)
|
|
338
|
+
if err:
|
|
339
|
+
logger.warning("Validation failed for %s!%s: %s", repo, pr_number, err)
|
|
340
|
+
return {"status": "ignored", "reason": err}
|
|
341
|
+
|
|
342
|
+
head_sha = (attrs.get("last_commit") or {}).get("id", "")
|
|
343
|
+
package_name = (
|
|
344
|
+
canonicalize_name(parsed.package) if parsed.ecosystem == "pip" else parsed.package
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
pr_context = PRContext(
|
|
348
|
+
repo=repo,
|
|
349
|
+
pr_number=pr_number,
|
|
350
|
+
pr_author=pr_author,
|
|
351
|
+
installation_id=None,
|
|
352
|
+
platform="gitlab",
|
|
353
|
+
ecosystem=parsed.ecosystem,
|
|
354
|
+
package_name=package_name,
|
|
355
|
+
old_version=parsed.old_version,
|
|
356
|
+
new_version=parsed.new_version,
|
|
357
|
+
head_sha=head_sha,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
workflow_id = await _start_workflow(pr_context)
|
|
361
|
+
logger.info(
|
|
362
|
+
"Started workflow %s for %s!%s (%s %s %s→%s)",
|
|
363
|
+
workflow_id,
|
|
364
|
+
repo,
|
|
365
|
+
pr_number,
|
|
366
|
+
parsed.ecosystem,
|
|
367
|
+
package_name,
|
|
368
|
+
parsed.old_version,
|
|
369
|
+
parsed.new_version,
|
|
370
|
+
)
|
|
371
|
+
return {"status": "started", "workflow_id": workflow_id}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
async def _handle_gitlab_approval(payload: dict) -> dict:
|
|
375
|
+
"""Route a GitLab MR approval event to the waiting PRActionWorkflow."""
|
|
376
|
+
reviewer = payload.get("user", {}).get("username", "")
|
|
377
|
+
repo = payload.get("project", {}).get("path_with_namespace", "")
|
|
378
|
+
pr_number = payload.get("object_attributes", {}).get("iid", 0)
|
|
379
|
+
|
|
380
|
+
workflow_id = f"pr-action-{repo.replace('/', '-')}-{pr_number}"
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
assert _temporal_client is not None
|
|
384
|
+
handle = _temporal_client.get_workflow_handle(workflow_id)
|
|
385
|
+
await handle.signal(PRActionWorkflow.submit_decision, args=["approve", reviewer])
|
|
386
|
+
except Exception:
|
|
387
|
+
logger.debug("No active workflow for %s!%s — approval signal dropped", repo, pr_number)
|
|
388
|
+
return {"status": "ignored", "reason": "no active workflow for this MR"}
|
|
389
|
+
|
|
390
|
+
logger.info("Signalled %s with decision=approve from reviewer=%s", workflow_id, reviewer)
|
|
391
|
+
return {"status": "signalled", "workflow_id": workflow_id, "decision": "approve"}
|
checks/README.md
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Checks
|
|
2
|
+
|
|
3
|
+
**When do you need a new check?** When there's a new external data source you want to run on every bump — if you find yourself thinking "I wish the classifier knew about X."
|
|
4
|
+
|
|
5
|
+
Each check calls an external API or does a computation, then returns a structured result. The workflow runs all 11 in parallel and collects the results; checks do the actual work. For PR side-effect functions (comment, merge, close), see [`pr_actions/`](../pr_actions/README.md).
|
|
6
|
+
|
|
7
|
+
## Triage checks
|
|
8
|
+
|
|
9
|
+
These eleven checks run in parallel for every package bump. All degrade gracefully — if one fails or its API key is missing, the workflow continues with the remaining results.
|
|
10
|
+
|
|
11
|
+
| File | Activity name | Returns | External service | API key |
|
|
12
|
+
|---|---|---|---|---|
|
|
13
|
+
| `metadata.py` | `activities.metadata.fetch` | `MetadataChecks` | Package registry (via ecosystem provider) | None |
|
|
14
|
+
| `osv.py` | `activities.osv.check` | `OSVChecks` | [OSV.dev](https://osv.dev) vulnerability database | None |
|
|
15
|
+
| `socket.py` | `activities.socket.score` | `SocketChecks` | [Socket.dev](https://socket.dev) supply chain scoring | `SOCKET_API_KEY` |
|
|
16
|
+
| `package_diff.py` | `activities.package_diff.compute` | `PackageDiffChecks` | Package registry (archive download) | `GITHUB_TOKEN` (optional, for artifact-vs-source comparison) |
|
|
17
|
+
| `maintainer.py` | `activities.maintainer.history` | `MaintainerChecks` | Package registry (via ecosystem provider) | None |
|
|
18
|
+
| `release_age.py` | `activities.release_age.check` | `ReleaseAgeChecks` | Package registry (via ecosystem provider) | None |
|
|
19
|
+
| `attestation.py` | `activities.attestation.check` | `AttestationChecks` | Package registry provenance endpoint | None |
|
|
20
|
+
| `release_notes.py` | `activities.release_notes.check` | `ReleaseChecks` | GitHub / GitLab API | `GITHUB_TOKEN` / `GITLAB_TOKEN` (optional) |
|
|
21
|
+
| `version_lineage.py` | `activities.version_lineage.check` | `VersionLineageChecks` | Package registry | None |
|
|
22
|
+
| `depsdev.py` | `activities.depsdev.fetch` | `DepsDevChecks` | [deps.dev](https://deps.dev) | None |
|
|
23
|
+
| `scorecard.py` | `activities.scorecard.fetch` | `ScorecardChecks` | [OpenSSF Scorecard](https://securityscorecards.dev) | None |
|
|
24
|
+
|
|
25
|
+
`package_diff.compute` downloads and extracts the full package archive — it's the slowest check and runs on a longer timeout than the rest. It calls `activity.heartbeat()` at each phase (download → extract → artifact/source comparison) so Temporal can detect worker crashes mid-run rather than waiting for the full timeout to expire.
|
|
26
|
+
|
|
27
|
+
## Check naming convention
|
|
28
|
+
|
|
29
|
+
Each check function is registered under a string name (e.g. `"activities.metadata.fetch"`) that the workflow uses to schedule it. The name in `@activity.defn(name=...)` must match exactly what appears in `_CHECK_REGISTRY` in `workflows/package_triage_workflow.py`.
|
|
30
|
+
|
|
31
|
+
This codebase uses string names deliberately: `_CHECK_REGISTRY` is a data structure mapping field names to check names and result types, making it easy to add or reorder checks without touching workflow control flow. The trade-off is that name mismatches are caught at runtime rather than import time — see [CLAUDE.md](../CLAUDE.md) for the full convention.
|
|
32
|
+
|
|
33
|
+
## Attack signature patterns
|
|
34
|
+
|
|
35
|
+
The [`signatures/`](signatures/README.md) subdirectory contains the YAML pattern files used by `package_diff.py` to detect suspicious code in package archives — network calls, obfuscation, persistence mechanisms, and dangerous file types. Add a new pattern there; no Python required.
|
|
36
|
+
|
|
37
|
+
## Worker auto-discovery
|
|
38
|
+
|
|
39
|
+
The worker (`worker.py`) automatically discovers and registers every check function found in `checks/*.py` and `pr_actions/*.py`. **You do not need to manually register new checks** — just put the file in this directory and restart the worker.
|
|
40
|
+
|
|
41
|
+
## Adding a new triage check
|
|
42
|
+
|
|
43
|
+
**Step 1 — create the check**
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
# checks/mycheck.py
|
|
47
|
+
from temporalio import activity
|
|
48
|
+
from models import MyChecks
|
|
49
|
+
|
|
50
|
+
@activity.defn(name="activities.mycheck.fetch")
|
|
51
|
+
async def fetch(ecosystem: str, package: str, old_version: str, new_version: str) -> MyChecks:
|
|
52
|
+
...
|
|
53
|
+
return MyChecks(...)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Step 2 — add a model**
|
|
57
|
+
|
|
58
|
+
Add `MyChecks` to `models/__init__.py` as a `BaseModel` subclass, and add it as a field on `PackageChecks`.
|
|
59
|
+
|
|
60
|
+
**Step 3 — register in the workflow**
|
|
61
|
+
|
|
62
|
+
Add a row to `_CHECK_REGISTRY` in `workflows/package_triage_workflow.py`:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
("mycheck", "activities.mycheck.fetch", MyChecks, False),
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The fourth element is `True` if the check is slow (like `package_diff`) and needs a longer timeout.
|
|
69
|
+
|
|
70
|
+
**Step 4 — write tests and regenerate fixtures**
|
|
71
|
+
|
|
72
|
+
Add tests under `tests/`. The worker will auto-discover your new check file — no manual registration needed. Then regenerate the Temporal replay fixtures since the workflow history changed:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
uv run python tests/generate_fixtures.py
|
|
76
|
+
```
|
checks/__init__.py
ADDED
|
File without changes
|
checks/attestation.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from ecosystems import get_provider
|
|
2
|
+
from models import AttestationChecks
|
|
3
|
+
from helpers.cache import ActivityCache
|
|
4
|
+
from temporalio import activity
|
|
5
|
+
|
|
6
|
+
_cache: ActivityCache = ActivityCache() # SLSA provenance is immutable once signed
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@activity.defn(name="activities.attestation.check")
|
|
10
|
+
async def check(
|
|
11
|
+
ecosystem: str, package: str, old_version: str, new_version: str
|
|
12
|
+
) -> AttestationChecks:
|
|
13
|
+
"""Check whether the registry can cryptographically prove the package was built by a trusted CI pipeline, using Sigstore/SLSA provenance records.
|
|
14
|
+
|
|
15
|
+
Returns an ``AttestationChecks`` with flags indicating whether provenance exists and whether it was issued by a recognized build system."""
|
|
16
|
+
key = (ecosystem, package, old_version, new_version)
|
|
17
|
+
return await _cache.get_or_compute(
|
|
18
|
+
key,
|
|
19
|
+
lambda: get_provider(ecosystem).fetch_attestations(package, old_version, new_version),
|
|
20
|
+
)
|
checks/classifier.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from temporalio import activity
|
|
2
|
+
|
|
3
|
+
from classifiers import RuleBasedClassifier, get_classifier
|
|
4
|
+
from models import PackageChecks, Verdict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@activity.defn(name="activities.classifier.classify")
|
|
8
|
+
async def classify(signals: PackageChecks) -> Verdict:
|
|
9
|
+
"""Feed all collected package signals into the configured classifier and return a GREEN, YELLOW, or RED verdict with a rationale.
|
|
10
|
+
|
|
11
|
+
Uses an LLM classifier when one is configured; falls back to deterministic rules otherwise."""
|
|
12
|
+
clf = get_classifier()
|
|
13
|
+
if isinstance(clf, RuleBasedClassifier):
|
|
14
|
+
activity.logger.info("No LLM configured — using rule-based classifier")
|
|
15
|
+
else:
|
|
16
|
+
activity.logger.info("Using classifier: %s", type(clf).__name__)
|
|
17
|
+
return await clf.classify(signals)
|
checks/custom_checks.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from temporalio import activity
|
|
7
|
+
|
|
8
|
+
from models import CheckContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@activity.defn(name="activities.custom_checks.run_all")
|
|
12
|
+
async def run_all(ctx: CheckContext) -> dict[str, Any]:
|
|
13
|
+
"""Discovers and runs all dependency_scout.checks entry-point functions in parallel."""
|
|
14
|
+
try:
|
|
15
|
+
from importlib.metadata import entry_points
|
|
16
|
+
|
|
17
|
+
eps = list(entry_points(group="dependency_scout.checks"))
|
|
18
|
+
except Exception:
|
|
19
|
+
return {}
|
|
20
|
+
|
|
21
|
+
if not eps:
|
|
22
|
+
return {}
|
|
23
|
+
|
|
24
|
+
async def _run_one(ep) -> tuple[str, Any]:
|
|
25
|
+
try:
|
|
26
|
+
fn = ep.load()
|
|
27
|
+
result = await fn(ctx)
|
|
28
|
+
return ep.name, result
|
|
29
|
+
except Exception as exc: # noqa: BLE001
|
|
30
|
+
activity.logger.warning("Custom check %r failed: %r — skipped", ep.name, exc)
|
|
31
|
+
return ep.name, None
|
|
32
|
+
|
|
33
|
+
pairs = await asyncio.gather(*(_run_one(ep) for ep in eps))
|
|
34
|
+
return {name: result for name, result in pairs if result is not None}
|
checks/depsdev.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Activity: fetch deprecation status from deps.dev (https://api.deps.dev)."""
|
|
2
|
+
|
|
3
|
+
from urllib.parse import quote
|
|
4
|
+
|
|
5
|
+
from temporalio import activity
|
|
6
|
+
|
|
7
|
+
from models import DepsDevChecks
|
|
8
|
+
from helpers.cache import ActivityCache
|
|
9
|
+
from helpers.http import get_client
|
|
10
|
+
|
|
11
|
+
_cache: ActivityCache = ActivityCache(ttl_seconds=86400) # deprecation changes rarely; 24h TTL
|
|
12
|
+
|
|
13
|
+
_ECOSYSTEM_MAP = {
|
|
14
|
+
"pip": "pypi",
|
|
15
|
+
"npm": "npm",
|
|
16
|
+
"rubygems": "rubygems",
|
|
17
|
+
"maven": "maven",
|
|
18
|
+
"nuget": "nuget",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@activity.defn(name="activities.depsdev.fetch")
|
|
23
|
+
async def fetch(ecosystem: str, package: str, old_version: str, new_version: str) -> DepsDevChecks:
|
|
24
|
+
"""Query the deps.dev API for the new version and return whether it has been marked deprecated, along with the deprecation reason if one is provided."""
|
|
25
|
+
key = (ecosystem, package, new_version)
|
|
26
|
+
return await _cache.get_or_compute(key, lambda: _do_fetch(ecosystem, package, new_version))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def _do_fetch(ecosystem: str, package: str, new_version: str) -> DepsDevChecks:
|
|
30
|
+
system = _ECOSYSTEM_MAP.get(ecosystem)
|
|
31
|
+
if system is None:
|
|
32
|
+
return DepsDevChecks()
|
|
33
|
+
|
|
34
|
+
encoded_package = quote(package, safe="")
|
|
35
|
+
encoded_version = quote(new_version, safe="")
|
|
36
|
+
url = f"https://api.deps.dev/v3alpha/systems/{system}/packages/{encoded_package}/versions/{encoded_version}"
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
client = get_client()
|
|
40
|
+
resp = await client.get(url, timeout=15.0)
|
|
41
|
+
|
|
42
|
+
if resp.status_code != 200:
|
|
43
|
+
return DepsDevChecks()
|
|
44
|
+
|
|
45
|
+
data = resp.json()
|
|
46
|
+
is_deprecated = data.get("isDeprecated", False)
|
|
47
|
+
deprecated_reason = data.get("deprecatedReason") or None
|
|
48
|
+
|
|
49
|
+
return DepsDevChecks(is_deprecated=is_deprecated, deprecated_reason=deprecated_reason)
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
activity.logger.warning(f"deps.dev fetch failed for {package}@{new_version}: {exc!r}")
|
|
52
|
+
return DepsDevChecks()
|
checks/maintainer.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from ecosystems import get_provider
|
|
2
|
+
from models import MaintainerChecks
|
|
3
|
+
from helpers.cache import ActivityCache
|
|
4
|
+
from temporalio import activity
|
|
5
|
+
|
|
6
|
+
_cache: ActivityCache = ActivityCache() # publishing history is immutable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@activity.defn(name="activities.maintainer.history")
|
|
10
|
+
async def history(
|
|
11
|
+
ecosystem: str, package: str, old_version: str, new_version: str
|
|
12
|
+
) -> MaintainerChecks:
|
|
13
|
+
"""Compare the list of maintainers who published the old version against those who published the new version.
|
|
14
|
+
|
|
15
|
+
Returns a ``MaintainerChecks`` indicating whether any new uploaders appeared, which can signal an account takeover."""
|
|
16
|
+
key = (ecosystem, package, old_version, new_version)
|
|
17
|
+
return await _cache.get_or_compute(
|
|
18
|
+
key,
|
|
19
|
+
lambda: get_provider(ecosystem).fetch_maintainer(package, old_version, new_version),
|
|
20
|
+
)
|
checks/metadata.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from ecosystems import get_provider
|
|
2
|
+
from helpers.cache import ActivityCache
|
|
3
|
+
from models import MetadataChecks
|
|
4
|
+
from temporalio import activity
|
|
5
|
+
|
|
6
|
+
_cache: ActivityCache = ActivityCache(ttl_seconds=3600) # weekly downloads refresh daily; 1h TTL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@activity.defn(name="activities.metadata.fetch")
|
|
10
|
+
async def fetch(ecosystem: str, package: str, old_version: str, new_version: str) -> MetadataChecks:
|
|
11
|
+
"""Fetch registry metadata for the package, including weekly download counts, whether the bump is a major version change, and the package description.
|
|
12
|
+
|
|
13
|
+
Returns a ``MetadataChecks`` populated from the ecosystem registry (e.g. PyPI, npm)."""
|
|
14
|
+
key = (ecosystem, package, old_version, new_version)
|
|
15
|
+
return await _cache.get_or_compute(
|
|
16
|
+
key,
|
|
17
|
+
lambda: get_provider(ecosystem).fetch_metadata(package, old_version, new_version),
|
|
18
|
+
)
|