dependency-scout 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. dependency_scout-0.1.0/.claude/commands/add-check-plugin.md +235 -0
  2. dependency_scout-0.1.0/.claude/commands/add-detection.md +93 -0
  3. dependency_scout-0.1.0/.claude/commands/add-ecosystem.md +108 -0
  4. dependency_scout-0.1.0/.claude/commands/add-signature-plugin.md +228 -0
  5. dependency_scout-0.1.0/.claude/commands/triage-test.md +20 -0
  6. dependency_scout-0.1.0/.env.example +111 -0
  7. dependency_scout-0.1.0/.github/dependency-scout.yml.example +77 -0
  8. dependency_scout-0.1.0/.github/workflows/ci.yml +42 -0
  9. dependency_scout-0.1.0/.gitignore +221 -0
  10. dependency_scout-0.1.0/.pre-commit-config.yaml +16 -0
  11. dependency_scout-0.1.0/CLAUDE.md +79 -0
  12. dependency_scout-0.1.0/Dockerfile +12 -0
  13. dependency_scout-0.1.0/LICENSE +21 -0
  14. dependency_scout-0.1.0/Makefile +5 -0
  15. dependency_scout-0.1.0/PKG-INFO +227 -0
  16. dependency_scout-0.1.0/README.md +206 -0
  17. dependency_scout-0.1.0/api/README.md +28 -0
  18. dependency_scout-0.1.0/api/__init__.py +0 -0
  19. dependency_scout-0.1.0/api/webhook.py +391 -0
  20. dependency_scout-0.1.0/checks/README.md +76 -0
  21. dependency_scout-0.1.0/checks/__init__.py +0 -0
  22. dependency_scout-0.1.0/checks/attestation.py +20 -0
  23. dependency_scout-0.1.0/checks/classifier.py +17 -0
  24. dependency_scout-0.1.0/checks/custom_checks.py +34 -0
  25. dependency_scout-0.1.0/checks/depsdev.py +52 -0
  26. dependency_scout-0.1.0/checks/maintainer.py +20 -0
  27. dependency_scout-0.1.0/checks/metadata.py +18 -0
  28. dependency_scout-0.1.0/checks/osv.py +40 -0
  29. dependency_scout-0.1.0/checks/package_diff.py +1143 -0
  30. dependency_scout-0.1.0/checks/release_age.py +20 -0
  31. dependency_scout-0.1.0/checks/release_notes.py +21 -0
  32. dependency_scout-0.1.0/checks/scorecard.py +116 -0
  33. dependency_scout-0.1.0/checks/security_advisories.py +111 -0
  34. dependency_scout-0.1.0/checks/signatures/README.md +35 -0
  35. dependency_scout-0.1.0/checks/signatures/__init__.py +195 -0
  36. dependency_scout-0.1.0/checks/signatures/file_types.yaml +39 -0
  37. dependency_scout-0.1.0/checks/signatures/net_calls.yaml +376 -0
  38. dependency_scout-0.1.0/checks/signatures/obfuscation.yaml +128 -0
  39. dependency_scout-0.1.0/checks/signatures/persistence.yaml +64 -0
  40. dependency_scout-0.1.0/checks/socket.py +116 -0
  41. dependency_scout-0.1.0/checks/version_lineage.py +261 -0
  42. dependency_scout-0.1.0/classifiers/README.md +28 -0
  43. dependency_scout-0.1.0/classifiers/__init__.py +84 -0
  44. dependency_scout-0.1.0/classifiers/_helpers.py +461 -0
  45. dependency_scout-0.1.0/classifiers/anthropic.py +179 -0
  46. dependency_scout-0.1.0/classifiers/ollama.py +57 -0
  47. dependency_scout-0.1.0/classifiers/openai.py +172 -0
  48. dependency_scout-0.1.0/docker-compose.yml +53 -0
  49. dependency_scout-0.1.0/docs/architecture.md +353 -0
  50. dependency_scout-0.1.0/docs/configuration.md +118 -0
  51. dependency_scout-0.1.0/docs/contributing.md +276 -0
  52. dependency_scout-0.1.0/docs/deployment.md +233 -0
  53. dependency_scout-0.1.0/docs/extending.md +362 -0
  54. dependency_scout-0.1.0/docs/security.md +171 -0
  55. dependency_scout-0.1.0/ecosystems/README.md +40 -0
  56. dependency_scout-0.1.0/ecosystems/__init__.py +702 -0
  57. dependency_scout-0.1.0/ecosystems/_registry.py +192 -0
  58. dependency_scout-0.1.0/ecosystems/cargo.py +221 -0
  59. dependency_scout-0.1.0/ecosystems/composer.py +273 -0
  60. dependency_scout-0.1.0/ecosystems/docker.py +141 -0
  61. dependency_scout-0.1.0/ecosystems/elm.py +186 -0
  62. dependency_scout-0.1.0/ecosystems/github_actions.py +232 -0
  63. dependency_scout-0.1.0/ecosystems/gomod.py +189 -0
  64. dependency_scout-0.1.0/ecosystems/maven.py +307 -0
  65. dependency_scout-0.1.0/ecosystems/mix.py +233 -0
  66. dependency_scout-0.1.0/ecosystems/npm.py +370 -0
  67. dependency_scout-0.1.0/ecosystems/nuget.py +245 -0
  68. dependency_scout-0.1.0/ecosystems/pip.py +363 -0
  69. dependency_scout-0.1.0/ecosystems/pub.py +219 -0
  70. dependency_scout-0.1.0/ecosystems/remote.py +239 -0
  71. dependency_scout-0.1.0/ecosystems/rubygems.py +270 -0
  72. dependency_scout-0.1.0/ecosystems/swift.py +367 -0
  73. dependency_scout-0.1.0/ecosystems/terraform.py +320 -0
  74. dependency_scout-0.1.0/helpers/README.md +19 -0
  75. dependency_scout-0.1.0/helpers/__init__.py +0 -0
  76. dependency_scout-0.1.0/helpers/bot_parsers.py +81 -0
  77. dependency_scout-0.1.0/helpers/cache.py +98 -0
  78. dependency_scout-0.1.0/helpers/comment_formatter.py +325 -0
  79. dependency_scout-0.1.0/helpers/config_provider.py +169 -0
  80. dependency_scout-0.1.0/helpers/display.py +63 -0
  81. dependency_scout-0.1.0/helpers/github_app.py +86 -0
  82. dependency_scout-0.1.0/helpers/http.py +24 -0
  83. dependency_scout-0.1.0/helpers/notification.py +164 -0
  84. dependency_scout-0.1.0/helpers/pr_parser.py +192 -0
  85. dependency_scout-0.1.0/helpers/prompts.py +186 -0
  86. dependency_scout-0.1.0/helpers/temporal_client.py +24 -0
  87. dependency_scout-0.1.0/mcp_server.py +105 -0
  88. dependency_scout-0.1.0/models/README.md +74 -0
  89. dependency_scout-0.1.0/models/__init__.py +49 -0
  90. dependency_scout-0.1.0/models/checks.py +131 -0
  91. dependency_scout-0.1.0/models/package.py +43 -0
  92. dependency_scout-0.1.0/models/pr.py +64 -0
  93. dependency_scout-0.1.0/models/triage.py +11 -0
  94. dependency_scout-0.1.0/models/verdict.py +44 -0
  95. dependency_scout-0.1.0/platforms/README.md +40 -0
  96. dependency_scout-0.1.0/platforms/__init__.py +59 -0
  97. dependency_scout-0.1.0/platforms/github.py +394 -0
  98. dependency_scout-0.1.0/platforms/gitlab.py +274 -0
  99. dependency_scout-0.1.0/pr_actions/README.md +21 -0
  100. dependency_scout-0.1.0/pr_actions/__init__.py +2 -0
  101. dependency_scout-0.1.0/pr_actions/platform.py +105 -0
  102. dependency_scout-0.1.0/pyproject.toml +120 -0
  103. dependency_scout-0.1.0/scout.py +609 -0
  104. dependency_scout-0.1.0/setup.py +512 -0
  105. dependency_scout-0.1.0/tests/__init__.py +0 -0
  106. dependency_scout-0.1.0/tests/conftest.py +39 -0
  107. dependency_scout-0.1.0/tests/fixtures/.gitkeep +0 -0
  108. dependency_scout-0.1.0/tests/fixtures/pr_action_green_automerge.json +748 -0
  109. dependency_scout-0.1.0/tests/fixtures/pr_action_observe_only.json +648 -0
  110. dependency_scout-0.1.0/tests/fixtures/pr_action_red_blocked.json +866 -0
  111. dependency_scout-0.1.0/tests/fixtures/pr_action_yellow_human_approved.json +911 -0
  112. dependency_scout-0.1.0/tests/fixtures/pr_action_yellow_human_rejected.json +811 -0
  113. dependency_scout-0.1.0/tests/generate_fixtures.py +368 -0
  114. dependency_scout-0.1.0/tests/helpers.py +35 -0
  115. dependency_scout-0.1.0/tests/test_activities.py +824 -0
  116. dependency_scout-0.1.0/tests/test_activity_check_plugins.py +107 -0
  117. dependency_scout-0.1.0/tests/test_attestation.py +760 -0
  118. dependency_scout-0.1.0/tests/test_cargo.py +318 -0
  119. dependency_scout-0.1.0/tests/test_check_wiring.py +64 -0
  120. dependency_scout-0.1.0/tests/test_classifier.py +708 -0
  121. dependency_scout-0.1.0/tests/test_comment_formatter.py +501 -0
  122. dependency_scout-0.1.0/tests/test_composer.py +723 -0
  123. dependency_scout-0.1.0/tests/test_custom_checks.py +108 -0
  124. dependency_scout-0.1.0/tests/test_depsdev_scorecard.py +354 -0
  125. dependency_scout-0.1.0/tests/test_docker.py +210 -0
  126. dependency_scout-0.1.0/tests/test_ecosystems_utils.py +562 -0
  127. dependency_scout-0.1.0/tests/test_elm.py +357 -0
  128. dependency_scout-0.1.0/tests/test_github.py +940 -0
  129. dependency_scout-0.1.0/tests/test_github_actions.py +359 -0
  130. dependency_scout-0.1.0/tests/test_github_app.py +166 -0
  131. dependency_scout-0.1.0/tests/test_gitlab.py +568 -0
  132. dependency_scout-0.1.0/tests/test_gomod.py +283 -0
  133. dependency_scout-0.1.0/tests/test_maven.py +533 -0
  134. dependency_scout-0.1.0/tests/test_mix.py +330 -0
  135. dependency_scout-0.1.0/tests/test_nuget.py +554 -0
  136. dependency_scout-0.1.0/tests/test_package_diff.py +166 -0
  137. dependency_scout-0.1.0/tests/test_package_diff_extended.py +4736 -0
  138. dependency_scout-0.1.0/tests/test_pr_parser.py +285 -0
  139. dependency_scout-0.1.0/tests/test_pub.py +400 -0
  140. dependency_scout-0.1.0/tests/test_release_notes.py +675 -0
  141. dependency_scout-0.1.0/tests/test_remote_provider.py +244 -0
  142. dependency_scout-0.1.0/tests/test_repo_config.py +255 -0
  143. dependency_scout-0.1.0/tests/test_signature_plugins.py +434 -0
  144. dependency_scout-0.1.0/tests/test_signatures.py +316 -0
  145. dependency_scout-0.1.0/tests/test_socket.py +236 -0
  146. dependency_scout-0.1.0/tests/test_swift.py +519 -0
  147. dependency_scout-0.1.0/tests/test_terraform.py +487 -0
  148. dependency_scout-0.1.0/tests/test_version_lineage.py +582 -0
  149. dependency_scout-0.1.0/tests/test_webhook.py +786 -0
  150. dependency_scout-0.1.0/tests/test_workflow_replay.py +68 -0
  151. dependency_scout-0.1.0/uv.lock +1893 -0
  152. dependency_scout-0.1.0/worker.py +157 -0
  153. dependency_scout-0.1.0/workflows/README.md +41 -0
  154. dependency_scout-0.1.0/workflows/__init__.py +0 -0
  155. dependency_scout-0.1.0/workflows/package_triage_workflow.py +166 -0
  156. dependency_scout-0.1.0/workflows/pr_action_workflow.py +256 -0
@@ -0,0 +1,235 @@
1
+ Create a standalone check plugin package for dependency-scout.
2
+
3
+ Use this when you want to add a custom supply-chain check as a separate
4
+ installable package — for example, an internal vulnerability database lookup,
5
+ a proprietary license scanner, or a deep archive analysis that doesn't belong
6
+ in the core.
7
+
8
+ If the user provided arguments ($ARGUMENTS), treat them as a description of
9
+ what the check will do. Otherwise ask: "What will this check do? (e.g.
10
+ 'look up packages in our internal vuln DB', 'scan archive contents for
11
+ proprietary license markers', 'fetch SBOM from internal registry')"
12
+
13
+ ---
14
+
15
+ ## Step 1 — Choose the right tier
16
+
17
+ Ask (or infer from context):
18
+
19
+ **Tier A — Simple check** (`dependency_scout.checks`)
20
+ - Plain `async def` — no Temporal knowledge required
21
+ - Runs in parallel with all other checks; must finish in under ~30 seconds
22
+ - Use this for: API lookups, database queries, lightweight analysis
23
+
24
+ **Tier B — Advanced check** (`dependency_scout.activity_checks`)
25
+ - Full Temporal `@activity.defn` — heartbeating, custom retry/timeout policies, cancellation
26
+ - Requires per-repo opt-in in `.github/dependency-scout.yml`
27
+ - Use this for: archive downloads, corpus scanning, anything that could take minutes or needs retry control
28
+
29
+ When in doubt, suggest Tier A. Suggest Tier B only if the user mentions timeouts, heartbeating, long-running work, or archive/binary downloads.
30
+
31
+ ---
32
+
33
+ ## Step 2 — Scaffold the package
34
+
35
+ Suggest a name like `dependency-scout-{org}-checks` or `dependency-scout-{topic}`.
36
+
37
+ ```
38
+ my-plugin/
39
+ ├── pyproject.toml
40
+ └── my_plugin/
41
+ ├── __init__.py ← empty or minimal
42
+ └── checks.py ← the check implementation
43
+ ```
44
+
45
+ ---
46
+
47
+ ## Step 3A — Tier A: Simple check
48
+
49
+ ### `my_plugin/checks.py`
50
+
51
+ ```python
52
+ from models import CheckContext
53
+
54
+ async def run(ctx: CheckContext) -> dict:
55
+ """
56
+ ctx fields: ctx.package, ctx.ecosystem, ctx.old_version, ctx.new_version
57
+ Return a dict — keys become fields in PackageChecks.custom_checks.
58
+ """
59
+ result = await my_internal_db.lookup(ctx.package, ctx.ecosystem)
60
+ return {
61
+ "internal_vuln_count": result.count,
62
+ "severity": result.max_severity,
63
+ }
64
+ ```
65
+
66
+ Return a plain `dict`. Keys are arbitrary — choose names that will be
67
+ meaningful in the LLM classifier prompt. Values must be JSON-serialisable.
68
+
69
+ On failure, catch exceptions and return a degraded default rather than
70
+ raising — the Scout treats any exception from a custom check as a
71
+ non-fatal warning:
72
+
73
+ ```python
74
+ async def run(ctx: CheckContext) -> dict:
75
+ try:
76
+ result = await my_internal_db.lookup(ctx.package, ctx.ecosystem)
77
+ return {"internal_vuln_count": result.count}
78
+ except Exception:
79
+ return {"internal_vuln_count": None}
80
+ ```
81
+
82
+ ### `pyproject.toml`
83
+
84
+ ```toml
85
+ [project]
86
+ name = "dependency-scout-my-checks"
87
+ version = "0.1.0"
88
+ dependencies = ["dependency-scout"]
89
+
90
+ [project.entry-points."dependency_scout.checks"]
91
+ my_check = "my_plugin.checks:run"
92
+
93
+ [build-system]
94
+ requires = ["hatchling"]
95
+ build-backend = "hatchling.build"
96
+ ```
97
+
98
+ The entry-point name (`my_check` above) is the key your results are stored
99
+ under in `PackageChecks.custom_checks`. Choose something unique and
100
+ descriptive — it appears verbatim in the LLM classifier prompt.
101
+
102
+ ### How classifiers handle your results
103
+
104
+ - **LLM classifiers (Claude, OpenAI, Ollama)** — your dict is injected
105
+ automatically into the prompt as labeled JSON in a `<untrusted_custom>`
106
+ block. No code changes needed.
107
+ - **Rule-based classifier** — ignores `custom_checks` by design. If you need
108
+ rule-based support, contribute the check as a built-in (see contributing.md).
109
+
110
+ No config changes are needed in target repos — plugins are discovered
111
+ automatically from installed packages.
112
+
113
+ ---
114
+
115
+ ## Step 3B — Tier B: Advanced check
116
+
117
+ ### `my_plugin/checks.py`
118
+
119
+ ```python
120
+ from temporalio import activity
121
+ from models import CheckContext
122
+
123
+ @activity.defn(name="my_company.deep_archive_scan")
124
+ async def deep_archive_scan(ctx: CheckContext) -> dict:
125
+ # Call activity.heartbeat() periodically so Temporal knows you're alive.
126
+ # Without this, a stuck download silently times out.
127
+ activity.heartbeat()
128
+
129
+ # ... long-running analysis ...
130
+ data = await download_and_scan(ctx.package, ctx.new_version)
131
+
132
+ activity.heartbeat() # call again after expensive steps
133
+ return {"suspicious_patterns": data.patterns, "risk_score": data.score}
134
+ ```
135
+
136
+ The `name=` string in `@activity.defn` must be **globally unique** across all
137
+ installed plugins. Use a namespaced format: `org.check_name`.
138
+
139
+ ### `pyproject.toml`
140
+
141
+ ```toml
142
+ [project]
143
+ name = "dependency-scout-my-checks"
144
+ version = "0.1.0"
145
+ dependencies = ["dependency-scout", "temporalio"]
146
+
147
+ [project.entry-points."dependency_scout.activity_checks"]
148
+ deep_scan = "my_plugin.checks:deep_archive_scan"
149
+
150
+ [build-system]
151
+ requires = ["hatchling"]
152
+ build-backend = "hatchling.build"
153
+ ```
154
+
155
+ ### Per-repo opt-in
156
+
157
+ Unlike simple checks, activity checks require explicit opt-in in each repo's
158
+ `.github/dependency-scout.yml`:
159
+
160
+ ```yaml
161
+ extra_check_activities:
162
+ - my_company.deep_archive_scan # must match the @activity.defn name exactly
163
+ ```
164
+
165
+ The activity is registered with the worker at startup for all repos, but only
166
+ called for repos that list it here.
167
+
168
+ ---
169
+
170
+ ## Step 4 — Install and verify
171
+
172
+ ```bash
173
+ # Install your plugin into the Scout's environment
174
+ uv pip install -e ../my-plugin
175
+
176
+ # Verify the entry point is registered
177
+ python -c "
178
+ from importlib.metadata import entry_points
179
+ # Change group to 'dependency_scout.activity_checks' for Tier B
180
+ eps = entry_points(group='dependency_scout.checks')
181
+ print([ep.name for ep in eps])
182
+ "
183
+ ```
184
+
185
+ For Tier A, run a quick triage to confirm your check fires and results appear:
186
+
187
+ ```bash
188
+ uv run python triage.py --ecosystem pip --package requests --old 2.31.0 --new 2.32.0
189
+ ```
190
+
191
+ Your check's results will appear in the verdict output under `custom_checks`.
192
+
193
+ ---
194
+
195
+ ## Step 5 — Test your check
196
+
197
+ ```python
198
+ import pytest
199
+ from temporalio.testing import ActivityEnvironment
200
+ from models import CheckContext
201
+ from my_plugin.checks import run # or deep_archive_scan for Tier B
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_my_check_success():
205
+ env = ActivityEnvironment()
206
+ ctx = CheckContext(
207
+ package="requests",
208
+ ecosystem="pip",
209
+ old_version="2.31.0",
210
+ new_version="2.32.0",
211
+ )
212
+ result = await env.run(run, ctx)
213
+ assert "internal_vuln_count" in result
214
+
215
+ @pytest.mark.asyncio
216
+ async def test_my_check_degrades_on_failure(monkeypatch):
217
+ # Simulate the external service being down
218
+ monkeypatch.setattr("my_plugin.checks.my_internal_db", broken_db)
219
+ env = ActivityEnvironment()
220
+ ctx = CheckContext(package="requests", ecosystem="pip",
221
+ old_version="2.31.0", new_version="2.32.0")
222
+ result = await env.run(run, ctx)
223
+ assert result["internal_vuln_count"] is None # degraded, not raised
224
+ ```
225
+
226
+ ---
227
+
228
+ ## Common pitfalls
229
+
230
+ - **Tier B: plain async function silently skipped** — if you forget `@activity.defn`, the worker logs a WARNING and skips your check entirely. If your check isn't running, check the logs first.
231
+ - **Tier B: `@activity.defn` name collision** — two plugins with the same `name=` string cause a registration error at worker startup. Always namespace: `my_company.check_name`.
232
+ - **Tier B: no `extra_check_activities` in repo config** — the activity is registered but never called. This is intentional (opt-in), not a bug.
233
+ - **Return non-serialisable types** — `datetime`, custom objects, etc. will cause serialisation errors. Stick to strings, numbers, lists, dicts, and `None`.
234
+ - **Raising instead of degrading** — an unhandled exception in a simple check is caught by `run_all` and logged as a warning; the check result is omitted. In an activity check, Temporal will retry it according to the retry policy. Either way, prefer returning a degraded dict over raising.
235
+ - **Entry point name conflicts** — two plugins with the same entry-point name (e.g. both registering `my_check`) will have one silently override the other. Use org-namespaced names.
@@ -0,0 +1,93 @@
1
+ Add a new supply chain attack detection pattern to the Scout.
2
+
3
+ If the user provided arguments ($ARGUMENTS), treat them as a description of what to detect (attack name, behaviour, or package involved). Otherwise ask: "What attack or behaviour do you want to detect? (e.g. 'npm credential theft', 'Python .pth persistence', a CVE or blog post link)"
4
+
5
+ Once you have a description, ask clarifying questions only if you cannot determine the answers from context:
6
+
7
+ 1. **Which kind of pattern is this?**
8
+ - Network call in library code (outbound HTTP) → `checks/signatures/net_calls.yaml`
9
+ - Obfuscated/encoded payload → `checks/signatures/obfuscation.yaml`
10
+ - OS persistence, self-propagation, credential theft → `checks/signatures/persistence.yaml`
11
+ - Suspicious file name or binary type → `checks/signatures/file_types.yaml`
12
+
13
+ 2. **Which language/extension?** (only for net_calls.yaml — it's keyed by file extension)
14
+
15
+ ---
16
+
17
+ ## Where to add it
18
+
19
+ ### New network call pattern — `checks/signatures/net_calls.yaml`
20
+
21
+ Find the block for the file extension (`.py`, `.js`, `.ts`, `.rb`, etc.) and add:
22
+
23
+ ```yaml
24
+ - pattern: 'YourRegexHere'
25
+ desc: what this detects and why it matters
26
+ ```
27
+
28
+ Use **single-quoted** YAML strings for regex — single quotes never process backslash escapes, so `\b` and `\.` work as-is without doubling.
29
+
30
+ If the extension doesn't exist yet, add a new block at the end:
31
+
32
+ ```yaml
33
+ .ext:
34
+ - pattern: 'SomeHTTPClient\b'
35
+ desc: HTTP client library for SomeLang
36
+ ```
37
+
38
+ ### OS persistence or worm propagation — `checks/signatures/persistence.yaml`
39
+
40
+ Persistence goes under `patterns:`:
41
+
42
+ ```yaml
43
+ - pattern: 'some\.persistence\.path'
44
+ desc: short-name (Attack/Repo/Date reference if known)
45
+ ```
46
+
47
+ Worm propagation is a **compound rule** — the file must contain BOTH `credential_read` AND `publish_endpoint` to trigger. If the new attack has a two-step pattern, update both sub-keys under `worm_propagation:`.
48
+
49
+ ### Obfuscation — `checks/signatures/obfuscation.yaml`
50
+
51
+ Keyed by extension, same format as net_calls.yaml.
52
+
53
+ ### Suspicious file names/types — `checks/signatures/file_types.yaml`
54
+
55
+ Add to `suspicious_filenames:`, `suspicious_path_prefixes:`, `dangerous_binary_suffixes:`, or `install_hook_names:` as appropriate.
56
+
57
+ ---
58
+
59
+ ## Step — Add a test
60
+
61
+ Open `tests/test_signatures.py` and add a test that checks your pattern actually matches a sample string, for example:
62
+
63
+ ```python
64
+ def test_my_new_pattern_matches():
65
+ # .py network call
66
+ sample = "import my_new_http_lib"
67
+ assert any(p.search(sample) for p in NET_CALL_PATTERNS.get(".py", []))
68
+ ```
69
+
70
+ Run:
71
+
72
+ ```bash
73
+ uv run pytest tests/test_signatures.py -v
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Step — Run the full suite
79
+
80
+ ```bash
81
+ uv run ruff format checks/signatures/ tests/test_signatures.py
82
+ uv run pytest -x -q
83
+ ```
84
+
85
+ ---
86
+
87
+ ## Common pitfalls
88
+
89
+ - **Backslash doubling in YAML** — single-quoted strings (`'...'`) preserve regex backslashes literally. Double-quoted strings require `\\b`, `\\.`, etc. Always use single quotes.
90
+ - **Single quotes inside single-quoted patterns** — `''` inside a single-quoted YAML string is an escaped `'`, but `'''` closes the string at the third quote and leaves the rest as bare YAML. If your regex needs to match a literal `'`, use `\S` or `[^\s]` instead, or rewrite to avoid quoting both `'` and `"` in the same character class.
91
+ - **Too-broad patterns** — a pattern like `http` will match half the codebase. Prefer `\bhttplib2\b`, `requests\.get\b`, etc.
92
+ - **Wrong file** — persistence patterns in net_calls.yaml (or vice versa) won't affect the right classifier signal.
93
+ - **Compound worm rule** — the worm fires only when BOTH `credential_read` AND `publish_endpoint` appear in the **same file**. If you're describing a single-step attack, use a plain persistence pattern instead.
@@ -0,0 +1,108 @@
1
+ Add a new package ecosystem provider to dependency-scout.
2
+
3
+ If the user provided arguments ($ARGUMENTS), treat them as the ecosystem name or description. Otherwise ask: "What ecosystem are you adding? (e.g. 'PyPI', 'Hackage', 'Pub/Flutter')"
4
+
5
+ Then collect any other details you need (registry URL, OSV ecosystem name, Dependabot slug) by looking at existing providers as reference, asking the user only for things you can't infer.
6
+
7
+ ---
8
+
9
+ ## What you're building
10
+
11
+ A single file `ecosystems/{name}.py` that implements the `EcosystemProvider` protocol. The provider is auto-discovered at startup — no registry rows, no worker changes, no fixture regeneration needed.
12
+
13
+ ---
14
+
15
+ ## Step 1 — Create `ecosystems/{name}.py`
16
+
17
+ Copy `ecosystems/cargo.py` as the closest structural template. The provider must have:
18
+
19
+ **Four class attributes:**
20
+ ```python
21
+ ecosystem_name = "myeco" # key used everywhere in the codebase
22
+ osv_name = "MyEco" # must match api.osv.dev ecosystem name exactly
23
+ dependabot_slug = "myeco" # Dependabot's internal branch prefix (e.g. "npm_and_yarn",
24
+ # "pip", "cargo", "bundler", "maven", "nuget", "composer",
25
+ # "go_modules") — check github.com/dependabot/dependabot-core
26
+ # if unsure; used to parse Dependabot PR branch names
27
+ name_re = re.compile(r"^[a-z0-9_-]+$") # package name allowlist for the webhook
28
+ ```
29
+
30
+ **Seven async methods** (all must be present; return degraded defaults on failure, never raise):
31
+ - `fetch_metadata(package, old_version, new_version) -> PyPIChecks` — download stats, description, major-bump flag
32
+ - `fetch_release_age(package, new_version) -> ReleaseAgeChecks` — registry publish timestamp
33
+ - `fetch_maintainer(package, old_version, new_version) -> MaintainerChecks` — who published each version
34
+ - `get_archive_url(client, package, version) -> tuple[str, str, str] | None` — returns `(url, filename, integrity_hash)`; call `validate_archive_url(url)` before returning
35
+ - `extract_archive(archive_bytes, filename, dest) -> None` — unpack the archive into `dest`
36
+ - `fetch_attestations(package, old_version, new_version) -> AttestationChecks` — SLSA/Sigstore provenance; return `AttestationChecks(has_attestation=False)` if registry doesn't support it
37
+ - `fetch_release(package, old_version, version) -> ReleaseChecks` — GitHub release checks; use the `fetch_vcs_release`, `fetch_vcs_tag_signature`, `fetch_vcs_ci_workflow_changes`, and `build_release_signals` helpers from `ecosystems/__init__.py` if the registry exposes a source repo URL
38
+
39
+ Imports to start with:
40
+ ```python
41
+ from models import (
42
+ AttestationChecks, MaintainerChecks, PyPIChecks,
43
+ ReleaseAgeChecks, ReleaseChecks,
44
+ )
45
+ from ecosystems import (
46
+ build_release_signals, fetch_vcs_release, fetch_vcs_tag_signature,
47
+ fetch_vcs_ci_workflow_changes, is_major, parse_upload_time,
48
+ parse_vcs_repo, validate_archive_url,
49
+ )
50
+ from helpers.http import get_client
51
+ ```
52
+
53
+ ## Step 2 — Add the CDN host
54
+
55
+ Open `ecosystems/__init__.py` and add the registry's download CDN hostname to `ALLOWED_CDN_HOSTS`. This is enforced before any archive download — without it the diff check silently degrades.
56
+
57
+ Example: if archives are served from `files.example-registry.org`, add that string to the frozenset.
58
+
59
+ ## Step 3 — Write tests
60
+
61
+ Create `tests/test_{name}.py`. Use `tests/test_cargo.py` as the template — it covers all seven methods and is the cleanest example. Minimum required tests per method:
62
+
63
+ - `fetch_metadata`: success case, 404/not-found case
64
+ - `fetch_release_age`: success (recent release), success (old release), missing upload_time
65
+ - `fetch_maintainer`: same publisher, changed publisher, API failure degrades gracefully
66
+ - `get_archive_url`: returns valid tuple, CDN host is in ALLOWED_CDN_HOSTS
67
+ - `extract_archive`: round-trips (create archive in test, extract, verify contents)
68
+ - `fetch_attestations`: no-attestation case (if registry doesn't support it, just test it returns `has_attestation=False`)
69
+ - `fetch_release`: no linked GitHub repo case, linked repo with release
70
+
71
+ Mock HTTP with `respx`. Run activities inside `ActivityEnvironment()` from `temporalio.testing`.
72
+
73
+ Keep coverage above 95% (`uv run pytest --cov=ecosystems --cov-report=term-missing`).
74
+
75
+ ## Step 4 — Run the full suite
76
+
77
+ ```bash
78
+ uv run ruff format .
79
+ uv run ruff check .
80
+ uv run mypy .
81
+ uv run pytest -x -q
82
+ ```
83
+
84
+ The `test_check_wiring.py` tests will catch registration problems automatically. If they fail, check that `ecosystem_name` is set as a class attribute (not instance attribute) and that the file is directly inside `ecosystems/` (not a subdirectory).
85
+
86
+ ## Step 5 — Smoke test with a real package
87
+
88
+ ```bash
89
+ uv run python -m start_workflow \
90
+ --ecosystem myeco \
91
+ --repo owner/some-repo \
92
+ --package some-package \
93
+ --old-version 1.0.0 \
94
+ --new-version 1.1.0 \
95
+ --pr-number 1
96
+ ```
97
+
98
+ Watch the Temporal UI at http://localhost:8233 to confirm all activities complete (green checkmarks, not orange retries).
99
+
100
+ ---
101
+
102
+ ## Common pitfalls
103
+
104
+ - **`dependabot_slug` wrong** — Dependabot PRs for your ecosystem won't be parsed. Check the Dependabot source or look at real PR branch names: `dependabot/{slug}/{package}-{version}`.
105
+ - **`osv_name` wrong** — OSV vulnerability lookups return no results silently. Cross-check at https://api.osv.dev/v1/query with your ecosystem string.
106
+ - **CDN host not in `ALLOWED_CDN_HOSTS`** — archive diff degrades to empty with no error logged at the activity level. Always add the host before testing.
107
+ - **Raising from a method** — methods should catch their own exceptions and return degraded defaults. Only use `ApplicationError(..., non_retryable=True)` for permanent failures like 404 or auth errors.
108
+ - **`parse_upload_time`** — use this helper for registry timestamps rather than `datetime.fromisoformat`; it handles the format variations across registries.
@@ -0,0 +1,228 @@
1
+ Create a standalone signature plugin package for dependency-scout.
2
+
3
+ Use this when you want to ship attack signatures as a separate installable
4
+ package rather than contributing them to the core `checks/signatures/` files —
5
+ for example, org-internal threat intel, proprietary detection rules, or a
6
+ community feed of patterns.
7
+
8
+ If the user provided arguments ($ARGUMENTS), treat them as a description of
9
+ what the plugin will detect. Otherwise ask: "What will this signature plugin
10
+ detect? (e.g. 'internal threat intel feed', 'custom persistence patterns for
11
+ our stack', 'patterns from a CVE feed')"
12
+
13
+ ---
14
+
15
+ ## Step 1 — Choose the right tier
16
+
17
+ Ask (or infer from context):
18
+
19
+ **Tier A — YAML directory** (`dependency_scout.signatures`)
20
+ - Patterns are static and can be written as regex strings in YAML
21
+ - No runtime dependencies or network calls needed
22
+ - Lowest barrier: YAML files + a 3-line Python shim
23
+ - Use this for: curated pattern lists, community rule sets, org-internal IOCs
24
+
25
+ **Tier B — Python provider** (`dependency_scout.signature_providers`)
26
+ - Patterns must be generated at runtime (fetched from an API, a database, generated programmatically)
27
+ - Full Python — import anything, call anything
28
+ - Use this for: threat-intel feeds, CVE APIs, patterns that change frequently
29
+
30
+ When in doubt, suggest Tier A. If the user mentions "API", "feed", "dynamic", or "generated", suggest Tier B.
31
+
32
+ ---
33
+
34
+ ## Step 2 — Scaffold the package
35
+
36
+ Create a directory for the plugin. If the user hasn't named it, suggest `dependency-scout-{org}-signatures` or `dependency-scout-{topic}-sigs`.
37
+
38
+ ```
39
+ my-plugin/
40
+ ├── pyproject.toml
41
+ └── my_plugin/
42
+ ├── __init__.py ← empty or minimal
43
+ └── signatures.py ← the entry point callable
44
+ ```
45
+
46
+ For Tier A, also create:
47
+ ```
48
+ └── sigs/
49
+ ├── net_calls.yaml ← optional: network call patterns
50
+ ├── persistence.yaml ← optional: persistence/worm patterns
51
+ ├── obfuscation.yaml ← optional: obfuscation patterns
52
+ └── file_types.yaml ← optional: suspicious filenames/types
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Step 3A — Tier A: YAML directory plugin
58
+
59
+ ### `my_plugin/signatures.py`
60
+
61
+ ```python
62
+ from pathlib import Path
63
+
64
+ def get_signatures_dir() -> Path:
65
+ return Path(__file__).parent / "sigs"
66
+ ```
67
+
68
+ ### YAML files (only create the ones you need)
69
+
70
+ Use the same format as `checks/signatures/` in the core repo.
71
+
72
+ **`sigs/net_calls.yaml`** — outbound network calls, keyed by file extension:
73
+ ```yaml
74
+ .py:
75
+ - pattern: 'evil_sdk\.fetch\b'
76
+ desc: EvilSDK HTTP client (SupplyChainCorp campaign May 2026)
77
+ .js:
78
+ - pattern: 'require\s*\(\s*[''"]evil-fetch[''"]\s*\)'
79
+ desc: evil-fetch npm package
80
+ ```
81
+
82
+ **`sigs/persistence.yaml`** — OS persistence, self-propagation:
83
+ ```yaml
84
+ patterns:
85
+ - pattern: 'crontab.*attacker\.sh'
86
+ desc: cron-based persistence dropper
87
+ ```
88
+
89
+ **`sigs/obfuscation.yaml`** — encoded payloads, keyed by extension:
90
+ ```yaml
91
+ patterns:
92
+ .js:
93
+ - pattern: '_0xdeadbeef'
94
+ desc: javascript-obfuscator hex variable names
95
+ ```
96
+
97
+ **`sigs/file_types.yaml`** — suspicious filenames and binary types:
98
+ ```yaml
99
+ suspicious_filenames:
100
+ - evil.cfg
101
+ suspicious_path_prefixes:
102
+ - .evil/
103
+ dangerous_binary_suffixes:
104
+ - .evil
105
+ install_hook_names:
106
+ - evil_install.sh
107
+ npm_install_scripts:
108
+ - evil_install
109
+ ```
110
+
111
+ **Single-quote pitfall:** `\b`, `\s`, `\.` all work as-is in single-quoted YAML strings. But `'''` inside a single-quoted string closes the string at the third quote — if your regex needs to match literal `'`, use `\S` or `[^\s]` instead, or rewrite to avoid `'` and `"` in the same character class.
112
+
113
+ ### `pyproject.toml`
114
+
115
+ ```toml
116
+ [project]
117
+ name = "dependency-scout-my-sigs"
118
+ version = "0.1.0"
119
+ dependencies = ["pyyaml>=6.0"]
120
+
121
+ [project.entry-points."dependency_scout.signatures"]
122
+ my_sigs = "my_plugin.signatures:get_signatures_dir"
123
+
124
+ [build-system]
125
+ requires = ["hatchling"]
126
+ build-backend = "hatchling.build"
127
+ ```
128
+
129
+ ---
130
+
131
+ ## Step 3B — Tier B: Python provider plugin
132
+
133
+ ### `my_plugin/signatures.py`
134
+
135
+ ```python
136
+ from checks.signatures import SignatureContribution
137
+
138
+ def get_signatures() -> SignatureContribution:
139
+ # Fetch from API, database, or generate programmatically
140
+ patterns = _fetch_from_threat_feed()
141
+ return SignatureContribution(
142
+ net_call_patterns={".py": patterns["python_net_calls"]},
143
+ persistence_patterns=patterns["persistence"],
144
+ )
145
+ ```
146
+
147
+ Only populate the fields you are contributing — omitted fields are ignored.
148
+ All pattern strings are raw regex strings; they are compiled internally.
149
+
150
+ `SignatureContribution` fields:
151
+ | Field | Type | What it adds to |
152
+ |---|---|---|
153
+ | `net_call_patterns` | `dict[str, list[str]]` | `NET_CALL_PATTERNS` (keyed by extension) |
154
+ | `obfuscation_patterns` | `dict[str, list[str]]` | `OBFUSCATION_PATTERNS` (keyed by extension) |
155
+ | `persistence_patterns` | `list[str]` | `PERSISTENCE_PATTERNS` |
156
+ | `suspicious_package_files` | `list[str]` | `SUSPICIOUS_PACKAGE_FILES` |
157
+ | `suspicious_package_prefixes` | `list[str]` | `SUSPICIOUS_PACKAGE_PREFIXES` |
158
+ | `dangerous_binary_suffixes` | `list[str]` | `DANGEROUS_BINARY_SUFFIXES` |
159
+ | `install_hook_names` | `list[str]` | `INSTALL_HOOK_NAMES` |
160
+ | `npm_install_scripts` | `list[str]` | `NPM_INSTALL_SCRIPTS` |
161
+
162
+ ### `pyproject.toml`
163
+
164
+ ```toml
165
+ [project]
166
+ name = "dependency-scout-my-provider"
167
+ version = "0.1.0"
168
+ dependencies = ["dependency-scout"] # for SignatureContribution
169
+
170
+ [project.entry-points."dependency_scout.signature_providers"]
171
+ my_provider = "my_plugin.signatures:get_signatures"
172
+
173
+ [build-system]
174
+ requires = ["hatchling"]
175
+ build-backend = "hatchling.build"
176
+ ```
177
+
178
+ ---
179
+
180
+ ## Step 4 — Install and verify
181
+
182
+ ```bash
183
+ # From the core repo, install your plugin in editable mode
184
+ uv pip install -e ../my-plugin
185
+
186
+ # Verify the entry point is discovered
187
+ python -c "
188
+ from importlib.metadata import entry_points
189
+ eps = entry_points(group='dependency_scout.signatures') # or signature_providers
190
+ print([ep.name for ep in eps])
191
+ "
192
+
193
+ # Verify patterns are merged into the constants
194
+ python -c "
195
+ from checks.signatures import NET_CALL_PATTERNS, PERSISTENCE_PATTERNS
196
+ print('Extensions covered:', list(NET_CALL_PATTERNS.keys()))
197
+ print('Persistence pattern count:', len(PERSISTENCE_PATTERNS))
198
+ "
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Step 5 — Test your patterns
204
+
205
+ Write a test that directly imports from `checks.signatures` after installing your plugin and checks that your patterns are present:
206
+
207
+ ```python
208
+ from checks.signatures import NET_CALL_PATTERNS, PERSISTENCE_PATTERNS
209
+
210
+ def test_my_pattern_is_loaded():
211
+ patterns = NET_CALL_PATTERNS.get(".py", [])
212
+ assert any(p.search("evil_sdk.fetch(url)") for p in patterns)
213
+ ```
214
+
215
+ Run the core test suite to confirm nothing regresses:
216
+ ```bash
217
+ uv run pytest -x -q
218
+ ```
219
+
220
+ ---
221
+
222
+ ## Common pitfalls
223
+
224
+ - **Entry point not discovered** — run `uv pip install -e .` in your plugin directory; entry points only register on install.
225
+ - **`dependency_scout.signatures` vs `dependency_scout.signature_providers`** — wrong group means silent no-op; the discovery loop skips unrecognised groups entirely.
226
+ - **Tier B: exceptions crash silently** — broken providers are caught and logged as WARNING, not raised. If your patterns aren't showing up, check the logs at WARNING level.
227
+ - **Tier A: missing YAML files are skipped** — you don't need all four YAML files; only the ones present in your `sigs/` directory are merged. But a present file that fails to parse is also silently skipped (with a WARNING).
228
+ - **Pattern too broad** — test with real package diffs, not just unit tests. A pattern like `fetch` will fire on half the internet.
@@ -0,0 +1,20 @@
1
+ Run a one-off triage against a real package to test the Scout locally.
2
+
3
+ Parse $ARGUMENTS for ecosystem, package, old version, and new version. If any are missing, ask the user. Repo and PR number can be anything (they only affect whether real PR actions fire, which is off by default).
4
+
5
+ Then run:
6
+
7
+ ```bash
8
+ uv run python -m start_workflow \
9
+ --repo owner/test-repo \
10
+ --package {package} \
11
+ --old-version {old} \
12
+ --new-version {new} \
13
+ --pr-number 1
14
+ ```
15
+
16
+ If the user specified an ecosystem other than pip, add `--ecosystem {ecosystem}`.
17
+
18
+ Remind them to open http://localhost:8233 to watch the workflow run in the Temporal UI, and that Temporal must be running (`temporal server start-dev` in a separate terminal) for this to work.
19
+
20
+ After the command returns, show the verdict that was printed to stdout.