opencode-skills-collection 3.0.28 → 3.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled-skills/.antigravity-install-manifest.json +11 -1
- package/bundled-skills/bumblebee/SKILL.md +186 -0
- package/bundled-skills/bumblebee/scripts/render_report.py +362 -0
- package/bundled-skills/complexity-cuts/SKILL.md +254 -0
- package/bundled-skills/decision-navigator/SKILL.md +238 -0
- package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
- package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
- package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
- package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
- package/bundled-skills/docs/users/bundles.md +1 -1
- package/bundled-skills/docs/users/claude-code-skills.md +1 -1
- package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
- package/bundled-skills/docs/users/getting-started.md +1 -1
- package/bundled-skills/docs/users/kiro-integration.md +1 -1
- package/bundled-skills/docs/users/usage.md +4 -4
- package/bundled-skills/docs/users/visual-guide.md +4 -4
- package/bundled-skills/invariant-guard/SKILL.md +307 -0
- package/bundled-skills/lemmaly/SKILL.md +236 -0
- package/bundled-skills/mathguard/SKILL.md +269 -0
- package/bundled-skills/sendblue/sendblue-api/SKILL.md +194 -0
- package/bundled-skills/sendblue/sendblue-cli/SKILL.md +145 -0
- package/bundled-skills/sendblue/sendblue-notify/SKILL.md +173 -0
- package/bundled-skills/sendblue/textme/SKILL.md +232 -0
- package/package.json +1 -1
- package/skills_index.json +220 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": 1,
|
|
3
|
-
"updatedAt": "2026-05-
|
|
3
|
+
"updatedAt": "2026-05-28T01:50:58.515Z",
|
|
4
4
|
"entries": [
|
|
5
5
|
"00-andruia-consultant",
|
|
6
6
|
"007",
|
|
@@ -295,6 +295,7 @@
|
|
|
295
295
|
"building-native-ui",
|
|
296
296
|
"bulletmind",
|
|
297
297
|
"bullmq-specialist",
|
|
298
|
+
"bumblebee",
|
|
298
299
|
"bun-development",
|
|
299
300
|
"burp-suite-testing",
|
|
300
301
|
"burpsuite-project-parser",
|
|
@@ -375,6 +376,7 @@
|
|
|
375
376
|
"commit",
|
|
376
377
|
"competitive-landscape",
|
|
377
378
|
"competitor-alternatives",
|
|
379
|
+
"complexity-cuts",
|
|
378
380
|
"comprehensive-review-full-review",
|
|
379
381
|
"comprehensive-review-pr-enhance",
|
|
380
382
|
"computer-use-agents",
|
|
@@ -456,6 +458,7 @@
|
|
|
456
458
|
"debugger",
|
|
457
459
|
"debugging-strategies",
|
|
458
460
|
"debugging-toolkit-smart-debug",
|
|
461
|
+
"decision-navigator",
|
|
459
462
|
"deep-research",
|
|
460
463
|
"defi-protocol-templates",
|
|
461
464
|
"defuddle",
|
|
@@ -739,6 +742,7 @@
|
|
|
739
742
|
"internal-comms-anthropic",
|
|
740
743
|
"internal-comms-community",
|
|
741
744
|
"interview-coach",
|
|
745
|
+
"invariant-guard",
|
|
742
746
|
"inventory-demand-planning",
|
|
743
747
|
"ios-debugger-agent",
|
|
744
748
|
"ios-developer",
|
|
@@ -792,6 +796,7 @@
|
|
|
792
796
|
"leiloeiro-juridico",
|
|
793
797
|
"leiloeiro-mercado",
|
|
794
798
|
"leiloeiro-risco",
|
|
799
|
+
"lemmaly",
|
|
795
800
|
"lex",
|
|
796
801
|
"libreoffice/base",
|
|
797
802
|
"libreoffice/calc",
|
|
@@ -855,6 +860,7 @@
|
|
|
855
860
|
"marketing-ideas",
|
|
856
861
|
"marketing-psychology",
|
|
857
862
|
"matematico-tao",
|
|
863
|
+
"mathguard",
|
|
858
864
|
"matplotlib",
|
|
859
865
|
"maxia",
|
|
860
866
|
"mcp-builder",
|
|
@@ -1158,6 +1164,10 @@
|
|
|
1158
1164
|
"segment-cdp",
|
|
1159
1165
|
"semgrep-rule-creator",
|
|
1160
1166
|
"semgrep-rule-variant-creator",
|
|
1167
|
+
"sendblue/sendblue-api",
|
|
1168
|
+
"sendblue/sendblue-cli",
|
|
1169
|
+
"sendblue/sendblue-notify",
|
|
1170
|
+
"sendblue/textme",
|
|
1161
1171
|
"sendgrid-automation",
|
|
1162
1172
|
"senior-architect",
|
|
1163
1173
|
"senior-frontend",
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: bumblebee
|
|
3
|
+
description: "Run Bumblebee supply-chain inventory and exposure scans on macOS/Linux to detect compromised packages, extensions, and MCP host configs."
|
|
4
|
+
category: security
|
|
5
|
+
risk: safe
|
|
6
|
+
source: community
|
|
7
|
+
source_repo: mycelos-ai/bumblebee-skill
|
|
8
|
+
source_type: community
|
|
9
|
+
date_added: "2026-05-27"
|
|
10
|
+
author: stefan-kp
|
|
11
|
+
tags: [security, supply-chain, incident-response, npm, pypi, tooling]
|
|
12
|
+
tools: [claude]
|
|
13
|
+
license: "MIT"
|
|
14
|
+
license_source: "https://github.com/mycelos-ai/bumblebee-skill/blob/main/LICENSE"
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# Bumblebee Security Scan
|
|
18
|
+
|
|
19
|
+
Bumblebee (https://github.com/perplexityai/bumblebee) is a read-only inventory collector that surfaces package, extension, and developer-tool metadata on developer endpoints. It answers a focused supply-chain question: when an advisory names a package or version, do any matches exist on this machine right now?
|
|
20
|
+
|
|
21
|
+
This skill drives a single Bumblebee scan from start to finish:
|
|
22
|
+
|
|
23
|
+
1. Verify Go is on the PATH (provide install guidance if not).
|
|
24
|
+
2. Verify or install the `bumblebee` binary.
|
|
25
|
+
3. Run the requested scan profile (`baseline`, `project`, or `deep`).
|
|
26
|
+
4. Save raw NDJSON output plus a Markdown report into the user's workspace.
|
|
27
|
+
5. Summarize findings — especially exposure-catalog matches — in the chat reply.
|
|
28
|
+
|
|
29
|
+
Communicate with the user in the language they used (German for Stefan). Code, commit messages, and on-disk file contents stay in English to match existing project conventions.
|
|
30
|
+
|
|
31
|
+
## When to Use This Skill
|
|
32
|
+
|
|
33
|
+
Use this skill when an advisory, incident report, or exposure catalog names compromised packages,
|
|
34
|
+
developer tools, browser/editor extensions, or MCP host configuration that may exist on a local
|
|
35
|
+
macOS or Linux developer endpoint.
|
|
36
|
+
|
|
37
|
+
Use it for read-only inventory and exposure checks. Do not use it to patch, uninstall, quarantine,
|
|
38
|
+
or otherwise mutate the scanned machine.
|
|
39
|
+
|
|
40
|
+
## Step 1 — Clarify the scan request
|
|
41
|
+
|
|
42
|
+
Before running anything, confirm two things with the user via `AskUserQuestion`, unless the message already pins them down:
|
|
43
|
+
|
|
44
|
+
- **Profile**: `baseline` (global package roots), `project` (specific dev folders like `~/code`), or `deep` (explicit `--root` paths, including `$HOME` for incident response).
|
|
45
|
+
- **Roots**: For `project` and `deep` profiles, ask which directories to scan. `deep` is the only profile that accepts a bare-home root.
|
|
46
|
+
|
|
47
|
+
If the user has an advisory or exposure-catalog file ready, also ask whether they want to pass it via `--exposure-catalog`. The skill does not ship its own catalogs — point them at `threat_intel/` in the Bumblebee repo if they ask where to find ready-made ones.
|
|
48
|
+
|
|
49
|
+
Skip the questions for one-liner asks like "lauf mal ne Baseline-Scan" — just run a baseline.
|
|
50
|
+
|
|
51
|
+
## Step 2 — Check Go
|
|
52
|
+
|
|
53
|
+
Run `command -v go && go version` in bash. Three outcomes:
|
|
54
|
+
|
|
55
|
+
- **Go ≥ 1.25 present** → continue.
|
|
56
|
+
- **Go present but < 1.25** → tell the user the version, explain Bumblebee needs Go 1.25+, and stop until they upgrade.
|
|
57
|
+
- **Go missing** → do not install Go automatically. Show platform-appropriate instructions and stop:
|
|
58
|
+
- macOS: `brew install go` (or download from https://go.dev/dl/).
|
|
59
|
+
- Debian/Ubuntu: prefer the official tarball from https://go.dev/dl/ because distro repos lag; `sudo apt install golang-go` only as fallback.
|
|
60
|
+
- Fedora/RHEL: `sudo dnf install golang` or the official tarball.
|
|
61
|
+
|
|
62
|
+
After installation, the user must ensure `$GOBIN` (or `$HOME/go/bin`) is on `$PATH` so `bumblebee` is found later.
|
|
63
|
+
|
|
64
|
+
## Step 3 — Check or install Bumblebee
|
|
65
|
+
|
|
66
|
+
Run `command -v bumblebee && bumblebee version`. If missing:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
go install github.com/perplexityai/bumblebee/cmd/bumblebee@latest
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Then re-check `bumblebee version`. If the binary still cannot be located, the user's `GOBIN`/`PATH` is likely misconfigured — surface the resolved `go env GOPATH` and `go env GOBIN` so they can fix it. Do not fall back to running the binary by absolute path silently; explain what is happening.
|
|
73
|
+
|
|
74
|
+
Once installed, also run `bumblebee selftest` as a sanity check. A non-zero exit means the local install is broken and the scan should not proceed.
|
|
75
|
+
|
|
76
|
+
## Step 4 — Run the scan
|
|
77
|
+
|
|
78
|
+
All scans write NDJSON to a file. Use the workspace folder for output so the user can open the results afterwards.
|
|
79
|
+
|
|
80
|
+
Output filenames (use the user's workspace path; the example below assumes `$OUT` is set):
|
|
81
|
+
|
|
82
|
+
- `bumblebee-<profile>-<UTC-timestamp>.ndjson` — raw records.
|
|
83
|
+
- `bumblebee-<profile>-<UTC-timestamp>.report.md` — Markdown report (generated in Step 5).
|
|
84
|
+
|
|
85
|
+
Pick a sensible `--max-duration` so a runaway scan does not hang the session. Reasonable defaults:
|
|
86
|
+
|
|
87
|
+
- `baseline`: 5m
|
|
88
|
+
- `project`: 10m
|
|
89
|
+
- `deep`: 15m (warn the user that scanning `$HOME` can still take longer; offer to raise the limit)
|
|
90
|
+
|
|
91
|
+
Always stream stderr to a sibling `.log` file — Bumblebee emits diagnostic NDJSON there that helps explain partial scans.
|
|
92
|
+
|
|
93
|
+
### Baseline
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
bumblebee scan --profile baseline \
|
|
97
|
+
--max-duration 5m \
|
|
98
|
+
> "$OUT/bumblebee-baseline-$TS.ndjson" \
|
|
99
|
+
2> "$OUT/bumblebee-baseline-$TS.log"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Optional: scope to specific ecosystems if the user only cares about, say, npm and PyPI:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
bumblebee scan --profile baseline --ecosystem npm,pypi ...
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Project
|
|
109
|
+
|
|
110
|
+
Each `--root` must be an existing absolute path. Reject bare `$HOME` for this profile (Bumblebee will reject it too — surface the message clearly).
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
bumblebee scan --profile project \
|
|
114
|
+
--root "$HOME/code" \
|
|
115
|
+
--root "$HOME/Developer" \
|
|
116
|
+
--max-duration 10m \
|
|
117
|
+
> "$OUT/bumblebee-project-$TS.ndjson" \
|
|
118
|
+
2> "$OUT/bumblebee-project-$TS.log"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Deep
|
|
122
|
+
|
|
123
|
+
Used for incident response — broad roots are allowed but should be paired with an exposure catalog and `--findings-only` whenever possible, so the output stays focused.
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
bumblebee scan --profile deep \
|
|
127
|
+
--root "$HOME" \
|
|
128
|
+
--exposure-catalog "$CATALOG" \
|
|
129
|
+
--findings-only \
|
|
130
|
+
--max-duration 15m \
|
|
131
|
+
> "$OUT/bumblebee-deep-$TS.ndjson" \
|
|
132
|
+
2> "$OUT/bumblebee-deep-$TS.log"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
If the user has no catalog, run deep without `--findings-only` but warn them that the NDJSON file can grow large (hundreds of MB on dense developer machines).
|
|
136
|
+
|
|
137
|
+
## Step 5 — Generate the Markdown report
|
|
138
|
+
|
|
139
|
+
Run the bundled helper to turn the NDJSON into a human-readable report:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
python3 scripts/render_report.py \
|
|
143
|
+
"$OUT/bumblebee-<profile>-$TS.ndjson" \
|
|
144
|
+
"$OUT/bumblebee-<profile>-$TS.report.md"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
The helper groups records by type and ecosystem, lists every `finding` record with its catalog entry and severity, and embeds the `scan_summary` for traceability. It is dependency-free Python 3 — no `pip install` needed.
|
|
148
|
+
|
|
149
|
+
If `render_report.py` exits non-zero (malformed NDJSON, missing summary), surface stderr to the user instead of silently producing an empty report.
|
|
150
|
+
|
|
151
|
+
## Step 6 — Present results
|
|
152
|
+
|
|
153
|
+
End the turn with:
|
|
154
|
+
|
|
155
|
+
- A short summary in chat: profile, root(s), record counts, and — most importantly — any findings with their severity. If there are zero findings, say so explicitly; silence on findings is the kind of thing that gets misread.
|
|
156
|
+
- `computer://` links to both the NDJSON and the Markdown report so the user can open them directly.
|
|
157
|
+
- If diagnostics in the `.log` file indicate skipped roots or read errors, mention it and link the log too.
|
|
158
|
+
|
|
159
|
+
Do not paste large chunks of NDJSON into the chat — it is noisy and not where the user will read it.
|
|
160
|
+
|
|
161
|
+
## Safety and privacy notes
|
|
162
|
+
|
|
163
|
+
- Bumblebee is read-only by design. Do not propose patches, deletions, or `npm uninstall` actions from inside this skill; the user runs remediation themselves once they know what is affected.
|
|
164
|
+
- MCP host configs can carry secrets in their `env` blocks. Bumblebee does not emit those values, but the `.log` file may still contain paths to sensitive config files. Treat the output files as containing inventory data and do not upload them to third-party services without the user's explicit consent (DSGVO-relevant).
|
|
165
|
+
- Never run `bumblebee` with elevated privileges (`sudo`). It is meant to inspect the current user's developer environment, not the whole system.
|
|
166
|
+
|
|
167
|
+
## Failure modes to watch for
|
|
168
|
+
|
|
169
|
+
- `bumblebee: command not found` after `go install` → almost always a `PATH`/`GOBIN` problem. Show `go env GOPATH GOBIN PATH` to debug.
|
|
170
|
+
- `refusing to scan bare home with profile baseline` → use `deep` for `$HOME`, or pick a subdirectory for `project`.
|
|
171
|
+
- Scan times out → either narrow the `--root` set, scope with `--ecosystem`, or raise `--max-duration`. Do not loop and retry blindly.
|
|
172
|
+
- Exposure catalog rejected → check that the JSON has both `schema_version` and `entries` keys (bare top-level arrays are rejected) and that `schema_version` is one Bumblebee understands.
|
|
173
|
+
|
|
174
|
+
## Limitations
|
|
175
|
+
|
|
176
|
+
- This skill only reports local inventory and exposure matches; it does not remediate affected packages, extensions, or configs.
|
|
177
|
+
- Scan coverage depends on Bumblebee's supported ecosystems, the selected roots, and the current user's filesystem permissions.
|
|
178
|
+
- Results are point-in-time evidence and should be re-run after package installs, dependency updates, or incident-response changes.
|
|
179
|
+
|
|
180
|
+
## Reference
|
|
181
|
+
|
|
182
|
+
See `scripts/render_report.py` for the report layout. Bumblebee's own documentation lives at https://github.com/perplexityai/bumblebee — consult `docs/inventory-sources.md`, `docs/transport.md`, and `docs/state-model.md` when a question goes beyond what this skill covers.
|
|
183
|
+
|
|
184
|
+
## Credit
|
|
185
|
+
|
|
186
|
+
Bumblebee is developed by Perplexity (https://github.com/perplexityai/bumblebee, Apache-2.0). All scan logic, output formats, and exposure-catalog semantics belong to that project. This repository is just a thin Claude-skill wrapper around the official `bumblebee` CLI; the wrapper itself is MIT-licensed (see `LICENSE`).
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Render a Bumblebee NDJSON scan into a human-readable Markdown report.
|
|
3
|
+
|
|
4
|
+
Bumblebee emits one JSON record per line. Record types we know about:
|
|
5
|
+
|
|
6
|
+
- package — an inventory record for a discovered package
|
|
7
|
+
- finding — an exposure-catalog match (high signal)
|
|
8
|
+
- scan_summary — emitted once at end of run, contains counts/duration
|
|
9
|
+
- diagnostic — non-fatal warnings (skipped roots, parse errors)
|
|
10
|
+
|
|
11
|
+
Unknown record types are bucketed under "other" and counted but not
|
|
12
|
+
rendered in detail. The script never imports anything outside the
|
|
13
|
+
standard library — it has to run on whatever Python 3 ships with the
|
|
14
|
+
developer's machine.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python3 render_report.py <input.ndjson> <output.md>
|
|
18
|
+
|
|
19
|
+
Exit codes:
|
|
20
|
+
0 success
|
|
21
|
+
1 usage error
|
|
22
|
+
2 input file unreadable or empty
|
|
23
|
+
3 no records parsed (likely malformed file)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import sys
|
|
30
|
+
from collections import Counter, defaultdict
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
SEVERITY_ORDER = ["critical", "high", "medium", "low", "info", "unknown"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def severity_rank(value: str | None) -> int:
|
|
40
|
+
"""Return a sort key for severities; unknown values sort last."""
|
|
41
|
+
if not value:
|
|
42
|
+
return len(SEVERITY_ORDER)
|
|
43
|
+
try:
|
|
44
|
+
return SEVERITY_ORDER.index(value.lower())
|
|
45
|
+
except ValueError:
|
|
46
|
+
return len(SEVERITY_ORDER)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_records(path: Path) -> list[dict[str, Any]]:
|
|
50
|
+
"""Parse NDJSON, tolerating blank lines and trailing whitespace.
|
|
51
|
+
|
|
52
|
+
Malformed lines are reported on stderr but do not abort the run —
|
|
53
|
+
Bumblebee can interleave records from multiple goroutines and a single
|
|
54
|
+
truncated line should not lose the rest of the report.
|
|
55
|
+
"""
|
|
56
|
+
records: list[dict[str, Any]] = []
|
|
57
|
+
with path.open("r", encoding="utf-8") as fh:
|
|
58
|
+
for lineno, raw in enumerate(fh, start=1):
|
|
59
|
+
line = raw.strip()
|
|
60
|
+
if not line:
|
|
61
|
+
continue
|
|
62
|
+
try:
|
|
63
|
+
records.append(json.loads(line))
|
|
64
|
+
except json.JSONDecodeError as exc:
|
|
65
|
+
print(
|
|
66
|
+
f"warning: skipping malformed line {lineno}: {exc}",
|
|
67
|
+
file=sys.stderr,
|
|
68
|
+
)
|
|
69
|
+
return records
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def group_by_kind(records: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
|
|
73
|
+
groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
74
|
+
for rec in records:
|
|
75
|
+
kind = rec.get("record_type") or rec.get("type") or "unknown"
|
|
76
|
+
groups[kind].append(rec)
|
|
77
|
+
return groups
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def render_findings(findings: list[dict[str, Any]]) -> str:
|
|
81
|
+
"""Findings are the most important section — render them first.
|
|
82
|
+
|
|
83
|
+
Each finding carries the matched package's identity (Bumblebee uses
|
|
84
|
+
`normalized_name` / `source_file` per docs/state-model.md) plus
|
|
85
|
+
catalog metadata. We look up by the canonical Bumblebee names first
|
|
86
|
+
and keep a small set of fallbacks so the helper still works against
|
|
87
|
+
older schemas or hand-rolled fixtures.
|
|
88
|
+
"""
|
|
89
|
+
if not findings:
|
|
90
|
+
return "## Findings\n\nNo exposure-catalog matches.\n"
|
|
91
|
+
|
|
92
|
+
# Sort by severity (critical first) then by package name.
|
|
93
|
+
sorted_findings = sorted(
|
|
94
|
+
findings,
|
|
95
|
+
key=lambda f: (
|
|
96
|
+
severity_rank(_get(f, "severity", "catalog_severity")),
|
|
97
|
+
_get(f, "normalized_name", "package", "name", "package_name") or "",
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
out = [f"## Findings\n\n**{len(sorted_findings)} match(es) against exposure catalog.**\n"]
|
|
102
|
+
for finding in sorted_findings:
|
|
103
|
+
severity = _get(finding, "severity", "catalog_severity") or "unknown"
|
|
104
|
+
catalog_id = _get(finding, "catalog_id", "advisory_id", "id") or "—"
|
|
105
|
+
catalog_name = _get(finding, "catalog_name", "advisory", "name") or ""
|
|
106
|
+
ecosystem = _get(finding, "ecosystem") or "?"
|
|
107
|
+
pkg = _get(finding, "normalized_name", "package", "name", "package_name") or "?"
|
|
108
|
+
version = _get(finding, "version", "matched_version") or "?"
|
|
109
|
+
# Bumblebee emits `source_file` (and often a `project_path`);
|
|
110
|
+
# legacy / demo records may use `source_path`. Render both when
|
|
111
|
+
# available — responders need that traceability.
|
|
112
|
+
source_file = _get(finding, "source_file", "source_path", "evidence_path", "path") or ""
|
|
113
|
+
project_path = _get(finding, "project_path") or ""
|
|
114
|
+
finding_type = _get(finding, "finding_type") or ""
|
|
115
|
+
|
|
116
|
+
out.append(f"### [{severity.upper()}] {pkg}@{version} ({ecosystem})")
|
|
117
|
+
out.append("")
|
|
118
|
+
out.append(f"- Catalog entry: `{catalog_id}`" + (f" — {catalog_name}" if catalog_name else ""))
|
|
119
|
+
if finding_type:
|
|
120
|
+
out.append(f"- Finding type: {finding_type}")
|
|
121
|
+
if source_file:
|
|
122
|
+
out.append(f"- Source file: `{source_file}`")
|
|
123
|
+
if project_path and project_path != source_file:
|
|
124
|
+
out.append(f"- Project path: `{project_path}`")
|
|
125
|
+
confidence = _get(finding, "confidence")
|
|
126
|
+
if confidence:
|
|
127
|
+
out.append(f"- Confidence: {confidence}")
|
|
128
|
+
root_kind = _get(finding, "root_kind")
|
|
129
|
+
if root_kind:
|
|
130
|
+
out.append(f"- Root kind: {root_kind}")
|
|
131
|
+
evidence = _get(finding, "evidence")
|
|
132
|
+
if evidence and isinstance(evidence, (str, int, float)):
|
|
133
|
+
out.append(f"- Evidence: {evidence}")
|
|
134
|
+
out.append("")
|
|
135
|
+
return "\n".join(out)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def render_inventory(packages: list[dict[str, Any]]) -> str:
|
|
139
|
+
if not packages:
|
|
140
|
+
return "## Inventory\n\nNo package records emitted (findings-only mode?).\n"
|
|
141
|
+
|
|
142
|
+
by_ecosystem: Counter[str] = Counter()
|
|
143
|
+
by_root_kind: Counter[str] = Counter()
|
|
144
|
+
by_confidence: Counter[str] = Counter()
|
|
145
|
+
for pkg in packages:
|
|
146
|
+
by_ecosystem[pkg.get("ecosystem", "unknown")] += 1
|
|
147
|
+
by_root_kind[pkg.get("root_kind", "unknown")] += 1
|
|
148
|
+
by_confidence[pkg.get("confidence", "unknown")] += 1
|
|
149
|
+
|
|
150
|
+
lines = [
|
|
151
|
+
"## Inventory",
|
|
152
|
+
"",
|
|
153
|
+
f"Total package records: **{len(packages):,}**",
|
|
154
|
+
"",
|
|
155
|
+
"### By ecosystem",
|
|
156
|
+
"",
|
|
157
|
+
"| Ecosystem | Count |",
|
|
158
|
+
"| --- | ---: |",
|
|
159
|
+
]
|
|
160
|
+
for eco, count in by_ecosystem.most_common():
|
|
161
|
+
lines.append(f"| {eco} | {count:,} |")
|
|
162
|
+
|
|
163
|
+
lines += [
|
|
164
|
+
"",
|
|
165
|
+
"### By root kind",
|
|
166
|
+
"",
|
|
167
|
+
"| Root kind | Count |",
|
|
168
|
+
"| --- | ---: |",
|
|
169
|
+
]
|
|
170
|
+
for kind, count in by_root_kind.most_common():
|
|
171
|
+
lines.append(f"| {kind} | {count:,} |")
|
|
172
|
+
|
|
173
|
+
lines += [
|
|
174
|
+
"",
|
|
175
|
+
"### By confidence",
|
|
176
|
+
"",
|
|
177
|
+
"| Confidence | Count |",
|
|
178
|
+
"| --- | ---: |",
|
|
179
|
+
]
|
|
180
|
+
for conf, count in by_confidence.most_common():
|
|
181
|
+
lines.append(f"| {conf} | {count:,} |")
|
|
182
|
+
lines.append("")
|
|
183
|
+
return "\n".join(lines)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def render_summary(summary_records: list[dict[str, Any]]) -> str:
|
|
187
|
+
"""Render scan_summary record(s).
|
|
188
|
+
|
|
189
|
+
Bumblebee's real scan_summary (per docs/state-model.md) is flat —
|
|
190
|
+
`scan_time`, `end_time`, `status`, `package_records_emitted`,
|
|
191
|
+
`findings_emitted`, `diagnostics_count`, `roots`, plus HTTP-sink
|
|
192
|
+
counters when applicable. We render those canonical fields first
|
|
193
|
+
and still fall back to the older `counts`/`totals` shape for
|
|
194
|
+
backwards compatibility with hand-rolled fixtures.
|
|
195
|
+
"""
|
|
196
|
+
if not summary_records:
|
|
197
|
+
return "## Scan summary\n\n_No `scan_summary` record found — the run may not have completed cleanly._\n"
|
|
198
|
+
|
|
199
|
+
# Bumblebee canonical scan_summary fields (per docs/state-model.md).
|
|
200
|
+
# Order matters: identity → status → timing → counts → delivery → versioning.
|
|
201
|
+
canonical_fields = (
|
|
202
|
+
"endpoint_id",
|
|
203
|
+
"profile",
|
|
204
|
+
"run_id",
|
|
205
|
+
"status",
|
|
206
|
+
"scan_time",
|
|
207
|
+
"end_time",
|
|
208
|
+
"timed_out",
|
|
209
|
+
"package_records_emitted",
|
|
210
|
+
"package_records_suppressed",
|
|
211
|
+
"findings_emitted",
|
|
212
|
+
"diagnostics_count",
|
|
213
|
+
"http_batches_attempted",
|
|
214
|
+
"http_batches_succeeded",
|
|
215
|
+
"http_batches_failed",
|
|
216
|
+
"http_last_status",
|
|
217
|
+
"scanner_version",
|
|
218
|
+
"schema_version",
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
out = ["## Scan summary", ""]
|
|
222
|
+
for idx, rec in enumerate(summary_records, start=1):
|
|
223
|
+
if len(summary_records) > 1:
|
|
224
|
+
out.append(f"### Summary {idx}")
|
|
225
|
+
out.append("")
|
|
226
|
+
|
|
227
|
+
status = rec.get("status")
|
|
228
|
+
if status and status != "complete":
|
|
229
|
+
out.append(f"> **Status `{status}`** — this run is not a trustworthy complete snapshot. Treat as raw evidence only.")
|
|
230
|
+
out.append("")
|
|
231
|
+
|
|
232
|
+
# Canonical fields
|
|
233
|
+
for key in canonical_fields:
|
|
234
|
+
if key in rec and rec[key] not in (None, ""):
|
|
235
|
+
out.append(f"- **{key}**: `{rec[key]}`")
|
|
236
|
+
|
|
237
|
+
# Roots can be a list of objects; render compactly.
|
|
238
|
+
roots = rec.get("roots")
|
|
239
|
+
if roots:
|
|
240
|
+
if isinstance(roots, list):
|
|
241
|
+
out.append(f"- **roots**: {len(roots)} root(s) scanned")
|
|
242
|
+
for r in roots[:20]:
|
|
243
|
+
if isinstance(r, dict):
|
|
244
|
+
# Common shape: {kind, path}
|
|
245
|
+
kind = r.get("kind") or r.get("root_kind") or "?"
|
|
246
|
+
path = r.get("path") or r.get("root") or "?"
|
|
247
|
+
out.append(f" - `{kind}`: `{path}`")
|
|
248
|
+
else:
|
|
249
|
+
out.append(f" - `{r}`")
|
|
250
|
+
if len(roots) > 20:
|
|
251
|
+
out.append(f" - _… {len(roots) - 20} more truncated_")
|
|
252
|
+
else:
|
|
253
|
+
out.append(f"- **roots**: `{roots}`")
|
|
254
|
+
|
|
255
|
+
# Legacy / fixture shape: nested counts dict
|
|
256
|
+
counts = rec.get("counts") or rec.get("totals") or {}
|
|
257
|
+
if counts:
|
|
258
|
+
out.append("- **counts** (legacy):")
|
|
259
|
+
for k, v in counts.items():
|
|
260
|
+
out.append(f" - {k}: {v}")
|
|
261
|
+
|
|
262
|
+
# Surface any remaining fields we didn't recognize so the helper
|
|
263
|
+
# never silently drops data when the schema gains new keys.
|
|
264
|
+
rendered = set(canonical_fields) | {"roots", "counts", "totals", "record_type", "type", "record_id"}
|
|
265
|
+
extras = {k: v for k, v in rec.items() if k not in rendered and v not in (None, "", [], {})}
|
|
266
|
+
if extras:
|
|
267
|
+
out.append("- **other fields**:")
|
|
268
|
+
for k, v in extras.items():
|
|
269
|
+
out.append(f" - {k}: `{v}`")
|
|
270
|
+
|
|
271
|
+
out.append("")
|
|
272
|
+
return "\n".join(out)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def render_diagnostics(diagnostics: list[dict[str, Any]]) -> str:
|
|
276
|
+
if not diagnostics:
|
|
277
|
+
return ""
|
|
278
|
+
out = ["## Diagnostics", "", f"{len(diagnostics)} diagnostic record(s) emitted on stdout."]
|
|
279
|
+
out.append("")
|
|
280
|
+
for diag in diagnostics[:50]: # Cap output — log file has full detail.
|
|
281
|
+
level = diag.get("level", "info")
|
|
282
|
+
msg = diag.get("message") or diag.get("msg") or json.dumps(diag, sort_keys=True)
|
|
283
|
+
path = diag.get("path") or diag.get("source_path") or ""
|
|
284
|
+
suffix = f" — `{path}`" if path else ""
|
|
285
|
+
out.append(f"- **{level}**: {msg}{suffix}")
|
|
286
|
+
if len(diagnostics) > 50:
|
|
287
|
+
out.append("")
|
|
288
|
+
out.append(f"_… {len(diagnostics) - 50} more diagnostic record(s) truncated. See the `.log` file for the full list._")
|
|
289
|
+
out.append("")
|
|
290
|
+
return "\n".join(out)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _get(record: dict[str, Any], *keys: str) -> Any:
|
|
294
|
+
"""Return the first non-empty value found for any of the candidate keys."""
|
|
295
|
+
for key in keys:
|
|
296
|
+
value = record.get(key)
|
|
297
|
+
if value not in (None, "", [], {}):
|
|
298
|
+
return value
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def build_report(records: list[dict[str, Any]], source_path: Path) -> str:
|
|
303
|
+
groups = group_by_kind(records)
|
|
304
|
+
findings = groups.get("finding", []) + groups.get("findings", [])
|
|
305
|
+
packages = groups.get("package", []) + groups.get("packages", [])
|
|
306
|
+
summaries = groups.get("scan_summary", [])
|
|
307
|
+
diagnostics = groups.get("diagnostic", []) + groups.get("diagnostics", [])
|
|
308
|
+
|
|
309
|
+
other_kinds = {
|
|
310
|
+
kind: len(items)
|
|
311
|
+
for kind, items in groups.items()
|
|
312
|
+
if kind not in {"finding", "findings", "package", "packages", "scan_summary", "diagnostic", "diagnostics"}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
generated = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
316
|
+
header = [
|
|
317
|
+
"# Bumblebee Scan Report",
|
|
318
|
+
"",
|
|
319
|
+
f"- Source: `{source_path}`",
|
|
320
|
+
f"- Generated: `{generated}`",
|
|
321
|
+
f"- Total records: **{len(records):,}**",
|
|
322
|
+
]
|
|
323
|
+
if other_kinds:
|
|
324
|
+
header.append(f"- Other record types: {', '.join(f'{k} ({v})' for k, v in sorted(other_kinds.items()))}")
|
|
325
|
+
header.append("")
|
|
326
|
+
|
|
327
|
+
sections = [
|
|
328
|
+
"\n".join(header),
|
|
329
|
+
render_findings(findings),
|
|
330
|
+
render_summary(summaries),
|
|
331
|
+
render_inventory(packages),
|
|
332
|
+
render_diagnostics(diagnostics),
|
|
333
|
+
]
|
|
334
|
+
return "\n".join(s for s in sections if s).rstrip() + "\n"
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def main(argv: list[str]) -> int:
|
|
338
|
+
if len(argv) != 3:
|
|
339
|
+
print(f"usage: {Path(argv[0]).name} <input.ndjson> <output.md>", file=sys.stderr)
|
|
340
|
+
return 1
|
|
341
|
+
|
|
342
|
+
input_path = Path(argv[1])
|
|
343
|
+
output_path = Path(argv[2])
|
|
344
|
+
|
|
345
|
+
if not input_path.exists() or input_path.stat().st_size == 0:
|
|
346
|
+
print(f"error: {input_path} is missing or empty", file=sys.stderr)
|
|
347
|
+
return 2
|
|
348
|
+
|
|
349
|
+
records = load_records(input_path)
|
|
350
|
+
if not records:
|
|
351
|
+
print("error: no JSON records parsed from input", file=sys.stderr)
|
|
352
|
+
return 3
|
|
353
|
+
|
|
354
|
+
report = build_report(records, input_path)
|
|
355
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
356
|
+
output_path.write_text(report, encoding="utf-8")
|
|
357
|
+
print(f"wrote {output_path} ({len(records)} records)")
|
|
358
|
+
return 0
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
if __name__ == "__main__":
|
|
362
|
+
sys.exit(main(sys.argv))
|