vtk 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,849 @@
1
+ #!/bin/bash
2
+ #
3
+ # Shai-Hulud Repository Scanner
4
+ # =============================
5
+ #
6
+ # Scans a repository (or directory tree) for compromised npm packages
7
+ # and backdoor GitHub workflow files associated with the Shai-Hulud attack.
8
+ #
9
+ # WHAT THIS CHECKS:
10
+ #
11
+ # Lockfiles (package-lock.json, yarn.lock, pnpm-lock.yaml):
12
+ # - Compares installed packages against known compromised package list
13
+ # - Supports recursive scanning for monorepos
14
+ #
15
+ # Backdoor Workflows (.github/workflows/):
16
+ # - discussion.yaml: Self-hosted runner with unescaped discussion body
17
+ # - formatter_*.yml: Secrets extraction workflows
18
+ #
19
+ # EXIT CODES:
20
+ # 0 - Clean (no issues found)
21
+ # 1 - INFECTED (compromised packages found)
22
+ # 2 - WARNING (backdoor workflows found, but no compromised packages)
23
+ #
24
+ # USAGE:
25
+ # ./shai-hulud-repo-check.sh [PATH] # Scan directory (default: current dir)
26
+ # ./shai-hulud-repo-check.sh -r [PATH] # Recursive scan (default depth: 5)
27
+ # ./shai-hulud-repo-check.sh --depth=0 [PATH] # Recursive with unlimited depth
28
+ # ./shai-hulud-repo-check.sh --json [PATH] # JSON output
29
+ # ./shai-hulud-repo-check.sh --quiet [PATH] # Exit code only
30
+ # ./shai-hulud-repo-check.sh --refresh [PATH] # Force refresh package list
31
+ #
32
+ # KNOWN LIMITATIONS:
33
+ # - pnpm-lock.yaml parsing tested with pnpm v6/v7/v9; other versions may vary
34
+ # - Does not scan node_modules directly (lockfile is source of truth)
35
+ # - Short flags cannot be combined (use "-r -j" not "-rj")
36
+ #
37
+ # References:
38
+ # - https://department-of-veterans-affairs.github.io/eert/shai-hulud-dev-machine-cleanup-playbook
39
+ #
40
+ # Author: Eric Boehs / EERT (with Claude Code)
41
+ # Version: 1.0.0
42
+ # Date: December 2025
43
+ #
44
+
45
+ set -e
46
+
47
+ # Configuration
48
+ COMPROMISED_PACKAGES_URL="https://raw.githubusercontent.com/Cobenian/shai-hulud-detect/main/compromised-packages.txt"
49
+ CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/vtk"
50
+ CACHE_FILE="$CACHE_DIR/compromised-packages.txt"
51
+ CACHE_TTL=86400 # 24 hours
52
+ MIN_EXPECTED_PACKAGES=500
53
+ EXPECTED_HEADER="Shai-Hulud NPM Supply Chain Attack"
54
+ PLAYBOOK_URL="https://department-of-veterans-affairs.github.io/eert/shai-hulud-dev-machine-cleanup-playbook"
55
+
56
+ # Parse arguments
57
+ QUIET=false
58
+ JSON=false
59
+ RECURSIVE=false
60
+ REFRESH=false
61
+ VERBOSE=false
62
+ MAX_DEPTH=5
63
+ SCAN_PATH=""
64
+
65
+ for arg in "$@"; do
66
+ case $arg in
67
+ --quiet|-q) QUIET=true ;;
68
+ --json|-j) JSON=true ;;
69
+ --recursive|-r) RECURSIVE=true ;;
70
+ --refresh) REFRESH=true ;;
71
+ --verbose|-v) VERBOSE=true ;;
72
+ --depth=*) MAX_DEPTH="${arg#*=}" ;;
73
+ --help|-h)
74
+ echo "Usage: $0 [OPTIONS] [PATH]"
75
+ echo ""
76
+ echo "Scan a repository for compromised packages and backdoor workflows."
77
+ echo ""
78
+ echo "Options:"
79
+ echo " -h, --help Display this help message"
80
+ echo " -j, --json Output results as JSON"
81
+ echo " -q, --quiet Exit code only, no output"
82
+ echo " -r, --recursive Recursively scan subdirectories (default depth: 5)"
83
+ echo " -v, --verbose Show each lockfile path as it's scanned"
84
+ echo " --depth=N Max directory depth for recursive scan (default: 5, 0=unlimited)"
85
+ echo " --refresh Force refresh of compromised packages list"
86
+ echo ""
87
+ echo "Arguments:"
88
+ echo " PATH Directory to scan (default: current directory)"
89
+ echo ""
90
+ echo "Exit Codes:"
91
+ echo " 0 Clean - no issues found"
92
+ echo " 1 INFECTED - compromised packages found"
93
+ echo " 2 WARNING - backdoor workflows found"
94
+ exit 0
95
+ ;;
96
+ -*)
97
+ echo "Unknown option: $arg" >&2
98
+ exit 1
99
+ ;;
100
+ *)
101
+ if [ -z "$SCAN_PATH" ]; then
102
+ SCAN_PATH="$arg"
103
+ fi
104
+ ;;
105
+ esac
106
+ done
107
+
108
+ # Default to current directory
109
+ SCAN_PATH="${SCAN_PATH:-.}"
110
+
111
+ # Resolve to absolute path
112
+ SCAN_PATH="$(cd "$SCAN_PATH" 2>/dev/null && pwd)" || {
113
+ echo "ERROR: Directory not found: $SCAN_PATH" >&2
114
+ exit 1
115
+ }
116
+
117
+ # Results tracking
118
+ declare -a COMPROMISED_FINDINGS=()
119
+ declare -a BACKDOOR_FINDINGS=()
120
+ declare -a WARNINGS=()
121
+ declare -a LOCKFILES_SCANNED=() # Format: "path|package_count"
122
+ TOTAL_PACKAGES_SCANNED=0
123
+
124
+ # Colors (disabled in quiet/json mode)
125
+ if [ "$QUIET" = false ] && [ "$JSON" = false ] && [ -t 1 ]; then
126
+ RED='\033[0;31m'
127
+ YELLOW='\033[0;33m'
128
+ GREEN='\033[0;32m'
129
+ BOLD='\033[1m'
130
+ NC='\033[0m'
131
+ else
132
+ RED='' YELLOW='' GREEN='' BOLD='' NC=''
133
+ fi
134
+
135
+ # Logging
136
+ log() {
137
+ if [ "$QUIET" = false ] && [ "$JSON" = false ]; then
138
+ echo -e "$@"
139
+ fi
140
+ }
141
+
142
+ log_status() {
143
+ if [ "$QUIET" = false ] && [ "$JSON" = false ]; then
144
+ echo -e "$@" >&2
145
+ fi
146
+ }
147
+
148
+ ###########################################
149
+ # CACHE MANAGEMENT
150
+ ###########################################
151
+
152
+ ensure_cache_dir() {
153
+ mkdir -p "$CACHE_DIR"
154
+ }
155
+
156
+ cache_stale() {
157
+ if [ ! -f "$CACHE_FILE" ]; then
158
+ return 0 # true, cache is stale (doesn't exist)
159
+ fi
160
+
161
+ local file_age
162
+ if [[ "$OSTYPE" == "darwin"* ]]; then
163
+ file_age=$(( $(date +%s) - $(stat -f %m "$CACHE_FILE") ))
164
+ else
165
+ file_age=$(( $(date +%s) - $(stat -c %Y "$CACHE_FILE") ))
166
+ fi
167
+
168
+ [ "$file_age" -gt "$CACHE_TTL" ]
169
+ }
170
+
171
+ validate_package_list() {
172
+ local content="$1"
173
+
174
+ # Check for expected header
175
+ if ! echo "$content" | grep -q "$EXPECTED_HEADER"; then
176
+ echo "Downloaded file missing expected header - possible MITM or corrupted file" >&2
177
+ return 1
178
+ fi
179
+
180
+ # Count packages (non-comment lines with colons)
181
+ local package_count
182
+ package_count=$(echo "$content" | grep -v '^#' | grep -c ':' || true)
183
+
184
+ if [ "$package_count" -lt "$MIN_EXPECTED_PACKAGES" ]; then
185
+ echo "Downloaded file has only $package_count packages (expected $MIN_EXPECTED_PACKAGES+)" >&2
186
+ return 1
187
+ fi
188
+
189
+ return 0
190
+ }
191
+
192
+ fetch_compromised_packages() {
193
+ log_status "Fetching compromised packages list..."
194
+
195
+ local content
196
+ if command -v curl &>/dev/null; then
197
+ content=$(curl -sS --fail -A "vtk-security-scanner" "$COMPROMISED_PACKAGES_URL") || {
198
+ echo "Failed to fetch compromised packages list" >&2
199
+ return 1
200
+ }
201
+ elif command -v wget &>/dev/null; then
202
+ content=$(wget -qO- --user-agent="vtk-security-scanner" "$COMPROMISED_PACKAGES_URL") || {
203
+ echo "Failed to fetch compromised packages list" >&2
204
+ return 1
205
+ }
206
+ else
207
+ echo "ERROR: Neither curl nor wget found" >&2
208
+ return 1
209
+ fi
210
+
211
+ if ! validate_package_list "$content"; then
212
+ return 1
213
+ fi
214
+
215
+ echo "$content" > "$CACHE_FILE"
216
+ local count
217
+ count=$(echo "$content" | grep -v '^#' | grep -c ':' || true)
218
+ log_status "Cached $count compromised packages"
219
+ }
220
+
221
+ load_compromised_packages() {
222
+ ensure_cache_dir
223
+
224
+ if [ "$REFRESH" = true ] || cache_stale; then
225
+ fetch_compromised_packages || {
226
+ if [ ! -f "$CACHE_FILE" ]; then
227
+ echo "ERROR: No compromised packages list available. Check your network connection." >&2
228
+ exit 1
229
+ fi
230
+ log_status "WARNING: Using cached version"
231
+ }
232
+ fi
233
+
234
+ if [ ! -f "$CACHE_FILE" ]; then
235
+ echo "ERROR: No compromised packages list available." >&2
236
+ exit 1
237
+ fi
238
+ }
239
+
240
+ is_compromised() {
241
+ local package="$1"
242
+ grep -qxF "$package" "$CACHE_FILE" 2>/dev/null
243
+ }
244
+
245
+ ###########################################
246
+ # LOCKFILE PARSING
247
+ ###########################################
248
+
249
+ find_lockfiles() {
250
+ local dir="$1"
251
+ local lockfiles=()
252
+
253
+ if [ "$RECURSIVE" = true ]; then
254
+ local depth_arg=""
255
+ [ "$MAX_DEPTH" -gt 0 ] && depth_arg="-maxdepth $MAX_DEPTH"
256
+ while IFS= read -r -d '' file; do
257
+ lockfiles+=("$file")
258
+ done < <(find "$dir" $depth_arg -type f \( -name "package-lock.json" -o -name "yarn.lock" -o -name "pnpm-lock.yaml" \) -print0 2>/dev/null | sort -z)
259
+ else
260
+ for name in package-lock.json yarn.lock pnpm-lock.yaml; do
261
+ if [ -f "$dir/$name" ]; then
262
+ lockfiles+=("$dir/$name")
263
+ fi
264
+ done
265
+ fi
266
+
267
+ printf '%s\n' "${lockfiles[@]}"
268
+ }
269
+
270
+ # Parse package-lock.json and extract package:version pairs
271
+ parse_package_lock() {
272
+ local file="$1"
273
+
274
+ # Handle both v2/v3 format (packages) and v1 format (dependencies)
275
+ # Using sed+awk for portability (no jq dependency)
276
+ # Works with both pretty-printed and minified JSON
277
+
278
+ # v2/v3: Extract from "packages" section
279
+ # Preprocess: add newlines before each "node_modules/ to handle minified JSON
280
+ # Then use awk to match package name and version across lines
281
+ sed 's/"node_modules\//\n"node_modules\//g' "$file" 2>/dev/null | \
282
+ awk '
283
+ /"node_modules\/[^"]+":/ {
284
+ # Extract package name: remove everything before "node_modules/ and after closing "
285
+ pkg = $0
286
+ sub(/.*"node_modules\//, "", pkg)
287
+ sub(/".*/, "", pkg)
288
+ }
289
+ /"version":/ && pkg != "" {
290
+ # Extract version: find "version": "X.Y.Z" pattern
291
+ ver = $0
292
+ sub(/.*"version": *"/, "", ver)
293
+ sub(/".*/, "", ver)
294
+ if (ver != "") {
295
+ print pkg ":" ver
296
+ pkg = ""
297
+ }
298
+ }
299
+ ' 2>/dev/null || true
300
+
301
+ # v1: Extract from "dependencies" section (top-level deps without node_modules path)
302
+ # Preprocess: add newlines before each opening brace to separate entries
303
+ sed 's/": *{/": {\n/g' "$file" 2>/dev/null | \
304
+ awk '
305
+ /"[^"]+": *\{/ && !/node_modules/ && !/packages/ {
306
+ # Potential package name line - extract the key
307
+ pkg = $0
308
+ sub(/.*"/, "", pkg)
309
+ sub(/":.*/, "", pkg)
310
+ # Skip metadata keys
311
+ if (pkg ~ /^(name|lockfileVersion|requires|dependencies|devDependencies|optionalDependencies)$/) {
312
+ pkg = ""
313
+ }
314
+ }
315
+ /"version":/ && pkg != "" {
316
+ ver = $0
317
+ sub(/.*"version": *"/, "", ver)
318
+ sub(/".*/, "", ver)
319
+ # Only accept semver-like versions
320
+ if (ver ~ /^[0-9]+\.[0-9]+\.[0-9]/) {
321
+ print pkg ":" ver
322
+ pkg = ""
323
+ }
324
+ }
325
+ # Reset pkg when we exit a block
326
+ /^\s*\}/ { pkg = "" }
327
+ ' 2>/dev/null || true
328
+ }
329
+
330
+ # Parse yarn.lock and extract package:version pairs
331
+ parse_yarn_lock() {
332
+ local file="$1"
333
+
334
+ # yarn.lock format:
335
+ # "package@^1.0.0":
336
+ # version "1.2.3"
337
+ awk '
338
+ /^"?[^#@][^"]*@/ || /^"?@/ {
339
+ # Extract package name (before the @version specifier)
340
+ # Save original line to check for scoped packages
341
+ original = $0
342
+ gsub(/^"/, "", $0)
343
+ gsub(/".*/, "", $0)
344
+ split($0, parts, "@")
345
+ if (original ~ /^"?@/ || parts[1] == "") {
346
+ # Scoped package: @scope/name (parts[1] is empty when line starts with @)
347
+ pkg = "@" parts[2]
348
+ } else {
349
+ pkg = parts[1]
350
+ }
351
+ }
352
+ /^ version / {
353
+ gsub(/^ version "?/, "", $0)
354
+ gsub(/".*/, "", $0)
355
+ if (pkg != "") {
356
+ print pkg ":" $0
357
+ pkg = ""
358
+ }
359
+ }
360
+ ' "$file" 2>/dev/null | sort -u || true
361
+ }
362
+
363
+ # Parse pnpm-lock.yaml and extract package:version pairs
364
+ parse_pnpm_lock() {
365
+ local file="$1"
366
+
367
+ # pnpm-lock.yaml format (in packages section):
368
+ # /@scope/pkg@1.2.3:
369
+ # /pkg@1.2.3:
370
+ grep -E "^ '?/?@?[^:]+@[0-9]" "$file" 2>/dev/null | \
371
+ sed -E "s/^ '?\/?(@?[^@:]+)@([^:']+).*/\1:\2/" || true
372
+ }
373
+
374
+ parse_lockfile() {
375
+ local file="$1"
376
+ local basename
377
+ basename=$(basename "$file")
378
+
379
+ case "$basename" in
380
+ package-lock.json)
381
+ parse_package_lock "$file"
382
+ ;;
383
+ yarn.lock)
384
+ parse_yarn_lock "$file"
385
+ ;;
386
+ pnpm-lock.yaml)
387
+ parse_pnpm_lock "$file"
388
+ ;;
389
+ esac
390
+ }
391
+
392
+ output_lockfile_jsonl() {
393
+ local lockfile="$1"
394
+ local pkg_scanned="$2"
395
+ shift 2
396
+ local compromised=("$@")
397
+
398
+ # Build compromised array JSON
399
+ local compromised_json="["
400
+ local first=true
401
+ for pkg in "${compromised[@]}"; do
402
+ if [ "$first" = true ]; then
403
+ first=false
404
+ else
405
+ compromised_json+=","
406
+ fi
407
+ pkg="${pkg//\\/\\\\}"
408
+ pkg="${pkg//\"/\\\"}"
409
+ compromised_json+="\"$pkg\""
410
+ done
411
+ compromised_json+="]"
412
+
413
+ # Determine status
414
+ local status="CLEAN"
415
+ if [ ${#compromised[@]} -gt 0 ]; then
416
+ status="INFECTED"
417
+ fi
418
+
419
+ # Escape path for JSON
420
+ local path_escaped="${lockfile//\\/\\\\}"
421
+ path_escaped="${path_escaped//\"/\\\"}"
422
+
423
+ echo "{\"lockfile\":\"$path_escaped\",\"packages_scanned\":$pkg_scanned,\"status\":\"$status\",\"compromised_packages\":$compromised_json}"
424
+ }
425
+
426
+ check_lockfiles() {
427
+ local lockfiles
428
+ lockfiles=$(find_lockfiles "$SCAN_PATH")
429
+
430
+ if [ -z "$lockfiles" ]; then
431
+ WARNINGS+=("No lockfiles found (package-lock.json, yarn.lock, or pnpm-lock.yaml)")
432
+ return
433
+ fi
434
+
435
+ # Count total for progress display
436
+ local total count=0
437
+ total=$(echo "$lockfiles" | wc -l | tr -d ' ')
438
+
439
+ # Check if we should show inline progress (not in JSON, quiet, or verbose mode)
440
+ local show_progress=false
441
+ if [ "$QUIET" = false ] && [ "$JSON" = false ] && [ "$VERBOSE" = false ] && [ -t 2 ]; then
442
+ show_progress=true
443
+ fi
444
+
445
+ while IFS= read -r lockfile; do
446
+ [ -z "$lockfile" ] && continue
447
+ count=$((count + 1))
448
+
449
+ # Get relative path for cleaner display
450
+ local rel_path="${lockfile#$SCAN_PATH/}"
451
+
452
+ # Show progress (file name first, then update with package count)
453
+ # Note: verbose is ignored in JSON mode
454
+ if [ "$VERBOSE" = true ] && [ "$JSON" = false ]; then
455
+ log_status "[$count/$total] $lockfile"
456
+ elif [ "$show_progress" = true ] && [ "$RECURSIVE" = true ]; then
457
+ printf "\r\033[K[%d/%d] %s" "$count" "$total" "$rel_path" >&2
458
+ elif [ "$show_progress" = true ]; then
459
+ # Non-recursive: show scanning status
460
+ printf "\r\033[KScanning %s..." "$rel_path" >&2
461
+ fi
462
+
463
+ local packages
464
+ packages=$(parse_lockfile "$lockfile" | sort -u)
465
+ local pkg_count=0
466
+ if [ -n "$packages" ]; then
467
+ pkg_count=$(echo "$packages" | wc -l | tr -d ' ')
468
+ fi
469
+
470
+ # Update progress with package count (will be overwritten by scanning progress)
471
+ if [ "$VERBOSE" = true ] && [ "$JSON" = false ] && [ -t 2 ]; then
472
+ printf " └─ 0/%d packages..." "$pkg_count" >&2
473
+ elif [ "$show_progress" = true ] && [ "$RECURSIVE" = true ]; then
474
+ printf "\r\033[K[%d/%d] %s (0/%d packages)" "$count" "$total" "$rel_path" "$pkg_count" >&2
475
+ elif [ "$show_progress" = true ]; then
476
+ # Non-recursive: show package count
477
+ printf "\r\033[KScanning %s (0/%d packages)..." "$rel_path" "$pkg_count" >&2
478
+ fi
479
+
480
+ # Track compromised packages for this lockfile (for JSONL output)
481
+ local lockfile_compromised=()
482
+
483
+ local pkg_scanned=0
484
+ while IFS= read -r pkg; do
485
+ [ -z "$pkg" ] && continue
486
+ pkg_scanned=$((pkg_scanned + 1))
487
+
488
+ # Show package scanning progress every 100 packages
489
+ if [ $((pkg_scanned % 100)) -eq 0 ]; then
490
+ if [ "$VERBOSE" = true ] && [ "$JSON" = false ] && [ -t 2 ]; then
491
+ printf "\r\033[K └─ %d/%d packages..." "$pkg_scanned" "$pkg_count" >&2
492
+ elif [ "$show_progress" = true ]; then
493
+ if [ "$RECURSIVE" = true ]; then
494
+ printf "\r\033[K[%d/%d] %s (%d/%d packages)" "$count" "$total" "$rel_path" "$pkg_scanned" "$pkg_count" >&2
495
+ else
496
+ printf "\r\033[KScanning %s (%d/%d packages)..." "$rel_path" "$pkg_scanned" "$pkg_count" >&2
497
+ fi
498
+ fi
499
+ fi
500
+
501
+ if is_compromised "$pkg"; then
502
+ COMPROMISED_FINDINGS+=("$pkg|$lockfile")
503
+ lockfile_compromised+=("$pkg")
504
+ fi
505
+ done <<< "$packages"
506
+
507
+ # Track total packages scanned
508
+ TOTAL_PACKAGES_SCANNED=$((TOTAL_PACKAGES_SCANNED + pkg_scanned))
509
+
510
+ # Track this lockfile for JSON output (non-recursive)
511
+ LOCKFILES_SCANNED+=("$lockfile|$pkg_scanned")
512
+
513
+ # Output JSONL for recursive JSON mode
514
+ if [ "$JSON" = true ] && [ "$RECURSIVE" = true ]; then
515
+ output_lockfile_jsonl "$lockfile" "$pkg_scanned" "${lockfile_compromised[@]}"
516
+ fi
517
+
518
+ # Show final count for this lockfile
519
+ if [ "$VERBOSE" = true ] && [ "$JSON" = false ] && [ -t 2 ]; then
520
+ # Clear progress line and show final count on new line
521
+ printf "\r\033[K └─ %d packages\n" "$pkg_count" >&2
522
+ elif [ "$show_progress" = true ] && [ "$pkg_count" -gt 0 ]; then
523
+ if [ "$RECURSIVE" = true ]; then
524
+ printf "\r\033[K[%d/%d] %s (%d/%d packages)" "$count" "$total" "$rel_path" "$pkg_count" "$pkg_count" >&2
525
+ else
526
+ printf "\r\033[KScanning %s (%d/%d packages)..." "$rel_path" "$pkg_count" "$pkg_count" >&2
527
+ fi
528
+ fi
529
+ done <<< "$lockfiles"
530
+
531
+ # Clear progress line
532
+ if [ "$show_progress" = true ] && [ "$VERBOSE" = false ]; then
533
+ if [ "$RECURSIVE" = true ]; then
534
+ printf "\r\033[KScanned %d lockfiles (%d packages).\n" "$total" "$TOTAL_PACKAGES_SCANNED" >&2
535
+ else
536
+ printf "\r\033[KScanned %d packages.\n" "$TOTAL_PACKAGES_SCANNED" >&2
537
+ fi
538
+ fi
539
+ }
540
+
541
+ ###########################################
542
+ # BACKDOOR WORKFLOW DETECTION
543
+ ###########################################
544
+
545
+ find_workflow_dirs() {
546
+ if [ "$RECURSIVE" = true ]; then
547
+ local depth_arg=""
548
+ [ "$MAX_DEPTH" -gt 0 ] && depth_arg="-maxdepth $MAX_DEPTH"
549
+ find "$SCAN_PATH" $depth_arg -type d -path "*/.github/workflows" 2>/dev/null
550
+ else
551
+ local dir="$SCAN_PATH/.github/workflows"
552
+ if [ -d "$dir" ]; then
553
+ echo "$dir"
554
+ fi
555
+ fi
556
+ }
557
+
558
+ check_discussion_backdoor() {
559
+ local workflows_dir="$1"
560
+
561
+ for filename in discussion.yaml discussion.yml; do
562
+ local workflow_path="$workflows_dir/$filename"
563
+ [ -f "$workflow_path" ] || continue
564
+
565
+ local content
566
+ content=$(cat "$workflow_path")
567
+
568
+ # Check for malicious pattern: discussion trigger + self-hosted + unescaped body
569
+ if echo "$content" | grep -q "discussion" && \
570
+ echo "$content" | grep -q "self-hosted" && \
571
+ echo "$content" | grep -qE '\$\{\{\s*github\.event\.discussion\.body\s*\}\}'; then
572
+ BACKDOOR_FINDINGS+=("$workflow_path|discussion_backdoor")
573
+ fi
574
+ done
575
+ }
576
+
577
+ check_formatter_backdoor() {
578
+ local workflows_dir="$1"
579
+
580
+ # Match timestamp-based formatter files (formatter_ + digits) per Wiz report
581
+ # This reduces false positives on legitimate files like formatter_config.yml
582
+ for workflow in "$workflows_dir"/formatter_[0-9]*.yml; do
583
+ [ -f "$workflow" ] || continue
584
+ BACKDOOR_FINDINGS+=("$workflow|secrets_extraction")
585
+ done
586
+ }
587
+
588
+ check_backdoor_workflows() {
589
+ local workflow_dirs
590
+ workflow_dirs=$(find_workflow_dirs)
591
+
592
+ while IFS= read -r dir; do
593
+ [ -z "$dir" ] && continue
594
+ check_discussion_backdoor "$dir"
595
+ check_formatter_backdoor "$dir"
596
+ done <<< "$workflow_dirs"
597
+ }
598
+
599
+ ###########################################
600
+ # OUTPUT
601
+ ###########################################
602
+
603
+ report_text() {
604
+ log ""
605
+
606
+ # Report compromised packages
607
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ]; then
608
+ log "${RED}🚨 COMPROMISED PACKAGES FOUND:${NC}"
609
+ for finding in "${COMPROMISED_FINDINGS[@]}"; do
610
+ local pkg="${finding%%|*}"
611
+ local lockfile="${finding#*|}"
612
+ log " $pkg"
613
+ log " └─ in $lockfile"
614
+ done
615
+ log ""
616
+ fi
617
+
618
+ # Report backdoors
619
+ if [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
620
+ log "${RED}🚨 BACKDOOR WORKFLOWS FOUND:${NC}"
621
+ for finding in "${BACKDOOR_FINDINGS[@]}"; do
622
+ local file="${finding%%|*}"
623
+ local type="${finding#*|}"
624
+ log " $file"
625
+ log " └─ Type: $type"
626
+ done
627
+ log ""
628
+ fi
629
+
630
+ # Report warnings (only if no critical findings)
631
+ if [ ${#COMPROMISED_FINDINGS[@]} -eq 0 ] && [ ${#BACKDOOR_FINDINGS[@]} -eq 0 ] && [ ${#WARNINGS[@]} -gt 0 ]; then
632
+ for warning in "${WARNINGS[@]}"; do
633
+ log "${YELLOW}⚠️ $warning${NC}"
634
+ done
635
+ log ""
636
+ fi
637
+
638
+ # Status
639
+ local status
640
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ]; then
641
+ status="${RED}INFECTED - Compromised packages found${NC}"
642
+ elif [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
643
+ status="${YELLOW}WARNING - Backdoor workflows found${NC}"
644
+ else
645
+ status="${GREEN}CLEAN${NC}"
646
+ fi
647
+
648
+ log "Status: $status"
649
+
650
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ] || [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
651
+ log ""
652
+ log "See cleanup playbook:"
653
+ log " $PLAYBOOK_URL"
654
+ fi
655
+ }
656
+
657
+ report_json() {
658
+ # Build lockfiles_scanned JSON array
659
+ local lockfiles_json="["
660
+ local first=true
661
+ for entry in "${LOCKFILES_SCANNED[@]}"; do
662
+ local lockfile="${entry%%|*}"
663
+ local pkg_count="${entry#*|}"
664
+ if [ "$first" = true ]; then
665
+ first=false
666
+ else
667
+ lockfiles_json+=","
668
+ fi
669
+ # Escape for JSON
670
+ lockfile="${lockfile//\\/\\\\}"
671
+ lockfile="${lockfile//\"/\\\"}"
672
+ lockfiles_json+="{\"path\":\"$lockfile\",\"packages_scanned\":$pkg_count}"
673
+ done
674
+ lockfiles_json+="]"
675
+
676
+ # Build compromised_packages JSON array
677
+ local compromised_json="["
678
+ first=true
679
+ for finding in "${COMPROMISED_FINDINGS[@]}"; do
680
+ local pkg="${finding%%|*}"
681
+ local lockfile="${finding#*|}"
682
+ if [ "$first" = true ]; then
683
+ first=false
684
+ else
685
+ compromised_json+=","
686
+ fi
687
+ # Escape for JSON
688
+ pkg="${pkg//\\/\\\\}"
689
+ pkg="${pkg//\"/\\\"}"
690
+ lockfile="${lockfile//\\/\\\\}"
691
+ lockfile="${lockfile//\"/\\\"}"
692
+ compromised_json+="{\"package\":\"$pkg\",\"lockfile\":\"$lockfile\"}"
693
+ done
694
+ compromised_json+="]"
695
+
696
+ local backdoors_json="["
697
+ first=true
698
+ for finding in "${BACKDOOR_FINDINGS[@]}"; do
699
+ local file="${finding%%|*}"
700
+ local type="${finding#*|}"
701
+ if [ "$first" = true ]; then
702
+ first=false
703
+ else
704
+ backdoors_json+=","
705
+ fi
706
+ file="${file//\\/\\\\}"
707
+ file="${file//\"/\\\"}"
708
+ backdoors_json+="{\"file\":\"$file\",\"type\":\"$type\"}"
709
+ done
710
+ backdoors_json+="]"
711
+
712
+ local warnings_json="["
713
+ first=true
714
+ for warning in "${WARNINGS[@]}"; do
715
+ if [ "$first" = true ]; then
716
+ first=false
717
+ else
718
+ warnings_json+=","
719
+ fi
720
+ warning="${warning//\\/\\\\}"
721
+ warning="${warning//\"/\\\"}"
722
+ warnings_json+="\"$warning\""
723
+ done
724
+ warnings_json+="]"
725
+
726
+ local status
727
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ]; then
728
+ status="INFECTED - Compromised packages found"
729
+ elif [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
730
+ status="WARNING - Backdoor workflows found"
731
+ else
732
+ status="CLEAN"
733
+ fi
734
+
735
+ # Escape path for JSON
736
+ local path_escaped="${SCAN_PATH//\\/\\\\}"
737
+ path_escaped="${path_escaped//\"/\\\"}"
738
+
739
+ cat <<EOF
740
+ {
741
+ "path": "$path_escaped",
742
+ "status": "$status",
743
+ "packages_scanned": $TOTAL_PACKAGES_SCANNED,
744
+ "lockfiles_scanned": $lockfiles_json,
745
+ "compromised_packages": $compromised_json,
746
+ "backdoors": $backdoors_json,
747
+ "warnings": $warnings_json
748
+ }
749
+ EOF
750
+ }
751
+
752
+ ###########################################
753
+ # MAIN
754
+ ###########################################
755
+
756
+ output_jsonl_summary() {
757
+ # Build backdoors JSON array
758
+ local backdoors_json="["
759
+ local first=true
760
+ for finding in "${BACKDOOR_FINDINGS[@]}"; do
761
+ local file="${finding%%|*}"
762
+ local type="${finding#*|}"
763
+ if [ "$first" = true ]; then
764
+ first=false
765
+ else
766
+ backdoors_json+=","
767
+ fi
768
+ file="${file//\\/\\\\}"
769
+ file="${file//\"/\\\"}"
770
+ backdoors_json+="{\"file\":\"$file\",\"type\":\"$type\"}"
771
+ done
772
+ backdoors_json+="]"
773
+
774
+ # Build warnings JSON array
775
+ local warnings_json="["
776
+ first=true
777
+ for warning in "${WARNINGS[@]}"; do
778
+ if [ "$first" = true ]; then
779
+ first=false
780
+ else
781
+ warnings_json+=","
782
+ fi
783
+ warning="${warning//\\/\\\\}"
784
+ warning="${warning//\"/\\\"}"
785
+ warnings_json+="\"$warning\""
786
+ done
787
+ warnings_json+="]"
788
+
789
+ local status
790
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ]; then
791
+ status="INFECTED"
792
+ elif [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
793
+ status="WARNING"
794
+ else
795
+ status="CLEAN"
796
+ fi
797
+
798
+ local path_escaped="${SCAN_PATH//\\/\\\\}"
799
+ path_escaped="${path_escaped//\"/\\\"}"
800
+
801
+ echo "{\"type\":\"summary\",\"path\":\"$path_escaped\",\"status\":\"$status\",\"total_packages_scanned\":$TOTAL_PACKAGES_SCANNED,\"total_lockfiles\":${#LOCKFILES_SCANNED[@]},\"total_compromised\":${#COMPROMISED_FINDINGS[@]},\"backdoors\":$backdoors_json,\"warnings\":$warnings_json}"
802
+ }
803
+
804
+ main() {
805
+ # Show scan info up front
806
+ if [ "$QUIET" = false ] && [ "$JSON" = false ]; then
807
+ if [ "$RECURSIVE" = true ]; then
808
+ if [ "$MAX_DEPTH" -eq 0 ]; then
809
+ log "Scanning: $SCAN_PATH (recursive, unlimited depth)"
810
+ else
811
+ log "Scanning: $SCAN_PATH (recursive, max depth: $MAX_DEPTH)"
812
+ fi
813
+ else
814
+ log "Scanning: $SCAN_PATH"
815
+ fi
816
+ fi
817
+
818
+ # Load compromised packages list
819
+ load_compromised_packages
820
+
821
+ # Run checks
822
+ check_lockfiles
823
+ check_backdoor_workflows
824
+
825
+ # Output results
826
+ if [ "$QUIET" = false ]; then
827
+ if [ "$JSON" = true ]; then
828
+ if [ "$RECURSIVE" = true ]; then
829
+ # JSONL mode: lockfiles already output, just add summary
830
+ output_jsonl_summary
831
+ else
832
+ report_json
833
+ fi
834
+ else
835
+ report_text
836
+ fi
837
+ fi
838
+
839
+ # Exit code
840
+ if [ ${#COMPROMISED_FINDINGS[@]} -gt 0 ]; then
841
+ exit 1
842
+ elif [ ${#BACKDOOR_FINDINGS[@]} -gt 0 ]; then
843
+ exit 2
844
+ else
845
+ exit 0
846
+ fi
847
+ }
848
+
849
+ main