vtk 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1317 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # gh-action-trace — Find all direct and transitive uses of GitHub Actions in an org.
4
+ #
5
+ # Traces the full dependency chain: repos that directly reference target actions,
6
+ # repos that call shared/reusable workflows containing those actions, and external
7
+ # shared workflows that wrap them. Reports pinning status (SHA/tag/branch) for each.
8
+ #
9
+ # Requires: gh (GitHub CLI), jq, base64
10
+ #
11
+ # Usage:
12
+ # gh-action-trace --org department-of-veterans-affairs --action aquasecurity/trivy-action
13
+ # gh-action-trace --org my-org --action actions/checkout --action actions/setup-node
14
+ # gh-action-trace --org my-org --action aquasecurity/trivy-action --depth 3 --format json
15
+ #
16
+ # Options:
17
+ # --org ORG GitHub org to search (required)
18
+ # --action ACTION Action to trace — repeatable (required, at least one)
19
+ # --depth N Max recursion depth for shared workflows (default: 2)
20
+ # --format FORMAT Output format: text, json, csv, both (default: both)
21
+ # --external Also search all of GitHub for external shared workflows (slower)
22
+ # --output FILE Write JSON output to file (default: stdout)
23
+ # --check-runs FROM..TO Check workflow run history during a time window (ISO 8601)
24
+ # e.g. --check-runs 2026-03-19T19:00:00Z..2026-03-21T00:00:00Z
25
+ # Omit TO to default to now: --check-runs 2026-03-19T19:00:00Z..
26
+ # --quiet Suppress progress output (only show results)
27
+ # --verbose Show detailed debug info
28
+ # --help Show this help
29
+
30
+ set -euo pipefail
31
+
32
+ # =============================================================================
33
+ # Configuration
34
+ # =============================================================================
35
+
36
+ declare -a TARGET_ACTIONS=()
37
+ ORG=""
38
+ MAX_DEPTH=2
39
+ FORMAT="both"
40
+ SEARCH_EXTERNAL=false
41
+ OUTPUT_FILE=""
42
+ CHECK_RUNS=""
43
+ CHECK_RUNS_FROM=""
44
+ CHECK_RUNS_TO=""
45
+ QUIET=false
46
+ VERBOSE=false
47
+
48
+ # Rate limiting — modeled after github-viewer's approach
49
+ # Note: code_search resource has a 10/min limit (not 30 like regular search)
50
+ # These are stored as files (in COUNTER_DIR) to survive subshell boundaries.
51
+ SEARCH_CRITICAL_THRESHOLD=2
52
+ SEARCH_WARNING_THRESHOLD=4
53
+
54
+ # Retry config (exponential backoff for server errors)
55
+ MAX_RETRIES=3
56
+ RETRY_BACKOFF_BASE=2
57
+
58
+ # Counter/state directory — set in main(), all mutable state lives here
59
+ COUNTER_DIR=""
60
+
61
+ # Results accumulator (newline-delimited JSON objects)
62
+ RESULTS_FILE=""
63
+ CACHE_DIR=""
64
+
65
+ # Visited sets (prevent infinite loops and redundant work)
66
+ VISITED_WORKFLOWS_FILE=""
67
+ VISITED_SEARCHES_FILE=""
68
+
69
+ # =============================================================================
70
+ # Utility functions
71
+ # =============================================================================
72
+
73
+ log() { if [[ "$VERBOSE" == true ]]; then printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; fi; }
74
+ progress() { if [[ "$QUIET" != true ]]; then printf '%s\n' "$*" >&2; fi; }
75
+ warn() { printf '[WARN] %s\n' "$*" >&2; }
76
+ die() { printf '[ERROR] %s\n' "$*" >&2; exit 1; }
77
+
78
+ # Inline progress bar that overwrites the current line.
79
+ # Args: $1 = current, $2 = total, $3 = found count, $4 = label
80
+ progress_bar() {
81
+ [[ "$QUIET" == true ]] && return
82
+ local current="$1" total="$2" found="$3" label="$4"
83
+ local width=20 pct=0
84
+ if [[ "$total" -gt 0 ]]; then
85
+ pct=$(( current * 100 / total ))
86
+ fi
87
+ local filled=$(( pct * width / 100 ))
88
+ local empty=$(( width - filled ))
89
+ local bar
90
+ bar="$(printf '%*s' "$filled" '' | tr ' ' '█')$(printf '%*s' "$empty" '' | tr ' ' '░')"
91
+ printf '\r\033[K %s [%d/%d] %s %d found' "$label" "$current" "$total" "$bar" "$found" >&2
92
+ if [[ "$current" -ge "$total" ]]; then
93
+ printf '\n' >&2
94
+ fi
95
+ }
96
+
97
+ # Show a rate limit wait message on the progress line.
98
+ progress_wait() {
99
+ [[ "$QUIET" == true ]] && return
100
+ local secs="$1"
101
+ printf '\r\033[K Rate limited. Waiting %ds for reset...' "$secs" >&2
102
+ }
103
+
104
+ usage() {
105
+ sed -n '/^# Usage:/,/^[^#]/{ /^[^#]/d; s/^# \?//; p; }' "$0"
106
+ exit 0
107
+ }
108
+
109
+ # Hide cursor on start, restore on exit/Ctrl-C.
110
+ show_cursor() {
111
+ tput cnorm 2>/dev/null || true
112
+ }
113
+
114
+ hide_cursor() {
115
+ tput civis 2>/dev/null || true
116
+ }
117
+
118
+ cleanup() {
119
+ show_cursor
120
+ if [[ -n "$CACHE_DIR" && -d "$CACHE_DIR" ]]; then
121
+ rm -rf "$CACHE_DIR"
122
+ log "Cleaned up cache dir: $CACHE_DIR"
123
+ fi
124
+ }
125
+ trap cleanup EXIT INT TERM
126
+
127
+ # File-based counters to survive subshell boundaries (pipe | while read).
128
+ # Each counter is a file containing a number; increment atomically.
129
+ counter_init() {
130
+ local name="$1"
131
+ echo "0" > "${COUNTER_DIR}/${name}"
132
+ }
133
+
134
+ counter_inc() {
135
+ local name="$1"
136
+ local file="${COUNTER_DIR}/${name}"
137
+ local val
138
+ val=$(<"$file")
139
+ echo $(( val + 1 )) > "$file"
140
+ }
141
+
142
+ counter_dec() {
143
+ local name="$1"
144
+ local file="${COUNTER_DIR}/${name}"
145
+ local val
146
+ val=$(<"$file")
147
+ echo $(( val - 1 )) > "$file"
148
+ }
149
+
150
+ counter_get() {
151
+ local name="$1"
152
+ cat "${COUNTER_DIR}/${name}"
153
+ }
154
+
155
+ counter_set() {
156
+ local name="$1" val="$2"
157
+ echo "$val" > "${COUNTER_DIR}/${name}"
158
+ }
159
+
160
+ # =============================================================================
161
+ # Argument parsing
162
+ # =============================================================================
163
+
164
+ parse_args() {
165
+ while [[ $# -gt 0 ]]; do
166
+ case "$1" in
167
+ --org) ORG="$2"; shift 2 ;;
168
+ --action) TARGET_ACTIONS+=("$2"); shift 2 ;;
169
+ --depth) MAX_DEPTH="$2"; shift 2 ;;
170
+ --format) FORMAT="$2"; shift 2 ;;
171
+ --external) SEARCH_EXTERNAL=true; shift ;;
172
+ --output) OUTPUT_FILE="$2"; shift 2 ;;
173
+ --check-runs)
174
+ CHECK_RUNS="$2"
175
+ CHECK_RUNS_FROM="${2%%\.\.*}"
176
+ CHECK_RUNS_TO="${2##*\.\.}"
177
+ if [[ -z "$CHECK_RUNS_TO" ]]; then
178
+ CHECK_RUNS_TO=$(date -u +%Y-%m-%dT%H:%M:%SZ)
179
+ fi
180
+ shift 2 ;;
181
+ --quiet|-q) QUIET=true; shift ;;
182
+ --verbose) VERBOSE=true; shift ;;
183
+ --help|-h) usage ;;
184
+ *) die "Unknown option: $1" ;;
185
+ esac
186
+ done
187
+
188
+ [[ -n "$ORG" ]] || die "Missing required --org"
189
+ [[ ${#TARGET_ACTIONS[@]} -gt 0 ]] || die "Missing required --action (at least one)"
190
+ [[ "$FORMAT" =~ ^(text|json|csv|both)$ ]] || die "Invalid --format: $FORMAT (must be text, json, csv, or both)"
191
+ [[ "$MAX_DEPTH" =~ ^[0-9]+$ ]] || die "Invalid --depth: $MAX_DEPTH (must be a number)"
192
+ }
193
+
194
+ # =============================================================================
195
+ # Rate limiting
196
+ # =============================================================================
197
+
198
+ # Refresh rate limit info from the API (costs 1 core API call).
199
+ refresh_rate_limit() {
200
+ local info
201
+ info=$(gh api '/rate_limit' 2>/dev/null) || return 1
202
+ counter_inc core_calls
203
+
204
+ local search_info
205
+ search_info=$(echo "$info" | jq '.resources.code_search // .resources.search // empty')
206
+
207
+ if [[ -n "$search_info" ]]; then
208
+ local remaining limit reset
209
+ remaining=$(echo "$search_info" | jq -r '.remaining')
210
+ limit=$(echo "$search_info" | jq -r '.limit')
211
+ reset=$(echo "$search_info" | jq -r '.reset')
212
+ counter_set search_remaining "$remaining"
213
+ counter_set search_limit "$limit"
214
+ counter_set search_reset "$reset"
215
+ log "Rate limit refreshed: search ${remaining}/${limit} (resets $(date -r "${reset}" +%H:%M:%S 2>/dev/null || echo "at ${reset}"))"
216
+ fi
217
+
218
+ local core_info
219
+ core_info=$(echo "$info" | jq '.resources.core // empty')
220
+ if [[ -n "$core_info" ]]; then
221
+ counter_set core_remaining "$(echo "$core_info" | jq -r '.remaining')"
222
+ fi
223
+ }
224
+
225
+ # Pre-emptively wait if we're approaching the search rate limit.
226
+ throttle_search() {
227
+ local search_calls remaining limit reset
228
+
229
+ search_calls=$(counter_get search_calls)
230
+ # Refresh from API every 3 search calls to stay accurate
231
+ if (( search_calls % 3 == 0 && search_calls > 0 )); then
232
+ refresh_rate_limit
233
+ fi
234
+
235
+ remaining=$(counter_get search_remaining)
236
+ limit=$(counter_get search_limit)
237
+ reset=$(counter_get search_reset)
238
+
239
+ if [[ "$remaining" -le "$SEARCH_CRITICAL_THRESHOLD" ]]; then
240
+ refresh_rate_limit
241
+ remaining=$(counter_get search_remaining)
242
+ reset=$(counter_get search_reset)
243
+
244
+ if [[ "$remaining" -le "$SEARCH_CRITICAL_THRESHOLD" && "$reset" -gt 0 ]]; then
245
+ local now wait_secs
246
+ now=$(date +%s)
247
+ wait_secs=$(( reset - now + 1 ))
248
+
249
+ if [[ "$wait_secs" -gt 0 && "$wait_secs" -le 120 ]]; then
250
+ progress_wait "$wait_secs"
251
+ sleep "$wait_secs"
252
+ counter_set search_remaining "$limit"
253
+ fi
254
+ fi
255
+ elif [[ "$remaining" -le "$SEARCH_WARNING_THRESHOLD" ]]; then
256
+ log "Search rate limit getting low (${remaining}/${limit} remaining). Adding 3s delay."
257
+ sleep 3
258
+ fi
259
+ }
260
+
261
+ # =============================================================================
262
+ # GitHub API wrappers
263
+ # =============================================================================
264
+
265
+ # Make a GitHub API call with rate limit tracking and retry logic.
266
+ # Args: $1 = resource type (search|core), remaining args passed to gh api
267
+ # Outputs: response body on stdout
268
+ gh_api() {
269
+ local resource="$1"; shift
270
+ local retries=0
271
+ local exit_code
272
+
273
+ if [[ "$resource" == "search" ]]; then
274
+ throttle_search
275
+ fi
276
+
277
+ local header_file="${CACHE_DIR}/last_headers.txt"
278
+
279
+ while true; do
280
+ local body
281
+ # gh api outputs JSON body to stdout, errors to stderr
282
+ body=$(gh api "$@" 2>"${header_file}.err") && exit_code=0 || exit_code=$?
283
+
284
+ if [[ $exit_code -eq 0 ]]; then
285
+ case "$resource" in
286
+ search)
287
+ counter_inc search_calls
288
+ counter_dec search_remaining
289
+ log "Search calls: $(counter_get search_calls) (est. remaining: $(counter_get search_remaining))"
290
+ ;;
291
+ core)
292
+ counter_inc core_calls
293
+ counter_dec core_remaining
294
+ ;;
295
+ esac
296
+
297
+ echo "$body"
298
+ return 0
299
+ fi
300
+
301
+ local err_msg
302
+ err_msg=$(cat "${header_file}.err" 2>/dev/null || echo "unknown error")
303
+
304
+ # Check for rate limit error
305
+ if echo "$err_msg" | grep -qi "rate limit\|API rate limit\|secondary rate limit\|abuse detection"; then
306
+ # Try to get actual rate limit reset time
307
+ local rate_info
308
+ rate_info=$(gh api '/rate_limit' --jq ".resources.code_search // .resources.search // .resources.core" 2>/dev/null || true)
309
+
310
+ if [[ -n "$rate_info" ]]; then
311
+ local reset_at remaining
312
+ reset_at=$(echo "$rate_info" | jq -r '.reset // 0')
313
+ remaining=$(echo "$rate_info" | jq -r '.remaining // 0')
314
+ counter_set search_remaining "$remaining"
315
+ counter_set search_reset "$reset_at"
316
+
317
+ local now wait_secs
318
+ now=$(date +%s)
319
+ wait_secs=$(( reset_at - now + 1 ))
320
+
321
+ if [[ "$wait_secs" -gt 0 && "$wait_secs" -le 120 ]]; then
322
+ progress_wait "$wait_secs"
323
+ sleep "$wait_secs"
324
+ counter_set search_remaining "$(counter_get search_limit)"
325
+ continue
326
+ fi
327
+ fi
328
+
329
+ progress_wait 60
330
+ sleep 60
331
+ continue
332
+ fi
333
+
334
+ # Check for server error (5xx) — retry with exponential backoff
335
+ if echo "$err_msg" | grep -qE "50[0-9]|502|503"; then
336
+ if [[ $retries -lt $MAX_RETRIES ]]; then
337
+ retries=$((retries + 1))
338
+ local delay=$(( RETRY_BACKOFF_BASE ** retries ))
339
+ warn "Server error. Retrying in ${delay}s (attempt ${retries}/${MAX_RETRIES})"
340
+ sleep "$delay"
341
+ continue
342
+ fi
343
+ fi
344
+
345
+ # Unrecoverable error
346
+ warn "API call failed: $err_msg"
347
+ return 1
348
+ done
349
+ }
350
+
351
+ # Search code with pagination. Returns all items as a JSON array.
352
+ # Args: $1 = query string
353
+ search_code() {
354
+ local query="$1"
355
+ local all_items="[]"
356
+ local page=1
357
+ local per_page=100
358
+
359
+ log "Searching: $query"
360
+
361
+ while true; do
362
+ local result
363
+ result=$(gh_api search -X GET '/search/code' \
364
+ -f q="$query" \
365
+ -F per_page="$per_page" \
366
+ -F page="$page" 2>/dev/null) || break
367
+
368
+ local total_count items_count
369
+ total_count=$(echo "$result" | jq -r '.total_count // 0')
370
+ items_count=$(echo "$result" | jq -r '.items | length')
371
+
372
+ if [[ "$items_count" -eq 0 ]]; then
373
+ break
374
+ fi
375
+
376
+ # Merge items
377
+ all_items=$(echo "$all_items" "$result" | jq -s '.[0] + (.[1].items // [])')
378
+
379
+ local accumulated
380
+ accumulated=$(echo "$all_items" | jq 'length')
381
+ log " Page $page: got $items_count items (${accumulated}/${total_count} total)"
382
+
383
+ # Check if we've got everything or hit the 1000-result cap
384
+ if [[ "$accumulated" -ge "$total_count" || "$accumulated" -ge 1000 ]]; then
385
+ if [[ "$total_count" -gt 1000 ]]; then
386
+ warn "Search returned ${total_count} results but GitHub caps at 1000. Results may be incomplete."
387
+ fi
388
+ break
389
+ fi
390
+
391
+ page=$((page + 1))
392
+ done
393
+
394
+ echo "$all_items"
395
+ }
396
+
397
+ # Fetch a file's content from a repo. Uses cache.
398
+ # Args: $1 = owner/repo, $2 = file path
399
+ # Outputs: decoded file content on stdout
400
+ fetch_content() {
401
+ local repo="$1" path="$2"
402
+ local cache_key
403
+ cache_key=$(echo "${repo}/${path}" | tr '/' '_' | tr '.' '_')
404
+ local cache_file="${CACHE_DIR}/content_${cache_key}"
405
+
406
+ if [[ -f "$cache_file" ]]; then
407
+ counter_inc cache_hits
408
+ cat "$cache_file"
409
+ return 0
410
+ fi
411
+
412
+ local result
413
+ result=$(gh_api core -X GET "/repos/${repo}/contents/${path}" \
414
+ --jq '.content // empty' 2>/dev/null) || return 1
415
+
416
+ if [[ -z "$result" ]]; then
417
+ return 1
418
+ fi
419
+
420
+ local decoded
421
+ decoded=$(echo "$result" | base64 -d 2>/dev/null) || return 1
422
+ echo "$decoded" > "$cache_file"
423
+ echo "$decoded"
424
+ }
425
+
426
+ # =============================================================================
427
+ # Analysis functions
428
+ # =============================================================================
429
+
430
+ # Extract all `uses:` references from workflow content.
431
+ # Outputs: one JSON object per line: {"ref": "owner/repo/path@version", "raw": "original line"}
432
+ extract_uses_refs() {
433
+ local content="$1"
434
+
435
+ echo "$content" | grep -E '^[[:space:]]+uses:[[:space:]]' | while IFS= read -r line; do
436
+ local ref
437
+ ref=$(echo "$line" | sed -E 's/.*uses:[[:space:]]*"?([^"[:space:]#]+)"?.*/\1/' | xargs)
438
+
439
+ # Skip local references (./path)
440
+ if [[ "$ref" == ./* || "$ref" == ../* ]]; then
441
+ continue
442
+ fi
443
+
444
+ # Skip Docker references
445
+ if [[ "$ref" == docker://* ]]; then
446
+ continue
447
+ fi
448
+
449
+ echo "$ref"
450
+ done
451
+ }
452
+
453
+ # Classify a version reference as sha, tag, or branch.
454
+ # Args: $1 = full uses reference (e.g., "actions/checkout@v4" or "org/repo@abc123...")
455
+ # Outputs: JSON object with classification
456
+ classify_pin() {
457
+ local ref="$1"
458
+ local action_part="${ref%%@*}"
459
+ local version="${ref#*@}"
460
+ [[ "$ref" != *@* ]] && version=""
461
+
462
+ local pin_type pin_risk
463
+ if [[ -z "$version" ]]; then
464
+ pin_type="none"; pin_risk="critical"
465
+ elif [[ "$version" =~ ^[0-9a-f]{40}$ ]]; then
466
+ pin_type="sha"; pin_risk="safe"
467
+ elif [[ "$version" =~ ^v?[0-9] ]]; then
468
+ pin_type="tag"; pin_risk="risky"
469
+ else
470
+ pin_type="branch"; pin_risk="dangerous"
471
+ fi
472
+
473
+ jq -n \
474
+ --arg action "$action_part" \
475
+ --arg version "$version" \
476
+ --arg pin_type "$pin_type" \
477
+ --arg pin_risk "$pin_risk" \
478
+ '{action: $action, version: $version, pin_type: $pin_type, pin_risk: $pin_risk}'
479
+ }
480
+
481
+ # Check if workflow content defines a reusable workflow (has workflow_call trigger).
482
+ # Args: $1 = workflow content
483
+ # Returns: 0 if reusable, 1 if not
484
+ is_reusable_workflow() {
485
+ local content="$1"
486
+ echo "$content" | grep -qE '^[[:space:]]*workflow_call[[:space:]]*:?[[:space:]]*$|on:.*workflow_call|"workflow_call"'
487
+ }
488
+
489
+ # =============================================================================
490
+ # Result recording
491
+ # =============================================================================
492
+
493
+ # Add a result to the results file.
494
+ # Args: JSON object on stdin or as $1
495
+ add_result() {
496
+ local json="${1:-$(cat)}"
497
+ echo "$json" >> "$RESULTS_FILE"
498
+ counter_inc found
499
+ }
500
+
501
+ # Check if a workflow has already been visited (prevent loops).
502
+ # Args: $1 = identifier (e.g., "org/repo/.github/workflows/file.yml")
503
+ # Returns: 0 if already visited, 1 if new
504
+ is_visited() {
505
+ local id="$1"
506
+ grep -qxF "$id" "$VISITED_WORKFLOWS_FILE" 2>/dev/null
507
+ }
508
+
509
+ mark_visited() {
510
+ local id="$1"
511
+ echo "$id" >> "$VISITED_WORKFLOWS_FILE"
512
+ }
513
+
514
+ is_search_done() {
515
+ local id="$1"
516
+ grep -qxF "$id" "$VISITED_SEARCHES_FILE" 2>/dev/null
517
+ }
518
+
519
+ mark_search_done() {
520
+ local id="$1"
521
+ echo "$id" >> "$VISITED_SEARCHES_FILE"
522
+ }
523
+
524
+ # =============================================================================
525
+ # Core trace logic
526
+ # =============================================================================
527
+
528
+ # Trace a single action through the org, finding direct and transitive references.
529
+ # Args: $1 = action (e.g., "aquasecurity/trivy-action")
530
+ trace_action() {
531
+ local action="$1"
532
+ log "=== Tracing action: $action ==="
533
+
534
+ local total_phases=2 phase=1
535
+ [[ "$SEARCH_EXTERNAL" == true ]] && total_phases=3
536
+
537
+ progress " [${phase}/${total_phases}] Searching $ORG for direct references..."
538
+ find_direct_refs "$action"
539
+
540
+ if [[ "$SEARCH_EXTERNAL" == true ]]; then
541
+ phase=$((phase + 1))
542
+ progress " [${phase}/${total_phases}] Searching all of GitHub for external shared workflows..."
543
+ find_external_wrappers "$action"
544
+ fi
545
+
546
+ phase=$((phase + 1))
547
+ progress " [${phase}/${total_phases}] Tracing transitive references through shared workflows..."
548
+ trace_shared_workflows "$action" 1
549
+ }
550
+
551
+ # Find direct references to an action in the org's workflow files.
552
+ # Args: $1 = action name
553
+ find_direct_refs() {
554
+ local action="$1"
555
+ local search_key="direct:${ORG}:${action}"
556
+
557
+ if is_search_done "$search_key"; then
558
+ log " Skipping duplicate search: $search_key"
559
+ return
560
+ fi
561
+ mark_search_done "$search_key"
562
+
563
+ local items
564
+ items=$(search_code "${action} path:.github/workflows user:${ORG} language:yaml")
565
+
566
+ local count
567
+ count=$(echo "$items" | jq 'length')
568
+ progress " Found $count workflow files to inspect"
569
+
570
+ if [[ "$count" -eq 0 ]]; then
571
+ return
572
+ fi
573
+
574
+ # Process each result — fetch content and analyze
575
+ local item_idx=0
576
+ echo "$items" | jq -c '.[]' | while IFS= read -r item; do
577
+ local repo path
578
+ repo=$(echo "$item" | jq -r '.repository.full_name')
579
+ path=$(echo "$item" | jq -r '.path')
580
+ item_idx=$((item_idx + 1))
581
+
582
+ local workflow_id="${repo}/${path}"
583
+ if is_visited "$workflow_id"; then
584
+ log " Skipping already-visited: $workflow_id"
585
+ progress_bar "$item_idx" "$count" "$(counter_get found)" "Inspecting..."
586
+ continue
587
+ fi
588
+ mark_visited "$workflow_id"
589
+
590
+ progress_bar "$item_idx" "$count" "$(counter_get found)" "Inspecting..."
591
+ log " Fetching: ${repo}/${path}"
592
+ local content
593
+ content=$(fetch_content "$repo" "$path" 2>/dev/null) || {
594
+ warn " Could not fetch ${repo}/${path} — skipping"
595
+ continue
596
+ }
597
+
598
+ # Find all uses: lines that match our target action
599
+ local refs
600
+ refs=$(extract_uses_refs "$content" | grep -F "${action}" || true)
601
+
602
+ if [[ -z "$refs" ]]; then
603
+ # The search matched but no uses: line contains the action directly.
604
+ # Could be a comment match or partial match — skip.
605
+ log " No matching uses: reference found in $workflow_id (possible false positive)"
606
+ continue
607
+ fi
608
+
609
+ # Check if this workflow is itself reusable
610
+ local reusable=false
611
+ if is_reusable_workflow "$content"; then
612
+ reusable=true
613
+ fi
614
+
615
+ # Record each matching reference
616
+ while IFS= read -r ref; do
617
+ local pin_info
618
+ pin_info=$(classify_pin "$ref")
619
+
620
+ add_result "$(jq -n \
621
+ --arg repo "$repo" \
622
+ --arg workflow "$path" \
623
+ --arg ref "$ref" \
624
+ --arg action "$action" \
625
+ --arg ref_type "direct" \
626
+ --argjson reusable "$reusable" \
627
+ --argjson pin "$pin_info" \
628
+ --arg chain "$action" \
629
+ '{
630
+ repo: $repo,
631
+ workflow: $workflow,
632
+ uses_ref: $ref,
633
+ target_action: $action,
634
+ reference_type: $ref_type,
635
+ is_reusable_workflow: $reusable,
636
+ pin_type: $pin.pin_type,
637
+ pin_value: $pin.version,
638
+ pin_risk: $pin.pin_risk,
639
+ chain: [$chain]
640
+ }')"
641
+ done <<< "$refs"
642
+ done
643
+ }
644
+
645
+ # Search all of GitHub for external shared workflows that wrap the target action.
646
+ # Then search the org for repos calling those external workflows.
647
+ # Args: $1 = action name
648
+ find_external_wrappers() {
649
+ local action="$1"
650
+ local search_key="external:${action}"
651
+
652
+ if is_search_done "$search_key"; then
653
+ return
654
+ fi
655
+ mark_search_done "$search_key"
656
+
657
+ log " Searching all of GitHub for workflows wrapping: $action"
658
+ local items
659
+ items=$(search_code "${action} path:.github/workflows language:yaml")
660
+
661
+ local count
662
+ count=$(echo "$items" | jq 'length')
663
+ log " Found $count global references"
664
+
665
+ if [[ "$count" -eq 0 ]]; then
666
+ return
667
+ fi
668
+
669
+ # Find repos outside our org that contain the action in workflow files
670
+ local external_repos
671
+ external_repos=$(echo "$items" | jq -r --arg org "$ORG" \
672
+ '[.[] | select(.repository.full_name | startswith($org + "/") | not)] | unique_by(.repository.full_name) | .[].repository.full_name' 2>/dev/null || true)
673
+
674
+ if [[ -z "$external_repos" ]]; then
675
+ log " No external repos found"
676
+ return
677
+ fi
678
+
679
+ local ext_count
680
+ ext_count=$(echo "$external_repos" | wc -l | tr -d ' ')
681
+ progress " Found $ext_count external repos wrapping $action"
682
+
683
+ # For each external repo, check if it has reusable workflows and if our org calls them
684
+ while IFS= read -r ext_repo; do
685
+ [[ -z "$ext_repo" ]] && continue
686
+
687
+ # Get the workflow files from this external repo that matched
688
+ local ext_paths
689
+ ext_paths=$(echo "$items" | jq -r --arg repo "$ext_repo" \
690
+ '.[] | select(.repository.full_name == $repo) | .path')
691
+
692
+ while IFS= read -r ext_path; do
693
+ [[ -z "$ext_path" ]] && continue
694
+
695
+ # Fetch and check if it's a reusable workflow
696
+ local content
697
+ content=$(fetch_content "$ext_repo" "$ext_path" 2>/dev/null) || continue
698
+
699
+ if ! is_reusable_workflow "$content"; then
700
+ continue
701
+ fi
702
+
703
+ log " External reusable workflow found: ${ext_repo}/${ext_path}"
704
+
705
+ # Search our org for callers of this external workflow
706
+ local caller_ref="${ext_repo}/${ext_path}"
707
+ find_callers_of_workflow "$caller_ref" "$action" 1 "$action"
708
+ done <<< "$ext_paths"
709
+ done <<< "$external_repos"
710
+ }
711
+
712
+ # Recursively find repos that call shared workflows containing the target action.
713
+ # Args: $1 = action being traced
714
+ # $2 = current depth
715
+ trace_shared_workflows() {
716
+ local action="$1"
717
+ local depth="$2"
718
+
719
+ if [[ "$depth" -gt "$MAX_DEPTH" ]]; then
720
+ log " Max depth ($MAX_DEPTH) reached — stopping recursion"
721
+ return
722
+ fi
723
+
724
+ # Find all reusable workflows in our results that directly reference this action
725
+ local reusable_workflows=""
726
+ if [[ -s "$RESULTS_FILE" ]]; then
727
+ reusable_workflows=$(jq -r --arg action "$action" \
728
+ 'select(.target_action == $action and .is_reusable_workflow == true and .reference_type == "direct") | "\(.repo)/\(.workflow)"' \
729
+ "$RESULTS_FILE" 2>/dev/null | sort -u || true)
730
+ fi
731
+
732
+ if [[ -z "$reusable_workflows" ]]; then
733
+ log " No reusable workflows found at depth $depth"
734
+ return
735
+ fi
736
+
737
+ local rw_total rw_idx=0
738
+ rw_total=$(echo "$reusable_workflows" | wc -l | tr -d ' ')
739
+ progress " Found $rw_total reusable workflows to trace"
740
+
741
+ while IFS= read -r workflow_path; do
742
+ [[ -z "$workflow_path" ]] && continue
743
+ rw_idx=$((rw_idx + 1))
744
+ progress_bar "$rw_idx" "$rw_total" "$(counter_get found)" "Tracing callers..."
745
+ find_callers_of_workflow "$workflow_path" "$action" "$depth" "$action"
746
+ done <<< "$reusable_workflows"
747
+ }
748
+
749
+ # Find all repos in the org that call a given reusable workflow.
750
+ # Args: $1 = workflow ref (e.g., "org/repo/.github/workflows/file.yml")
751
+ # $2 = original target action
752
+ # $3 = current depth
753
+ # $4 = chain so far (comma-separated)
754
+ find_callers_of_workflow() {
755
+ local workflow_ref="$1"
756
+ local target_action="$2"
757
+ local depth="$3"
758
+ local chain_base="$4"
759
+
760
+ local search_key="callers:${ORG}:${workflow_ref}"
761
+ if is_search_done "$search_key"; then
762
+ log " Skipping duplicate caller search: $workflow_ref"
763
+ return
764
+ fi
765
+ mark_search_done "$search_key"
766
+
767
+ # Build search query — search for the workflow reference in our org
768
+ # Use the most specific part that's unique enough
769
+ local search_term="$workflow_ref"
770
+ log " Searching ${ORG} for callers of: $workflow_ref"
771
+
772
+ local items
773
+ items=$(search_code "${search_term} path:.github/workflows user:${ORG} language:yaml")
774
+
775
+ local count
776
+ count=$(echo "$items" | jq 'length')
777
+ log " Found $count potential callers"
778
+
779
+ if [[ "$count" -eq 0 ]]; then
780
+ return
781
+ fi
782
+
783
+ echo "$items" | jq -c '.[]' | while IFS= read -r item; do
784
+ local repo path
785
+ repo=$(echo "$item" | jq -r '.repository.full_name')
786
+ path=$(echo "$item" | jq -r '.path')
787
+
788
+ local caller_id="${repo}/${path}->$(echo "$workflow_ref" | md5sum | cut -c1-8 2>/dev/null || md5 -q -s "$workflow_ref" 2>/dev/null || echo "$workflow_ref")"
789
+ if is_visited "$caller_id"; then
790
+ continue
791
+ fi
792
+ mark_visited "$caller_id"
793
+
794
+ log " Fetching caller: ${repo}/${path}"
795
+ local content
796
+ content=$(fetch_content "$repo" "$path" 2>/dev/null) || {
797
+ warn " Could not fetch ${repo}/${path} — skipping"
798
+ continue
799
+ }
800
+
801
+ # Verify this workflow actually calls the target shared workflow
802
+ local matching_refs
803
+ matching_refs=$(extract_uses_refs "$content" | grep -F "$(basename "${workflow_ref%%@*}" .yml)" || true)
804
+
805
+ if [[ -z "$matching_refs" ]]; then
806
+ log " No matching workflow_call reference found in ${repo}/${path} (false positive)"
807
+ continue
808
+ fi
809
+
810
+ # Check if this caller is itself a reusable workflow
811
+ local reusable=false
812
+ if is_reusable_workflow "$content"; then
813
+ reusable=true
814
+ fi
815
+
816
+ while IFS= read -r ref; do
817
+ local pin_info
818
+ pin_info=$(classify_pin "$ref")
819
+
820
+ local chain_array
821
+ chain_array=$(jq -n --arg wf "$workflow_ref" --arg action "$target_action" '[$wf, $action]')
822
+
823
+ add_result "$(jq -n \
824
+ --arg repo "$repo" \
825
+ --arg workflow "$path" \
826
+ --arg ref "$ref" \
827
+ --arg action "$target_action" \
828
+ --arg ref_type "indirect" \
829
+ --argjson reusable "$reusable" \
830
+ --argjson pin "$pin_info" \
831
+ --argjson chain "$chain_array" \
832
+ --arg via "$workflow_ref" \
833
+ '{
834
+ repo: $repo,
835
+ workflow: $workflow,
836
+ uses_ref: $ref,
837
+ target_action: $action,
838
+ reference_type: $ref_type,
839
+ is_reusable_workflow: $reusable,
840
+ pin_type: $pin.pin_type,
841
+ pin_value: $pin.version,
842
+ pin_risk: $pin.pin_risk,
843
+ chain: $chain,
844
+ via_workflow: $via
845
+ }')"
846
+ done <<< "$matching_refs"
847
+
848
+ # If this caller is also a reusable workflow, recurse
849
+ if [[ "$reusable" == true && "$depth" -lt "$MAX_DEPTH" ]]; then
850
+ local next_ref="${repo}/${path}"
851
+ log " Recursing into reusable caller: $next_ref (depth $((depth + 1)))"
852
+ find_callers_of_workflow "$next_ref" "$target_action" $((depth + 1)) "${chain_base},${workflow_ref}"
853
+ fi
854
+ done
855
+ }
856
+
857
+ # =============================================================================
858
+ # Leaf pin enrichment
859
+ # =============================================================================
860
+
861
+ # For indirect references, resolve the leaf action's actual pin status.
862
+ # An indirect ref like vets-api -> vsp-github-actions/sbom.yml@main -> trivy-action@0.35.0
863
+ # should report the leaf pin (tag 0.35.0) not the caller's pin (branch main).
864
+ enrich_leaf_pins() {
865
+ [[ ! -s "$RESULTS_FILE" ]] && return
866
+
867
+ progress "Resolving leaf pin status for indirect references..."
868
+
869
+ local enriched_file="${CACHE_DIR}/results_leaf.jsonl"
870
+
871
+ # Build a lookup of direct results: keyed by "repo/workflow"
872
+ # These contain the actual trivy-action pin
873
+ local direct_lookup
874
+ direct_lookup=$(jq -s '
875
+ [.[] | select(.reference_type == "direct")]
876
+ | group_by("\(.repo)/\(.workflow)")
877
+ | map({key: "\(.[0].repo)/\(.[0].workflow)", value: .[0]})
878
+ | from_entries
879
+ ' "$RESULTS_FILE")
880
+
881
+ # Enrich each indirect result with leaf pin from its via_workflow
882
+ jq -c --argjson lookup "$direct_lookup" '
883
+ if .reference_type == "indirect" and .via_workflow then
884
+ ($lookup[.via_workflow] // null) as $leaf |
885
+ if $leaf then
886
+ . + {
887
+ leaf_pin_type: $leaf.pin_type,
888
+ leaf_pin_value: $leaf.pin_value,
889
+ leaf_pin_risk: $leaf.pin_risk,
890
+ leaf_uses_ref: $leaf.uses_ref
891
+ }
892
+ else . end
893
+ else
894
+ # Direct refs: the pin IS the leaf pin
895
+ . + {
896
+ leaf_pin_type: .pin_type,
897
+ leaf_pin_value: .pin_value,
898
+ leaf_pin_risk: .pin_risk,
899
+ leaf_uses_ref: .uses_ref
900
+ }
901
+ end
902
+ ' "$RESULTS_FILE" > "$enriched_file"
903
+
904
+ mv "$enriched_file" "$RESULTS_FILE"
905
+ }
906
+
907
+ # =============================================================================
908
+ # Workflow run checking
909
+ # =============================================================================
910
+
911
+ # Check if at-risk workflows ran during the specified time window.
912
+ # Enriches RESULTS_FILE entries with run_count and ran_during_window fields.
913
+ check_workflow_runs() {
914
+ [[ -z "$CHECK_RUNS_FROM" ]] && return
915
+
916
+ progress "Checking workflow runs during ${CHECK_RUNS_FROM}..${CHECK_RUNS_TO}..."
917
+
918
+ # Get unique repo+workflow pairs that are at risk (leaf pin is not SHA)
919
+ local at_risk
920
+ at_risk=$(jq -r 'select(.leaf_pin_type != "sha" and .leaf_pin_type != null) | "\(.repo)\t\(.workflow)"' "$RESULTS_FILE" | sort -u)
921
+
922
+ if [[ -z "$at_risk" ]]; then
923
+ progress " No at-risk workflows to check"
924
+ return
925
+ fi
926
+
927
+ local total pair_idx=0 hits=0
928
+ total=$(echo "$at_risk" | wc -l | tr -d ' ')
929
+
930
+ # Create enriched results file
931
+ local enriched_file="${CACHE_DIR}/results_enriched.jsonl"
932
+ cp "$RESULTS_FILE" "$enriched_file"
933
+
934
+ while IFS=$'\t' read -r repo workflow; do
935
+ [[ -z "$repo" ]] && continue
936
+ pair_idx=$((pair_idx + 1))
937
+ progress_bar "$pair_idx" "$total" "$hits" "Checking runs..."
938
+
939
+ local wf_name
940
+ wf_name=$(basename "$workflow")
941
+
942
+ local run_count
943
+ run_count=$(gh api "repos/${repo}/actions/workflows/${wf_name}/runs?created=${CHECK_RUNS_FROM}..${CHECK_RUNS_TO}&per_page=1" \
944
+ --jq '.total_count // 0' 2>/dev/null) || run_count=0
945
+ # Ensure it's a number
946
+ if ! [[ "$run_count" =~ ^[0-9]+$ ]]; then
947
+ run_count=0
948
+ fi
949
+ counter_inc core_calls
950
+
951
+ if [[ "$run_count" -gt 0 ]]; then
952
+ hits=$((hits + 1))
953
+ fi
954
+
955
+ # Update matching entries in the enriched file with run data
956
+ local tmp_file="${CACHE_DIR}/results_tmp.jsonl"
957
+ jq --arg repo "$repo" --arg wf "$workflow" --argjson count "${run_count:-0}" '
958
+ if .repo == $repo and .workflow == $wf then
959
+ . + {run_count: $count, ran_during_window: ($count > 0)}
960
+ else . end
961
+ ' "$enriched_file" > "$tmp_file"
962
+ mv "$tmp_file" "$enriched_file"
963
+ done <<< "$at_risk"
964
+
965
+ # Also mark SHA-pinned entries as safe (no runs to check)
966
+ local tmp_file="${CACHE_DIR}/results_tmp.jsonl"
967
+ jq 'if .pin_type == "sha" then . + {run_count: 0, ran_during_window: false} else . end' \
968
+ "$enriched_file" > "$tmp_file"
969
+ mv "$tmp_file" "$enriched_file"
970
+
971
+ # Replace results file
972
+ mv "$enriched_file" "$RESULTS_FILE"
973
+
974
+ progress " $hits of $total at-risk workflows ran during the window"
975
+ }
976
+
977
+ # =============================================================================
978
+ # Output formatting
979
+ # =============================================================================
980
+
981
+ # Deduplicate results by repo+workflow+uses_ref. Outputs a JSON array.
982
+ deduped_results() {
983
+ if [[ ! -s "$RESULTS_FILE" ]]; then
984
+ echo "[]"
985
+ else
986
+ jq -s 'unique_by(.repo + "|" + .workflow + "|" + .uses_ref)' "$RESULTS_FILE"
987
+ fi
988
+ }
989
+
990
+ output_json() {
991
+ local results_array
992
+ results_array=$(deduped_results)
993
+
994
+ local total direct indirect pin_sha pin_tag pin_branch pin_none
995
+ total=$(echo "$results_array" | jq 'length')
996
+ direct=$(echo "$results_array" | jq '[.[] | select(.reference_type == "direct")] | length')
997
+ indirect=$(echo "$results_array" | jq '[.[] | select(.reference_type == "indirect")] | length')
998
+ pin_sha=$(echo "$results_array" | jq '[.[] | select(.pin_type == "sha")] | length')
999
+ pin_tag=$(echo "$results_array" | jq '[.[] | select(.pin_type == "tag")] | length')
1000
+ pin_branch=$(echo "$results_array" | jq '[.[] | select(.pin_type == "branch")] | length')
1001
+ pin_none=$(echo "$results_array" | jq '[.[] | select(.pin_type == "none")] | length')
1002
+
1003
+ local unique_repos
1004
+ unique_repos=$(echo "$results_array" | jq '[.[].repo] | unique | length')
1005
+
1006
+ local json_output
1007
+ json_output=$(jq -n \
1008
+ --argjson actions "$(printf '%s\n' "${TARGET_ACTIONS[@]}" | jq -R . | jq -s .)" \
1009
+ --arg org "$ORG" \
1010
+ --arg scan_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1011
+ --argjson depth "$MAX_DEPTH" \
1012
+ --argjson external "$SEARCH_EXTERNAL" \
1013
+ --argjson results "$results_array" \
1014
+ --argjson total "$total" \
1015
+ --argjson unique_repos "$unique_repos" \
1016
+ --argjson direct "$direct" \
1017
+ --argjson indirect "$indirect" \
1018
+ --argjson pin_sha "$pin_sha" \
1019
+ --argjson pin_tag "$pin_tag" \
1020
+ --argjson pin_branch "$pin_branch" \
1021
+ --argjson pin_none "$pin_none" \
1022
+ --argjson search_calls "$(counter_get search_calls)" \
1023
+ --argjson core_calls "$(counter_get core_calls)" \
1024
+ --argjson cache_hits "$(counter_get cache_hits)" \
1025
+ '{
1026
+ target_actions: $actions,
1027
+ org: $org,
1028
+ scan_time: $scan_time,
1029
+ options: {max_depth: $depth, search_external: $external},
1030
+ results: $results,
1031
+ summary: {
1032
+ total_references: $total,
1033
+ unique_repos: $unique_repos,
1034
+ direct: $direct,
1035
+ indirect: $indirect,
1036
+ pinning: {sha: $pin_sha, tag: $pin_tag, branch: $pin_branch, none: $pin_none}
1037
+ },
1038
+ api_usage: {search_calls: $search_calls, core_calls: $core_calls, cache_hits: $cache_hits}
1039
+ }')
1040
+
1041
+ echo "$json_output"
1042
+ }
1043
+
1044
+ output_text() {
1045
+ local results_array
1046
+ results_array=$(deduped_results)
1047
+
1048
+ if [[ "$(echo "$results_array" | jq 'length')" -eq 0 ]]; then
1049
+ printf '\nNo references found.\n'
1050
+ return
1051
+ fi
1052
+
1053
+ printf '\n'
1054
+ printf '%.0s=' {1..70}
1055
+ printf '\n'
1056
+
1057
+ for action in "${TARGET_ACTIONS[@]}"; do
1058
+ printf ' Trace: %s in %s\n' "$action" "$ORG"
1059
+ done
1060
+
1061
+ printf '%.0s=' {1..70}
1062
+ printf '\n'
1063
+
1064
+ # Direct references
1065
+ local direct
1066
+ direct=$(echo "$results_array" | jq -c '[.[] | select(.reference_type == "direct")]')
1067
+ local direct_count
1068
+ direct_count=$(echo "$direct" | jq 'length')
1069
+
1070
+ printf '\n DIRECT REFERENCES (%d)\n' "$direct_count"
1071
+ printf '%.0s-' {1..40}
1072
+ printf '\n'
1073
+
1074
+ if [[ "$direct_count" -gt 0 ]]; then
1075
+ echo "$direct" | jq -r 'group_by(.repo)[] | .[0].repo as $repo |
1076
+ "\n \($repo)",
1077
+ (.[] | " \(.workflow)",
1078
+ " \(.uses_ref)",
1079
+ " pin: \(.pin_type) (\(.pin_value)) \(if .pin_risk == "safe" then "✓" elif .pin_risk == "risky" then "⚠" else "✗" end)",
1080
+ " reusable: \(if .is_reusable_workflow then "YES (workflow_call)" else "no" end)",
1081
+ (if .ran_during_window == true then " RUNS: \(.run_count) during window (verify)" elif .ran_during_window == false then " runs: 0 (clear)" else empty end))' 2>/dev/null || true
1082
+ fi
1083
+
1084
+ # Indirect references
1085
+ local indirect
1086
+ indirect=$(echo "$results_array" | jq -c '[.[] | select(.reference_type == "indirect")]')
1087
+ local indirect_count
1088
+ indirect_count=$(echo "$indirect" | jq 'length')
1089
+
1090
+ printf '\n\n INDIRECT REFERENCES (%d) — via shared workflows\n' "$indirect_count"
1091
+ printf '%.0s-' {1..40}
1092
+ printf '\n'
1093
+
1094
+ if [[ "$indirect_count" -gt 0 ]]; then
1095
+ echo "$indirect" | jq -r 'group_by(.repo)[] | .[0].repo as $repo |
1096
+ "\n \($repo)",
1097
+ (.[] | " \(.workflow)",
1098
+ " \(.uses_ref)",
1099
+ " pin: \(.pin_type) (\(.pin_value)) \(if .pin_risk == "safe" then "✓" elif .pin_risk == "risky" then "⚠" else "✗" end)",
1100
+ (if .leaf_pin_type and .leaf_pin_type != .pin_type then " leaf: \(.leaf_pin_type) (\(.leaf_pin_value)) \(if .leaf_pin_risk == "safe" then "✓" elif .leaf_pin_risk == "risky" then "⚠" else "✗" end) ← actual action pin" else empty end),
1101
+ " via: \(.via_workflow // "unknown")",
1102
+ " chain: \(.chain | join(" → "))",
1103
+ (if .ran_during_window == true then " RUNS: \(.run_count) during window (verify)" elif .ran_during_window == false then " runs: 0 (clear)" else empty end))' 2>/dev/null || true
1104
+ fi
1105
+
1106
+ # Summary
1107
+ local total pin_sha pin_tag pin_branch pin_none unique_repos
1108
+ total=$(echo "$results_array" | jq 'length')
1109
+ unique_repos=$(echo "$results_array" | jq '[.[].repo] | unique | length')
1110
+ pin_sha=$(echo "$results_array" | jq '[.[] | select(.pin_type == "sha")] | length')
1111
+ pin_tag=$(echo "$results_array" | jq '[.[] | select(.pin_type == "tag")] | length')
1112
+ pin_branch=$(echo "$results_array" | jq '[.[] | select(.pin_type == "branch")] | length')
1113
+ pin_none=$(echo "$results_array" | jq '[.[] | select(.pin_type == "none")] | length')
1114
+
1115
+ printf '\n\n PINNING SUMMARY\n'
1116
+ printf '%.0s-' {1..40}
1117
+ printf '\n'
1118
+ printf ' SHA-pinned: %3d (safe)\n' "$pin_sha"
1119
+ printf ' Tag-pinned: %3d (risky — tags are mutable)\n' "$pin_tag"
1120
+ printf ' Branch: %3d (dangerous)\n' "$pin_branch"
1121
+ if [[ "$pin_none" -gt 0 ]]; then
1122
+ printf ' No version: %3d (critical)\n' "$pin_none"
1123
+ fi
1124
+ printf ' ─────────────────\n'
1125
+ printf ' Total refs: %3d across %d repos\n' "$total" "$unique_repos"
1126
+
1127
+ # Compromised workflows section (only when --check-runs was used)
1128
+ # Compromised: ran during window AND leaf pin is not safe
1129
+ local compromised
1130
+ compromised=$(echo "$results_array" | jq -c '[.[] | select(.ran_during_window == true and .leaf_pin_risk != "safe")]')
1131
+ local compromised_count
1132
+ compromised_count=$(echo "$compromised" | jq 'length')
1133
+
1134
+ # Safe runs: ran during window but leaf pin was safe (e.g., through vsp-github-actions@0.35.0)
1135
+ local safe_runs
1136
+ safe_runs=$(echo "$results_array" | jq -c '[.[] | select(.ran_during_window == true and .leaf_pin_risk == "safe")]')
1137
+ local safe_runs_count
1138
+ safe_runs_count=$(echo "$safe_runs" | jq 'length')
1139
+
1140
+ if [[ "$compromised_count" -gt 0 ]]; then
1141
+ local compromised_repos
1142
+ compromised_repos=$(echo "$compromised" | jq '[.[].repo] | unique | length')
1143
+
1144
+ printf '\n\n *** POTENTIALLY COMPROMISED: RAN DURING WINDOW (%d across %d repos) ***\n' "$compromised_count" "$compromised_repos"
1145
+ printf '%.0s-' {1..40}
1146
+ printf '\n'
1147
+ echo "$compromised" | jq -r 'group_by(.repo)[] | .[0].repo as $repo |
1148
+ "\n \($repo)",
1149
+ (.[] | " \(.workflow) - \(.run_count) runs [\(.leaf_pin_type // .pin_type) \(.leaf_pin_value // .pin_value)]")' 2>/dev/null || true
1150
+ printf '\n Note: Verify via git history that the workflow used a compromised ref at\n'
1151
+ printf ' the time of execution. Branch/tag state may have changed since.\n'
1152
+ elif [[ -n "$CHECK_RUNS_FROM" ]]; then
1153
+ printf '\n\n No at-risk workflows ran during %s..%s\n' "$CHECK_RUNS_FROM" "$CHECK_RUNS_TO"
1154
+ fi
1155
+
1156
+ if [[ "$safe_runs_count" -gt 0 ]]; then
1157
+ local safe_repos
1158
+ safe_repos=$(echo "$safe_runs" | jq '[.[].repo] | unique | length')
1159
+
1160
+ printf '\n SAFE: Ran during window but leaf action was SHA/safe-pinned (%d across %d repos)\n' "$safe_runs_count" "$safe_repos"
1161
+ printf '%.0s-' {1..40}
1162
+ printf '\n'
1163
+ echo "$safe_runs" | jq -r 'group_by(.repo)[] | .[0].repo as $repo |
1164
+ "\n \($repo)",
1165
+ (.[] | " \(.workflow) [\(.leaf_pin_type) \(.leaf_pin_value)]")' 2>/dev/null || true
1166
+ printf '\n'
1167
+ fi
1168
+
1169
+ printf '\n API USAGE\n'
1170
+ printf '%.0s-' {1..40}
1171
+ printf '\n'
1172
+ printf ' Search API calls: %d\n' "$(counter_get search_calls)"
1173
+ printf ' Core API calls: %d\n' "$(counter_get core_calls)"
1174
+ printf ' Cache hits: %d\n' "$(counter_get cache_hits)"
1175
+ printf '\n'
1176
+ }
1177
+
1178
+ # Generate CSV report.
1179
+ # Args: $1 = output file path
1180
+ output_csv() {
1181
+ local outfile="$1"
1182
+ local results_array
1183
+ results_array=$(deduped_results)
1184
+
1185
+ {
1186
+ echo "repo,workflow,uses_ref,reference_type,pin_type,pin_value,pin_risk,leaf_pin_type,leaf_pin_value,leaf_pin_risk,is_reusable,target_action,via_workflow,run_count,ran_during_window"
1187
+ echo "$results_array" | jq -r '.[] |
1188
+ [.repo, .workflow, .uses_ref, .reference_type, .pin_type, .pin_value, .pin_risk,
1189
+ (.leaf_pin_type // ""), (.leaf_pin_value // ""), (.leaf_pin_risk // ""),
1190
+ (if .is_reusable_workflow then "yes" else "no" end),
1191
+ .target_action, (.via_workflow // ""),
1192
+ (.run_count // ""), (if .ran_during_window == true then "yes" elif .ran_during_window == false then "no" else "" end)] | @csv'
1193
+ } > "$outfile"
1194
+ }
1195
+
1196
+ # =============================================================================
1197
+ # Main
1198
+ # =============================================================================
1199
+
1200
+ main() {
1201
+ parse_args "$@"
1202
+
1203
+ # Hide cursor during progress display
1204
+ hide_cursor
1205
+
1206
+ # Set up temp directory for cache, results, and counters
1207
+ CACHE_DIR=$(mktemp -d "${TMPDIR:-/tmp}/gh-action-trace.XXXXXX")
1208
+ COUNTER_DIR="$CACHE_DIR"
1209
+ RESULTS_FILE="${CACHE_DIR}/results.jsonl"
1210
+ VISITED_WORKFLOWS_FILE="${CACHE_DIR}/visited_workflows.txt"
1211
+ VISITED_SEARCHES_FILE="${CACHE_DIR}/visited_searches.txt"
1212
+ touch "$RESULTS_FILE" "$VISITED_WORKFLOWS_FILE" "$VISITED_SEARCHES_FILE"
1213
+
1214
+ # Initialize file-based counters
1215
+ counter_init search_calls
1216
+ counter_init core_calls
1217
+ counter_init cache_hits
1218
+ counter_init found
1219
+ counter_set search_remaining 10
1220
+ counter_set search_limit 10
1221
+ counter_set search_reset 0
1222
+ counter_set core_remaining 5000
1223
+
1224
+ log "Cache dir: $CACHE_DIR"
1225
+ log "Org: $ORG"
1226
+ log "Actions: ${TARGET_ACTIONS[*]}"
1227
+ log "Max depth: $MAX_DEPTH"
1228
+
1229
+ # Check gh auth
1230
+ gh auth status &>/dev/null || die "Not authenticated with gh. Run: gh auth login"
1231
+
1232
+ # Refresh rate limits before starting so we know our budget
1233
+ refresh_rate_limit
1234
+
1235
+ # Trace each target action
1236
+ for action in "${TARGET_ACTIONS[@]}"; do
1237
+ progress "Tracing: $action in $ORG (depth=$MAX_DEPTH)"
1238
+ trace_action "$action"
1239
+ done
1240
+
1241
+ # Enrich indirect refs with leaf pin status
1242
+ enrich_leaf_pins
1243
+
1244
+ # Check workflow runs if --check-runs was specified
1245
+ if [[ -n "$CHECK_RUNS_FROM" ]]; then
1246
+ check_workflow_runs
1247
+ fi
1248
+
1249
+ show_cursor
1250
+ progress "Done. $(counter_get search_calls) search + $(counter_get core_calls) core API calls."
1251
+
1252
+ # Build report filename base from org and actions
1253
+ local report_slug
1254
+ report_slug="${ORG}"
1255
+ for action in "${TARGET_ACTIONS[@]}"; do
1256
+ report_slug="${report_slug}_${action##*/}"
1257
+ done
1258
+ report_slug=$(echo "$report_slug" | tr '/' '-' | tr ' ' '-')
1259
+ local timestamp
1260
+ timestamp=$(date +%Y%m%d-%H%M%S)
1261
+ local report_base="gh-action-trace_${report_slug}_${timestamp}"
1262
+
1263
+ # Generate reports based on format
1264
+ local json_file="${report_base}.json"
1265
+ local csv_file="${report_base}.csv"
1266
+
1267
+ case "$FORMAT" in
1268
+ text)
1269
+ output_text
1270
+ ;;
1271
+ json)
1272
+ if [[ -n "$OUTPUT_FILE" ]]; then
1273
+ output_json > "$OUTPUT_FILE"
1274
+ else
1275
+ output_json > "$json_file"
1276
+ fi
1277
+ ;;
1278
+ csv)
1279
+ if [[ -n "$OUTPUT_FILE" ]]; then
1280
+ output_csv "$OUTPUT_FILE"
1281
+ else
1282
+ output_csv "$csv_file"
1283
+ fi
1284
+ ;;
1285
+ both)
1286
+ output_text
1287
+ if [[ -n "$OUTPUT_FILE" ]]; then
1288
+ output_json > "$OUTPUT_FILE"
1289
+ else
1290
+ output_json > "$json_file"
1291
+ fi
1292
+ output_csv "$csv_file"
1293
+ ;;
1294
+ esac
1295
+
1296
+ # Show report file paths (only for formats that generate files)
1297
+ if [[ "$FORMAT" != "text" ]]; then
1298
+ printf '\n REPORTS\n'
1299
+ printf '%.0s-' {1..40}
1300
+ printf '\n'
1301
+ case "$FORMAT" in
1302
+ json)
1303
+ printf ' JSON: %s\n' "${OUTPUT_FILE:-$json_file}"
1304
+ ;;
1305
+ csv)
1306
+ printf ' CSV: %s\n' "${OUTPUT_FILE:-$csv_file}"
1307
+ ;;
1308
+ both)
1309
+ printf ' JSON: %s\n' "${OUTPUT_FILE:-$json_file}"
1310
+ printf ' CSV: %s\n' "$csv_file"
1311
+ ;;
1312
+ esac
1313
+ printf '\n'
1314
+ fi
1315
+ }
1316
+
1317
+ main "$@"