@vigolium/piolium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/agents/access-auditor.md +300 -0
- package/agents/assumption-breaker.md +154 -0
- package/agents/attack-designer.md +116 -0
- package/agents/code-scanner.md +139 -0
- package/agents/concurrency-auditor.md +238 -0
- package/agents/confirm-writer.md +257 -0
- package/agents/context-reviewer.md +274 -0
- package/agents/cross-verifier.md +165 -0
- package/agents/cve-scout.md +381 -0
- package/agents/env-builder.md +282 -0
- package/agents/env-profiler.md +205 -0
- package/agents/evidence-collector.md +140 -0
- package/agents/finding-grader.md +142 -0
- package/agents/finding-writer.md +148 -0
- package/agents/flow-tracer.md +106 -0
- package/agents/goal-backtracer.md +146 -0
- package/agents/history-miner.md +467 -0
- package/agents/independent-verifier.md +118 -0
- package/agents/intent-mapper.md +183 -0
- package/agents/longshot-collector.md +128 -0
- package/agents/longshot-prober.md +126 -0
- package/agents/patch-auditor.md +73 -0
- package/agents/poc-author.md +124 -0
- package/agents/poc-runner.md +194 -0
- package/agents/probe-lead.md +269 -0
- package/agents/red-challenger.md +101 -0
- package/agents/report-composer.md +208 -0
- package/agents/review-adjudicator.md +216 -0
- package/agents/spec-auditor.md +155 -0
- package/agents/taint-tracer.md +265 -0
- package/agents/test-locator.md +209 -0
- package/agents/threat-modeler.md +132 -0
- package/agents/variant-scanner.md +108 -0
- package/agents/variant-spotter.md +110 -0
- package/bin/piolium.mjs +376 -0
- package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
- package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
- package/extensions/piolium/agent-runner.ts +322 -0
- package/extensions/piolium/agents.ts +266 -0
- package/extensions/piolium/audit-state.ts +522 -0
- package/extensions/piolium/bundled-resources.ts +97 -0
- package/extensions/piolium/candidate-scan.ts +966 -0
- package/extensions/piolium/command-target.ts +177 -0
- package/extensions/piolium/console-stream.ts +57 -0
- package/extensions/piolium/export-results.ts +380 -0
- package/extensions/piolium/findings.ts +448 -0
- package/extensions/piolium/heartbeat.ts +182 -0
- package/extensions/piolium/help.ts +234 -0
- package/extensions/piolium/index.ts +1865 -0
- package/extensions/piolium/longshot.ts +530 -0
- package/extensions/piolium/matcher-suggestions.ts +196 -0
- package/extensions/piolium/matcher-utils.ts +83 -0
- package/extensions/piolium/modes/balanced.ts +750 -0
- package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
- package/extensions/piolium/modes/confirm.ts +697 -0
- package/extensions/piolium/modes/deep.ts +917 -0
- package/extensions/piolium/modes/diff.ts +177 -0
- package/extensions/piolium/modes/lite.ts +540 -0
- package/extensions/piolium/modes/longshot.ts +595 -0
- package/extensions/piolium/modes/merge.ts +204 -0
- package/extensions/piolium/modes/phase-runner.ts +267 -0
- package/extensions/piolium/modes/reinvest.ts +546 -0
- package/extensions/piolium/modes/revisit.ts +279 -0
- package/extensions/piolium/modes.ts +48 -0
- package/extensions/piolium/phase-labels.ts +123 -0
- package/extensions/piolium/phase-status-strip.ts +92 -0
- package/extensions/piolium/prompt-prefix-editor.ts +39 -0
- package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
- package/extensions/piolium/recon.ts +409 -0
- package/extensions/piolium/result-stats.ts +105 -0
- package/extensions/piolium/retry.ts +120 -0
- package/extensions/piolium/scheduler.ts +212 -0
- package/extensions/piolium/secrets.ts +368 -0
- package/extensions/piolium/tools/web-tools.ts +148 -0
- package/package.json +77 -0
- package/skills/agentic-actions-auditor/SKILL.md +327 -0
- package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
- package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
- package/skills/agentic-actions-auditor/references/foundations.md +94 -0
- package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
- package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
- package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
- package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
- package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
- package/skills/audit/SKILL.md +562 -0
- package/skills/audit/assets/icon.svg +7 -0
- package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
- package/skills/audit/references/adversarial-review.md +148 -0
- package/skills/audit/references/architecture-aware-sast.md +306 -0
- package/skills/audit/references/audit-workflow.md +737 -0
- package/skills/audit/references/chamber-protocol.md +384 -0
- package/skills/audit/references/creative-attack-modes.md +221 -0
- package/skills/audit/references/deep-analysis.md +273 -0
- package/skills/audit/references/domain-attack-playbooks.md +1129 -0
- package/skills/audit/references/knowledge-base-template.md +513 -0
- package/skills/audit/references/real-env-validation.md +191 -0
- package/skills/audit/references/report-templates.md +417 -0
- package/skills/audit/references/triage-and-prereqs.md +134 -0
- package/skills/audit/scripts/consolidate_drafts.py +554 -0
- package/skills/audit/scripts/partition_findings.py +152 -0
- package/skills/audit/scripts/rg-hotspots.sh +121 -0
- package/skills/audit/scripts/stamp_file_state.py +349 -0
- package/skills/code-reviewer/SKILL.md +65 -0
- package/skills/codeql/SKILL.md +281 -0
- package/skills/codeql/references/build-fixes.md +90 -0
- package/skills/codeql/references/diagnostic-query-templates.md +339 -0
- package/skills/codeql/references/extension-yaml-format.md +209 -0
- package/skills/codeql/references/important-only-suite.md +153 -0
- package/skills/codeql/references/language-details.md +207 -0
- package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
- package/skills/codeql/references/performance-tuning.md +111 -0
- package/skills/codeql/references/quality-assessment.md +172 -0
- package/skills/codeql/references/ruleset-catalog.md +63 -0
- package/skills/codeql/references/run-all-suite.md +92 -0
- package/skills/codeql/references/sarif-processing.md +79 -0
- package/skills/codeql/references/threat-models.md +51 -0
- package/skills/codeql/workflows/build-database.md +280 -0
- package/skills/codeql/workflows/create-data-extensions.md +261 -0
- package/skills/codeql/workflows/run-analysis.md +301 -0
- package/skills/differential-review/SKILL.md +220 -0
- package/skills/differential-review/adversarial.md +203 -0
- package/skills/differential-review/methodology.md +234 -0
- package/skills/differential-review/patterns.md +300 -0
- package/skills/differential-review/reporting.md +369 -0
- package/skills/fp-check/SKILL.md +125 -0
- package/skills/fp-check/references/bug-class-verification.md +114 -0
- package/skills/fp-check/references/deep-verification.md +143 -0
- package/skills/fp-check/references/evidence-templates.md +91 -0
- package/skills/fp-check/references/false-positive-patterns.md +115 -0
- package/skills/fp-check/references/gate-reviews.md +27 -0
- package/skills/fp-check/references/standard-verification.md +78 -0
- package/skills/insecure-defaults/SKILL.md +117 -0
- package/skills/insecure-defaults/references/examples.md +409 -0
- package/skills/last30days/SKILL.md +444 -0
- package/skills/sarif-parsing/SKILL.md +483 -0
- package/skills/sarif-parsing/resources/jq-queries.md +162 -0
- package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
- package/skills/security-threat-model/LICENSE.txt +201 -0
- package/skills/security-threat-model/SKILL.md +81 -0
- package/skills/security-threat-model/agents/openai.yaml +4 -0
- package/skills/security-threat-model/references/prompt-template.md +255 -0
- package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
- package/skills/semgrep/SKILL.md +212 -0
- package/skills/semgrep/references/rulesets.md +162 -0
- package/skills/semgrep/references/scan-modes.md +110 -0
- package/skills/semgrep/references/scanner-task-prompt.md +140 -0
- package/skills/semgrep/scripts/merge_sarif.py +203 -0
- package/skills/semgrep/workflows/scan-workflow.md +311 -0
- package/skills/semgrep-rule-creator/SKILL.md +168 -0
- package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
- package/skills/semgrep-rule-creator/references/workflow.md +240 -0
- package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
- package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
- package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
- package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
- package/skills/sharp-edges/SKILL.md +292 -0
- package/skills/sharp-edges/references/auth-patterns.md +252 -0
- package/skills/sharp-edges/references/case-studies.md +274 -0
- package/skills/sharp-edges/references/config-patterns.md +333 -0
- package/skills/sharp-edges/references/crypto-apis.md +190 -0
- package/skills/sharp-edges/references/lang-c.md +205 -0
- package/skills/sharp-edges/references/lang-csharp.md +285 -0
- package/skills/sharp-edges/references/lang-go.md +270 -0
- package/skills/sharp-edges/references/lang-java.md +263 -0
- package/skills/sharp-edges/references/lang-javascript.md +269 -0
- package/skills/sharp-edges/references/lang-kotlin.md +265 -0
- package/skills/sharp-edges/references/lang-php.md +245 -0
- package/skills/sharp-edges/references/lang-python.md +274 -0
- package/skills/sharp-edges/references/lang-ruby.md +273 -0
- package/skills/sharp-edges/references/lang-rust.md +272 -0
- package/skills/sharp-edges/references/lang-swift.md +287 -0
- package/skills/sharp-edges/references/language-specific.md +588 -0
- package/skills/spec-to-code-compliance/SKILL.md +357 -0
- package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
- package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
- package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
- package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
- package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
- package/skills/variant-analysis/METHODOLOGY.md +327 -0
- package/skills/variant-analysis/SKILL.md +142 -0
- package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
- package/skills/variant-analysis/resources/codeql/go.ql +69 -0
- package/skills/variant-analysis/resources/codeql/java.ql +71 -0
- package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
- package/skills/variant-analysis/resources/codeql/python.ql +80 -0
- package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
- package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
- package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
- package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
- package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
- package/skills/variant-analysis/resources/variant-report-template.md +75 -0
- package/skills/vuln-report/SKILL.md +137 -0
- package/skills/vuln-report/agents/openai.yaml +4 -0
- package/skills/vuln-report/references/report-template.md +135 -0
- package/skills/wooyun-legacy/SKILL.md +367 -0
- package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
- package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
- package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
- package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
- package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
- package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
- package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
- package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
- package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
- package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
- package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
- package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
- package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
- package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
- package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
- package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
- package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
- package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
- package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
- package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
- package/skills/wooyun-legacy/references/xss.md +746 -0
- package/skills/zeroize-audit/SKILL.md +371 -0
- package/skills/zeroize-audit/configs/c.yaml +21 -0
- package/skills/zeroize-audit/configs/default.yaml +128 -0
- package/skills/zeroize-audit/configs/rust.yaml +83 -0
- package/skills/zeroize-audit/prompts/report_template.md +238 -0
- package/skills/zeroize-audit/prompts/system.md +163 -0
- package/skills/zeroize-audit/prompts/task.md +97 -0
- package/skills/zeroize-audit/references/compile-commands.md +231 -0
- package/skills/zeroize-audit/references/detection-strategy.md +191 -0
- package/skills/zeroize-audit/references/ir-analysis.md +252 -0
- package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
- package/skills/zeroize-audit/references/poc-generation.md +470 -0
- package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
- package/skills/zeroize-audit/schemas/input.json +83 -0
- package/skills/zeroize-audit/schemas/output.json +140 -0
- package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
- package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
- package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
- package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
- package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
- package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
- package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
- package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
- package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
- package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
- package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
- package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
- package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
- package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
- package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
- package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
- package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
- package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
- package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
- package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
- package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
- package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
- package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
- package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
- package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
- package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
- package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
- package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
- package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
- package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
- package/themes/piolium-srcery.json +94 -0
|
@@ -0,0 +1,966 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic candidate discovery.
|
|
3
|
+
*
|
|
4
|
+
* This pass runs before model-heavy phases and leaves durable evidence:
|
|
5
|
+
* - `piolium/attack-surface/candidates.jsonl`
|
|
6
|
+
* - `piolium/attack-surface/candidates-summary.md`
|
|
7
|
+
* - `piolium/file-records/<source-path>.json`
|
|
8
|
+
*
|
|
9
|
+
* The records are intentionally lightweight. They help later agents spend
|
|
10
|
+
* attention on higher-risk files without turning this phase into a separate
|
|
11
|
+
* triage gate.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createHash } from "node:crypto";
|
|
15
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
|
|
16
|
+
import { dirname, extname, join, relative, sep } from "node:path";
|
|
17
|
+
import { yieldToEventLoop } from "./retry.ts";
|
|
18
|
+
|
|
19
|
+
export const CANDIDATE_JSONL_PATH = "piolium/attack-surface/candidates.jsonl";
|
|
20
|
+
export const CANDIDATE_SUMMARY_PATH = "piolium/attack-surface/candidates-summary.md";
|
|
21
|
+
export const FILE_RECORDS_DIR = "piolium/file-records";
|
|
22
|
+
|
|
23
|
+
export type CandidateNoise = "precise" | "normal" | "noisy";
|
|
24
|
+
|
|
25
|
+
export interface CandidateMatch {
|
|
26
|
+
slug: string;
|
|
27
|
+
description: string;
|
|
28
|
+
noise: CandidateNoise;
|
|
29
|
+
filePath: string;
|
|
30
|
+
line: number;
|
|
31
|
+
snippet: string;
|
|
32
|
+
matchedPattern: string;
|
|
33
|
+
score: number;
|
|
34
|
+
source: "builtin" | "custom";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface FileCandidateRecord {
|
|
38
|
+
filePath: string;
|
|
39
|
+
sha256: string;
|
|
40
|
+
lastScannedAt: string;
|
|
41
|
+
status: "candidate" | "clean";
|
|
42
|
+
candidateCount: number;
|
|
43
|
+
riskScore: number;
|
|
44
|
+
owner?: string[];
|
|
45
|
+
candidates: CandidateMatch[];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface CandidateScanResult {
|
|
49
|
+
scannedFiles: number;
|
|
50
|
+
candidateFiles: number;
|
|
51
|
+
candidateCount: number;
|
|
52
|
+
candidatesPath: string;
|
|
53
|
+
summaryPath: string;
|
|
54
|
+
fileRecordsDir: string;
|
|
55
|
+
fileRecordsWritten: boolean;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface CandidateScanOptions {
|
|
59
|
+
/**
|
|
60
|
+
* Per-file records are useful for offline diagnostics, but on large extracted
|
|
61
|
+
* appliances they create tens of thousands of tiny files. Keep them opt-in.
|
|
62
|
+
*/
|
|
63
|
+
writeFileRecords?: boolean;
|
|
64
|
+
/** Abort signal for async scans. */
|
|
65
|
+
signal?: AbortSignal;
|
|
66
|
+
/**
|
|
67
|
+
* Async scans yield after this many candidate files so Pi can repaint status
|
|
68
|
+
* widgets during preflight. Defaults to 25 in `runCandidateScanAsync`.
|
|
69
|
+
*/
|
|
70
|
+
yieldEveryFiles?: number;
|
|
71
|
+
/**
|
|
72
|
+
* Async scans yield after this many visited directory entries while walking
|
|
73
|
+
* the tree, including files that are skipped before content scanning.
|
|
74
|
+
*/
|
|
75
|
+
yieldEveryEntries?: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
interface NativeMatcher {
|
|
79
|
+
slug: string;
|
|
80
|
+
description: string;
|
|
81
|
+
noise: CandidateNoise;
|
|
82
|
+
include?: string[];
|
|
83
|
+
pathIncludes?: string[];
|
|
84
|
+
patterns: Array<{ label: string; regex: RegExp }>;
|
|
85
|
+
source: "builtin" | "custom";
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
interface CustomMatcherConfig {
|
|
89
|
+
matchers?: Array<{
|
|
90
|
+
slug?: string;
|
|
91
|
+
description?: string;
|
|
92
|
+
noise?: string;
|
|
93
|
+
include?: string[];
|
|
94
|
+
pathIncludes?: string[];
|
|
95
|
+
regex?: string;
|
|
96
|
+
flags?: string;
|
|
97
|
+
label?: string;
|
|
98
|
+
}>;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const SKIP_DIRS = new Set([
|
|
102
|
+
"node_modules",
|
|
103
|
+
".git",
|
|
104
|
+
"vendor",
|
|
105
|
+
"dist",
|
|
106
|
+
"build",
|
|
107
|
+
"target",
|
|
108
|
+
"out",
|
|
109
|
+
".next",
|
|
110
|
+
".nuxt",
|
|
111
|
+
".cache",
|
|
112
|
+
".venv",
|
|
113
|
+
"venv",
|
|
114
|
+
"__pycache__",
|
|
115
|
+
".pytest_cache",
|
|
116
|
+
".mypy_cache",
|
|
117
|
+
".idea",
|
|
118
|
+
".vscode",
|
|
119
|
+
"coverage",
|
|
120
|
+
"piolium",
|
|
121
|
+
]);
|
|
122
|
+
|
|
123
|
+
const SOURCE_EXTENSIONS = new Set([
|
|
124
|
+
".ts",
|
|
125
|
+
".tsx",
|
|
126
|
+
".js",
|
|
127
|
+
".jsx",
|
|
128
|
+
".mjs",
|
|
129
|
+
".cjs",
|
|
130
|
+
".py",
|
|
131
|
+
".go",
|
|
132
|
+
".rs",
|
|
133
|
+
".rb",
|
|
134
|
+
".java",
|
|
135
|
+
".kt",
|
|
136
|
+
".swift",
|
|
137
|
+
".c",
|
|
138
|
+
".h",
|
|
139
|
+
".cpp",
|
|
140
|
+
".cc",
|
|
141
|
+
".hpp",
|
|
142
|
+
".cs",
|
|
143
|
+
".php",
|
|
144
|
+
".vue",
|
|
145
|
+
".svelte",
|
|
146
|
+
".scala",
|
|
147
|
+
".clj",
|
|
148
|
+
".sh",
|
|
149
|
+
".bash",
|
|
150
|
+
".zsh",
|
|
151
|
+
".sql",
|
|
152
|
+
".lua",
|
|
153
|
+
".m",
|
|
154
|
+
".mm",
|
|
155
|
+
".yml",
|
|
156
|
+
".yaml",
|
|
157
|
+
".json",
|
|
158
|
+
".tf",
|
|
159
|
+
".dockerfile",
|
|
160
|
+
]);
|
|
161
|
+
|
|
162
|
+
const SPECIAL_FILENAMES = new Set([
|
|
163
|
+
"dockerfile",
|
|
164
|
+
".env",
|
|
165
|
+
"makefile",
|
|
166
|
+
"jenkinsfile",
|
|
167
|
+
"procfile",
|
|
168
|
+
"cloudbuild.yaml",
|
|
169
|
+
"cloudbuild.yml",
|
|
170
|
+
]);
|
|
171
|
+
|
|
172
|
+
const MAX_FILES_TO_SCAN = 80_000;
|
|
173
|
+
const MAX_FILE_BYTES = 1024 * 1024;
|
|
174
|
+
const MAX_MATCHES_PER_MATCHER_PER_FILE = 20;
|
|
175
|
+
const FILE_RECORDS_ENV = "PIOLIUM_FILE_RECORDS";
|
|
176
|
+
const DEFAULT_ASYNC_YIELD_EVERY_FILES = 25;
|
|
177
|
+
const DEFAULT_ASYNC_YIELD_EVERY_ENTRIES = 100;
|
|
178
|
+
|
|
179
|
+
const PATH_RISK_HINTS = [
|
|
180
|
+
"admin",
|
|
181
|
+
"auth",
|
|
182
|
+
"api",
|
|
183
|
+
"route",
|
|
184
|
+
"router",
|
|
185
|
+
"handler",
|
|
186
|
+
"controller",
|
|
187
|
+
"upload",
|
|
188
|
+
"download",
|
|
189
|
+
"webhook",
|
|
190
|
+
"payment",
|
|
191
|
+
"billing",
|
|
192
|
+
"permission",
|
|
193
|
+
"policy",
|
|
194
|
+
"middleware",
|
|
195
|
+
"session",
|
|
196
|
+
"token",
|
|
197
|
+
"crypto",
|
|
198
|
+
"secret",
|
|
199
|
+
"gateway",
|
|
200
|
+
"proxy",
|
|
201
|
+
"terraform",
|
|
202
|
+
"workflow",
|
|
203
|
+
".github/workflows",
|
|
204
|
+
];
|
|
205
|
+
|
|
206
|
+
const NOISE_SCORE: Record<CandidateNoise, number> = {
|
|
207
|
+
precise: 80,
|
|
208
|
+
normal: 55,
|
|
209
|
+
noisy: 30,
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
const BUILTIN_MATCHERS: NativeMatcher[] = [
|
|
213
|
+
{
|
|
214
|
+
slug: "command-execution",
|
|
215
|
+
description: "Potential command execution or shell invocation with variable input.",
|
|
216
|
+
noise: "precise",
|
|
217
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".php", ".java", ".sh"],
|
|
218
|
+
patterns: [
|
|
219
|
+
{ label: "node child_process", regex: /\b(exec|execSync|spawn|spawnSync)\s*\(/g },
|
|
220
|
+
{
|
|
221
|
+
label: "python process",
|
|
222
|
+
regex: /\b(os\.system|subprocess\.(?:Popen|run|call|check_output))\s*\(/g,
|
|
223
|
+
},
|
|
224
|
+
{ label: "go command", regex: /\bexec\.Command(?:Context)?\s*\(/g },
|
|
225
|
+
{ label: "php process", regex: /\b(shell_exec|system|passthru|proc_open|popen)\s*\(/g },
|
|
226
|
+
],
|
|
227
|
+
source: "builtin",
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
slug: "dynamic-code-execution",
|
|
231
|
+
description: "Dynamic code execution, expression evaluation, or runtime compilation.",
|
|
232
|
+
noise: "precise",
|
|
233
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".rb", ".php", ".java"],
|
|
234
|
+
patterns: [
|
|
235
|
+
{ label: "eval", regex: /\beval\s*\(/g },
|
|
236
|
+
{ label: "function constructor", regex: /\bnew\s+Function\s*\(/g },
|
|
237
|
+
{ label: "python eval", regex: /\b(exec|eval|compile)\s*\(/g },
|
|
238
|
+
{ label: "ruby eval", regex: /\b(instance_eval|class_eval|eval)\s*\(/g },
|
|
239
|
+
],
|
|
240
|
+
source: "builtin",
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
slug: "raw-sql-query",
|
|
244
|
+
description: "Raw SQL construction or query execution that may need parameterization review.",
|
|
245
|
+
noise: "normal",
|
|
246
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php", ".sql"],
|
|
247
|
+
patterns: [
|
|
248
|
+
{ label: "query call", regex: /\b(query|execute|raw|rawQuery|createQuery)\s*\(/g },
|
|
249
|
+
{
|
|
250
|
+
label: "sql keyword string",
|
|
251
|
+
regex: /[`'"]\s*(SELECT|INSERT|UPDATE|DELETE|DROP)\b[\s\S]{0,160}\$\{/gi,
|
|
252
|
+
},
|
|
253
|
+
{ label: "string concat sql", regex: /\b(SELECT|INSERT|UPDATE|DELETE|DROP)\b[^;\n]{0,160}\+/gi },
|
|
254
|
+
],
|
|
255
|
+
source: "builtin",
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
slug: "ssrf-capable-request",
|
|
259
|
+
description: "Outbound HTTP request site that may be attacker-controlled.",
|
|
260
|
+
noise: "normal",
|
|
261
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php"],
|
|
262
|
+
patterns: [
|
|
263
|
+
{
|
|
264
|
+
label: "fetch/http client",
|
|
265
|
+
regex: /\b(fetch|axios\.(?:get|post|request)|request|got|superagent)\s*\(/g,
|
|
266
|
+
},
|
|
267
|
+
{ label: "python requests", regex: /\brequests\.(?:get|post|put|delete|request)\s*\(/g },
|
|
268
|
+
{
|
|
269
|
+
label: "go http client",
|
|
270
|
+
regex: /\bhttp\.(?:Get|Post|NewRequest|NewRequestWithContext)\s*\(/g,
|
|
271
|
+
},
|
|
272
|
+
],
|
|
273
|
+
source: "builtin",
|
|
274
|
+
},
|
|
275
|
+
{
|
|
276
|
+
slug: "path-traversal-file-access",
|
|
277
|
+
description: "Filesystem access using path joins or user-controllable paths.",
|
|
278
|
+
noise: "normal",
|
|
279
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php"],
|
|
280
|
+
patterns: [
|
|
281
|
+
{
|
|
282
|
+
label: "file read/write",
|
|
283
|
+
regex: /\b(readFile|readFileSync|writeFile|writeFileSync|createReadStream|sendFile)\s*\(/g,
|
|
284
|
+
},
|
|
285
|
+
{ label: "path join", regex: /\b(path\.join|join|resolve)\s*\(/g },
|
|
286
|
+
{ label: "python file open", regex: /\b(open|send_file|send_from_directory)\s*\(/g },
|
|
287
|
+
],
|
|
288
|
+
source: "builtin",
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
slug: "unsafe-html-or-template",
|
|
292
|
+
description: "HTML injection sink or template escape bypass.",
|
|
293
|
+
noise: "normal",
|
|
294
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".vue", ".svelte", ".php", ".rb", ".py"],
|
|
295
|
+
patterns: [
|
|
296
|
+
{ label: "dangerous html", regex: /\bdangerouslySetInnerHTML\b|v-html\b|innerHTML\s*=/g },
|
|
297
|
+
{ label: "template unescaped", regex: /\|\s*safe\b|raw\s*\}|unescapeHTML|html_safe\b/g },
|
|
298
|
+
],
|
|
299
|
+
source: "builtin",
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
slug: "open-redirect",
|
|
303
|
+
description: "Redirect sink that may accept user-controlled URLs.",
|
|
304
|
+
noise: "normal",
|
|
305
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php"],
|
|
306
|
+
patterns: [
|
|
307
|
+
{ label: "redirect call", regex: /\b(redirect|res\.redirect|ctx\.redirect|sendRedirect)\s*\(/g },
|
|
308
|
+
{ label: "location header", regex: /\b(Location|setHeader)\s*\(\s*['"]Location['"]/g },
|
|
309
|
+
],
|
|
310
|
+
source: "builtin",
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
slug: "weak-token-or-crypto",
|
|
314
|
+
description: "Token, JWT, randomness, or crypto usage that deserves review.",
|
|
315
|
+
noise: "normal",
|
|
316
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php"],
|
|
317
|
+
patterns: [
|
|
318
|
+
{
|
|
319
|
+
label: "jwt decode",
|
|
320
|
+
regex: /\b(jwt\.decode|verify\s*:\s*false|algorithms?\s*:\s*\[\s*['"]none['"])/g,
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
label: "weak random",
|
|
324
|
+
regex: /\b(Math\.random|random\.random|rand\.Int|java\.util\.Random)\b/g,
|
|
325
|
+
},
|
|
326
|
+
{ label: "weak hash", regex: /\b(md5|sha1|createHash\s*\(\s*['"](?:md5|sha1)['"])\b/gi },
|
|
327
|
+
],
|
|
328
|
+
source: "builtin",
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
slug: "secret-literal",
|
|
332
|
+
description: "Hardcoded secret-like literal.",
|
|
333
|
+
noise: "precise",
|
|
334
|
+
include: [
|
|
335
|
+
".ts",
|
|
336
|
+
".tsx",
|
|
337
|
+
".js",
|
|
338
|
+
".jsx",
|
|
339
|
+
".py",
|
|
340
|
+
".go",
|
|
341
|
+
".rb",
|
|
342
|
+
".java",
|
|
343
|
+
".php",
|
|
344
|
+
".env",
|
|
345
|
+
".yml",
|
|
346
|
+
".yaml",
|
|
347
|
+
".json",
|
|
348
|
+
],
|
|
349
|
+
patterns: [
|
|
350
|
+
{
|
|
351
|
+
label: "secret assignment",
|
|
352
|
+
regex:
|
|
353
|
+
/\b(api[_-]?key|secret|token|private[_-]?key|client[_-]?secret|password)\b\s*[:=]\s*['"][^'"\n]{12,}['"]/gi,
|
|
354
|
+
},
|
|
355
|
+
{ label: "private key", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/g },
|
|
356
|
+
],
|
|
357
|
+
source: "builtin",
|
|
358
|
+
},
|
|
359
|
+
{
|
|
360
|
+
slug: "public-entrypoint",
|
|
361
|
+
description: "Public route, handler, controller, workflow, or operation entry point.",
|
|
362
|
+
noise: "noisy",
|
|
363
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".php"],
|
|
364
|
+
pathIncludes: ["route", "router", "controller", "handler", "api", "pages", "app"],
|
|
365
|
+
patterns: [
|
|
366
|
+
{ label: "http route", regex: /\.(get|post|put|patch|delete|all)\s*\(\s*['"`/]/g },
|
|
367
|
+
{
|
|
368
|
+
label: "framework route",
|
|
369
|
+
regex: /\b(route|router|app)\.(?:get|post|put|patch|delete|all)\s*\(/g,
|
|
370
|
+
},
|
|
371
|
+
{ label: "decorated route", regex: /@(Get|Post|Put|Patch|Delete|Controller|Route)\b/g },
|
|
372
|
+
{ label: "python route", regex: /@\w+\.route\s*\(/g },
|
|
373
|
+
],
|
|
374
|
+
source: "builtin",
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
slug: "webhook-without-obvious-signature",
|
|
378
|
+
description: "Webhook handler path that should be checked for signature verification.",
|
|
379
|
+
noise: "normal",
|
|
380
|
+
pathIncludes: ["webhook"],
|
|
381
|
+
patterns: [
|
|
382
|
+
{ label: "webhook route", regex: /\b(webhook|stripe|github|slack|shopify|callback)\b/gi },
|
|
383
|
+
],
|
|
384
|
+
source: "builtin",
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
slug: "hidden-control-channel",
|
|
388
|
+
description:
|
|
389
|
+
"Request header or framework/proxy context read that may influence auth, routing, tenant, runtime, debug, or middleware behavior.",
|
|
390
|
+
noise: "normal",
|
|
391
|
+
include: [".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".rb", ".java", ".kt", ".php", ".cs"],
|
|
392
|
+
patterns: [
|
|
393
|
+
{
|
|
394
|
+
label: "request header read",
|
|
395
|
+
regex:
|
|
396
|
+
/\b(headers\s*\(\s*\)|(?:req|request|ctx|context|event|c|r)\.headers?\b|(?:getHeader|header|Header\.Get|headers\.get)\s*\()/g,
|
|
397
|
+
},
|
|
398
|
+
{
|
|
399
|
+
label: "proxy or original request header",
|
|
400
|
+
regex:
|
|
401
|
+
/\b(?:x-forwarded-(?:for|host|proto|port|prefix)|forwarded|x-real-ip|x-original-(?:url|uri|method)|x-rewrite-url|x-http-method-override|host|origin|referer)\b/gi,
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
label: "identity or internal control header",
|
|
405
|
+
regex:
|
|
406
|
+
/\b(?:x-(?:user|auth|tenant|org|workspace|role|admin|internal|debug|preview|middleware|subrequest)[a-z0-9_-]*|middleware|subrequest|preview[_-]?mode)\b/gi,
|
|
407
|
+
},
|
|
408
|
+
],
|
|
409
|
+
source: "builtin",
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
slug: "ci-agent-prompt-surface",
|
|
413
|
+
description: "Workflow step passes issue, PR, commit, or comment data into an AI/agent command.",
|
|
414
|
+
noise: "normal",
|
|
415
|
+
include: [".yml", ".yaml"],
|
|
416
|
+
pathIncludes: [".github/workflows"],
|
|
417
|
+
patterns: [
|
|
418
|
+
{
|
|
419
|
+
label: "ai action with event context",
|
|
420
|
+
regex:
|
|
421
|
+
/\b(openai|codex|claude|gemini|copilot|ai|agent)\b[\s\S]{0,500}\$\{\{\s*github\.event\./gi,
|
|
422
|
+
},
|
|
423
|
+
],
|
|
424
|
+
source: "builtin",
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
slug: "container-or-iac-exposure",
|
|
428
|
+
description: "Container or infrastructure config with public exposure or weak runtime defaults.",
|
|
429
|
+
noise: "normal",
|
|
430
|
+
include: [".tf", ".yml", ".yaml", "Dockerfile", ".dockerfile"],
|
|
431
|
+
patterns: [
|
|
432
|
+
{ label: "root user", regex: /^\s*USER\s+root\s*$/gim },
|
|
433
|
+
{ label: "public ingress", regex: /0\.0\.0\.0\/0|::\/0/g },
|
|
434
|
+
{ label: "wildcard iam", regex: /\b(Action|Resource)\s*=\s*["']\*["']/g },
|
|
435
|
+
{ label: "latest tag", regex: /\bimage\s*[:=]\s*["']?[^'"\s:]+:latest\b/g },
|
|
436
|
+
],
|
|
437
|
+
source: "builtin",
|
|
438
|
+
},
|
|
439
|
+
];
|
|
440
|
+
|
|
441
|
+
interface CandidateScanWorkState {
|
|
442
|
+
scannedAt: string;
|
|
443
|
+
candidates: CandidateMatch[];
|
|
444
|
+
scannedFiles: number;
|
|
445
|
+
candidateFiles: number;
|
|
446
|
+
matchers: NativeMatcher[];
|
|
447
|
+
recordsDir: string;
|
|
448
|
+
writeFileRecords: boolean;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function beginCandidateScan(cwd: string, options: CandidateScanOptions): CandidateScanWorkState {
|
|
452
|
+
const matchers = [...BUILTIN_MATCHERS, ...loadCustomMatchers(cwd)];
|
|
453
|
+
const recordsDir = join(cwd, FILE_RECORDS_DIR);
|
|
454
|
+
const writeFileRecords = options.writeFileRecords ?? shouldWriteFileRecords();
|
|
455
|
+
if (writeFileRecords) mkdirSync(recordsDir, { recursive: true });
|
|
456
|
+
return {
|
|
457
|
+
scannedAt: new Date().toISOString(),
|
|
458
|
+
candidates: [],
|
|
459
|
+
scannedFiles: 0,
|
|
460
|
+
candidateFiles: 0,
|
|
461
|
+
matchers,
|
|
462
|
+
recordsDir,
|
|
463
|
+
writeFileRecords,
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
function scanCandidateFile(cwd: string, state: CandidateScanWorkState, filePath: string): void {
|
|
468
|
+
let raw: Buffer;
|
|
469
|
+
try {
|
|
470
|
+
raw = readFileSync(filePath);
|
|
471
|
+
} catch {
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
if (raw.length > MAX_FILE_BYTES || raw.includes(0)) return;
|
|
475
|
+
const relPath = normalizePath(relative(cwd, filePath));
|
|
476
|
+
const content = raw.toString("utf8");
|
|
477
|
+
const matches = matchFile(relPath, content, state.matchers);
|
|
478
|
+
const riskScore = scoreFile(relPath, matches);
|
|
479
|
+
const record: FileCandidateRecord = {
|
|
480
|
+
filePath: relPath,
|
|
481
|
+
sha256: createHash("sha256").update(raw).digest("hex"),
|
|
482
|
+
lastScannedAt: state.scannedAt,
|
|
483
|
+
status: matches.length > 0 ? "candidate" : "clean",
|
|
484
|
+
candidateCount: matches.length,
|
|
485
|
+
riskScore,
|
|
486
|
+
candidates: matches,
|
|
487
|
+
};
|
|
488
|
+
if (state.writeFileRecords) writeJson(fileRecordPath(cwd, relPath), record);
|
|
489
|
+
state.scannedFiles++;
|
|
490
|
+
if (matches.length > 0) {
|
|
491
|
+
state.candidateFiles++;
|
|
492
|
+
state.candidates.push(...matches);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
function finishCandidateScan(cwd: string, state: CandidateScanWorkState): CandidateScanResult {
|
|
497
|
+
state.candidates.sort(candidateSort);
|
|
498
|
+
const candidatesPath = join(cwd, CANDIDATE_JSONL_PATH);
|
|
499
|
+
mkdirSync(dirname(candidatesPath), { recursive: true });
|
|
500
|
+
writeFileSync(
|
|
501
|
+
candidatesPath,
|
|
502
|
+
state.candidates.map((candidate) => JSON.stringify(candidate)).join("\n") +
|
|
503
|
+
(state.candidates.length > 0 ? "\n" : ""),
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
const summaryPath = join(cwd, CANDIDATE_SUMMARY_PATH);
|
|
507
|
+
writeFileSync(
|
|
508
|
+
summaryPath,
|
|
509
|
+
buildCandidateSummary({
|
|
510
|
+
scannedFiles: state.scannedFiles,
|
|
511
|
+
candidateFiles: state.candidateFiles,
|
|
512
|
+
candidates: state.candidates,
|
|
513
|
+
writeFileRecords: state.writeFileRecords,
|
|
514
|
+
}),
|
|
515
|
+
);
|
|
516
|
+
|
|
517
|
+
return {
|
|
518
|
+
scannedFiles: state.scannedFiles,
|
|
519
|
+
candidateFiles: state.candidateFiles,
|
|
520
|
+
candidateCount: state.candidates.length,
|
|
521
|
+
candidatesPath,
|
|
522
|
+
summaryPath,
|
|
523
|
+
fileRecordsDir: state.recordsDir,
|
|
524
|
+
fileRecordsWritten: state.writeFileRecords,
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
export function runCandidateScan(
|
|
529
|
+
cwd: string,
|
|
530
|
+
options: CandidateScanOptions = {},
|
|
531
|
+
): CandidateScanResult {
|
|
532
|
+
const state = beginCandidateScan(cwd, options);
|
|
533
|
+
for (const filePath of walkCandidateFiles(cwd)) {
|
|
534
|
+
if (state.scannedFiles >= MAX_FILES_TO_SCAN) break;
|
|
535
|
+
scanCandidateFile(cwd, state, filePath);
|
|
536
|
+
}
|
|
537
|
+
return finishCandidateScan(cwd, state);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
export async function runCandidateScanAsync(
|
|
541
|
+
cwd: string,
|
|
542
|
+
options: CandidateScanOptions = {},
|
|
543
|
+
): Promise<CandidateScanResult> {
|
|
544
|
+
const state = beginCandidateScan(cwd, options);
|
|
545
|
+
const yieldEveryFiles = Math.max(1, options.yieldEveryFiles ?? DEFAULT_ASYNC_YIELD_EVERY_FILES);
|
|
546
|
+
let visitedFiles = 0;
|
|
547
|
+
for await (const filePath of walkCandidateFilesAsync(cwd, options)) {
|
|
548
|
+
if (options.signal?.aborted) throw options.signal.reason ?? new Error("Aborted");
|
|
549
|
+
if (state.scannedFiles >= MAX_FILES_TO_SCAN) break;
|
|
550
|
+
scanCandidateFile(cwd, state, filePath);
|
|
551
|
+
visitedFiles++;
|
|
552
|
+
if (visitedFiles % yieldEveryFiles === 0) {
|
|
553
|
+
await yieldToEventLoop(options.signal);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
return finishCandidateScan(cwd, state);
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
export function candidateSummaryPath(cwd: string): string {
|
|
560
|
+
return join(cwd, CANDIDATE_SUMMARY_PATH);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
export function candidatesJsonlPath(cwd: string): string {
|
|
564
|
+
return join(cwd, CANDIDATE_JSONL_PATH);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
export function fileRecordsDir(cwd: string): string {
|
|
568
|
+
return join(cwd, FILE_RECORDS_DIR);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
export function readCandidateScores(cwd: string): Map<string, number> {
|
|
572
|
+
const path = candidatesJsonlPath(cwd);
|
|
573
|
+
const scores = new Map<string, number>();
|
|
574
|
+
if (!existsSync(path)) return scores;
|
|
575
|
+
for (const line of readFileSync(path, "utf8").split(/\r?\n/)) {
|
|
576
|
+
const trimmed = line.trim();
|
|
577
|
+
if (!trimmed) continue;
|
|
578
|
+
try {
|
|
579
|
+
const candidate = JSON.parse(trimmed) as CandidateMatch;
|
|
580
|
+
const previous = scores.get(candidate.filePath) ?? 0;
|
|
581
|
+
scores.set(candidate.filePath, previous + Math.max(1, candidate.score));
|
|
582
|
+
} catch {}
|
|
583
|
+
}
|
|
584
|
+
return scores;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
function* walkCandidateFiles(cwd: string): Generator<string> {
|
|
588
|
+
function* walk(dir: string): Generator<string> {
|
|
589
|
+
let entries: string[];
|
|
590
|
+
try {
|
|
591
|
+
entries = readdirSync(dir);
|
|
592
|
+
} catch {
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
for (const entry of entries) {
|
|
596
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
597
|
+
if (entry.startsWith(".") && entry !== ".github" && !entry.startsWith(".env")) continue;
|
|
598
|
+
const full = join(dir, entry);
|
|
599
|
+
let st: ReturnType<typeof statSync>;
|
|
600
|
+
try {
|
|
601
|
+
st = statSync(full);
|
|
602
|
+
} catch {
|
|
603
|
+
continue;
|
|
604
|
+
}
|
|
605
|
+
if (st.isDirectory()) {
|
|
606
|
+
yield* walk(full);
|
|
607
|
+
continue;
|
|
608
|
+
}
|
|
609
|
+
if (!st.isFile()) continue;
|
|
610
|
+
const rel = normalizePath(relative(cwd, full));
|
|
611
|
+
if (isCandidateFile(rel)) yield full;
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
yield* walk(cwd);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
async function* walkCandidateFilesAsync(
|
|
618
|
+
cwd: string,
|
|
619
|
+
options: CandidateScanOptions,
|
|
620
|
+
): AsyncGenerator<string> {
|
|
621
|
+
let visitedEntries = 0;
|
|
622
|
+
const yieldEveryEntries = Math.max(
|
|
623
|
+
1,
|
|
624
|
+
options.yieldEveryEntries ?? DEFAULT_ASYNC_YIELD_EVERY_ENTRIES,
|
|
625
|
+
);
|
|
626
|
+
|
|
627
|
+
async function maybeYield(): Promise<void> {
|
|
628
|
+
visitedEntries++;
|
|
629
|
+
if (visitedEntries % yieldEveryEntries === 0) {
|
|
630
|
+
await yieldToEventLoop(options.signal);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
async function* walk(dir: string): AsyncGenerator<string> {
|
|
635
|
+
if (options.signal?.aborted) throw options.signal.reason ?? new Error("Aborted");
|
|
636
|
+
let entries: string[];
|
|
637
|
+
try {
|
|
638
|
+
entries = readdirSync(dir);
|
|
639
|
+
} catch {
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
for (const entry of entries) {
|
|
643
|
+
await maybeYield();
|
|
644
|
+
if (options.signal?.aborted) throw options.signal.reason ?? new Error("Aborted");
|
|
645
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
646
|
+
if (entry.startsWith(".") && entry !== ".github" && !entry.startsWith(".env")) continue;
|
|
647
|
+
const full = join(dir, entry);
|
|
648
|
+
let st: ReturnType<typeof statSync>;
|
|
649
|
+
try {
|
|
650
|
+
st = statSync(full);
|
|
651
|
+
} catch {
|
|
652
|
+
continue;
|
|
653
|
+
}
|
|
654
|
+
if (st.isDirectory()) {
|
|
655
|
+
yield* walk(full);
|
|
656
|
+
continue;
|
|
657
|
+
}
|
|
658
|
+
if (!st.isFile()) continue;
|
|
659
|
+
const rel = normalizePath(relative(cwd, full));
|
|
660
|
+
if (isCandidateFile(rel)) yield full;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
yield* walk(cwd);
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function isCandidateFile(relPath: string): boolean {
|
|
668
|
+
const lower = relPath.toLowerCase();
|
|
669
|
+
const base = lower.split("/").pop() ?? lower;
|
|
670
|
+
if (SPECIAL_FILENAMES.has(base)) return true;
|
|
671
|
+
if (lower.includes("/.github/workflows/")) return true;
|
|
672
|
+
const ext = extname(base);
|
|
673
|
+
return SOURCE_EXTENSIONS.has(ext);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
function matchFile(relPath: string, content: string, matchers: NativeMatcher[]): CandidateMatch[] {
|
|
677
|
+
const out: CandidateMatch[] = [];
|
|
678
|
+
const lineStarts = buildLineStarts(content);
|
|
679
|
+
for (const matcher of matchers) {
|
|
680
|
+
if (!matcherApplies(matcher, relPath)) continue;
|
|
681
|
+
let count = 0;
|
|
682
|
+
for (const pattern of matcher.patterns) {
|
|
683
|
+
const regex = withGlobal(pattern.regex);
|
|
684
|
+
regex.lastIndex = 0;
|
|
685
|
+
let match = regex.exec(content);
|
|
686
|
+
while (match !== null) {
|
|
687
|
+
const index = match.index;
|
|
688
|
+
const line = lineNumberAt(lineStarts, index);
|
|
689
|
+
const candidate: CandidateMatch = {
|
|
690
|
+
slug: matcher.slug,
|
|
691
|
+
description: matcher.description,
|
|
692
|
+
noise: matcher.noise,
|
|
693
|
+
filePath: relPath,
|
|
694
|
+
line,
|
|
695
|
+
snippet: extractLine(content, lineStarts, line),
|
|
696
|
+
matchedPattern: pattern.label,
|
|
697
|
+
score: scoreCandidate(relPath, matcher.noise, pattern.label),
|
|
698
|
+
source: matcher.source,
|
|
699
|
+
};
|
|
700
|
+
out.push(candidate);
|
|
701
|
+
count++;
|
|
702
|
+
if (count >= MAX_MATCHES_PER_MATCHER_PER_FILE) break;
|
|
703
|
+
if (match[0].length === 0) regex.lastIndex++;
|
|
704
|
+
match = regex.exec(content);
|
|
705
|
+
}
|
|
706
|
+
if (count >= MAX_MATCHES_PER_MATCHER_PER_FILE) break;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
out.sort(candidateSort);
|
|
710
|
+
return out;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
function matcherApplies(matcher: NativeMatcher, relPath: string): boolean {
|
|
714
|
+
const lower = relPath.toLowerCase();
|
|
715
|
+
const include = matcher.include ?? [];
|
|
716
|
+
const pathIncludes = matcher.pathIncludes ?? [];
|
|
717
|
+
const includeMatches =
|
|
718
|
+
include.length === 0 ||
|
|
719
|
+
include.some((token) => {
|
|
720
|
+
const normalized = token.toLowerCase();
|
|
721
|
+
if (normalized.startsWith(".")) return lower.endsWith(normalized);
|
|
722
|
+
if (normalized.includes("/")) return lower.includes(normalizePath(normalized));
|
|
723
|
+
return (lower.split("/").pop() ?? lower) === normalized;
|
|
724
|
+
});
|
|
725
|
+
const pathMatches =
|
|
726
|
+
pathIncludes.length === 0 ||
|
|
727
|
+
pathIncludes.some((token) => lower.includes(normalizePath(token.toLowerCase())));
|
|
728
|
+
return includeMatches && pathMatches;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
function scoreCandidate(relPath: string, noise: CandidateNoise, patternLabel: string): number {
|
|
732
|
+
let score = NOISE_SCORE[noise];
|
|
733
|
+
const lower = relPath.toLowerCase();
|
|
734
|
+
for (const hint of PATH_RISK_HINTS) {
|
|
735
|
+
if (lower.includes(hint)) score += 8;
|
|
736
|
+
}
|
|
737
|
+
if (/signature|secret|private|command|eval|redirect/i.test(patternLabel)) score += 10;
|
|
738
|
+
return score;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function scoreFile(relPath: string, matches: CandidateMatch[]): number {
|
|
742
|
+
if (matches.length === 0) return 0;
|
|
743
|
+
const top = matches
|
|
744
|
+
.map((m) => m.score)
|
|
745
|
+
.sort((a, b) => b - a)
|
|
746
|
+
.slice(0, 5)
|
|
747
|
+
.reduce((sum, score) => sum + score, 0);
|
|
748
|
+
const diversity = new Set(matches.map((m) => m.slug)).size * 12;
|
|
749
|
+
const lower = relPath.toLowerCase();
|
|
750
|
+
const pathBonus = PATH_RISK_HINTS.some((hint) => lower.includes(hint)) ? 20 : 0;
|
|
751
|
+
return top + diversity + pathBonus;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
function candidateSort(a: CandidateMatch, b: CandidateMatch): number {
|
|
755
|
+
if (b.score !== a.score) return b.score - a.score;
|
|
756
|
+
const file = a.filePath.localeCompare(b.filePath);
|
|
757
|
+
if (file !== 0) return file;
|
|
758
|
+
return a.line - b.line;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
function buildCandidateSummary(input: {
|
|
762
|
+
scannedFiles: number;
|
|
763
|
+
candidateFiles: number;
|
|
764
|
+
candidates: CandidateMatch[];
|
|
765
|
+
writeFileRecords: boolean;
|
|
766
|
+
}): string {
|
|
767
|
+
const lines: string[] = [];
|
|
768
|
+
lines.push("# Candidate Scan");
|
|
769
|
+
lines.push("");
|
|
770
|
+
lines.push(`Generated by piolium at ${new Date().toISOString()}`);
|
|
771
|
+
lines.push("");
|
|
772
|
+
lines.push("## Totals");
|
|
773
|
+
lines.push("");
|
|
774
|
+
lines.push(`- Files scanned: ${input.scannedFiles}`);
|
|
775
|
+
lines.push(`- Candidate files: ${input.candidateFiles}`);
|
|
776
|
+
lines.push(`- Candidate matches: ${input.candidates.length}`);
|
|
777
|
+
lines.push(
|
|
778
|
+
`- Per-file records: ${input.writeFileRecords ? `written to \`${FILE_RECORDS_DIR}/\`` : `disabled (set ${FILE_RECORDS_ENV}=1 to enable)`}`,
|
|
779
|
+
);
|
|
780
|
+
lines.push("");
|
|
781
|
+
|
|
782
|
+
const bySlug = new Map<string, { count: number; maxScore: number; description: string }>();
|
|
783
|
+
for (const candidate of input.candidates) {
|
|
784
|
+
const current = bySlug.get(candidate.slug);
|
|
785
|
+
if (current) {
|
|
786
|
+
current.count++;
|
|
787
|
+
current.maxScore = Math.max(current.maxScore, candidate.score);
|
|
788
|
+
} else {
|
|
789
|
+
bySlug.set(candidate.slug, {
|
|
790
|
+
count: 1,
|
|
791
|
+
maxScore: candidate.score,
|
|
792
|
+
description: candidate.description,
|
|
793
|
+
});
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
lines.push("## Candidate Classes");
|
|
797
|
+
lines.push("");
|
|
798
|
+
if (bySlug.size === 0) {
|
|
799
|
+
lines.push("(none)");
|
|
800
|
+
} else {
|
|
801
|
+
for (const [slug, info] of [...bySlug.entries()].sort(
|
|
802
|
+
(a, b) => b[1].maxScore - a[1].maxScore || b[1].count - a[1].count,
|
|
803
|
+
)) {
|
|
804
|
+
lines.push(
|
|
805
|
+
`- ${slug}: ${info.count} match(es), max score ${info.maxScore}. ${info.description}`,
|
|
806
|
+
);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
lines.push("");
|
|
810
|
+
|
|
811
|
+
const byFile = new Map<string, { count: number; score: number }>();
|
|
812
|
+
for (const candidate of input.candidates) {
|
|
813
|
+
const current = byFile.get(candidate.filePath) ?? { count: 0, score: 0 };
|
|
814
|
+
current.count++;
|
|
815
|
+
current.score += candidate.score;
|
|
816
|
+
byFile.set(candidate.filePath, current);
|
|
817
|
+
}
|
|
818
|
+
lines.push("## Top Files");
|
|
819
|
+
lines.push("");
|
|
820
|
+
const topFiles = [...byFile.entries()]
|
|
821
|
+
.sort((a, b) => b[1].score - a[1].score || b[1].count - a[1].count)
|
|
822
|
+
.slice(0, 40);
|
|
823
|
+
if (topFiles.length === 0) {
|
|
824
|
+
lines.push("(none)");
|
|
825
|
+
} else {
|
|
826
|
+
for (const [filePath, info] of topFiles) {
|
|
827
|
+
lines.push(`- \`${filePath}\`: score ${info.score}, ${info.count} match(es)`);
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
lines.push("");
|
|
831
|
+
|
|
832
|
+
lines.push("## Highest-Ranked Matches");
|
|
833
|
+
lines.push("");
|
|
834
|
+
const topMatches = input.candidates.slice(0, 80);
|
|
835
|
+
if (topMatches.length === 0) {
|
|
836
|
+
lines.push("(none)");
|
|
837
|
+
} else {
|
|
838
|
+
for (const candidate of topMatches) {
|
|
839
|
+
lines.push(
|
|
840
|
+
`- ${candidate.slug} (${candidate.noise}, score ${candidate.score}) at \`${candidate.filePath}:${candidate.line}\` - ${candidate.snippet}`,
|
|
841
|
+
);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
lines.push("");
|
|
845
|
+
lines.push("## Custom Matchers");
|
|
846
|
+
lines.push("");
|
|
847
|
+
lines.push(
|
|
848
|
+
"Project matchers can be added at `piolium/matchers.json`, `piolium/custom-matchers.json`, or `.piolium-matchers.json`.",
|
|
849
|
+
);
|
|
850
|
+
return `${lines.join("\n")}\n`;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
function loadCustomMatchers(cwd: string): NativeMatcher[] {
|
|
854
|
+
const paths = [
|
|
855
|
+
join(cwd, "piolium", "matchers.json"),
|
|
856
|
+
join(cwd, "piolium", "custom-matchers.json"),
|
|
857
|
+
join(cwd, ".piolium-matchers.json"),
|
|
858
|
+
];
|
|
859
|
+
const out: NativeMatcher[] = [];
|
|
860
|
+
for (const path of paths) {
|
|
861
|
+
if (!existsSync(path)) continue;
|
|
862
|
+
let config: CustomMatcherConfig;
|
|
863
|
+
try {
|
|
864
|
+
config = JSON.parse(readFileSync(path, "utf8")) as CustomMatcherConfig;
|
|
865
|
+
} catch {
|
|
866
|
+
continue;
|
|
867
|
+
}
|
|
868
|
+
for (const custom of config.matchers ?? []) {
|
|
869
|
+
if (!custom.slug || !custom.regex) continue;
|
|
870
|
+
const slug = slugify(custom.slug);
|
|
871
|
+
if (!slug) continue;
|
|
872
|
+
const noise = normalizeNoise(custom.noise);
|
|
873
|
+
try {
|
|
874
|
+
const flags = normalizeRegexFlags(custom.flags);
|
|
875
|
+
out.push({
|
|
876
|
+
slug,
|
|
877
|
+
description: custom.description ?? `Custom matcher ${slug}.`,
|
|
878
|
+
noise,
|
|
879
|
+
...(custom.include ? { include: custom.include } : {}),
|
|
880
|
+
...(custom.pathIncludes ? { pathIncludes: custom.pathIncludes } : {}),
|
|
881
|
+
patterns: [
|
|
882
|
+
{
|
|
883
|
+
label: custom.label ?? slug,
|
|
884
|
+
regex: new RegExp(custom.regex, flags),
|
|
885
|
+
},
|
|
886
|
+
],
|
|
887
|
+
source: "custom",
|
|
888
|
+
});
|
|
889
|
+
} catch {}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
return out;
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
function normalizeNoise(value: unknown): CandidateNoise {
|
|
896
|
+
return value === "precise" || value === "normal" || value === "noisy" ? value : "normal";
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
function normalizeRegexFlags(flags: string | undefined): string {
|
|
900
|
+
const raw = flags ?? "g";
|
|
901
|
+
const unique = [...new Set(raw.split(""))].filter((flag) => /^[dgimsuvy]$/.test(flag)).join("");
|
|
902
|
+
return unique.includes("g") ? unique : `${unique}g`;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
function withGlobal(regex: RegExp): RegExp {
|
|
906
|
+
const flags = regex.flags.includes("g") ? regex.flags : `${regex.flags}g`;
|
|
907
|
+
return new RegExp(regex.source, flags);
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
function buildLineStarts(content: string): number[] {
|
|
911
|
+
const starts = [0];
|
|
912
|
+
for (let i = 0; i < content.length; i++) {
|
|
913
|
+
if (content.charCodeAt(i) === 10) starts.push(i + 1);
|
|
914
|
+
}
|
|
915
|
+
return starts;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
function lineNumberAt(lineStarts: number[], index: number): number {
|
|
919
|
+
let lo = 0;
|
|
920
|
+
let hi = lineStarts.length - 1;
|
|
921
|
+
while (lo <= hi) {
|
|
922
|
+
const mid = Math.floor((lo + hi) / 2);
|
|
923
|
+
const start = lineStarts[mid] ?? 0;
|
|
924
|
+
const next = lineStarts[mid + 1] ?? Number.POSITIVE_INFINITY;
|
|
925
|
+
if (index >= start && index < next) return mid + 1;
|
|
926
|
+
if (index < start) hi = mid - 1;
|
|
927
|
+
else lo = mid + 1;
|
|
928
|
+
}
|
|
929
|
+
return 1;
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
function extractLine(content: string, lineStarts: number[], line: number): string {
|
|
933
|
+
const start = lineStarts[line - 1] ?? 0;
|
|
934
|
+
const end = lineStarts[line] !== undefined ? lineStarts[line] - 1 : content.length;
|
|
935
|
+
return content.slice(start, end).trim().replace(/\s+/g, " ").slice(0, 220);
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
function writeJson(path: string, value: unknown): void {
|
|
939
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
940
|
+
writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`);
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
function shouldWriteFileRecords(): boolean {
|
|
944
|
+
const value = process.env[FILE_RECORDS_ENV]?.trim().toLowerCase();
|
|
945
|
+
return value === "1" || value === "true" || value === "yes" || value === "on";
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
function fileRecordPath(cwd: string, relPath: string): string {
|
|
949
|
+
const normalized = normalizePath(relPath)
|
|
950
|
+
.split("/")
|
|
951
|
+
.filter((part) => part && part !== "." && part !== "..")
|
|
952
|
+
.join("/");
|
|
953
|
+
return join(cwd, FILE_RECORDS_DIR, `${normalized}.json`);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
function normalizePath(path: string): string {
|
|
957
|
+
return path.split(sep).join("/");
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
function slugify(value: string): string {
|
|
961
|
+
return value
|
|
962
|
+
.toLowerCase()
|
|
963
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
964
|
+
.replace(/^-+|-+$/g, "")
|
|
965
|
+
.slice(0, 80);
|
|
966
|
+
}
|