@vigolium/piolium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/agents/access-auditor.md +300 -0
- package/agents/assumption-breaker.md +154 -0
- package/agents/attack-designer.md +116 -0
- package/agents/code-scanner.md +139 -0
- package/agents/concurrency-auditor.md +238 -0
- package/agents/confirm-writer.md +257 -0
- package/agents/context-reviewer.md +274 -0
- package/agents/cross-verifier.md +165 -0
- package/agents/cve-scout.md +381 -0
- package/agents/env-builder.md +282 -0
- package/agents/env-profiler.md +205 -0
- package/agents/evidence-collector.md +140 -0
- package/agents/finding-grader.md +142 -0
- package/agents/finding-writer.md +148 -0
- package/agents/flow-tracer.md +106 -0
- package/agents/goal-backtracer.md +146 -0
- package/agents/history-miner.md +467 -0
- package/agents/independent-verifier.md +118 -0
- package/agents/intent-mapper.md +183 -0
- package/agents/longshot-collector.md +128 -0
- package/agents/longshot-prober.md +126 -0
- package/agents/patch-auditor.md +73 -0
- package/agents/poc-author.md +124 -0
- package/agents/poc-runner.md +194 -0
- package/agents/probe-lead.md +269 -0
- package/agents/red-challenger.md +101 -0
- package/agents/report-composer.md +208 -0
- package/agents/review-adjudicator.md +216 -0
- package/agents/spec-auditor.md +155 -0
- package/agents/taint-tracer.md +265 -0
- package/agents/test-locator.md +209 -0
- package/agents/threat-modeler.md +132 -0
- package/agents/variant-scanner.md +108 -0
- package/agents/variant-spotter.md +110 -0
- package/bin/piolium.mjs +376 -0
- package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
- package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
- package/extensions/piolium/agent-runner.ts +322 -0
- package/extensions/piolium/agents.ts +266 -0
- package/extensions/piolium/audit-state.ts +522 -0
- package/extensions/piolium/bundled-resources.ts +97 -0
- package/extensions/piolium/candidate-scan.ts +966 -0
- package/extensions/piolium/command-target.ts +177 -0
- package/extensions/piolium/console-stream.ts +57 -0
- package/extensions/piolium/export-results.ts +380 -0
- package/extensions/piolium/findings.ts +448 -0
- package/extensions/piolium/heartbeat.ts +182 -0
- package/extensions/piolium/help.ts +234 -0
- package/extensions/piolium/index.ts +1865 -0
- package/extensions/piolium/longshot.ts +530 -0
- package/extensions/piolium/matcher-suggestions.ts +196 -0
- package/extensions/piolium/matcher-utils.ts +83 -0
- package/extensions/piolium/modes/balanced.ts +750 -0
- package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
- package/extensions/piolium/modes/confirm.ts +697 -0
- package/extensions/piolium/modes/deep.ts +917 -0
- package/extensions/piolium/modes/diff.ts +177 -0
- package/extensions/piolium/modes/lite.ts +540 -0
- package/extensions/piolium/modes/longshot.ts +595 -0
- package/extensions/piolium/modes/merge.ts +204 -0
- package/extensions/piolium/modes/phase-runner.ts +267 -0
- package/extensions/piolium/modes/reinvest.ts +546 -0
- package/extensions/piolium/modes/revisit.ts +279 -0
- package/extensions/piolium/modes.ts +48 -0
- package/extensions/piolium/phase-labels.ts +123 -0
- package/extensions/piolium/phase-status-strip.ts +92 -0
- package/extensions/piolium/prompt-prefix-editor.ts +39 -0
- package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
- package/extensions/piolium/recon.ts +409 -0
- package/extensions/piolium/result-stats.ts +105 -0
- package/extensions/piolium/retry.ts +120 -0
- package/extensions/piolium/scheduler.ts +212 -0
- package/extensions/piolium/secrets.ts +368 -0
- package/extensions/piolium/tools/web-tools.ts +148 -0
- package/package.json +77 -0
- package/skills/agentic-actions-auditor/SKILL.md +327 -0
- package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
- package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
- package/skills/agentic-actions-auditor/references/foundations.md +94 -0
- package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
- package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
- package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
- package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
- package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
- package/skills/audit/SKILL.md +562 -0
- package/skills/audit/assets/icon.svg +7 -0
- package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
- package/skills/audit/references/adversarial-review.md +148 -0
- package/skills/audit/references/architecture-aware-sast.md +306 -0
- package/skills/audit/references/audit-workflow.md +737 -0
- package/skills/audit/references/chamber-protocol.md +384 -0
- package/skills/audit/references/creative-attack-modes.md +221 -0
- package/skills/audit/references/deep-analysis.md +273 -0
- package/skills/audit/references/domain-attack-playbooks.md +1129 -0
- package/skills/audit/references/knowledge-base-template.md +513 -0
- package/skills/audit/references/real-env-validation.md +191 -0
- package/skills/audit/references/report-templates.md +417 -0
- package/skills/audit/references/triage-and-prereqs.md +134 -0
- package/skills/audit/scripts/consolidate_drafts.py +554 -0
- package/skills/audit/scripts/partition_findings.py +152 -0
- package/skills/audit/scripts/rg-hotspots.sh +121 -0
- package/skills/audit/scripts/stamp_file_state.py +349 -0
- package/skills/code-reviewer/SKILL.md +65 -0
- package/skills/codeql/SKILL.md +281 -0
- package/skills/codeql/references/build-fixes.md +90 -0
- package/skills/codeql/references/diagnostic-query-templates.md +339 -0
- package/skills/codeql/references/extension-yaml-format.md +209 -0
- package/skills/codeql/references/important-only-suite.md +153 -0
- package/skills/codeql/references/language-details.md +207 -0
- package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
- package/skills/codeql/references/performance-tuning.md +111 -0
- package/skills/codeql/references/quality-assessment.md +172 -0
- package/skills/codeql/references/ruleset-catalog.md +63 -0
- package/skills/codeql/references/run-all-suite.md +92 -0
- package/skills/codeql/references/sarif-processing.md +79 -0
- package/skills/codeql/references/threat-models.md +51 -0
- package/skills/codeql/workflows/build-database.md +280 -0
- package/skills/codeql/workflows/create-data-extensions.md +261 -0
- package/skills/codeql/workflows/run-analysis.md +301 -0
- package/skills/differential-review/SKILL.md +220 -0
- package/skills/differential-review/adversarial.md +203 -0
- package/skills/differential-review/methodology.md +234 -0
- package/skills/differential-review/patterns.md +300 -0
- package/skills/differential-review/reporting.md +369 -0
- package/skills/fp-check/SKILL.md +125 -0
- package/skills/fp-check/references/bug-class-verification.md +114 -0
- package/skills/fp-check/references/deep-verification.md +143 -0
- package/skills/fp-check/references/evidence-templates.md +91 -0
- package/skills/fp-check/references/false-positive-patterns.md +115 -0
- package/skills/fp-check/references/gate-reviews.md +27 -0
- package/skills/fp-check/references/standard-verification.md +78 -0
- package/skills/insecure-defaults/SKILL.md +117 -0
- package/skills/insecure-defaults/references/examples.md +409 -0
- package/skills/last30days/SKILL.md +444 -0
- package/skills/sarif-parsing/SKILL.md +483 -0
- package/skills/sarif-parsing/resources/jq-queries.md +162 -0
- package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
- package/skills/security-threat-model/LICENSE.txt +201 -0
- package/skills/security-threat-model/SKILL.md +81 -0
- package/skills/security-threat-model/agents/openai.yaml +4 -0
- package/skills/security-threat-model/references/prompt-template.md +255 -0
- package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
- package/skills/semgrep/SKILL.md +212 -0
- package/skills/semgrep/references/rulesets.md +162 -0
- package/skills/semgrep/references/scan-modes.md +110 -0
- package/skills/semgrep/references/scanner-task-prompt.md +140 -0
- package/skills/semgrep/scripts/merge_sarif.py +203 -0
- package/skills/semgrep/workflows/scan-workflow.md +311 -0
- package/skills/semgrep-rule-creator/SKILL.md +168 -0
- package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
- package/skills/semgrep-rule-creator/references/workflow.md +240 -0
- package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
- package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
- package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
- package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
- package/skills/sharp-edges/SKILL.md +292 -0
- package/skills/sharp-edges/references/auth-patterns.md +252 -0
- package/skills/sharp-edges/references/case-studies.md +274 -0
- package/skills/sharp-edges/references/config-patterns.md +333 -0
- package/skills/sharp-edges/references/crypto-apis.md +190 -0
- package/skills/sharp-edges/references/lang-c.md +205 -0
- package/skills/sharp-edges/references/lang-csharp.md +285 -0
- package/skills/sharp-edges/references/lang-go.md +270 -0
- package/skills/sharp-edges/references/lang-java.md +263 -0
- package/skills/sharp-edges/references/lang-javascript.md +269 -0
- package/skills/sharp-edges/references/lang-kotlin.md +265 -0
- package/skills/sharp-edges/references/lang-php.md +245 -0
- package/skills/sharp-edges/references/lang-python.md +274 -0
- package/skills/sharp-edges/references/lang-ruby.md +273 -0
- package/skills/sharp-edges/references/lang-rust.md +272 -0
- package/skills/sharp-edges/references/lang-swift.md +287 -0
- package/skills/sharp-edges/references/language-specific.md +588 -0
- package/skills/spec-to-code-compliance/SKILL.md +357 -0
- package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
- package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
- package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
- package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
- package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
- package/skills/variant-analysis/METHODOLOGY.md +327 -0
- package/skills/variant-analysis/SKILL.md +142 -0
- package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
- package/skills/variant-analysis/resources/codeql/go.ql +69 -0
- package/skills/variant-analysis/resources/codeql/java.ql +71 -0
- package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
- package/skills/variant-analysis/resources/codeql/python.ql +80 -0
- package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
- package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
- package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
- package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
- package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
- package/skills/variant-analysis/resources/variant-report-template.md +75 -0
- package/skills/vuln-report/SKILL.md +137 -0
- package/skills/vuln-report/agents/openai.yaml +4 -0
- package/skills/vuln-report/references/report-template.md +135 -0
- package/skills/wooyun-legacy/SKILL.md +367 -0
- package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
- package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
- package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
- package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
- package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
- package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
- package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
- package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
- package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
- package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
- package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
- package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
- package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
- package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
- package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
- package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
- package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
- package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
- package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
- package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
- package/skills/wooyun-legacy/references/xss.md +746 -0
- package/skills/zeroize-audit/SKILL.md +371 -0
- package/skills/zeroize-audit/configs/c.yaml +21 -0
- package/skills/zeroize-audit/configs/default.yaml +128 -0
- package/skills/zeroize-audit/configs/rust.yaml +83 -0
- package/skills/zeroize-audit/prompts/report_template.md +238 -0
- package/skills/zeroize-audit/prompts/system.md +163 -0
- package/skills/zeroize-audit/prompts/task.md +97 -0
- package/skills/zeroize-audit/references/compile-commands.md +231 -0
- package/skills/zeroize-audit/references/detection-strategy.md +191 -0
- package/skills/zeroize-audit/references/ir-analysis.md +252 -0
- package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
- package/skills/zeroize-audit/references/poc-generation.md +470 -0
- package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
- package/skills/zeroize-audit/schemas/input.json +83 -0
- package/skills/zeroize-audit/schemas/output.json +140 -0
- package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
- package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
- package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
- package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
- package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
- package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
- package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
- package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
- package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
- package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
- package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
- package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
- package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
- package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
- package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
- package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
- package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
- package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
- package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
- package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
- package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
- package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
- package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
- package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
- package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
- package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
- package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
- package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
- package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
- package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
- package/themes/piolium-srcery.json +94 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.11"
|
|
4
|
+
# dependencies = []
|
|
5
|
+
# ///
|
|
6
|
+
"""
|
|
7
|
+
Semantic LLVM IR analyzer for zeroization detection.
|
|
8
|
+
|
|
9
|
+
This tool parses LLVM IR structurally (not just regex) to detect:
|
|
10
|
+
- Memory operations in SSA form (mem2reg output)
|
|
11
|
+
- Loop-unrolled zeroization patterns
|
|
12
|
+
- Complex optimization transformations
|
|
13
|
+
- Store/load chains that affect zeroization
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
import sys
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class IRInstruction:
|
|
26
|
+
"""Represents an LLVM IR instruction."""
|
|
27
|
+
|
|
28
|
+
line_num: int
|
|
29
|
+
opcode: str
|
|
30
|
+
operands: list[str]
|
|
31
|
+
result: str | None
|
|
32
|
+
raw_line: str
|
|
33
|
+
metadata: dict[str, str] = field(default_factory=dict)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class BasicBlock:
|
|
38
|
+
"""Represents a basic block in LLVM IR."""
|
|
39
|
+
|
|
40
|
+
label: str
|
|
41
|
+
instructions: list[IRInstruction]
|
|
42
|
+
successors: list[str] = field(default_factory=list)
|
|
43
|
+
predecessors: list[str] = field(default_factory=list)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Function:
|
|
48
|
+
"""Represents a function in LLVM IR."""
|
|
49
|
+
|
|
50
|
+
name: str
|
|
51
|
+
basic_blocks: dict[str, BasicBlock]
|
|
52
|
+
entry_block: str | None = None
|
|
53
|
+
arguments: list[str] = field(default_factory=list)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SemanticIRAnalyzer:
|
|
57
|
+
"""Semantic analyzer for LLVM IR."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, ir_file: Path, config: dict):
|
|
60
|
+
self.ir_file = ir_file
|
|
61
|
+
self.config = config
|
|
62
|
+
self.functions: dict[str, Function] = {}
|
|
63
|
+
self.current_function: Function | None = None
|
|
64
|
+
self.current_block: BasicBlock | None = None
|
|
65
|
+
|
|
66
|
+
def parse_ir(self) -> None:
|
|
67
|
+
"""Parse LLVM IR file into structured representation."""
|
|
68
|
+
with open(self.ir_file) as f:
|
|
69
|
+
lines = f.readlines()
|
|
70
|
+
|
|
71
|
+
line_num = 0
|
|
72
|
+
for line in lines:
|
|
73
|
+
line_num += 1
|
|
74
|
+
line = line.strip()
|
|
75
|
+
|
|
76
|
+
# Skip comments and empty lines
|
|
77
|
+
if not line or line.startswith(";"):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
# Function definition
|
|
81
|
+
if line.startswith("define "):
|
|
82
|
+
self._parse_function_def(line)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Function end
|
|
86
|
+
if line == "}" and self.current_function:
|
|
87
|
+
self.functions[self.current_function.name] = self.current_function
|
|
88
|
+
self.current_function = None
|
|
89
|
+
self.current_block = None
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
# Basic block label
|
|
93
|
+
if self.current_function and ":" in line and not line.startswith("%"):
|
|
94
|
+
label = line.split(":")[0].strip()
|
|
95
|
+
self.current_block = BasicBlock(label=label, instructions=[])
|
|
96
|
+
self.current_function.basic_blocks[label] = self.current_block
|
|
97
|
+
if not self.current_function.entry_block:
|
|
98
|
+
self.current_function.entry_block = label
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
# Instruction
|
|
102
|
+
if self.current_function and self.current_block:
|
|
103
|
+
inst = self._parse_instruction(line, line_num)
|
|
104
|
+
if inst:
|
|
105
|
+
self.current_block.instructions.append(inst)
|
|
106
|
+
|
|
107
|
+
# Track control flow
|
|
108
|
+
if inst.opcode in ["br", "switch", "ret"]:
|
|
109
|
+
self._update_control_flow(inst)
|
|
110
|
+
|
|
111
|
+
def _parse_function_def(self, line: str) -> None:
|
|
112
|
+
"""Parse function definition."""
|
|
113
|
+
# Extract function name: define ... @func_name(...)
|
|
114
|
+
match = re.search(r"@([a-zA-Z0-9_\.]+)\s*\(", line)
|
|
115
|
+
if match:
|
|
116
|
+
func_name = match.group(1)
|
|
117
|
+
self.current_function = Function(name=func_name, basic_blocks={})
|
|
118
|
+
|
|
119
|
+
# Extract arguments
|
|
120
|
+
args_match = re.search(r"\((.*?)\)", line)
|
|
121
|
+
if args_match:
|
|
122
|
+
args_str = args_match.group(1)
|
|
123
|
+
# Simple argument parsing (just count for now)
|
|
124
|
+
self.current_function.arguments = [
|
|
125
|
+
arg.strip() for arg in args_str.split(",") if arg.strip()
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
def _parse_instruction(self, line: str, line_num: int) -> IRInstruction | None:
|
|
129
|
+
"""Parse single instruction."""
|
|
130
|
+
# Pattern: %result = opcode operands
|
|
131
|
+
# or: opcode operands (for void instructions)
|
|
132
|
+
|
|
133
|
+
result = None
|
|
134
|
+
rest = line
|
|
135
|
+
|
|
136
|
+
if "=" in line:
|
|
137
|
+
parts = line.split("=", 1)
|
|
138
|
+
result = parts[0].strip()
|
|
139
|
+
rest = parts[1].strip()
|
|
140
|
+
|
|
141
|
+
# Extract opcode
|
|
142
|
+
tokens = rest.split(None, 1)
|
|
143
|
+
if not tokens:
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
opcode = tokens[0]
|
|
147
|
+
operands_str = tokens[1] if len(tokens) > 1 else ""
|
|
148
|
+
|
|
149
|
+
# Parse operands (simplified)
|
|
150
|
+
operands = self._parse_operands(operands_str)
|
|
151
|
+
|
|
152
|
+
return IRInstruction(
|
|
153
|
+
line_num=line_num, opcode=opcode, operands=operands, result=result, raw_line=line
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def _parse_operands(self, operands_str: str) -> list[str]:
|
|
157
|
+
"""Parse instruction operands."""
|
|
158
|
+
# Simple tokenization (can be improved)
|
|
159
|
+
operands = []
|
|
160
|
+
current = ""
|
|
161
|
+
depth = 0
|
|
162
|
+
|
|
163
|
+
for char in operands_str:
|
|
164
|
+
if char in "([{":
|
|
165
|
+
depth += 1
|
|
166
|
+
elif char in ")]}":
|
|
167
|
+
depth -= 1
|
|
168
|
+
elif char == "," and depth == 0:
|
|
169
|
+
if current.strip():
|
|
170
|
+
operands.append(current.strip())
|
|
171
|
+
current = ""
|
|
172
|
+
continue
|
|
173
|
+
current += char
|
|
174
|
+
|
|
175
|
+
if current.strip():
|
|
176
|
+
operands.append(current.strip())
|
|
177
|
+
|
|
178
|
+
return operands
|
|
179
|
+
|
|
180
|
+
def _update_control_flow(self, inst: IRInstruction) -> None:
|
|
181
|
+
"""Update CFG based on control flow instruction."""
|
|
182
|
+
if not self.current_block:
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
if inst.opcode == "br":
|
|
186
|
+
# Conditional: br i1 %cond, label %true, label %false
|
|
187
|
+
# Unconditional: br label %target
|
|
188
|
+
labels = [
|
|
189
|
+
op.replace("label", "").replace("%", "").strip()
|
|
190
|
+
for op in inst.operands
|
|
191
|
+
if "label" in op
|
|
192
|
+
]
|
|
193
|
+
self.current_block.successors.extend(labels)
|
|
194
|
+
|
|
195
|
+
# Update predecessors
|
|
196
|
+
for label in labels:
|
|
197
|
+
if label in self.current_function.basic_blocks:
|
|
198
|
+
self.current_function.basic_blocks[label].predecessors.append(
|
|
199
|
+
self.current_block.label
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
elif inst.opcode == "switch":
|
|
203
|
+
# switch i32 %val, label %default [ ... cases ... ]
|
|
204
|
+
labels = [
|
|
205
|
+
op.replace("label", "").replace("%", "").strip()
|
|
206
|
+
for op in inst.operands
|
|
207
|
+
if "label" in op
|
|
208
|
+
]
|
|
209
|
+
self.current_block.successors.extend(labels)
|
|
210
|
+
|
|
211
|
+
def find_memory_operations(self, func: Function) -> dict[str, list[IRInstruction]]:
|
|
212
|
+
"""Find all memory operations (load, store, memset, memcpy, etc.)."""
|
|
213
|
+
mem_ops = {"store": [], "load": [], "memset": [], "memcpy": [], "call": []}
|
|
214
|
+
|
|
215
|
+
for bb in func.basic_blocks.values():
|
|
216
|
+
for inst in bb.instructions:
|
|
217
|
+
if inst.opcode == "store":
|
|
218
|
+
mem_ops["store"].append(inst)
|
|
219
|
+
elif inst.opcode == "load":
|
|
220
|
+
mem_ops["load"].append(inst)
|
|
221
|
+
elif inst.opcode == "call":
|
|
222
|
+
# Check for memset/memcpy/zeroize calls
|
|
223
|
+
call_target = self._extract_call_target(inst)
|
|
224
|
+
if "memset" in call_target or "llvm.memset" in call_target:
|
|
225
|
+
mem_ops["memset"].append(inst)
|
|
226
|
+
elif "memcpy" in call_target or "llvm.memcpy" in call_target:
|
|
227
|
+
mem_ops["memcpy"].append(inst)
|
|
228
|
+
elif any(
|
|
229
|
+
fn in call_target
|
|
230
|
+
for fn in ["explicit_bzero", "OPENSSL_cleanse", "sodium_memzero", "zeroize"]
|
|
231
|
+
):
|
|
232
|
+
mem_ops["call"].append(inst)
|
|
233
|
+
|
|
234
|
+
return mem_ops
|
|
235
|
+
|
|
236
|
+
def _extract_call_target(self, inst: IRInstruction) -> str:
|
|
237
|
+
"""Extract function name from call instruction."""
|
|
238
|
+
for op in inst.operands:
|
|
239
|
+
if "@" in op:
|
|
240
|
+
match = re.search(r"@([a-zA-Z0-9_\.]+)", op)
|
|
241
|
+
if match:
|
|
242
|
+
return match.group(1)
|
|
243
|
+
return ""
|
|
244
|
+
|
|
245
|
+
def detect_loop_unrolled_wipes(self, func: Function) -> list[dict]:
|
|
246
|
+
"""Detect zeroization patterns from loop unrolling."""
|
|
247
|
+
findings = []
|
|
248
|
+
|
|
249
|
+
for bb_label, bb in func.basic_blocks.items():
|
|
250
|
+
# Look for patterns like:
|
|
251
|
+
# store i8 0, i8* %ptr.0
|
|
252
|
+
# store i8 0, i8* %ptr.1
|
|
253
|
+
# store i8 0, i8* %ptr.2
|
|
254
|
+
# ... (repeated pattern indicating unrolled loop)
|
|
255
|
+
|
|
256
|
+
zero_stores = []
|
|
257
|
+
for inst in bb.instructions:
|
|
258
|
+
# Check if storing 0
|
|
259
|
+
if (
|
|
260
|
+
inst.opcode == "store"
|
|
261
|
+
and inst.operands
|
|
262
|
+
and ("i8 0" in inst.operands[0] or "i32 0" in inst.operands[0])
|
|
263
|
+
):
|
|
264
|
+
zero_stores.append(inst)
|
|
265
|
+
|
|
266
|
+
# If we have 4+ consecutive zero stores, likely an unrolled wipe loop
|
|
267
|
+
if len(zero_stores) >= 4:
|
|
268
|
+
# Check if addresses are sequential
|
|
269
|
+
addresses = [self._extract_store_address(inst) for inst in zero_stores]
|
|
270
|
+
if self._are_sequential_addresses(addresses):
|
|
271
|
+
findings.append(
|
|
272
|
+
{
|
|
273
|
+
"type": "LOOP_UNROLLED_WIPE",
|
|
274
|
+
"block": bb_label,
|
|
275
|
+
"count": len(zero_stores),
|
|
276
|
+
"first_line": zero_stores[0].line_num,
|
|
277
|
+
"evidence": (
|
|
278
|
+
f"Found {len(zero_stores)} consecutive zero stores"
|
|
279
|
+
" (likely unrolled loop)"
|
|
280
|
+
),
|
|
281
|
+
}
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return findings
|
|
285
|
+
|
|
286
|
+
def _extract_store_address(self, inst: IRInstruction) -> str:
|
|
287
|
+
"""Extract address operand from store instruction."""
|
|
288
|
+
# store type value, type* pointer
|
|
289
|
+
if len(inst.operands) >= 2:
|
|
290
|
+
return inst.operands[1]
|
|
291
|
+
return ""
|
|
292
|
+
|
|
293
|
+
def _are_sequential_addresses(self, addresses: list[str]) -> bool:
|
|
294
|
+
"""Check if addresses look sequential (e.g., %ptr.0, %ptr.1, %ptr.2)."""
|
|
295
|
+
if len(addresses) < 2:
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
# Simple heuristic: check for pattern like %name.0, %name.1, etc.
|
|
299
|
+
base_pattern = re.sub(r"\d+", "", addresses[0])
|
|
300
|
+
return all(re.sub(r"\d+", "", addr) == base_pattern for addr in addresses[1:])
|
|
301
|
+
|
|
302
|
+
def detect_volatile_stores(self, func: Function) -> list[IRInstruction]:
|
|
303
|
+
"""Find volatile store instructions (cannot be optimized away)."""
|
|
304
|
+
volatile_stores = []
|
|
305
|
+
|
|
306
|
+
for bb in func.basic_blocks.values():
|
|
307
|
+
for inst in bb.instructions:
|
|
308
|
+
if inst.opcode == "store" and "volatile" in inst.raw_line:
|
|
309
|
+
volatile_stores.append(inst)
|
|
310
|
+
|
|
311
|
+
return volatile_stores
|
|
312
|
+
|
|
313
|
+
def analyze_mem2reg_output(self, func: Function) -> dict:
|
|
314
|
+
"""Analyze memory operations in SSA form (after mem2reg pass)."""
|
|
315
|
+
# After mem2reg, local variables are promoted to registers
|
|
316
|
+
# Look for phi nodes and register operations
|
|
317
|
+
|
|
318
|
+
phi_nodes = []
|
|
319
|
+
register_ops = []
|
|
320
|
+
|
|
321
|
+
for bb in func.basic_blocks.values():
|
|
322
|
+
for inst in bb.instructions:
|
|
323
|
+
if inst.opcode == "phi":
|
|
324
|
+
phi_nodes.append(inst)
|
|
325
|
+
elif inst.result and inst.result.startswith("%"):
|
|
326
|
+
register_ops.append(inst)
|
|
327
|
+
|
|
328
|
+
return {
|
|
329
|
+
"phi_count": len(phi_nodes),
|
|
330
|
+
"register_ops": len(register_ops),
|
|
331
|
+
"has_mem2reg": len(phi_nodes) > 0,
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
def analyze_function(self, func_name: str) -> dict:
|
|
335
|
+
"""Perform comprehensive analysis on a function."""
|
|
336
|
+
if func_name not in self.functions:
|
|
337
|
+
return {"error": f"Function {func_name} not found"}
|
|
338
|
+
|
|
339
|
+
func = self.functions[func_name]
|
|
340
|
+
|
|
341
|
+
# Find memory operations
|
|
342
|
+
mem_ops = self.find_memory_operations(func)
|
|
343
|
+
|
|
344
|
+
# Detect patterns
|
|
345
|
+
loop_unrolled = self.detect_loop_unrolled_wipes(func)
|
|
346
|
+
volatile_stores = self.detect_volatile_stores(func)
|
|
347
|
+
mem2reg_info = self.analyze_mem2reg_output(func)
|
|
348
|
+
|
|
349
|
+
# Check for wipe presence
|
|
350
|
+
has_wipe = (
|
|
351
|
+
len(mem_ops["memset"]) > 0 or len(mem_ops["call"]) > 0 or len(volatile_stores) > 0
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
return {
|
|
355
|
+
"function": func_name,
|
|
356
|
+
"basic_blocks": len(func.basic_blocks),
|
|
357
|
+
"memory_operations": {
|
|
358
|
+
"stores": len(mem_ops["store"]),
|
|
359
|
+
"loads": len(mem_ops["load"]),
|
|
360
|
+
"memset_calls": len(mem_ops["memset"]),
|
|
361
|
+
"secure_wipe_calls": len(mem_ops["call"]),
|
|
362
|
+
"volatile_stores": len(volatile_stores),
|
|
363
|
+
},
|
|
364
|
+
"patterns": {
|
|
365
|
+
"loop_unrolled_wipes": loop_unrolled,
|
|
366
|
+
"has_volatile_stores": len(volatile_stores) > 0,
|
|
367
|
+
},
|
|
368
|
+
"ssa_analysis": mem2reg_info,
|
|
369
|
+
"has_zeroization": has_wipe,
|
|
370
|
+
"wipe_instructions": [
|
|
371
|
+
{"line": inst.line_num, "type": "memset", "raw": inst.raw_line}
|
|
372
|
+
for inst in mem_ops["memset"]
|
|
373
|
+
]
|
|
374
|
+
+ [
|
|
375
|
+
{"line": inst.line_num, "type": "secure_call", "raw": inst.raw_line}
|
|
376
|
+
for inst in mem_ops["call"]
|
|
377
|
+
]
|
|
378
|
+
+ [
|
|
379
|
+
{"line": inst.line_num, "type": "volatile_store", "raw": inst.raw_line}
|
|
380
|
+
for inst in volatile_stores
|
|
381
|
+
],
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def main():
|
|
386
|
+
parser = argparse.ArgumentParser(description="Semantic LLVM IR analyzer")
|
|
387
|
+
parser.add_argument("--ir", required=True, help="LLVM IR file (.ll)")
|
|
388
|
+
parser.add_argument("--function", help="Specific function to analyze (default: all)")
|
|
389
|
+
parser.add_argument("--config", help="Configuration YAML file")
|
|
390
|
+
parser.add_argument("--out", required=True, help="Output JSON file")
|
|
391
|
+
|
|
392
|
+
args = parser.parse_args()
|
|
393
|
+
|
|
394
|
+
# Load config (simplified)
|
|
395
|
+
config = {}
|
|
396
|
+
|
|
397
|
+
# Parse IR
|
|
398
|
+
analyzer = SemanticIRAnalyzer(Path(args.ir), config)
|
|
399
|
+
try:
|
|
400
|
+
analyzer.parse_ir()
|
|
401
|
+
except OSError as e:
|
|
402
|
+
print(f"Error: cannot read IR file {args.ir}: {e}", file=sys.stderr)
|
|
403
|
+
sys.exit(1)
|
|
404
|
+
|
|
405
|
+
# Analyze functions
|
|
406
|
+
results = {"ir_file": args.ir, "functions_found": len(analyzer.functions), "analyses": []}
|
|
407
|
+
|
|
408
|
+
if args.function:
|
|
409
|
+
# Analyze specific function
|
|
410
|
+
analysis = analyzer.analyze_function(args.function)
|
|
411
|
+
results["analyses"].append(analysis)
|
|
412
|
+
else:
|
|
413
|
+
# Analyze all functions
|
|
414
|
+
for func_name in analyzer.functions:
|
|
415
|
+
analysis = analyzer.analyze_function(func_name)
|
|
416
|
+
results["analyses"].append(analysis)
|
|
417
|
+
|
|
418
|
+
# Write output
|
|
419
|
+
output_path = Path(args.out)
|
|
420
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
421
|
+
|
|
422
|
+
with open(output_path, "w") as f:
|
|
423
|
+
json.dump(results, f, indent=2)
|
|
424
|
+
|
|
425
|
+
print(f"OK: semantic IR analysis written to {args.out}")
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
if __name__ == "__main__":
|
|
429
|
+
main()
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Normalize and diff LLVM IR across one or more optimization levels.
|
|
5
|
+
#
|
|
6
|
+
# Usage (two-file, backward-compatible):
|
|
7
|
+
# diff_ir.sh <O0.ll> <O2.ll>
|
|
8
|
+
#
|
|
9
|
+
# Usage (multi-level — recommended):
|
|
10
|
+
# diff_ir.sh <O0.ll> <O1.ll> <O2.ll> [<O3.ll> ...]
|
|
11
|
+
#
|
|
12
|
+
# Output:
|
|
13
|
+
# - Prints a unified diff for each pair of adjacent files.
|
|
14
|
+
# - For 3+ files, also prints a WIPE PATTERN SUMMARY identifying the first
|
|
15
|
+
# optimization level at which zeroization patterns disappear.
|
|
16
|
+
# - Returns exit code 0 if all files are identical, 1 if any diffs found.
|
|
17
|
+
#
|
|
18
|
+
# Wipe patterns detected in the summary:
|
|
19
|
+
# llvm.memset, volatile, explicit_bzero, sodium_memzero, OPENSSL_cleanse,
|
|
20
|
+
# SecureZeroMemory, memset_s, store i8 0, store i64 0, store i32 0
|
|
21
|
+
|
|
22
|
+
usage() {
|
|
23
|
+
echo "Usage: $0 <baseline.ll> <file2.ll> [<file3.ll> ...]" >&2
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if [[ $# -lt 2 ]]; then
|
|
27
|
+
usage
|
|
28
|
+
exit 2
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
for f in "$@"; do
|
|
32
|
+
if [[ ! -f "$f" ]]; then
|
|
33
|
+
echo "Missing file: $f" >&2
|
|
34
|
+
exit 2
|
|
35
|
+
fi
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
norm() {
|
|
39
|
+
# Remove comments and metadata noise that changes frequently.
|
|
40
|
+
# Keep it simple and safe: do NOT rewrite semantics, only strip obviously noisy lines.
|
|
41
|
+
sed -E \
|
|
42
|
+
-e 's/;.*$//' \
|
|
43
|
+
-e '/^\s*$/d' \
|
|
44
|
+
-e '/^source_filename = /d' \
|
|
45
|
+
-e '/^target datalayout = /d' \
|
|
46
|
+
-e '/^target triple = /d' \
|
|
47
|
+
-e '/^!llvm\./d' \
|
|
48
|
+
-e '/^!DIGlobalVariable/d' \
|
|
49
|
+
-e '/^!DICompileUnit/d' \
|
|
50
|
+
-e '/^!DIFile/d' \
|
|
51
|
+
-e '/^!DISubprogram/d' \
|
|
52
|
+
-e '/^!DILocation/d' \
|
|
53
|
+
-e '/^!DI.*$/d'
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
has_wipe_pattern() {
|
|
57
|
+
# Return 0 (true) if the file contains any zeroization pattern.
|
|
58
|
+
grep -qE \
|
|
59
|
+
'llvm\.memset|volatile|explicit_bzero|sodium_memzero|OPENSSL_cleanse|SecureZeroMemory|memset_s|store i8 0|store i64 0|store i32 0' \
|
|
60
|
+
"$1"
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Normalize all input files into temp files.
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
FILES=("$@")
|
|
67
|
+
NUM_FILES=${#FILES[@]}
|
|
68
|
+
|
|
69
|
+
TMPDIR_BASE="$(mktemp -d -t za-ir-XXXXXX)"
|
|
70
|
+
trap 'rm -rf "$TMPDIR_BASE"' EXIT
|
|
71
|
+
|
|
72
|
+
NORMFILES=()
|
|
73
|
+
for i in "${!FILES[@]}"; do
|
|
74
|
+
tmp="$TMPDIR_BASE/norm_${i}.ll"
|
|
75
|
+
norm <"${FILES[$i]}" >"$tmp"
|
|
76
|
+
NORMFILES+=("$tmp")
|
|
77
|
+
done
|
|
78
|
+
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
# Two-file mode: backward-compatible, single diff, no summary.
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
if [[ $NUM_FILES -eq 2 ]]; then
|
|
83
|
+
diff_rc=0
|
|
84
|
+
diff -u "${NORMFILES[0]}" "${NORMFILES[1]}" || diff_rc=$?
|
|
85
|
+
if [[ $diff_rc -eq 2 ]]; then
|
|
86
|
+
echo "diff_ir.sh: diff failed (internal error)" >&2
|
|
87
|
+
exit 1
|
|
88
|
+
fi
|
|
89
|
+
exit $diff_rc
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# Multi-file mode: pairwise diffs between adjacent files + wipe summary.
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
any_diff=0
|
|
96
|
+
|
|
97
|
+
for ((i = 0; i < NUM_FILES - 1; i++)); do
|
|
98
|
+
j=$((i + 1))
|
|
99
|
+
A_LABEL="$(basename "${FILES[$i]}")"
|
|
100
|
+
B_LABEL="$(basename "${FILES[$j]}")"
|
|
101
|
+
echo "=== DIFF File $((i + 1)) ($A_LABEL) vs File $((j + 1)) ($B_LABEL) ==="
|
|
102
|
+
if ! diff -u "${NORMFILES[$i]}" "${NORMFILES[$j]}"; then
|
|
103
|
+
any_diff=1
|
|
104
|
+
fi
|
|
105
|
+
echo ""
|
|
106
|
+
done
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# Wipe pattern summary: identify first file where wipe disappears.
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
echo "=== WIPE PATTERN SUMMARY ==="
|
|
112
|
+
first_absent=-1
|
|
113
|
+
for i in "${!NORMFILES[@]}"; do
|
|
114
|
+
LABEL="$(basename "${FILES[$i]}")"
|
|
115
|
+
if has_wipe_pattern "${NORMFILES[$i]}"; then
|
|
116
|
+
echo " File $((i + 1)) ($LABEL): WIPE PRESENT"
|
|
117
|
+
else
|
|
118
|
+
echo " File $((i + 1)) ($LABEL): WIPE ABSENT"
|
|
119
|
+
if [[ $first_absent -eq -1 ]]; then
|
|
120
|
+
first_absent=$i
|
|
121
|
+
fi
|
|
122
|
+
fi
|
|
123
|
+
done
|
|
124
|
+
|
|
125
|
+
if [[ $first_absent -ne -1 ]]; then
|
|
126
|
+
LABEL="$(basename "${FILES[$first_absent]}")"
|
|
127
|
+
echo ""
|
|
128
|
+
echo " First disappearance at File $((first_absent + 1)) ($LABEL)."
|
|
129
|
+
echo " Evidence: OPTIMIZED_AWAY_ZEROIZE — wipe present at lower opt level(s) but absent here."
|
|
130
|
+
else
|
|
131
|
+
echo ""
|
|
132
|
+
echo " Wipe patterns present at all opt levels analyzed."
|
|
133
|
+
fi
|
|
134
|
+
|
|
135
|
+
exit $any_diff
|