@vigolium/piolium 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +117 -0
  3. package/agents/access-auditor.md +300 -0
  4. package/agents/assumption-breaker.md +154 -0
  5. package/agents/attack-designer.md +116 -0
  6. package/agents/code-scanner.md +139 -0
  7. package/agents/concurrency-auditor.md +238 -0
  8. package/agents/confirm-writer.md +257 -0
  9. package/agents/context-reviewer.md +274 -0
  10. package/agents/cross-verifier.md +165 -0
  11. package/agents/cve-scout.md +381 -0
  12. package/agents/env-builder.md +282 -0
  13. package/agents/env-profiler.md +205 -0
  14. package/agents/evidence-collector.md +140 -0
  15. package/agents/finding-grader.md +142 -0
  16. package/agents/finding-writer.md +148 -0
  17. package/agents/flow-tracer.md +106 -0
  18. package/agents/goal-backtracer.md +146 -0
  19. package/agents/history-miner.md +467 -0
  20. package/agents/independent-verifier.md +118 -0
  21. package/agents/intent-mapper.md +183 -0
  22. package/agents/longshot-collector.md +128 -0
  23. package/agents/longshot-prober.md +126 -0
  24. package/agents/patch-auditor.md +73 -0
  25. package/agents/poc-author.md +124 -0
  26. package/agents/poc-runner.md +194 -0
  27. package/agents/probe-lead.md +269 -0
  28. package/agents/red-challenger.md +101 -0
  29. package/agents/report-composer.md +208 -0
  30. package/agents/review-adjudicator.md +216 -0
  31. package/agents/spec-auditor.md +155 -0
  32. package/agents/taint-tracer.md +265 -0
  33. package/agents/test-locator.md +209 -0
  34. package/agents/threat-modeler.md +132 -0
  35. package/agents/variant-scanner.md +108 -0
  36. package/agents/variant-spotter.md +110 -0
  37. package/bin/piolium.mjs +376 -0
  38. package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
  39. package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
  40. package/extensions/piolium/agent-runner.ts +322 -0
  41. package/extensions/piolium/agents.ts +266 -0
  42. package/extensions/piolium/audit-state.ts +522 -0
  43. package/extensions/piolium/bundled-resources.ts +97 -0
  44. package/extensions/piolium/candidate-scan.ts +966 -0
  45. package/extensions/piolium/command-target.ts +177 -0
  46. package/extensions/piolium/console-stream.ts +57 -0
  47. package/extensions/piolium/export-results.ts +380 -0
  48. package/extensions/piolium/findings.ts +448 -0
  49. package/extensions/piolium/heartbeat.ts +182 -0
  50. package/extensions/piolium/help.ts +234 -0
  51. package/extensions/piolium/index.ts +1865 -0
  52. package/extensions/piolium/longshot.ts +530 -0
  53. package/extensions/piolium/matcher-suggestions.ts +196 -0
  54. package/extensions/piolium/matcher-utils.ts +83 -0
  55. package/extensions/piolium/modes/balanced.ts +750 -0
  56. package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
  57. package/extensions/piolium/modes/confirm.ts +697 -0
  58. package/extensions/piolium/modes/deep.ts +917 -0
  59. package/extensions/piolium/modes/diff.ts +177 -0
  60. package/extensions/piolium/modes/lite.ts +540 -0
  61. package/extensions/piolium/modes/longshot.ts +595 -0
  62. package/extensions/piolium/modes/merge.ts +204 -0
  63. package/extensions/piolium/modes/phase-runner.ts +267 -0
  64. package/extensions/piolium/modes/reinvest.ts +546 -0
  65. package/extensions/piolium/modes/revisit.ts +279 -0
  66. package/extensions/piolium/modes.ts +48 -0
  67. package/extensions/piolium/phase-labels.ts +123 -0
  68. package/extensions/piolium/phase-status-strip.ts +92 -0
  69. package/extensions/piolium/prompt-prefix-editor.ts +39 -0
  70. package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
  71. package/extensions/piolium/recon.ts +409 -0
  72. package/extensions/piolium/result-stats.ts +105 -0
  73. package/extensions/piolium/retry.ts +120 -0
  74. package/extensions/piolium/scheduler.ts +212 -0
  75. package/extensions/piolium/secrets.ts +368 -0
  76. package/extensions/piolium/tools/web-tools.ts +148 -0
  77. package/package.json +77 -0
  78. package/skills/agentic-actions-auditor/SKILL.md +327 -0
  79. package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
  80. package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
  81. package/skills/agentic-actions-auditor/references/foundations.md +94 -0
  82. package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
  83. package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
  84. package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
  85. package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
  86. package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
  87. package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
  88. package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
  89. package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
  90. package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
  91. package/skills/audit/SKILL.md +562 -0
  92. package/skills/audit/assets/icon.svg +7 -0
  93. package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
  94. package/skills/audit/references/adversarial-review.md +148 -0
  95. package/skills/audit/references/architecture-aware-sast.md +306 -0
  96. package/skills/audit/references/audit-workflow.md +737 -0
  97. package/skills/audit/references/chamber-protocol.md +384 -0
  98. package/skills/audit/references/creative-attack-modes.md +221 -0
  99. package/skills/audit/references/deep-analysis.md +273 -0
  100. package/skills/audit/references/domain-attack-playbooks.md +1129 -0
  101. package/skills/audit/references/knowledge-base-template.md +513 -0
  102. package/skills/audit/references/real-env-validation.md +191 -0
  103. package/skills/audit/references/report-templates.md +417 -0
  104. package/skills/audit/references/triage-and-prereqs.md +134 -0
  105. package/skills/audit/scripts/consolidate_drafts.py +554 -0
  106. package/skills/audit/scripts/partition_findings.py +152 -0
  107. package/skills/audit/scripts/rg-hotspots.sh +121 -0
  108. package/skills/audit/scripts/stamp_file_state.py +349 -0
  109. package/skills/code-reviewer/SKILL.md +65 -0
  110. package/skills/codeql/SKILL.md +281 -0
  111. package/skills/codeql/references/build-fixes.md +90 -0
  112. package/skills/codeql/references/diagnostic-query-templates.md +339 -0
  113. package/skills/codeql/references/extension-yaml-format.md +209 -0
  114. package/skills/codeql/references/important-only-suite.md +153 -0
  115. package/skills/codeql/references/language-details.md +207 -0
  116. package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
  117. package/skills/codeql/references/performance-tuning.md +111 -0
  118. package/skills/codeql/references/quality-assessment.md +172 -0
  119. package/skills/codeql/references/ruleset-catalog.md +63 -0
  120. package/skills/codeql/references/run-all-suite.md +92 -0
  121. package/skills/codeql/references/sarif-processing.md +79 -0
  122. package/skills/codeql/references/threat-models.md +51 -0
  123. package/skills/codeql/workflows/build-database.md +280 -0
  124. package/skills/codeql/workflows/create-data-extensions.md +261 -0
  125. package/skills/codeql/workflows/run-analysis.md +301 -0
  126. package/skills/differential-review/SKILL.md +220 -0
  127. package/skills/differential-review/adversarial.md +203 -0
  128. package/skills/differential-review/methodology.md +234 -0
  129. package/skills/differential-review/patterns.md +300 -0
  130. package/skills/differential-review/reporting.md +369 -0
  131. package/skills/fp-check/SKILL.md +125 -0
  132. package/skills/fp-check/references/bug-class-verification.md +114 -0
  133. package/skills/fp-check/references/deep-verification.md +143 -0
  134. package/skills/fp-check/references/evidence-templates.md +91 -0
  135. package/skills/fp-check/references/false-positive-patterns.md +115 -0
  136. package/skills/fp-check/references/gate-reviews.md +27 -0
  137. package/skills/fp-check/references/standard-verification.md +78 -0
  138. package/skills/insecure-defaults/SKILL.md +117 -0
  139. package/skills/insecure-defaults/references/examples.md +409 -0
  140. package/skills/last30days/SKILL.md +444 -0
  141. package/skills/sarif-parsing/SKILL.md +483 -0
  142. package/skills/sarif-parsing/resources/jq-queries.md +162 -0
  143. package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
  144. package/skills/security-threat-model/LICENSE.txt +201 -0
  145. package/skills/security-threat-model/SKILL.md +81 -0
  146. package/skills/security-threat-model/agents/openai.yaml +4 -0
  147. package/skills/security-threat-model/references/prompt-template.md +255 -0
  148. package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
  149. package/skills/semgrep/SKILL.md +212 -0
  150. package/skills/semgrep/references/rulesets.md +162 -0
  151. package/skills/semgrep/references/scan-modes.md +110 -0
  152. package/skills/semgrep/references/scanner-task-prompt.md +140 -0
  153. package/skills/semgrep/scripts/merge_sarif.py +203 -0
  154. package/skills/semgrep/workflows/scan-workflow.md +311 -0
  155. package/skills/semgrep-rule-creator/SKILL.md +168 -0
  156. package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
  157. package/skills/semgrep-rule-creator/references/workflow.md +240 -0
  158. package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
  159. package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
  160. package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
  161. package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
  162. package/skills/sharp-edges/SKILL.md +292 -0
  163. package/skills/sharp-edges/references/auth-patterns.md +252 -0
  164. package/skills/sharp-edges/references/case-studies.md +274 -0
  165. package/skills/sharp-edges/references/config-patterns.md +333 -0
  166. package/skills/sharp-edges/references/crypto-apis.md +190 -0
  167. package/skills/sharp-edges/references/lang-c.md +205 -0
  168. package/skills/sharp-edges/references/lang-csharp.md +285 -0
  169. package/skills/sharp-edges/references/lang-go.md +270 -0
  170. package/skills/sharp-edges/references/lang-java.md +263 -0
  171. package/skills/sharp-edges/references/lang-javascript.md +269 -0
  172. package/skills/sharp-edges/references/lang-kotlin.md +265 -0
  173. package/skills/sharp-edges/references/lang-php.md +245 -0
  174. package/skills/sharp-edges/references/lang-python.md +274 -0
  175. package/skills/sharp-edges/references/lang-ruby.md +273 -0
  176. package/skills/sharp-edges/references/lang-rust.md +272 -0
  177. package/skills/sharp-edges/references/lang-swift.md +287 -0
  178. package/skills/sharp-edges/references/language-specific.md +588 -0
  179. package/skills/spec-to-code-compliance/SKILL.md +357 -0
  180. package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
  181. package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
  182. package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
  183. package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
  184. package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
  185. package/skills/variant-analysis/METHODOLOGY.md +327 -0
  186. package/skills/variant-analysis/SKILL.md +142 -0
  187. package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
  188. package/skills/variant-analysis/resources/codeql/go.ql +69 -0
  189. package/skills/variant-analysis/resources/codeql/java.ql +71 -0
  190. package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
  191. package/skills/variant-analysis/resources/codeql/python.ql +80 -0
  192. package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
  193. package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
  194. package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
  195. package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
  196. package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
  197. package/skills/variant-analysis/resources/variant-report-template.md +75 -0
  198. package/skills/vuln-report/SKILL.md +137 -0
  199. package/skills/vuln-report/agents/openai.yaml +4 -0
  200. package/skills/vuln-report/references/report-template.md +135 -0
  201. package/skills/wooyun-legacy/SKILL.md +367 -0
  202. package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
  203. package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
  204. package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
  205. package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
  206. package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
  207. package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
  208. package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
  209. package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
  210. package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
  211. package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
  212. package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
  213. package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
  214. package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
  215. package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
  216. package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
  217. package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
  218. package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
  219. package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
  220. package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
  221. package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
  222. package/skills/wooyun-legacy/references/xss.md +746 -0
  223. package/skills/zeroize-audit/SKILL.md +371 -0
  224. package/skills/zeroize-audit/configs/c.yaml +21 -0
  225. package/skills/zeroize-audit/configs/default.yaml +128 -0
  226. package/skills/zeroize-audit/configs/rust.yaml +83 -0
  227. package/skills/zeroize-audit/prompts/report_template.md +238 -0
  228. package/skills/zeroize-audit/prompts/system.md +163 -0
  229. package/skills/zeroize-audit/prompts/task.md +97 -0
  230. package/skills/zeroize-audit/references/compile-commands.md +231 -0
  231. package/skills/zeroize-audit/references/detection-strategy.md +191 -0
  232. package/skills/zeroize-audit/references/ir-analysis.md +252 -0
  233. package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
  234. package/skills/zeroize-audit/references/poc-generation.md +470 -0
  235. package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
  236. package/skills/zeroize-audit/schemas/input.json +83 -0
  237. package/skills/zeroize-audit/schemas/output.json +140 -0
  238. package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
  239. package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
  240. package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
  241. package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
  242. package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
  243. package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
  244. package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
  245. package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
  246. package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
  247. package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
  248. package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
  249. package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
  250. package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
  251. package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
  252. package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
  253. package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
  254. package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
  255. package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
  256. package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
  257. package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
  258. package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
  259. package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
  260. package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
  261. package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
  262. package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
  263. package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
  264. package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
  265. package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
  266. package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
  267. package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
  268. package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
  269. package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
  270. package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
  271. package/themes/piolium-srcery.json +94 -0
@@ -0,0 +1,429 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.11"
4
+ # dependencies = []
5
+ # ///
6
+ """
7
+ Semantic LLVM IR analyzer for zeroization detection.
8
+
9
+ This tool parses LLVM IR structurally (not just regex) to detect:
10
+ - Memory operations in SSA form (mem2reg output)
11
+ - Loop-unrolled zeroization patterns
12
+ - Complex optimization transformations
13
+ - Store/load chains that affect zeroization
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import re
19
+ import sys
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+
23
+
24
+ @dataclass
25
+ class IRInstruction:
26
+ """Represents an LLVM IR instruction."""
27
+
28
+ line_num: int
29
+ opcode: str
30
+ operands: list[str]
31
+ result: str | None
32
+ raw_line: str
33
+ metadata: dict[str, str] = field(default_factory=dict)
34
+
35
+
36
+ @dataclass
37
+ class BasicBlock:
38
+ """Represents a basic block in LLVM IR."""
39
+
40
+ label: str
41
+ instructions: list[IRInstruction]
42
+ successors: list[str] = field(default_factory=list)
43
+ predecessors: list[str] = field(default_factory=list)
44
+
45
+
46
+ @dataclass
47
+ class Function:
48
+ """Represents a function in LLVM IR."""
49
+
50
+ name: str
51
+ basic_blocks: dict[str, BasicBlock]
52
+ entry_block: str | None = None
53
+ arguments: list[str] = field(default_factory=list)
54
+
55
+
56
+ class SemanticIRAnalyzer:
57
+ """Semantic analyzer for LLVM IR."""
58
+
59
+ def __init__(self, ir_file: Path, config: dict):
60
+ self.ir_file = ir_file
61
+ self.config = config
62
+ self.functions: dict[str, Function] = {}
63
+ self.current_function: Function | None = None
64
+ self.current_block: BasicBlock | None = None
65
+
66
+ def parse_ir(self) -> None:
67
+ """Parse LLVM IR file into structured representation."""
68
+ with open(self.ir_file) as f:
69
+ lines = f.readlines()
70
+
71
+ line_num = 0
72
+ for line in lines:
73
+ line_num += 1
74
+ line = line.strip()
75
+
76
+ # Skip comments and empty lines
77
+ if not line or line.startswith(";"):
78
+ continue
79
+
80
+ # Function definition
81
+ if line.startswith("define "):
82
+ self._parse_function_def(line)
83
+ continue
84
+
85
+ # Function end
86
+ if line == "}" and self.current_function:
87
+ self.functions[self.current_function.name] = self.current_function
88
+ self.current_function = None
89
+ self.current_block = None
90
+ continue
91
+
92
+ # Basic block label
93
+ if self.current_function and ":" in line and not line.startswith("%"):
94
+ label = line.split(":")[0].strip()
95
+ self.current_block = BasicBlock(label=label, instructions=[])
96
+ self.current_function.basic_blocks[label] = self.current_block
97
+ if not self.current_function.entry_block:
98
+ self.current_function.entry_block = label
99
+ continue
100
+
101
+ # Instruction
102
+ if self.current_function and self.current_block:
103
+ inst = self._parse_instruction(line, line_num)
104
+ if inst:
105
+ self.current_block.instructions.append(inst)
106
+
107
+ # Track control flow
108
+ if inst.opcode in ["br", "switch", "ret"]:
109
+ self._update_control_flow(inst)
110
+
111
+ def _parse_function_def(self, line: str) -> None:
112
+ """Parse function definition."""
113
+ # Extract function name: define ... @func_name(...)
114
+ match = re.search(r"@([a-zA-Z0-9_\.]+)\s*\(", line)
115
+ if match:
116
+ func_name = match.group(1)
117
+ self.current_function = Function(name=func_name, basic_blocks={})
118
+
119
+ # Extract arguments
120
+ args_match = re.search(r"\((.*?)\)", line)
121
+ if args_match:
122
+ args_str = args_match.group(1)
123
+ # Simple argument parsing (just count for now)
124
+ self.current_function.arguments = [
125
+ arg.strip() for arg in args_str.split(",") if arg.strip()
126
+ ]
127
+
128
+ def _parse_instruction(self, line: str, line_num: int) -> IRInstruction | None:
129
+ """Parse single instruction."""
130
+ # Pattern: %result = opcode operands
131
+ # or: opcode operands (for void instructions)
132
+
133
+ result = None
134
+ rest = line
135
+
136
+ if "=" in line:
137
+ parts = line.split("=", 1)
138
+ result = parts[0].strip()
139
+ rest = parts[1].strip()
140
+
141
+ # Extract opcode
142
+ tokens = rest.split(None, 1)
143
+ if not tokens:
144
+ return None
145
+
146
+ opcode = tokens[0]
147
+ operands_str = tokens[1] if len(tokens) > 1 else ""
148
+
149
+ # Parse operands (simplified)
150
+ operands = self._parse_operands(operands_str)
151
+
152
+ return IRInstruction(
153
+ line_num=line_num, opcode=opcode, operands=operands, result=result, raw_line=line
154
+ )
155
+
156
+ def _parse_operands(self, operands_str: str) -> list[str]:
157
+ """Parse instruction operands."""
158
+ # Simple tokenization (can be improved)
159
+ operands = []
160
+ current = ""
161
+ depth = 0
162
+
163
+ for char in operands_str:
164
+ if char in "([{":
165
+ depth += 1
166
+ elif char in ")]}":
167
+ depth -= 1
168
+ elif char == "," and depth == 0:
169
+ if current.strip():
170
+ operands.append(current.strip())
171
+ current = ""
172
+ continue
173
+ current += char
174
+
175
+ if current.strip():
176
+ operands.append(current.strip())
177
+
178
+ return operands
179
+
180
+ def _update_control_flow(self, inst: IRInstruction) -> None:
181
+ """Update CFG based on control flow instruction."""
182
+ if not self.current_block:
183
+ return
184
+
185
+ if inst.opcode == "br":
186
+ # Conditional: br i1 %cond, label %true, label %false
187
+ # Unconditional: br label %target
188
+ labels = [
189
+ op.replace("label", "").replace("%", "").strip()
190
+ for op in inst.operands
191
+ if "label" in op
192
+ ]
193
+ self.current_block.successors.extend(labels)
194
+
195
+ # Update predecessors
196
+ for label in labels:
197
+ if label in self.current_function.basic_blocks:
198
+ self.current_function.basic_blocks[label].predecessors.append(
199
+ self.current_block.label
200
+ )
201
+
202
+ elif inst.opcode == "switch":
203
+ # switch i32 %val, label %default [ ... cases ... ]
204
+ labels = [
205
+ op.replace("label", "").replace("%", "").strip()
206
+ for op in inst.operands
207
+ if "label" in op
208
+ ]
209
+ self.current_block.successors.extend(labels)
210
+
211
+ def find_memory_operations(self, func: Function) -> dict[str, list[IRInstruction]]:
212
+ """Find all memory operations (load, store, memset, memcpy, etc.)."""
213
+ mem_ops = {"store": [], "load": [], "memset": [], "memcpy": [], "call": []}
214
+
215
+ for bb in func.basic_blocks.values():
216
+ for inst in bb.instructions:
217
+ if inst.opcode == "store":
218
+ mem_ops["store"].append(inst)
219
+ elif inst.opcode == "load":
220
+ mem_ops["load"].append(inst)
221
+ elif inst.opcode == "call":
222
+ # Check for memset/memcpy/zeroize calls
223
+ call_target = self._extract_call_target(inst)
224
+ if "memset" in call_target or "llvm.memset" in call_target:
225
+ mem_ops["memset"].append(inst)
226
+ elif "memcpy" in call_target or "llvm.memcpy" in call_target:
227
+ mem_ops["memcpy"].append(inst)
228
+ elif any(
229
+ fn in call_target
230
+ for fn in ["explicit_bzero", "OPENSSL_cleanse", "sodium_memzero", "zeroize"]
231
+ ):
232
+ mem_ops["call"].append(inst)
233
+
234
+ return mem_ops
235
+
236
+ def _extract_call_target(self, inst: IRInstruction) -> str:
237
+ """Extract function name from call instruction."""
238
+ for op in inst.operands:
239
+ if "@" in op:
240
+ match = re.search(r"@([a-zA-Z0-9_\.]+)", op)
241
+ if match:
242
+ return match.group(1)
243
+ return ""
244
+
245
+ def detect_loop_unrolled_wipes(self, func: Function) -> list[dict]:
246
+ """Detect zeroization patterns from loop unrolling."""
247
+ findings = []
248
+
249
+ for bb_label, bb in func.basic_blocks.items():
250
+ # Look for patterns like:
251
+ # store i8 0, i8* %ptr.0
252
+ # store i8 0, i8* %ptr.1
253
+ # store i8 0, i8* %ptr.2
254
+ # ... (repeated pattern indicating unrolled loop)
255
+
256
+ zero_stores = []
257
+ for inst in bb.instructions:
258
+ # Check if storing 0
259
+ if (
260
+ inst.opcode == "store"
261
+ and inst.operands
262
+ and ("i8 0" in inst.operands[0] or "i32 0" in inst.operands[0])
263
+ ):
264
+ zero_stores.append(inst)
265
+
266
+ # If we have 4+ consecutive zero stores, likely an unrolled wipe loop
267
+ if len(zero_stores) >= 4:
268
+ # Check if addresses are sequential
269
+ addresses = [self._extract_store_address(inst) for inst in zero_stores]
270
+ if self._are_sequential_addresses(addresses):
271
+ findings.append(
272
+ {
273
+ "type": "LOOP_UNROLLED_WIPE",
274
+ "block": bb_label,
275
+ "count": len(zero_stores),
276
+ "first_line": zero_stores[0].line_num,
277
+ "evidence": (
278
+ f"Found {len(zero_stores)} consecutive zero stores"
279
+ " (likely unrolled loop)"
280
+ ),
281
+ }
282
+ )
283
+
284
+ return findings
285
+
286
+ def _extract_store_address(self, inst: IRInstruction) -> str:
287
+ """Extract address operand from store instruction."""
288
+ # store type value, type* pointer
289
+ if len(inst.operands) >= 2:
290
+ return inst.operands[1]
291
+ return ""
292
+
293
+ def _are_sequential_addresses(self, addresses: list[str]) -> bool:
294
+ """Check if addresses look sequential (e.g., %ptr.0, %ptr.1, %ptr.2)."""
295
+ if len(addresses) < 2:
296
+ return False
297
+
298
+ # Simple heuristic: check for pattern like %name.0, %name.1, etc.
299
+ base_pattern = re.sub(r"\d+", "", addresses[0])
300
+ return all(re.sub(r"\d+", "", addr) == base_pattern for addr in addresses[1:])
301
+
302
+ def detect_volatile_stores(self, func: Function) -> list[IRInstruction]:
303
+ """Find volatile store instructions (cannot be optimized away)."""
304
+ volatile_stores = []
305
+
306
+ for bb in func.basic_blocks.values():
307
+ for inst in bb.instructions:
308
+ if inst.opcode == "store" and "volatile" in inst.raw_line:
309
+ volatile_stores.append(inst)
310
+
311
+ return volatile_stores
312
+
313
+ def analyze_mem2reg_output(self, func: Function) -> dict:
314
+ """Analyze memory operations in SSA form (after mem2reg pass)."""
315
+ # After mem2reg, local variables are promoted to registers
316
+ # Look for phi nodes and register operations
317
+
318
+ phi_nodes = []
319
+ register_ops = []
320
+
321
+ for bb in func.basic_blocks.values():
322
+ for inst in bb.instructions:
323
+ if inst.opcode == "phi":
324
+ phi_nodes.append(inst)
325
+ elif inst.result and inst.result.startswith("%"):
326
+ register_ops.append(inst)
327
+
328
+ return {
329
+ "phi_count": len(phi_nodes),
330
+ "register_ops": len(register_ops),
331
+ "has_mem2reg": len(phi_nodes) > 0,
332
+ }
333
+
334
+ def analyze_function(self, func_name: str) -> dict:
335
+ """Perform comprehensive analysis on a function."""
336
+ if func_name not in self.functions:
337
+ return {"error": f"Function {func_name} not found"}
338
+
339
+ func = self.functions[func_name]
340
+
341
+ # Find memory operations
342
+ mem_ops = self.find_memory_operations(func)
343
+
344
+ # Detect patterns
345
+ loop_unrolled = self.detect_loop_unrolled_wipes(func)
346
+ volatile_stores = self.detect_volatile_stores(func)
347
+ mem2reg_info = self.analyze_mem2reg_output(func)
348
+
349
+ # Check for wipe presence
350
+ has_wipe = (
351
+ len(mem_ops["memset"]) > 0 or len(mem_ops["call"]) > 0 or len(volatile_stores) > 0
352
+ )
353
+
354
+ return {
355
+ "function": func_name,
356
+ "basic_blocks": len(func.basic_blocks),
357
+ "memory_operations": {
358
+ "stores": len(mem_ops["store"]),
359
+ "loads": len(mem_ops["load"]),
360
+ "memset_calls": len(mem_ops["memset"]),
361
+ "secure_wipe_calls": len(mem_ops["call"]),
362
+ "volatile_stores": len(volatile_stores),
363
+ },
364
+ "patterns": {
365
+ "loop_unrolled_wipes": loop_unrolled,
366
+ "has_volatile_stores": len(volatile_stores) > 0,
367
+ },
368
+ "ssa_analysis": mem2reg_info,
369
+ "has_zeroization": has_wipe,
370
+ "wipe_instructions": [
371
+ {"line": inst.line_num, "type": "memset", "raw": inst.raw_line}
372
+ for inst in mem_ops["memset"]
373
+ ]
374
+ + [
375
+ {"line": inst.line_num, "type": "secure_call", "raw": inst.raw_line}
376
+ for inst in mem_ops["call"]
377
+ ]
378
+ + [
379
+ {"line": inst.line_num, "type": "volatile_store", "raw": inst.raw_line}
380
+ for inst in volatile_stores
381
+ ],
382
+ }
383
+
384
+
385
+ def main():
386
+ parser = argparse.ArgumentParser(description="Semantic LLVM IR analyzer")
387
+ parser.add_argument("--ir", required=True, help="LLVM IR file (.ll)")
388
+ parser.add_argument("--function", help="Specific function to analyze (default: all)")
389
+ parser.add_argument("--config", help="Configuration YAML file")
390
+ parser.add_argument("--out", required=True, help="Output JSON file")
391
+
392
+ args = parser.parse_args()
393
+
394
+ # Load config (simplified)
395
+ config = {}
396
+
397
+ # Parse IR
398
+ analyzer = SemanticIRAnalyzer(Path(args.ir), config)
399
+ try:
400
+ analyzer.parse_ir()
401
+ except OSError as e:
402
+ print(f"Error: cannot read IR file {args.ir}: {e}", file=sys.stderr)
403
+ sys.exit(1)
404
+
405
+ # Analyze functions
406
+ results = {"ir_file": args.ir, "functions_found": len(analyzer.functions), "analyses": []}
407
+
408
+ if args.function:
409
+ # Analyze specific function
410
+ analysis = analyzer.analyze_function(args.function)
411
+ results["analyses"].append(analysis)
412
+ else:
413
+ # Analyze all functions
414
+ for func_name in analyzer.functions:
415
+ analysis = analyzer.analyze_function(func_name)
416
+ results["analyses"].append(analysis)
417
+
418
+ # Write output
419
+ output_path = Path(args.out)
420
+ output_path.parent.mkdir(parents=True, exist_ok=True)
421
+
422
+ with open(output_path, "w") as f:
423
+ json.dump(results, f, indent=2)
424
+
425
+ print(f"OK: semantic IR analysis written to {args.out}")
426
+
427
+
428
+ if __name__ == "__main__":
429
+ main()
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Normalize and diff LLVM IR across one or more optimization levels.
5
+ #
6
+ # Usage (two-file, backward-compatible):
7
+ # diff_ir.sh <O0.ll> <O2.ll>
8
+ #
9
+ # Usage (multi-level — recommended):
10
+ # diff_ir.sh <O0.ll> <O1.ll> <O2.ll> [<O3.ll> ...]
11
+ #
12
+ # Output:
13
+ # - Prints a unified diff for each pair of adjacent files.
14
+ # - For 3+ files, also prints a WIPE PATTERN SUMMARY identifying the first
15
+ # optimization level at which zeroization patterns disappear.
16
+ # - Returns exit code 0 if all files are identical, 1 if any diffs found.
17
+ #
18
+ # Wipe patterns detected in the summary:
19
+ # llvm.memset, volatile, explicit_bzero, sodium_memzero, OPENSSL_cleanse,
20
+ # SecureZeroMemory, memset_s, store i8 0, store i64 0, store i32 0
21
+
22
+ usage() {
23
+ echo "Usage: $0 <baseline.ll> <file2.ll> [<file3.ll> ...]" >&2
24
+ }
25
+
26
+ if [[ $# -lt 2 ]]; then
27
+ usage
28
+ exit 2
29
+ fi
30
+
31
+ for f in "$@"; do
32
+ if [[ ! -f "$f" ]]; then
33
+ echo "Missing file: $f" >&2
34
+ exit 2
35
+ fi
36
+ done
37
+
38
+ norm() {
39
+ # Remove comments and metadata noise that changes frequently.
40
+ # Keep it simple and safe: do NOT rewrite semantics, only strip obviously noisy lines.
41
+ sed -E \
42
+ -e 's/;.*$//' \
43
+ -e '/^\s*$/d' \
44
+ -e '/^source_filename = /d' \
45
+ -e '/^target datalayout = /d' \
46
+ -e '/^target triple = /d' \
47
+ -e '/^!llvm\./d' \
48
+ -e '/^!DIGlobalVariable/d' \
49
+ -e '/^!DICompileUnit/d' \
50
+ -e '/^!DIFile/d' \
51
+ -e '/^!DISubprogram/d' \
52
+ -e '/^!DILocation/d' \
53
+ -e '/^!DI.*$/d'
54
+ }
55
+
56
+ has_wipe_pattern() {
57
+ # Return 0 (true) if the file contains any zeroization pattern.
58
+ grep -qE \
59
+ 'llvm\.memset|volatile|explicit_bzero|sodium_memzero|OPENSSL_cleanse|SecureZeroMemory|memset_s|store i8 0|store i64 0|store i32 0' \
60
+ "$1"
61
+ }
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Normalize all input files into temp files.
65
+ # ---------------------------------------------------------------------------
66
+ FILES=("$@")
67
+ NUM_FILES=${#FILES[@]}
68
+
69
+ TMPDIR_BASE="$(mktemp -d -t za-ir-XXXXXX)"
70
+ trap 'rm -rf "$TMPDIR_BASE"' EXIT
71
+
72
+ NORMFILES=()
73
+ for i in "${!FILES[@]}"; do
74
+ tmp="$TMPDIR_BASE/norm_${i}.ll"
75
+ norm <"${FILES[$i]}" >"$tmp"
76
+ NORMFILES+=("$tmp")
77
+ done
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Two-file mode: backward-compatible, single diff, no summary.
81
+ # ---------------------------------------------------------------------------
82
+ if [[ $NUM_FILES -eq 2 ]]; then
83
+ diff_rc=0
84
+ diff -u "${NORMFILES[0]}" "${NORMFILES[1]}" || diff_rc=$?
85
+ if [[ $diff_rc -eq 2 ]]; then
86
+ echo "diff_ir.sh: diff failed (internal error)" >&2
87
+ exit 1
88
+ fi
89
+ exit $diff_rc
90
+ fi
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Multi-file mode: pairwise diffs between adjacent files + wipe summary.
94
+ # ---------------------------------------------------------------------------
95
+ any_diff=0
96
+
97
+ for ((i = 0; i < NUM_FILES - 1; i++)); do
98
+ j=$((i + 1))
99
+ A_LABEL="$(basename "${FILES[$i]}")"
100
+ B_LABEL="$(basename "${FILES[$j]}")"
101
+ echo "=== DIFF File $((i + 1)) ($A_LABEL) vs File $((j + 1)) ($B_LABEL) ==="
102
+ if ! diff -u "${NORMFILES[$i]}" "${NORMFILES[$j]}"; then
103
+ any_diff=1
104
+ fi
105
+ echo ""
106
+ done
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Wipe pattern summary: identify first file where wipe disappears.
110
+ # ---------------------------------------------------------------------------
111
+ echo "=== WIPE PATTERN SUMMARY ==="
112
+ first_absent=-1
113
+ for i in "${!NORMFILES[@]}"; do
114
+ LABEL="$(basename "${FILES[$i]}")"
115
+ if has_wipe_pattern "${NORMFILES[$i]}"; then
116
+ echo " File $((i + 1)) ($LABEL): WIPE PRESENT"
117
+ else
118
+ echo " File $((i + 1)) ($LABEL): WIPE ABSENT"
119
+ if [[ $first_absent -eq -1 ]]; then
120
+ first_absent=$i
121
+ fi
122
+ fi
123
+ done
124
+
125
+ if [[ $first_absent -ne -1 ]]; then
126
+ LABEL="$(basename "${FILES[$first_absent]}")"
127
+ echo ""
128
+ echo " First disappearance at File $((first_absent + 1)) ($LABEL)."
129
+ echo " Evidence: OPTIMIZED_AWAY_ZEROIZE — wipe present at lower opt level(s) but absent here."
130
+ else
131
+ echo ""
132
+ echo " Wipe patterns present at all opt levels analyzed."
133
+ fi
134
+
135
+ exit $any_diff