@vigolium/piolium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/agents/access-auditor.md +300 -0
- package/agents/assumption-breaker.md +154 -0
- package/agents/attack-designer.md +116 -0
- package/agents/code-scanner.md +139 -0
- package/agents/concurrency-auditor.md +238 -0
- package/agents/confirm-writer.md +257 -0
- package/agents/context-reviewer.md +274 -0
- package/agents/cross-verifier.md +165 -0
- package/agents/cve-scout.md +381 -0
- package/agents/env-builder.md +282 -0
- package/agents/env-profiler.md +205 -0
- package/agents/evidence-collector.md +140 -0
- package/agents/finding-grader.md +142 -0
- package/agents/finding-writer.md +148 -0
- package/agents/flow-tracer.md +106 -0
- package/agents/goal-backtracer.md +146 -0
- package/agents/history-miner.md +467 -0
- package/agents/independent-verifier.md +118 -0
- package/agents/intent-mapper.md +183 -0
- package/agents/longshot-collector.md +128 -0
- package/agents/longshot-prober.md +126 -0
- package/agents/patch-auditor.md +73 -0
- package/agents/poc-author.md +124 -0
- package/agents/poc-runner.md +194 -0
- package/agents/probe-lead.md +269 -0
- package/agents/red-challenger.md +101 -0
- package/agents/report-composer.md +208 -0
- package/agents/review-adjudicator.md +216 -0
- package/agents/spec-auditor.md +155 -0
- package/agents/taint-tracer.md +265 -0
- package/agents/test-locator.md +209 -0
- package/agents/threat-modeler.md +132 -0
- package/agents/variant-scanner.md +108 -0
- package/agents/variant-spotter.md +110 -0
- package/bin/piolium.mjs +376 -0
- package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
- package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
- package/extensions/piolium/agent-runner.ts +322 -0
- package/extensions/piolium/agents.ts +266 -0
- package/extensions/piolium/audit-state.ts +522 -0
- package/extensions/piolium/bundled-resources.ts +97 -0
- package/extensions/piolium/candidate-scan.ts +966 -0
- package/extensions/piolium/command-target.ts +177 -0
- package/extensions/piolium/console-stream.ts +57 -0
- package/extensions/piolium/export-results.ts +380 -0
- package/extensions/piolium/findings.ts +448 -0
- package/extensions/piolium/heartbeat.ts +182 -0
- package/extensions/piolium/help.ts +234 -0
- package/extensions/piolium/index.ts +1865 -0
- package/extensions/piolium/longshot.ts +530 -0
- package/extensions/piolium/matcher-suggestions.ts +196 -0
- package/extensions/piolium/matcher-utils.ts +83 -0
- package/extensions/piolium/modes/balanced.ts +750 -0
- package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
- package/extensions/piolium/modes/confirm.ts +697 -0
- package/extensions/piolium/modes/deep.ts +917 -0
- package/extensions/piolium/modes/diff.ts +177 -0
- package/extensions/piolium/modes/lite.ts +540 -0
- package/extensions/piolium/modes/longshot.ts +595 -0
- package/extensions/piolium/modes/merge.ts +204 -0
- package/extensions/piolium/modes/phase-runner.ts +267 -0
- package/extensions/piolium/modes/reinvest.ts +546 -0
- package/extensions/piolium/modes/revisit.ts +279 -0
- package/extensions/piolium/modes.ts +48 -0
- package/extensions/piolium/phase-labels.ts +123 -0
- package/extensions/piolium/phase-status-strip.ts +92 -0
- package/extensions/piolium/prompt-prefix-editor.ts +39 -0
- package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
- package/extensions/piolium/recon.ts +409 -0
- package/extensions/piolium/result-stats.ts +105 -0
- package/extensions/piolium/retry.ts +120 -0
- package/extensions/piolium/scheduler.ts +212 -0
- package/extensions/piolium/secrets.ts +368 -0
- package/extensions/piolium/tools/web-tools.ts +148 -0
- package/package.json +77 -0
- package/skills/agentic-actions-auditor/SKILL.md +327 -0
- package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
- package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
- package/skills/agentic-actions-auditor/references/foundations.md +94 -0
- package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
- package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
- package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
- package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
- package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
- package/skills/audit/SKILL.md +562 -0
- package/skills/audit/assets/icon.svg +7 -0
- package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
- package/skills/audit/references/adversarial-review.md +148 -0
- package/skills/audit/references/architecture-aware-sast.md +306 -0
- package/skills/audit/references/audit-workflow.md +737 -0
- package/skills/audit/references/chamber-protocol.md +384 -0
- package/skills/audit/references/creative-attack-modes.md +221 -0
- package/skills/audit/references/deep-analysis.md +273 -0
- package/skills/audit/references/domain-attack-playbooks.md +1129 -0
- package/skills/audit/references/knowledge-base-template.md +513 -0
- package/skills/audit/references/real-env-validation.md +191 -0
- package/skills/audit/references/report-templates.md +417 -0
- package/skills/audit/references/triage-and-prereqs.md +134 -0
- package/skills/audit/scripts/consolidate_drafts.py +554 -0
- package/skills/audit/scripts/partition_findings.py +152 -0
- package/skills/audit/scripts/rg-hotspots.sh +121 -0
- package/skills/audit/scripts/stamp_file_state.py +349 -0
- package/skills/code-reviewer/SKILL.md +65 -0
- package/skills/codeql/SKILL.md +281 -0
- package/skills/codeql/references/build-fixes.md +90 -0
- package/skills/codeql/references/diagnostic-query-templates.md +339 -0
- package/skills/codeql/references/extension-yaml-format.md +209 -0
- package/skills/codeql/references/important-only-suite.md +153 -0
- package/skills/codeql/references/language-details.md +207 -0
- package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
- package/skills/codeql/references/performance-tuning.md +111 -0
- package/skills/codeql/references/quality-assessment.md +172 -0
- package/skills/codeql/references/ruleset-catalog.md +63 -0
- package/skills/codeql/references/run-all-suite.md +92 -0
- package/skills/codeql/references/sarif-processing.md +79 -0
- package/skills/codeql/references/threat-models.md +51 -0
- package/skills/codeql/workflows/build-database.md +280 -0
- package/skills/codeql/workflows/create-data-extensions.md +261 -0
- package/skills/codeql/workflows/run-analysis.md +301 -0
- package/skills/differential-review/SKILL.md +220 -0
- package/skills/differential-review/adversarial.md +203 -0
- package/skills/differential-review/methodology.md +234 -0
- package/skills/differential-review/patterns.md +300 -0
- package/skills/differential-review/reporting.md +369 -0
- package/skills/fp-check/SKILL.md +125 -0
- package/skills/fp-check/references/bug-class-verification.md +114 -0
- package/skills/fp-check/references/deep-verification.md +143 -0
- package/skills/fp-check/references/evidence-templates.md +91 -0
- package/skills/fp-check/references/false-positive-patterns.md +115 -0
- package/skills/fp-check/references/gate-reviews.md +27 -0
- package/skills/fp-check/references/standard-verification.md +78 -0
- package/skills/insecure-defaults/SKILL.md +117 -0
- package/skills/insecure-defaults/references/examples.md +409 -0
- package/skills/last30days/SKILL.md +444 -0
- package/skills/sarif-parsing/SKILL.md +483 -0
- package/skills/sarif-parsing/resources/jq-queries.md +162 -0
- package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
- package/skills/security-threat-model/LICENSE.txt +201 -0
- package/skills/security-threat-model/SKILL.md +81 -0
- package/skills/security-threat-model/agents/openai.yaml +4 -0
- package/skills/security-threat-model/references/prompt-template.md +255 -0
- package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
- package/skills/semgrep/SKILL.md +212 -0
- package/skills/semgrep/references/rulesets.md +162 -0
- package/skills/semgrep/references/scan-modes.md +110 -0
- package/skills/semgrep/references/scanner-task-prompt.md +140 -0
- package/skills/semgrep/scripts/merge_sarif.py +203 -0
- package/skills/semgrep/workflows/scan-workflow.md +311 -0
- package/skills/semgrep-rule-creator/SKILL.md +168 -0
- package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
- package/skills/semgrep-rule-creator/references/workflow.md +240 -0
- package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
- package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
- package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
- package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
- package/skills/sharp-edges/SKILL.md +292 -0
- package/skills/sharp-edges/references/auth-patterns.md +252 -0
- package/skills/sharp-edges/references/case-studies.md +274 -0
- package/skills/sharp-edges/references/config-patterns.md +333 -0
- package/skills/sharp-edges/references/crypto-apis.md +190 -0
- package/skills/sharp-edges/references/lang-c.md +205 -0
- package/skills/sharp-edges/references/lang-csharp.md +285 -0
- package/skills/sharp-edges/references/lang-go.md +270 -0
- package/skills/sharp-edges/references/lang-java.md +263 -0
- package/skills/sharp-edges/references/lang-javascript.md +269 -0
- package/skills/sharp-edges/references/lang-kotlin.md +265 -0
- package/skills/sharp-edges/references/lang-php.md +245 -0
- package/skills/sharp-edges/references/lang-python.md +274 -0
- package/skills/sharp-edges/references/lang-ruby.md +273 -0
- package/skills/sharp-edges/references/lang-rust.md +272 -0
- package/skills/sharp-edges/references/lang-swift.md +287 -0
- package/skills/sharp-edges/references/language-specific.md +588 -0
- package/skills/spec-to-code-compliance/SKILL.md +357 -0
- package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
- package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
- package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
- package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
- package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
- package/skills/variant-analysis/METHODOLOGY.md +327 -0
- package/skills/variant-analysis/SKILL.md +142 -0
- package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
- package/skills/variant-analysis/resources/codeql/go.ql +69 -0
- package/skills/variant-analysis/resources/codeql/java.ql +71 -0
- package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
- package/skills/variant-analysis/resources/codeql/python.ql +80 -0
- package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
- package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
- package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
- package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
- package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
- package/skills/variant-analysis/resources/variant-report-template.md +75 -0
- package/skills/vuln-report/SKILL.md +137 -0
- package/skills/vuln-report/agents/openai.yaml +4 -0
- package/skills/vuln-report/references/report-template.md +135 -0
- package/skills/wooyun-legacy/SKILL.md +367 -0
- package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
- package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
- package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
- package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
- package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
- package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
- package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
- package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
- package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
- package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
- package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
- package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
- package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
- package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
- package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
- package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
- package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
- package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
- package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
- package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
- package/skills/wooyun-legacy/references/xss.md +746 -0
- package/skills/zeroize-audit/SKILL.md +371 -0
- package/skills/zeroize-audit/configs/c.yaml +21 -0
- package/skills/zeroize-audit/configs/default.yaml +128 -0
- package/skills/zeroize-audit/configs/rust.yaml +83 -0
- package/skills/zeroize-audit/prompts/report_template.md +238 -0
- package/skills/zeroize-audit/prompts/system.md +163 -0
- package/skills/zeroize-audit/prompts/task.md +97 -0
- package/skills/zeroize-audit/references/compile-commands.md +231 -0
- package/skills/zeroize-audit/references/detection-strategy.md +191 -0
- package/skills/zeroize-audit/references/ir-analysis.md +252 -0
- package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
- package/skills/zeroize-audit/references/poc-generation.md +470 -0
- package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
- package/skills/zeroize-audit/schemas/input.json +83 -0
- package/skills/zeroize-audit/schemas/output.json +140 -0
- package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
- package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
- package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
- package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
- package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
- package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
- package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
- package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
- package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
- package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
- package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
- package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
- package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
- package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
- package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
- package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
- package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
- package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
- package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
- package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
- package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
- package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
- package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
- package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
- package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
- package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
- package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
- package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
- package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
- package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
- package/themes/piolium-srcery.json +94 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Language-Specific Guidance
|
|
2
|
+
|
|
3
|
+
## No Build Required
|
|
4
|
+
|
|
5
|
+
### Python
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
codeql database create codeql.db --language=python --source-root=.
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
**Framework Support:**
|
|
12
|
+
- Django, Flask, FastAPI: Built-in models
|
|
13
|
+
- Tornado, Pyramid: Partial support
|
|
14
|
+
- Custom frameworks: May need data extensions
|
|
15
|
+
|
|
16
|
+
**Common Issues:**
|
|
17
|
+
| Issue | Fix |
|
|
18
|
+
|-------|-----|
|
|
19
|
+
| Missing Django models | Ensure `settings.py` is at expected location |
|
|
20
|
+
| Virtual env included | Use `paths-ignore` in config |
|
|
21
|
+
| Type stubs missing | Install `types-*` packages before extraction |
|
|
22
|
+
|
|
23
|
+
### JavaScript/TypeScript
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
codeql database create codeql.db --language=javascript --source-root=.
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**Framework Support:**
|
|
30
|
+
- React, Vue, Angular: Built-in models
|
|
31
|
+
- Express, Koa, Fastify: HTTP source/sink models
|
|
32
|
+
- Next.js, Nuxt: Partial SSR support
|
|
33
|
+
|
|
34
|
+
**Common Issues:**
|
|
35
|
+
| Issue | Fix |
|
|
36
|
+
|-------|-----|
|
|
37
|
+
| node_modules bloat | Already excluded by default |
|
|
38
|
+
| TypeScript not parsed | Ensure `tsconfig.json` is valid |
|
|
39
|
+
| Monorepo issues | Use `--source-root` for specific package |
|
|
40
|
+
|
|
41
|
+
### Go
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
codeql database create codeql.db --language=go --source-root=.
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**Framework Support:**
|
|
48
|
+
- net/http, Gin, Echo, Chi: Built-in models
|
|
49
|
+
- gRPC: Partial support
|
|
50
|
+
- Custom routers: May need data extensions
|
|
51
|
+
|
|
52
|
+
**Common Issues:**
|
|
53
|
+
| Issue | Fix |
|
|
54
|
+
|-------|-----|
|
|
55
|
+
| Missing dependencies | Run `go mod download` first |
|
|
56
|
+
| Vendor directory | CodeQL handles automatically |
|
|
57
|
+
| CGO code | Requires `--command='go build'` with CGO enabled |
|
|
58
|
+
|
|
59
|
+
### Ruby
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
codeql database create codeql.db --language=ruby --source-root=.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Framework Support:**
|
|
66
|
+
- Rails: Full support (controllers, models, views)
|
|
67
|
+
- Sinatra: Built-in support
|
|
68
|
+
- Hanami: Partial support
|
|
69
|
+
|
|
70
|
+
**Common Issues:**
|
|
71
|
+
| Issue | Fix |
|
|
72
|
+
|-------|-----|
|
|
73
|
+
| Bundler issues | Run `bundle install` first |
|
|
74
|
+
| Rails engines | May need multiple database passes |
|
|
75
|
+
|
|
76
|
+
## Build Required
|
|
77
|
+
|
|
78
|
+
### C/C++
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Make
|
|
82
|
+
codeql database create codeql.db --language=cpp --command='make -j8'
|
|
83
|
+
|
|
84
|
+
# CMake
|
|
85
|
+
codeql database create codeql.db --language=cpp \
|
|
86
|
+
--source-root=/path/to/src \
|
|
87
|
+
--command='cmake --build build'
|
|
88
|
+
|
|
89
|
+
# Ninja
|
|
90
|
+
codeql database create codeql.db --language=cpp \
|
|
91
|
+
--command='ninja -C build'
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Build System Tips:**
|
|
95
|
+
| Build System | Command |
|
|
96
|
+
|--------------|---------|
|
|
97
|
+
| Make | `make clean && make -j$(nproc)` |
|
|
98
|
+
| CMake | `cmake -B build && cmake --build build` |
|
|
99
|
+
| Meson | `meson setup build && ninja -C build` |
|
|
100
|
+
| Bazel | `bazel build //...` |
|
|
101
|
+
|
|
102
|
+
**Common Issues:**
|
|
103
|
+
| Issue | Fix |
|
|
104
|
+
|-------|-----|
|
|
105
|
+
| Partial extraction | Ensure `make clean` before CodeQL build |
|
|
106
|
+
| Header-only libraries | Use `--extractor-option cpp_trap_headers=true` |
|
|
107
|
+
| Cross-compilation | Set `CODEQL_EXTRACTOR_CPP_TARGET_ARCH` |
|
|
108
|
+
|
|
109
|
+
### Java/Kotlin
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# Gradle
|
|
113
|
+
codeql database create codeql.db --language=java --command='./gradlew build -x test'
|
|
114
|
+
|
|
115
|
+
# Maven
|
|
116
|
+
codeql database create codeql.db --language=java --command='mvn compile -DskipTests'
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Framework Support:**
|
|
120
|
+
- Spring Boot: Full support
|
|
121
|
+
- Jakarta EE: Built-in models
|
|
122
|
+
- Android: Requires Android SDK
|
|
123
|
+
|
|
124
|
+
**Common Issues:**
|
|
125
|
+
| Issue | Fix |
|
|
126
|
+
|-------|-----|
|
|
127
|
+
| Missing dependencies | Run `./gradlew dependencies` first |
|
|
128
|
+
| Kotlin mixed projects | Use `--language=java` (covers both) |
|
|
129
|
+
| Annotation processors | Ensure they run during CodeQL build |
|
|
130
|
+
|
|
131
|
+
### Rust
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
codeql database create codeql.db --language=rust --command='cargo build'
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Common Issues:**
|
|
138
|
+
| Issue | Fix |
|
|
139
|
+
|-------|-----|
|
|
140
|
+
| Proc macros | May require special handling |
|
|
141
|
+
| Workspace projects | Use `--source-root` for specific crate |
|
|
142
|
+
| Build script failures | Ensure native dependencies are available |
|
|
143
|
+
|
|
144
|
+
### C#
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# .NET Core
|
|
148
|
+
codeql database create codeql.db --language=csharp --command='dotnet build'
|
|
149
|
+
|
|
150
|
+
# MSBuild
|
|
151
|
+
codeql database create codeql.db --language=csharp --command='msbuild /t:rebuild'
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Framework Support:**
|
|
155
|
+
- ASP.NET Core: Full support
|
|
156
|
+
- Entity Framework: Database query models
|
|
157
|
+
- Blazor: Partial support
|
|
158
|
+
|
|
159
|
+
**Common Issues:**
|
|
160
|
+
| Issue | Fix |
|
|
161
|
+
|-------|-----|
|
|
162
|
+
| NuGet restore | Run `dotnet restore` first |
|
|
163
|
+
| Multiple solutions | Specify solution file in command |
|
|
164
|
+
|
|
165
|
+
### Swift
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Xcode project
|
|
169
|
+
codeql database create codeql.db --language=swift \
|
|
170
|
+
--command='xcodebuild -project MyApp.xcodeproj -scheme MyApp build'
|
|
171
|
+
|
|
172
|
+
# Swift Package Manager
|
|
173
|
+
codeql database create codeql.db --language=swift --command='swift build'
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Requirements:**
|
|
177
|
+
- macOS only
|
|
178
|
+
- Xcode Command Line Tools
|
|
179
|
+
|
|
180
|
+
**Common Issues:**
|
|
181
|
+
| Issue | Fix |
|
|
182
|
+
|-------|-----|
|
|
183
|
+
| Code signing | Add `CODE_SIGN_IDENTITY=- CODE_SIGNING_REQUIRED=NO` |
|
|
184
|
+
| Simulator target | Add `-sdk iphonesimulator` |
|
|
185
|
+
|
|
186
|
+
## Extractor Options
|
|
187
|
+
|
|
188
|
+
Set via environment variables: `CODEQL_EXTRACTOR_<LANG>_OPTION_<NAME>=<VALUE>`
|
|
189
|
+
|
|
190
|
+
### C/C++ Options
|
|
191
|
+
|
|
192
|
+
| Option | Description |
|
|
193
|
+
|--------|-------------|
|
|
194
|
+
| `trap_headers=true` | Include header file analysis |
|
|
195
|
+
| `target_arch=x86_64` | Target architecture |
|
|
196
|
+
|
|
197
|
+
### Java Options
|
|
198
|
+
|
|
199
|
+
| Option | Description |
|
|
200
|
+
|--------|-------------|
|
|
201
|
+
| `jdk_version=17` | JDK version for analysis |
|
|
202
|
+
|
|
203
|
+
### Python Options
|
|
204
|
+
|
|
205
|
+
| Option | Description |
|
|
206
|
+
|--------|-------------|
|
|
207
|
+
| `python_executable=/path/to/python` | Specific Python interpreter |
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# macOS arm64e Workaround
|
|
2
|
+
|
|
3
|
+
Methods for building CodeQL databases on macOS Apple Silicon when the `arm64e`/`arm64` architecture mismatch causes SIGKILL (exit code 137) during build tracing.
|
|
4
|
+
|
|
5
|
+
**Use when `IS_MACOS_ARM64E=true`** (detected in build-database workflow Step 2a). These replace Methods 1 and 2 on affected systems.
|
|
6
|
+
|
|
7
|
+
The strategy is to use Homebrew-installed tools (plain `arm64`, not `arm64e`) so `libtrace.dylib` can be injected successfully. Try sub-methods in order:
|
|
8
|
+
|
|
9
|
+
## Sub-method 2m-a: Homebrew clang/gcc with multi-step tracing
|
|
10
|
+
|
|
11
|
+
Trace only the compiler invocations individually, avoiding system tools (`/usr/bin/ar`, `/bin/mkdir`) that would be killed. This requires a multi-step build: init → trace each compiler call → finalize.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
log_step "METHOD 2m-a: macOS arm64 — Homebrew compiler with multi-step tracing"
|
|
15
|
+
|
|
16
|
+
# 1. Find Homebrew C/C++ compiler (arm64, not arm64e)
|
|
17
|
+
BREW_CC=""
|
|
18
|
+
# Prefer Homebrew clang
|
|
19
|
+
if [ -x "/opt/homebrew/opt/llvm/bin/clang" ]; then
|
|
20
|
+
BREW_CC="/opt/homebrew/opt/llvm/bin/clang"
|
|
21
|
+
# Try Homebrew GCC (e.g. gcc-14, gcc-13)
|
|
22
|
+
elif command -v gcc-14 >/dev/null 2>&1; then
|
|
23
|
+
BREW_CC="$(command -v gcc-14)"
|
|
24
|
+
elif command -v gcc-13 >/dev/null 2>&1; then
|
|
25
|
+
BREW_CC="$(command -v gcc-13)"
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
if [ -z "$BREW_CC" ]; then
|
|
29
|
+
log_result "No Homebrew C/C++ compiler found — skipping 2m-a"
|
|
30
|
+
# Fall through to 2m-b
|
|
31
|
+
else
|
|
32
|
+
# Verify it's arm64 (not arm64e)
|
|
33
|
+
BREW_CC_ARCH=$(lipo -archs "$BREW_CC" 2>/dev/null)
|
|
34
|
+
if [[ "$BREW_CC_ARCH" == *"arm64e"* ]]; then
|
|
35
|
+
log_result "Homebrew compiler is arm64e — skipping 2m-a"
|
|
36
|
+
else
|
|
37
|
+
log_step "Using Homebrew compiler: $BREW_CC (arch: $BREW_CC_ARCH)"
|
|
38
|
+
|
|
39
|
+
# 2. Run the build normally (without tracing) to create build dirs and artifacts
|
|
40
|
+
# Use Homebrew make (gmake) if available, otherwise system make outside tracer
|
|
41
|
+
if command -v gmake >/dev/null 2>&1; then
|
|
42
|
+
MAKE_CMD="gmake"
|
|
43
|
+
else
|
|
44
|
+
MAKE_CMD="make"
|
|
45
|
+
fi
|
|
46
|
+
$MAKE_CMD clean 2>/dev/null || true
|
|
47
|
+
$MAKE_CMD CC="$BREW_CC" 2>&1 | tee -a "$LOG_FILE"
|
|
48
|
+
|
|
49
|
+
# 3. Extract compiler commands from the Makefile / build system
|
|
50
|
+
# Use make's dry-run mode to get the exact compiler invocations
|
|
51
|
+
$MAKE_CMD clean 2>/dev/null || true
|
|
52
|
+
COMPILE_CMDS=$($MAKE_CMD CC="$BREW_CC" --dry-run 2>/dev/null \
|
|
53
|
+
| grep -E "^\s*$BREW_CC\b.*\s-c\s" \
|
|
54
|
+
| sed 's/^[[:space:]]*//')
|
|
55
|
+
|
|
56
|
+
if [ -z "$COMPILE_CMDS" ]; then
|
|
57
|
+
log_result "Could not extract compile commands from dry-run — skipping 2m-a"
|
|
58
|
+
else
|
|
59
|
+
# 4. Init database
|
|
60
|
+
codeql database init $DB_NAME --language=cpp --source-root=. --overwrite 2>&1 \
|
|
61
|
+
| tee -a "$LOG_FILE"
|
|
62
|
+
|
|
63
|
+
# 5. Ensure build directories exist (outside tracer — avoids arm64e mkdir)
|
|
64
|
+
$MAKE_CMD clean 2>/dev/null || true
|
|
65
|
+
# Parse -o flags to find output dirs, or just create common dirs
|
|
66
|
+
echo "$COMPILE_CMDS" | sed -n 's/.*-o[[:space:]]\{1,\}\([^[:space:]]\{1,\}\).*/\1/p' | xargs -I{} dirname {} \
|
|
67
|
+
| sort -u | xargs mkdir -p 2>/dev/null || true
|
|
68
|
+
|
|
69
|
+
# 6. Trace each compiler invocation individually
|
|
70
|
+
TRACE_OK=true
|
|
71
|
+
while IFS= read -r cmd; do
|
|
72
|
+
[ -z "$cmd" ] && continue
|
|
73
|
+
log_cmd "codeql database trace-command $DB_NAME -- $cmd"
|
|
74
|
+
if ! codeql database trace-command $DB_NAME -- $cmd 2>&1 | tee -a "$LOG_FILE"; then
|
|
75
|
+
log_result "FAILED on: $cmd"
|
|
76
|
+
TRACE_OK=false
|
|
77
|
+
break
|
|
78
|
+
fi
|
|
79
|
+
done <<< "$COMPILE_CMDS"
|
|
80
|
+
|
|
81
|
+
if $TRACE_OK; then
|
|
82
|
+
# 7. Finalize
|
|
83
|
+
codeql database finalize $DB_NAME 2>&1 | tee -a "$LOG_FILE"
|
|
84
|
+
if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then
|
|
85
|
+
log_result "SUCCESS (macOS arm64 multi-step)"
|
|
86
|
+
# Done — skip to Step 4
|
|
87
|
+
else
|
|
88
|
+
log_result "FAILED (finalize failed)"
|
|
89
|
+
fi
|
|
90
|
+
fi
|
|
91
|
+
fi
|
|
92
|
+
fi
|
|
93
|
+
fi
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Sub-method 2m-b: Rosetta x86_64 emulation
|
|
97
|
+
|
|
98
|
+
Force the entire CodeQL pipeline to run under Rosetta, which uses the `x86_64` slice of both `libtrace.dylib` and system tools — no `arm64e` mismatch.
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
log_step "METHOD 2m-b: macOS arm64 — Rosetta x86_64 emulation"
|
|
102
|
+
|
|
103
|
+
# Check if Rosetta is available
|
|
104
|
+
if ! arch -x86_64 /usr/bin/true 2>/dev/null; then
|
|
105
|
+
log_result "Rosetta not available — skipping 2m-b"
|
|
106
|
+
else
|
|
107
|
+
BUILD_CMD="<BUILD_CMD>" # e.g. "make clean && make -j4"
|
|
108
|
+
CMD="arch -x86_64 codeql database create $DB_NAME --language=$CODEQL_LANG --source-root=. --command='$BUILD_CMD' --overwrite"
|
|
109
|
+
log_cmd "$CMD"
|
|
110
|
+
|
|
111
|
+
arch -x86_64 codeql database create $DB_NAME --language=$CODEQL_LANG --source-root=. \
|
|
112
|
+
--command="$BUILD_CMD" --overwrite 2>&1 | tee -a "$LOG_FILE"
|
|
113
|
+
|
|
114
|
+
if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then
|
|
115
|
+
log_result "SUCCESS (Rosetta x86_64)"
|
|
116
|
+
else
|
|
117
|
+
log_result "FAILED (Rosetta)"
|
|
118
|
+
fi
|
|
119
|
+
fi
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Sub-method 2m-c: System compiler (direct attempt)
|
|
123
|
+
|
|
124
|
+
As a verification step, try the standard autobuild with the system compiler. This will likely fail with exit code 137 on affected systems, but confirms the arm64e issue is the cause.
|
|
125
|
+
|
|
126
|
+
> **This sub-method is optional.** Skip it if arm64e incompatibility was already confirmed in Step 2a.
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
log_step "METHOD 2m-c: System compiler (expected to fail on arm64e)"
|
|
130
|
+
CMD="codeql database create $DB_NAME --language=$CODEQL_LANG --source-root=. --overwrite"
|
|
131
|
+
log_cmd "$CMD"
|
|
132
|
+
|
|
133
|
+
$CMD 2>&1 | tee -a "$LOG_FILE"
|
|
134
|
+
|
|
135
|
+
EXIT_CODE=$?
|
|
136
|
+
if [ $EXIT_CODE -eq 137 ] || [ $EXIT_CODE -eq 134 ]; then
|
|
137
|
+
log_result "FAILED: exit code $EXIT_CODE confirms arm64e/libtrace incompatibility"
|
|
138
|
+
elif codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then
|
|
139
|
+
log_result "SUCCESS (unexpected — system compiler worked)"
|
|
140
|
+
else
|
|
141
|
+
log_result "FAILED (exit code: $EXIT_CODE)"
|
|
142
|
+
fi
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Sub-method 2m-d: Ask user
|
|
146
|
+
|
|
147
|
+
If all macOS workarounds fail, present options:
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
AskUserQuestion:
|
|
151
|
+
header: "macOS Build"
|
|
152
|
+
question: "Build tracing failed due to macOS arm64e incompatibility. How to proceed?"
|
|
153
|
+
multiSelect: false
|
|
154
|
+
options:
|
|
155
|
+
- label: "Use build-mode=none (Recommended)"
|
|
156
|
+
description: "Source-level analysis only. Misses some interprocedural data flow but catches most C/C++ vulnerabilities (format strings, buffer overflows, unsafe functions)."
|
|
157
|
+
- label: "Install arm64 tools and retry"
|
|
158
|
+
description: "Run: brew install llvm make — then retry with Homebrew toolchain"
|
|
159
|
+
- label: "Install Rosetta and retry"
|
|
160
|
+
description: "Run: softwareupdate --install-rosetta — then retry under x86_64 emulation"
|
|
161
|
+
- label: "Abort"
|
|
162
|
+
description: "Stop database creation"
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**If "Use build-mode=none":** Proceed to Method 4.
|
|
166
|
+
|
|
167
|
+
**If "Install arm64 tools and retry":**
|
|
168
|
+
```bash
|
|
169
|
+
log_step "Installing Homebrew arm64 toolchain"
|
|
170
|
+
brew install llvm make 2>&1 | tee -a "$LOG_FILE"
|
|
171
|
+
# Retry Sub-method 2m-a
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**If "Install Rosetta and retry":**
|
|
175
|
+
```bash
|
|
176
|
+
log_step "Installing Rosetta"
|
|
177
|
+
softwareupdate --install-rosetta --agree-to-license 2>&1 | tee -a "$LOG_FILE"
|
|
178
|
+
# Retry Sub-method 2m-b
|
|
179
|
+
```
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Performance Tuning
|
|
2
|
+
|
|
3
|
+
## Memory Configuration
|
|
4
|
+
|
|
5
|
+
### CODEQL_RAM Environment Variable
|
|
6
|
+
|
|
7
|
+
Control maximum heap memory (in MB):
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# 48GB for large codebases
|
|
11
|
+
CODEQL_RAM=48000 codeql database analyze codeql.db ...
|
|
12
|
+
|
|
13
|
+
# 16GB for medium codebases
|
|
14
|
+
CODEQL_RAM=16000 codeql database analyze codeql.db ...
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**Guidelines:**
|
|
18
|
+
| Codebase Size | Recommended RAM |
|
|
19
|
+
|---------------|-----------------|
|
|
20
|
+
| Small (<100K LOC) | 4-8 GB |
|
|
21
|
+
| Medium (100K-1M LOC) | 8-16 GB |
|
|
22
|
+
| Large (1M+ LOC) | 32-64 GB |
|
|
23
|
+
|
|
24
|
+
## Thread Configuration
|
|
25
|
+
|
|
26
|
+
### Analysis Threads
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Use all available cores
|
|
30
|
+
codeql database analyze codeql.db --threads=0 ...
|
|
31
|
+
|
|
32
|
+
# Use specific number
|
|
33
|
+
codeql database analyze codeql.db --threads=8 ...
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
**Note:** `--threads=0` uses all available cores. For shared machines, use explicit count.
|
|
37
|
+
|
|
38
|
+
## Query-Level Timeouts
|
|
39
|
+
|
|
40
|
+
Prevent individual queries from running indefinitely:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Set per-query timeout (in milliseconds)
|
|
44
|
+
codeql database analyze codeql.db --timeout=600000 ...
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
A 10-minute timeout (`600000`) catches runaway queries without killing legitimate complex analysis. Taint-tracking queries on large codebases may need longer.
|
|
48
|
+
|
|
49
|
+
## Evaluator Diagnostics
|
|
50
|
+
|
|
51
|
+
When analysis is slow, use `--evaluator-log` to identify which queries consume the most time:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
codeql database analyze codeql.db \
|
|
55
|
+
--evaluator-log=evaluator.log \
|
|
56
|
+
--format=sarif-latest \
|
|
57
|
+
--output=results.sarif \
|
|
58
|
+
-- codeql/python-queries:codeql-suites/python-security-extended.qls
|
|
59
|
+
|
|
60
|
+
# Summarize the log
|
|
61
|
+
codeql generate log-summary evaluator.log --format=text
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
The summary shows per-query timing and tuple counts. Queries producing millions of tuples are likely the bottleneck.
|
|
65
|
+
|
|
66
|
+
## Disk Space
|
|
67
|
+
|
|
68
|
+
| Phase | Typical Size | Notes |
|
|
69
|
+
|-------|-------------|-------|
|
|
70
|
+
| Database creation | 2-10x source size | Compiled languages are larger due to build tracing |
|
|
71
|
+
| Analysis cache | 1-5 GB | Stored in database directory |
|
|
72
|
+
| SARIF output | 1-50 MB | Depends on finding count |
|
|
73
|
+
|
|
74
|
+
Check available space before starting:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
df -h .
|
|
78
|
+
du -sh codeql_*.db 2>/dev/null
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Caching Behavior
|
|
82
|
+
|
|
83
|
+
CodeQL caches query evaluation results inside the database directory. Subsequent runs of the same queries skip re-evaluation.
|
|
84
|
+
|
|
85
|
+
| Scenario | Cache Effect |
|
|
86
|
+
|----------|-------------|
|
|
87
|
+
| Re-run same packs | Fast — uses cached results |
|
|
88
|
+
| Add new query pack | Only new queries evaluate |
|
|
89
|
+
| `codeql database cleanup` | Clears cache — forces full re-evaluation |
|
|
90
|
+
| `--rerun` flag | Ignores cache for this run |
|
|
91
|
+
|
|
92
|
+
**When to clear cache:**
|
|
93
|
+
- After deploying new data extensions (cache may hold stale results)
|
|
94
|
+
- When investigating unexpected zero-finding results
|
|
95
|
+
- Before benchmark comparisons (ensures consistent timing)
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Clear evaluation cache
|
|
99
|
+
codeql database cleanup codeql_1.db
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Troubleshooting Performance
|
|
103
|
+
|
|
104
|
+
| Symptom | Likely Cause | Solution |
|
|
105
|
+
|---------|--------------|----------|
|
|
106
|
+
| OOM during analysis | Not enough RAM | Increase `CODEQL_RAM` |
|
|
107
|
+
| Slow database creation | Complex build | Use `--threads`, simplify build |
|
|
108
|
+
| Slow query execution | Large codebase | Reduce query scope, add RAM |
|
|
109
|
+
| Database too large | Too many files | Use exclusion config (`codeql-config.yml` with `paths-ignore`) |
|
|
110
|
+
| Single query hangs | Runaway evaluation | Use `--timeout` and check `--evaluator-log` |
|
|
111
|
+
| Repeated runs still slow | Cache not used | Check you're using same database path |
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Quality Assessment
|
|
2
|
+
|
|
3
|
+
How to assess and improve CodeQL database quality after a successful build.
|
|
4
|
+
|
|
5
|
+
## Collect Metrics
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
log_step "Assessing database quality"
|
|
9
|
+
|
|
10
|
+
# 1. Baseline lines of code and file list (most reliable metric)
|
|
11
|
+
codeql database print-baseline -- "$DB_NAME"
|
|
12
|
+
BASELINE_LOC=$(python3 -c "
|
|
13
|
+
import json
|
|
14
|
+
with open('$DB_NAME/baseline-info.json') as f:
|
|
15
|
+
d = json.load(f)
|
|
16
|
+
for lang, info in d['languages'].items():
|
|
17
|
+
print(f'{lang}: {info[\"linesOfCode\"]} LoC, {len(info[\"files\"])} files')
|
|
18
|
+
")
|
|
19
|
+
echo "$BASELINE_LOC"
|
|
20
|
+
log_result "Baseline: $BASELINE_LOC"
|
|
21
|
+
|
|
22
|
+
# 2. Source archive file count
|
|
23
|
+
SRC_FILE_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null | wc -l)
|
|
24
|
+
echo "Files in source archive: $SRC_FILE_COUNT"
|
|
25
|
+
|
|
26
|
+
# 3. Extraction errors from extractor diagnostics
|
|
27
|
+
EXTRACTOR_ERRORS=$(find "$DB_NAME/diagnostic/extractors" -name '*.jsonl' \
|
|
28
|
+
-exec cat {} + 2>/dev/null | grep -c '^{' 2>/dev/null || true)
|
|
29
|
+
EXTRACTOR_ERRORS=${EXTRACTOR_ERRORS:-0}
|
|
30
|
+
echo "Extractor errors: $EXTRACTOR_ERRORS"
|
|
31
|
+
|
|
32
|
+
# 4. Export diagnostics summary (experimental but useful)
|
|
33
|
+
DIAG_TEXT=$(codeql database export-diagnostics --format=text -- "$DB_NAME" 2>/dev/null || true)
|
|
34
|
+
if [ -n "$DIAG_TEXT" ]; then
|
|
35
|
+
echo "Diagnostics: $DIAG_TEXT"
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
# 5. Check database is finalized
|
|
39
|
+
FINALIZED=$(grep '^finalised:' "$DB_NAME/codeql-database.yml" 2>/dev/null \
|
|
40
|
+
| awk '{print $2}')
|
|
41
|
+
echo "Finalized: $FINALIZED"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Compare Against Expected Source
|
|
45
|
+
|
|
46
|
+
Estimate the expected source file count from the working directory and compare.
|
|
47
|
+
|
|
48
|
+
> **Compiled languages (C/C++, Java, C#):** The source archive (`src.zip`) includes system headers and SDK files alongside project source files. For C/C++, this can inflate the archive count 10-20x (e.g., 111 archive files for 5 project source files). Compare against **project-relative files only** by filtering the archive listing.
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Count source files in the project (adjust extensions per language)
|
|
52
|
+
EXPECTED=$(fd -t f -e c -e cpp -e h -e hpp -e java -e kt -e py -e js -e ts \
|
|
53
|
+
--exclude 'codeql_*.db' --exclude node_modules --exclude vendor --exclude .git . \
|
|
54
|
+
2>/dev/null | wc -l)
|
|
55
|
+
echo "Expected source files: $EXPECTED"
|
|
56
|
+
|
|
57
|
+
# Count PROJECT files in source archive (exclude system/SDK paths)
|
|
58
|
+
PROJECT_SRC_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \
|
|
59
|
+
| grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' | wc -l)
|
|
60
|
+
echo "Project files in source archive: $PROJECT_SRC_COUNT"
|
|
61
|
+
echo "Total files in source archive: $SRC_FILE_COUNT (includes system headers for compiled langs)"
|
|
62
|
+
|
|
63
|
+
# Baseline LOC from database metadata (most reliable single metric)
|
|
64
|
+
DB_LOC=$(grep '^baselineLinesOfCode:' "$DB_NAME/codeql-database.yml" \
|
|
65
|
+
| awk '{print $2}')
|
|
66
|
+
echo "Baseline LoC: $DB_LOC"
|
|
67
|
+
|
|
68
|
+
# Error ratio — use project file count for compiled langs, total for interpreted
|
|
69
|
+
if [ "$PROJECT_SRC_COUNT" -gt 0 ]; then
|
|
70
|
+
ERROR_RATIO=$(python3 -c "print(f'{$EXTRACTOR_ERRORS/$PROJECT_SRC_COUNT*100:.1f}%')")
|
|
71
|
+
else
|
|
72
|
+
ERROR_RATIO="N/A (no files)"
|
|
73
|
+
fi
|
|
74
|
+
echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $PROJECT_SRC_COUNT project files)"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Log Assessment
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
log_step "Quality assessment results"
|
|
81
|
+
log_result "Baseline LoC: $DB_LOC"
|
|
82
|
+
log_result "Project source files: $PROJECT_SRC_COUNT (expected: ~$EXPECTED)"
|
|
83
|
+
log_result "Total archive files: $SRC_FILE_COUNT (includes system headers for compiled langs)"
|
|
84
|
+
log_result "Extractor errors: $EXTRACTOR_ERRORS (ratio: $ERROR_RATIO)"
|
|
85
|
+
log_result "Finalized: $FINALIZED"
|
|
86
|
+
|
|
87
|
+
# Sample extracted project files (exclude system paths)
|
|
88
|
+
unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \
|
|
89
|
+
| grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' \
|
|
90
|
+
| head -20 >> "$LOG_FILE"
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Quality Criteria
|
|
94
|
+
|
|
95
|
+
| Metric | Source | Good | Poor |
|
|
96
|
+
|--------|--------|------|------|
|
|
97
|
+
| Baseline LoC | `print-baseline` / `baseline-info.json` | > 0, proportional to project size | 0 or far below expected |
|
|
98
|
+
| Project source files | `src.zip` (filtered) | Close to expected source file count | 0 or < 50% of expected |
|
|
99
|
+
| Extractor errors | `diagnostic/extractors/*.jsonl` | 0 or < 5% of project files | > 5% of project files |
|
|
100
|
+
| Finalized | `codeql-database.yml` | `true` | `false` (incomplete build) |
|
|
101
|
+
| Key directories | `src.zip` listing | Application code directories present | Missing `src/main`, `lib/`, `app/` etc. |
|
|
102
|
+
| "No source code seen" | build log | Absent | Present (cached build — compiled languages) |
|
|
103
|
+
|
|
104
|
+
**Interpreting archive file counts for compiled languages:** C/C++ databases include system headers (e.g., `<stdio.h>`, SDK headers) in `src.zip`. A project with 5 source files may have 100+ files in the archive. Always filter to project-relative paths when comparing against expected counts. Use `baselineLinesOfCode` as the primary quality indicator.
|
|
105
|
+
|
|
106
|
+
**Interpreting baseline LoC:** A small number of extractor errors is normal and does not significantly impact analysis. However, if `baselineLinesOfCode` is 0 or the source archive contains no files, the database is empty — likely a cached build (compiled languages) or wrong `--source-root`.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Improve Quality (if poor)
|
|
111
|
+
|
|
112
|
+
Try these improvements, re-assess after each. **Log all improvements:**
|
|
113
|
+
|
|
114
|
+
### 1. Adjust source root
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
log_step "Quality improvement: adjust source root"
|
|
118
|
+
NEW_ROOT="./src" # or detected subdirectory
|
|
119
|
+
# For interpreted: add --codescanning-config=codeql-config.yml
|
|
120
|
+
# For compiled: omit config flag
|
|
121
|
+
log_cmd "codeql database create $DB_NAME --language=$CODEQL_LANG --source-root=$NEW_ROOT --overwrite"
|
|
122
|
+
codeql database create $DB_NAME --language=$CODEQL_LANG --source-root=$NEW_ROOT --overwrite
|
|
123
|
+
log_result "Changed source-root to: $NEW_ROOT"
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### 2. Fix "no source code seen" (cached build - compiled languages only)
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
log_step "Quality improvement: force rebuild (cached build detected)"
|
|
130
|
+
log_cmd "make clean && rebuild"
|
|
131
|
+
make clean && codeql database create $DB_NAME --language=$CODEQL_LANG --overwrite
|
|
132
|
+
log_result "Forced clean rebuild"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 3. Install type stubs / dependencies
|
|
136
|
+
|
|
137
|
+
> **Note:** These install into the *target project's* environment to improve CodeQL extraction quality.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
log_step "Quality improvement: install type stubs/additional deps"
|
|
141
|
+
|
|
142
|
+
# Python type stubs — install into target project's environment
|
|
143
|
+
STUBS_INSTALLED=""
|
|
144
|
+
for stub in types-requests types-PyYAML types-redis; do
|
|
145
|
+
if pip install "$stub" 2>/dev/null; then
|
|
146
|
+
STUBS_INSTALLED="$STUBS_INSTALLED $stub"
|
|
147
|
+
fi
|
|
148
|
+
done
|
|
149
|
+
log_result "Installed type stubs:$STUBS_INSTALLED"
|
|
150
|
+
|
|
151
|
+
# Additional project dependencies
|
|
152
|
+
log_cmd "pip install -e ."
|
|
153
|
+
pip install -e . 2>&1 | tee -a "$LOG_FILE"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### 4. Adjust extractor options
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
log_step "Quality improvement: adjust extractor options"
|
|
160
|
+
|
|
161
|
+
# C/C++: Include headers
|
|
162
|
+
export CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true
|
|
163
|
+
log_result "Set CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true"
|
|
164
|
+
|
|
165
|
+
# Java: Specific JDK version
|
|
166
|
+
export CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17
|
|
167
|
+
log_result "Set CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17"
|
|
168
|
+
|
|
169
|
+
# Then rebuild with current method
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**After each improvement:** Re-assess quality. If no improvement possible, move to next build method.
|