windows-exe-decompiler-mcp-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CODEX_INSTALLATION.md +69 -0
  2. package/COPILOT_INSTALLATION.md +77 -0
  3. package/LICENSE +21 -0
  4. package/README.md +314 -0
  5. package/bin/windows-exe-decompiler-mcp-server.js +3 -0
  6. package/dist/analysis-provenance.d.ts +184 -0
  7. package/dist/analysis-provenance.js +74 -0
  8. package/dist/analysis-task-runner.d.ts +31 -0
  9. package/dist/analysis-task-runner.js +160 -0
  10. package/dist/artifact-inventory.d.ts +23 -0
  11. package/dist/artifact-inventory.js +175 -0
  12. package/dist/cache-manager.d.ts +128 -0
  13. package/dist/cache-manager.js +454 -0
  14. package/dist/confidence-semantics.d.ts +66 -0
  15. package/dist/confidence-semantics.js +122 -0
  16. package/dist/config.d.ts +335 -0
  17. package/dist/config.js +193 -0
  18. package/dist/database.d.ts +227 -0
  19. package/dist/database.js +601 -0
  20. package/dist/decompiler-worker.d.ts +441 -0
  21. package/dist/decompiler-worker.js +1962 -0
  22. package/dist/dynamic-trace.d.ts +95 -0
  23. package/dist/dynamic-trace.js +629 -0
  24. package/dist/env-validator.d.ts +15 -0
  25. package/dist/env-validator.js +249 -0
  26. package/dist/error-handler.d.ts +28 -0
  27. package/dist/error-handler.example.d.ts +22 -0
  28. package/dist/error-handler.example.js +141 -0
  29. package/dist/error-handler.js +139 -0
  30. package/dist/ghidra-analysis-status.d.ts +49 -0
  31. package/dist/ghidra-analysis-status.js +178 -0
  32. package/dist/ghidra-config.d.ts +134 -0
  33. package/dist/ghidra-config.js +464 -0
  34. package/dist/index.d.ts +9 -0
  35. package/dist/index.js +200 -0
  36. package/dist/job-queue.d.ts +169 -0
  37. package/dist/job-queue.js +407 -0
  38. package/dist/logger.d.ts +106 -0
  39. package/dist/logger.js +176 -0
  40. package/dist/policy-guard.d.ts +115 -0
  41. package/dist/policy-guard.js +243 -0
  42. package/dist/process-output.d.ts +15 -0
  43. package/dist/process-output.js +90 -0
  44. package/dist/prompts/function-explanation-review.d.ts +5 -0
  45. package/dist/prompts/function-explanation-review.js +64 -0
  46. package/dist/prompts/semantic-name-review.d.ts +5 -0
  47. package/dist/prompts/semantic-name-review.js +63 -0
  48. package/dist/runtime-correlation.d.ts +34 -0
  49. package/dist/runtime-correlation.js +279 -0
  50. package/dist/runtime-paths.d.ts +3 -0
  51. package/dist/runtime-paths.js +11 -0
  52. package/dist/selection-diff.d.ts +667 -0
  53. package/dist/selection-diff.js +53 -0
  54. package/dist/semantic-name-suggestion-artifacts.d.ts +116 -0
  55. package/dist/semantic-name-suggestion-artifacts.js +314 -0
  56. package/dist/server.d.ts +129 -0
  57. package/dist/server.js +578 -0
  58. package/dist/tools/artifact-read.d.ts +235 -0
  59. package/dist/tools/artifact-read.js +317 -0
  60. package/dist/tools/artifacts-diff.d.ts +728 -0
  61. package/dist/tools/artifacts-diff.js +304 -0
  62. package/dist/tools/artifacts-list.d.ts +515 -0
  63. package/dist/tools/artifacts-list.js +389 -0
  64. package/dist/tools/attack-map.d.ts +290 -0
  65. package/dist/tools/attack-map.js +519 -0
  66. package/dist/tools/cache-observability.d.ts +4 -0
  67. package/dist/tools/cache-observability.js +36 -0
  68. package/dist/tools/code-function-cfg.d.ts +50 -0
  69. package/dist/tools/code-function-cfg.js +102 -0
  70. package/dist/tools/code-function-decompile.d.ts +55 -0
  71. package/dist/tools/code-function-decompile.js +103 -0
  72. package/dist/tools/code-function-disassemble.d.ts +43 -0
  73. package/dist/tools/code-function-disassemble.js +185 -0
  74. package/dist/tools/code-function-explain-apply.d.ts +255 -0
  75. package/dist/tools/code-function-explain-apply.js +225 -0
  76. package/dist/tools/code-function-explain-prepare.d.ts +535 -0
  77. package/dist/tools/code-function-explain-prepare.js +276 -0
  78. package/dist/tools/code-function-explain-review.d.ts +397 -0
  79. package/dist/tools/code-function-explain-review.js +589 -0
  80. package/dist/tools/code-function-rename-apply.d.ts +248 -0
  81. package/dist/tools/code-function-rename-apply.js +220 -0
  82. package/dist/tools/code-function-rename-prepare.d.ts +506 -0
  83. package/dist/tools/code-function-rename-prepare.js +279 -0
  84. package/dist/tools/code-function-rename-review.d.ts +574 -0
  85. package/dist/tools/code-function-rename-review.js +761 -0
  86. package/dist/tools/code-functions-list.d.ts +37 -0
  87. package/dist/tools/code-functions-list.js +91 -0
  88. package/dist/tools/code-functions-rank.d.ts +34 -0
  89. package/dist/tools/code-functions-rank.js +90 -0
  90. package/dist/tools/code-functions-reconstruct.d.ts +2725 -0
  91. package/dist/tools/code-functions-reconstruct.js +2807 -0
  92. package/dist/tools/code-functions-search.d.ts +39 -0
  93. package/dist/tools/code-functions-search.js +90 -0
  94. package/dist/tools/code-reconstruct-export.d.ts +1212 -0
  95. package/dist/tools/code-reconstruct-export.js +4002 -0
  96. package/dist/tools/code-reconstruct-plan.d.ts +274 -0
  97. package/dist/tools/code-reconstruct-plan.js +342 -0
  98. package/dist/tools/dotnet-metadata-extract.d.ts +541 -0
  99. package/dist/tools/dotnet-metadata-extract.js +355 -0
  100. package/dist/tools/dotnet-reconstruct-export.d.ts +567 -0
  101. package/dist/tools/dotnet-reconstruct-export.js +1151 -0
  102. package/dist/tools/dotnet-types-list.d.ts +325 -0
  103. package/dist/tools/dotnet-types-list.js +201 -0
  104. package/dist/tools/dynamic-dependencies.d.ts +115 -0
  105. package/dist/tools/dynamic-dependencies.js +213 -0
  106. package/dist/tools/dynamic-memory-import.d.ts +10 -0
  107. package/dist/tools/dynamic-memory-import.js +567 -0
  108. package/dist/tools/dynamic-trace-import.d.ts +10 -0
  109. package/dist/tools/dynamic-trace-import.js +235 -0
  110. package/dist/tools/entrypoint-fallback-disasm.d.ts +30 -0
  111. package/dist/tools/entrypoint-fallback-disasm.js +89 -0
  112. package/dist/tools/ghidra-analyze.d.ts +88 -0
  113. package/dist/tools/ghidra-analyze.js +208 -0
  114. package/dist/tools/ghidra-health.d.ts +37 -0
  115. package/dist/tools/ghidra-health.js +212 -0
  116. package/dist/tools/ioc-export.d.ts +209 -0
  117. package/dist/tools/ioc-export.js +542 -0
  118. package/dist/tools/packer-detect.d.ts +165 -0
  119. package/dist/tools/packer-detect.js +284 -0
  120. package/dist/tools/pe-exports-extract.d.ts +175 -0
  121. package/dist/tools/pe-exports-extract.js +253 -0
  122. package/dist/tools/pe-fingerprint.d.ts +234 -0
  123. package/dist/tools/pe-fingerprint.js +269 -0
  124. package/dist/tools/pe-imports-extract.d.ts +105 -0
  125. package/dist/tools/pe-imports-extract.js +245 -0
  126. package/dist/tools/report-generate.d.ts +157 -0
  127. package/dist/tools/report-generate.js +457 -0
  128. package/dist/tools/report-summarize.d.ts +2131 -0
  129. package/dist/tools/report-summarize.js +596 -0
  130. package/dist/tools/runtime-detect.d.ts +135 -0
  131. package/dist/tools/runtime-detect.js +247 -0
  132. package/dist/tools/sample-ingest.d.ts +94 -0
  133. package/dist/tools/sample-ingest.js +327 -0
  134. package/dist/tools/sample-profile-get.d.ts +183 -0
  135. package/dist/tools/sample-profile-get.js +121 -0
  136. package/dist/tools/sandbox-execute.d.ts +441 -0
  137. package/dist/tools/sandbox-execute.js +392 -0
  138. package/dist/tools/strings-extract.d.ts +375 -0
  139. package/dist/tools/strings-extract.js +314 -0
  140. package/dist/tools/strings-floss-decode.d.ts +143 -0
  141. package/dist/tools/strings-floss-decode.js +259 -0
  142. package/dist/tools/system-health.d.ts +434 -0
  143. package/dist/tools/system-health.js +446 -0
  144. package/dist/tools/task-cancel.d.ts +21 -0
  145. package/dist/tools/task-cancel.js +70 -0
  146. package/dist/tools/task-status.d.ts +27 -0
  147. package/dist/tools/task-status.js +106 -0
  148. package/dist/tools/task-sweep.d.ts +22 -0
  149. package/dist/tools/task-sweep.js +77 -0
  150. package/dist/tools/tool-help.d.ts +340 -0
  151. package/dist/tools/tool-help.js +261 -0
  152. package/dist/tools/yara-scan.d.ts +554 -0
  153. package/dist/tools/yara-scan.js +313 -0
  154. package/dist/types.d.ts +266 -0
  155. package/dist/types.js +41 -0
  156. package/dist/worker-pool.d.ts +204 -0
  157. package/dist/worker-pool.js +650 -0
  158. package/dist/workflows/deep-static.d.ts +104 -0
  159. package/dist/workflows/deep-static.js +276 -0
  160. package/dist/workflows/function-explanation-review.d.ts +655 -0
  161. package/dist/workflows/function-explanation-review.js +440 -0
  162. package/dist/workflows/reconstruct.d.ts +2053 -0
  163. package/dist/workflows/reconstruct.js +666 -0
  164. package/dist/workflows/semantic-name-review.d.ts +2418 -0
  165. package/dist/workflows/semantic-name-review.js +521 -0
  166. package/dist/workflows/triage.d.ts +659 -0
  167. package/dist/workflows/triage.js +1374 -0
  168. package/dist/workspace-manager.d.ts +150 -0
  169. package/dist/workspace-manager.js +411 -0
  170. package/ghidra_scripts/DecompileFunction.java +487 -0
  171. package/ghidra_scripts/DecompileFunction.py +150 -0
  172. package/ghidra_scripts/ExtractCFG.java +256 -0
  173. package/ghidra_scripts/ExtractCFG.py +233 -0
  174. package/ghidra_scripts/ExtractFunctions.java +442 -0
  175. package/ghidra_scripts/ExtractFunctions.py +101 -0
  176. package/ghidra_scripts/README.md +125 -0
  177. package/ghidra_scripts/SearchFunctionReferences.java +380 -0
  178. package/helpers/DotNetMetadataProbe/DotNetMetadataProbe.csproj +9 -0
  179. package/helpers/DotNetMetadataProbe/Program.cs +566 -0
  180. package/install-to-codex.ps1 +178 -0
  181. package/install-to-copilot.ps1 +303 -0
  182. package/package.json +101 -0
  183. package/requirements.txt +9 -0
  184. package/workers/requirements-dynamic.txt +11 -0
  185. package/workers/requirements.txt +8 -0
  186. package/workers/speakeasy_compat.py +175 -0
  187. package/workers/static_worker.py +5183 -0
  188. package/workers/yara_rules/default.yar +33 -0
  189. package/workers/yara_rules/malware_families.yar +93 -0
  190. package/workers/yara_rules/packers.yar +80 -0
@@ -0,0 +1,1962 @@
1
+ /**
2
+ * Decompiler Worker - Ghidra Headless integration
3
+ *
4
+ * Implements requirements 8.1-8.6:
5
+ * - Creates isolated Ghidra project spaces
6
+ * - Executes Ghidra Headless analysis
7
+ * - Extracts function lists
8
+ * - Stores results in database
9
+ * - Handles timeouts and failures
10
+ */
11
+ import { spawn } from 'child_process';
12
+ import path from 'path';
13
+ import fs from 'fs';
14
+ import { createHash, randomUUID } from 'crypto';
15
+ import { logger } from './logger.js';
16
+ import { ghidraConfig, createGhidraProject, buildProcessInvocation, } from './ghidra-config.js';
17
+ import { findBestGhidraAnalysis, getGhidraCapabilityStatus, isGhidraCapabilityReady, parseGhidraAnalysisMetadata, } from './ghidra-analysis-status.js';
18
+ import { buildRawCommandLine, decodeProcessStreams, } from './process-output.js';
19
+ export class GhidraProcessError extends Error {
20
+ errorCode;
21
+ diagnostics;
22
+ constructor(message, diagnostics, errorCode) {
23
+ super(message);
24
+ this.name = 'GhidraProcessError';
25
+ this.errorCode = errorCode;
26
+ this.diagnostics = diagnostics;
27
+ }
28
+ }
29
+ export class GhidraOutputParseError extends Error {
30
+ diagnostics;
31
+ constructor(message, diagnostics) {
32
+ super(message);
33
+ this.name = 'GhidraOutputParseError';
34
+ this.diagnostics = diagnostics;
35
+ }
36
+ }
37
+ export function getGhidraDiagnostics(error) {
38
+ if (error instanceof GhidraProcessError || error instanceof GhidraOutputParseError) {
39
+ return error.diagnostics;
40
+ }
41
+ return undefined;
42
+ }
43
+ function truncateDiagnosticText(value, limit = 240) {
44
+ if (!value) {
45
+ return null;
46
+ }
47
+ const normalized = value.replace(/\0/g, '').trim();
48
+ if (normalized.length === 0) {
49
+ return null;
50
+ }
51
+ if (normalized.length <= limit) {
52
+ return normalized;
53
+ }
54
+ return `${normalized.slice(0, limit)}...`;
55
+ }
56
+ export function normalizeGhidraError(error, stage) {
57
+ const diagnostics = getGhidraDiagnostics(error);
58
+ const message = error instanceof Error ? error.message : String(error);
59
+ const corpus = [message, diagnostics?.stderr, diagnostics?.stdout, diagnostics?.spawn_error]
60
+ .filter((item) => typeof item === 'string' && item.length > 0)
61
+ .join('\n');
62
+ if (!message && !diagnostics) {
63
+ return undefined;
64
+ }
65
+ const evidence = [
66
+ diagnostics?.raw_cmd ? `raw_cmd=${diagnostics.raw_cmd}` : '',
67
+ typeof diagnostics?.exit_code === 'number' ? `exit_code=${diagnostics.exit_code}` : '',
68
+ diagnostics?.spawn_error ? `spawn_error=${diagnostics.spawn_error}` : '',
69
+ truncateDiagnosticText(diagnostics?.stderr) ? `stderr=${truncateDiagnosticText(diagnostics?.stderr)}` : '',
70
+ truncateDiagnosticText(diagnostics?.stdout) ? `stdout=${truncateDiagnosticText(diagnostics?.stdout)}` : '',
71
+ ].filter((item) => item.length > 0);
72
+ const withStage = (summary) => stage ? `${stage}: ${summary}` : summary;
73
+ if (diagnostics?.cancelled || /E_CANCELLED/i.test(corpus)) {
74
+ return {
75
+ code: 'cancelled',
76
+ category: 'user',
77
+ stage,
78
+ summary: withStage('Ghidra task was cancelled before completion.'),
79
+ remediation_hints: [
80
+ 'Re-run the tool if cancellation was accidental.',
81
+ 'Use task.status to confirm whether a queued or running job was cancelled.',
82
+ ],
83
+ evidence,
84
+ };
85
+ }
86
+ if (diagnostics?.timed_out || /E_TIMEOUT|timed?\s*out/i.test(corpus)) {
87
+ return {
88
+ code: 'timeout',
89
+ category: 'transient',
90
+ stage,
91
+ summary: withStage('Ghidra execution timed out before producing a complete result.'),
92
+ remediation_hints: [
93
+ 'Increase the timeout for the current tool or queued job.',
94
+ 'Retry after reducing topk/include_xrefs/include_cfg scope if the sample is large.',
95
+ ],
96
+ evidence,
97
+ };
98
+ }
99
+ if (/unable to lock project|lockexception/i.test(corpus)) {
100
+ return {
101
+ code: 'project_lock',
102
+ category: 'transient',
103
+ stage,
104
+ summary: withStage('Ghidra project lock prevented the script from acquiring the project workspace.'),
105
+ remediation_hints: [
106
+ 'Wait for the other Ghidra process to release the project lock, then retry.',
107
+ 'Avoid running multiple decompile/CFG/export operations against the same project concurrently.',
108
+ ],
109
+ evidence,
110
+ };
111
+ }
112
+ if (/spawn.*EINVAL/i.test(corpus) || /EINVAL/i.test(diagnostics?.spawn_error || '')) {
113
+ return {
114
+ code: 'spawn_einval',
115
+ category: 'configuration',
116
+ stage,
117
+ summary: withStage('Ghidra process could not be spawned due to Windows batch/script invocation mismatch (EINVAL).'),
118
+ remediation_hints: [
119
+ 'Ensure GHIDRA_PATH/GHIDRA_INSTALL_DIR points to a valid Ghidra installation root.',
120
+ 'Prefer launching analyzeHeadless through the configured batch-wrapper path instead of hand-crafted shell quoting.',
121
+ 'Avoid broken quoting or partially expanded PATH entries when Ghidra lives under a path with spaces.',
122
+ ],
123
+ evidence,
124
+ };
125
+ }
126
+ if (/Ghidra was not started with PyGhidra|Python is not available/i.test(corpus)) {
127
+ return {
128
+ code: 'pyghidra_unavailable',
129
+ category: 'environment',
130
+ stage,
131
+ summary: withStage('PyGhidra is unavailable in the active environment, so Python post-scripts cannot run.'),
132
+ remediation_hints: [
133
+ 'Use the Java post-script fallback if available.',
134
+ 'Install/configure the Python environment bundled for PyGhidra if Python post-scripts are required.',
135
+ ],
136
+ evidence,
137
+ };
138
+ }
139
+ if (/require is not defined/i.test(corpus)) {
140
+ return {
141
+ code: 'script_runtime_require_undefined',
142
+ category: 'script_output',
143
+ stage,
144
+ summary: withStage('The Ghidra-side script used a runtime that does not support require().'),
145
+ remediation_hints: [
146
+ 'Do not use Node-style require() inside Ghidra post-scripts.',
147
+ 'Port the script to Java/Ghidra APIs or bundle dependencies explicitly for the target runtime.',
148
+ ],
149
+ evidence,
150
+ };
151
+ }
152
+ if (/No JSON output found/i.test(corpus)) {
153
+ return {
154
+ code: 'missing_json_output',
155
+ category: 'script_output',
156
+ stage,
157
+ summary: withStage('The Ghidra script exited without emitting the expected JSON payload.'),
158
+ remediation_hints: [
159
+ 'Inspect stderr/stdout snippets to see whether the post-script crashed before printing JSON.',
160
+ 'Check for project-lock, PyGhidra, or script-runtime errors in the attached diagnostics.',
161
+ ],
162
+ evidence,
163
+ };
164
+ }
165
+ if (diagnostics?.spawn_error || error instanceof GhidraProcessError && error.errorCode === 'E_SPAWN') {
166
+ return {
167
+ code: 'spawn_failure',
168
+ category: 'environment',
169
+ stage,
170
+ summary: withStage('The Ghidra process failed to start.'),
171
+ remediation_hints: [
172
+ 'Verify the configured analyzeHeadless executable exists and is executable.',
173
+ 'Check PATH/GHIDRA_PATH/GHIDRA_INSTALL_DIR and Windows shell quoting for the current installation path.',
174
+ ],
175
+ evidence,
176
+ };
177
+ }
178
+ if (diagnostics && diagnostics.exit_code !== null && diagnostics.exit_code !== 0) {
179
+ return {
180
+ code: 'ghidra_process_failure',
181
+ category: 'process',
182
+ stage,
183
+ summary: withStage('Ghidra exited with a non-zero status.'),
184
+ remediation_hints: [
185
+ 'Inspect stderr and raw_cmd to identify the failing post-script or analyzeHeadless phase.',
186
+ 'Retry with ghidra.health or a narrower tool scope if the failure is isolated to one capability.',
187
+ ],
188
+ evidence,
189
+ };
190
+ }
191
+ return {
192
+ code: 'unknown',
193
+ category: 'process',
194
+ stage,
195
+ summary: withStage('Ghidra reported an unclassified failure.'),
196
+ remediation_hints: [
197
+ 'Inspect the attached diagnostics and retry with ghidra.health for an end-to-end probe.',
198
+ ],
199
+ evidence,
200
+ };
201
+ }
202
+ /**
203
+ * Decompiler Worker class
204
+ * Manages Ghidra Headless execution and result processing
205
+ */
206
+ export class DecompilerWorker {
207
+ database;
208
+ workspaceManager;
209
+ constructor(database, workspaceManager) {
210
+ this.database = database;
211
+ this.workspaceManager = workspaceManager;
212
+ }
213
+ async delay(ms) {
214
+ await new Promise((resolve) => setTimeout(resolve, ms));
215
+ }
216
+ isProjectLockFailure(error) {
217
+ const diagnostics = getGhidraDiagnostics(error);
218
+ const corpus = [
219
+ error instanceof Error ? error.message : String(error),
220
+ diagnostics?.stdout,
221
+ diagnostics?.stderr,
222
+ diagnostics?.spawn_error,
223
+ ]
224
+ .filter((value) => typeof value === 'string' && value.length > 0)
225
+ .join('\n');
226
+ return /unable to lock project|lockexception/i.test(corpus);
227
+ }
228
+ async runWithProjectLockRetry(operationLabel, operation, context, attempts = 5, initialDelayMs = 1500) {
229
+ let delayMs = initialDelayMs;
230
+ let lastError;
231
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
232
+ try {
233
+ return await operation();
234
+ }
235
+ catch (error) {
236
+ lastError = error;
237
+ if (!this.isProjectLockFailure(error) || attempt >= attempts) {
238
+ throw error;
239
+ }
240
+ logger.warn({
241
+ ...context,
242
+ attempt,
243
+ attempts,
244
+ retry_delay_ms: delayMs,
245
+ error: error instanceof Error ? error.message : String(error),
246
+ }, `${operationLabel} hit a transient Ghidra project lock; retrying`);
247
+ await this.delay(delayMs);
248
+ delayMs *= 2;
249
+ }
250
+ }
251
+ throw lastError instanceof Error ? lastError : new Error(`${operationLabel} failed`);
252
+ }
253
+ /**
254
+ * Resolve sample file path in workspace/original.
255
+ * Prefer legacy "sample.exe" name, then fall back to first regular file.
256
+ */
257
+ resolveSamplePath(originalDir) {
258
+ const legacyPath = path.join(originalDir, 'sample.exe');
259
+ if (fs.existsSync(legacyPath)) {
260
+ return legacyPath;
261
+ }
262
+ if (!fs.existsSync(originalDir)) {
263
+ throw new Error(`Sample directory not found: ${originalDir}`);
264
+ }
265
+ const files = fs
266
+ .readdirSync(originalDir, { withFileTypes: true })
267
+ .filter(entry => entry.isFile())
268
+ .map(entry => entry.name)
269
+ .sort((a, b) => a.localeCompare(b));
270
+ if (files.length === 0) {
271
+ throw new Error(`Sample file not found in workspace: ${originalDir}`);
272
+ }
273
+ return path.join(originalDir, files[0]);
274
+ }
275
+ /**
276
+ * Spawn Ghidra process with Windows batch-script compatibility.
277
+ * On Windows, spawning .bat/.cmd directly can throw EINVAL; route through
278
+ * buildProcessInvocation() so batch scripts run via explicit cmd.exe quoting.
279
+ */
280
+ spawnGhidraProcess(invocation, cwd) {
281
+ return spawn(invocation.command, invocation.args, {
282
+ cwd,
283
+ env: {
284
+ ...process.env,
285
+ },
286
+ windowsHide: true,
287
+ windowsVerbatimArguments: invocation.windowsVerbatimArguments === true,
288
+ });
289
+ }
290
+ buildProcessDiagnostics(invocation, cwd, decoded, exitCode, signal, timedOut, cancelled, spawnError) {
291
+ return {
292
+ raw_cmd: buildRawCommandLine(invocation.command, invocation.args),
293
+ command: invocation.command,
294
+ args: [...invocation.args],
295
+ cwd,
296
+ exit_code: exitCode,
297
+ signal,
298
+ timed_out: timedOut,
299
+ cancelled,
300
+ stdout: decoded.stdout.text,
301
+ stderr: decoded.stderr.text,
302
+ stdout_encoding: decoded.stdout.encoding,
303
+ stderr_encoding: decoded.stderr.encoding,
304
+ spawn_error: spawnError,
305
+ };
306
+ }
307
+ async runGhidraCommand(command, args, cwd, timeoutMs, abortSignal, timeoutMessage, failureMessage) {
308
+ const invocation = buildProcessInvocation(command, args);
309
+ return new Promise((resolve, reject) => {
310
+ if (abortSignal?.aborted) {
311
+ const diagnostics = this.buildProcessDiagnostics(invocation, cwd, decodeProcessStreams(Buffer.alloc(0), Buffer.alloc(0)), null, null, false, true);
312
+ reject(new GhidraProcessError('E_CANCELLED: Ghidra command cancelled before process start', diagnostics, 'E_CANCELLED'));
313
+ return;
314
+ }
315
+ const stdoutChunks = [];
316
+ const stderrChunks = [];
317
+ let timedOut = false;
318
+ let cancelled = false;
319
+ let settled = false;
320
+ const childProcess = this.spawnGhidraProcess(invocation, cwd);
321
+ let onAbort;
322
+ const settle = (fn) => {
323
+ if (settled) {
324
+ return;
325
+ }
326
+ settled = true;
327
+ if (abortSignal && onAbort) {
328
+ abortSignal.removeEventListener('abort', onAbort);
329
+ }
330
+ fn();
331
+ };
332
+ const timeoutTimer = setTimeout(() => {
333
+ timedOut = true;
334
+ childProcess.kill('SIGTERM');
335
+ // Force kill after 5 seconds if still running
336
+ setTimeout(() => {
337
+ if (!childProcess.killed) {
338
+ childProcess.kill('SIGKILL');
339
+ }
340
+ }, 5000);
341
+ }, timeoutMs);
342
+ onAbort = () => {
343
+ cancelled = true;
344
+ childProcess.kill('SIGTERM');
345
+ setTimeout(() => {
346
+ if (!childProcess.killed) {
347
+ childProcess.kill('SIGKILL');
348
+ }
349
+ }, 5000);
350
+ };
351
+ if (abortSignal) {
352
+ abortSignal.addEventListener('abort', onAbort);
353
+ }
354
+ childProcess.stdout?.on('data', (data) => {
355
+ stdoutChunks.push(Buffer.from(data));
356
+ });
357
+ childProcess.stderr?.on('data', (data) => {
358
+ stderrChunks.push(Buffer.from(data));
359
+ });
360
+ childProcess.on('close', (code, signal) => {
361
+ settle(() => {
362
+ clearTimeout(timeoutTimer);
363
+ const decoded = decodeProcessStreams(Buffer.concat(stdoutChunks), Buffer.concat(stderrChunks));
364
+ const diagnostics = this.buildProcessDiagnostics(invocation, cwd, decoded, code, signal, timedOut, cancelled);
365
+ if (timedOut) {
366
+ reject(new GhidraProcessError(timeoutMessage, diagnostics, 'E_TIMEOUT'));
367
+ return;
368
+ }
369
+ if (cancelled) {
370
+ reject(new GhidraProcessError('E_CANCELLED: Ghidra command cancelled by user', diagnostics, 'E_CANCELLED'));
371
+ return;
372
+ }
373
+ if (code !== 0) {
374
+ reject(new GhidraProcessError(`${failureMessage} with exit code ${code}`, diagnostics, 'E_GHIDRA_PROCESS'));
375
+ return;
376
+ }
377
+ resolve({
378
+ stdout: decoded.stdout.text,
379
+ stderr: decoded.stderr.text,
380
+ diagnostics,
381
+ });
382
+ });
383
+ });
384
+ childProcess.on('error', (error) => {
385
+ settle(() => {
386
+ clearTimeout(timeoutTimer);
387
+ const decoded = decodeProcessStreams(Buffer.concat(stdoutChunks), Buffer.concat(stderrChunks));
388
+ const diagnostics = this.buildProcessDiagnostics(invocation, cwd, decoded, null, null, timedOut, cancelled, error.message);
389
+ reject(new GhidraProcessError(`Failed to spawn Ghidra process: ${error.message}`, diagnostics, 'E_SPAWN'));
390
+ });
391
+ });
392
+ });
393
+ }
394
+ buildAnalyzeBaseArgs(projectPath, projectKey) {
395
+ return [projectPath, projectKey];
396
+ }
397
+ buildAnalysisArgs(projectPath, projectKey, samplePath, options) {
398
+ const timeout = options.timeout || 300000; // Default 5 minutes
399
+ const maxCpu = options.maxCpu || '4';
400
+ return [
401
+ ...this.buildAnalyzeBaseArgs(projectPath, projectKey),
402
+ '-import',
403
+ samplePath,
404
+ '-max-cpu',
405
+ maxCpu,
406
+ '-analysisTimeoutPerFile',
407
+ String(Math.floor(timeout / 1000)),
408
+ ];
409
+ }
410
+ buildExtractFunctionsArgs(projectPath, projectKey, samplePath, scriptName) {
411
+ return [
412
+ ...this.buildAnalyzeBaseArgs(projectPath, projectKey),
413
+ '-process',
414
+ path.basename(samplePath),
415
+ '-scriptPath',
416
+ ghidraConfig.scriptsDir,
417
+ '-postScript',
418
+ scriptName,
419
+ '-noanalysis',
420
+ ];
421
+ }
422
+ async executeMainAnalysis(projectPath, projectKey, samplePath, options) {
423
+ const timeout = options.timeout || 300000; // Default 5 minutes
424
+ const command = ghidraConfig.analyzeHeadlessPath;
425
+ const args = this.buildAnalysisArgs(projectPath, projectKey, samplePath, options);
426
+ logger.debug({
427
+ command,
428
+ args,
429
+ timeout,
430
+ }, 'Executing Ghidra Headless analysis phase');
431
+ try {
432
+ return await this.runGhidraCommand(command, args, projectPath, timeout, options.abortSignal, `E_TIMEOUT: Ghidra analysis exceeded timeout of ${timeout}ms`, 'Ghidra analysis failed');
433
+ }
434
+ catch (error) {
435
+ if (error instanceof GhidraProcessError) {
436
+ logger.error({
437
+ error_code: error.errorCode,
438
+ raw_cmd: error.diagnostics.raw_cmd,
439
+ exit_code: error.diagnostics.exit_code,
440
+ timed_out: error.diagnostics.timed_out,
441
+ stderr: error.diagnostics.stderr.substring(0, 1000),
442
+ stderr_encoding: error.diagnostics.stderr_encoding,
443
+ }, 'Ghidra analysis phase execution failed');
444
+ }
445
+ throw error;
446
+ }
447
+ }
448
+ async executeFunctionExtractionScript(projectPath, projectKey, samplePath, scriptName, timeoutMs) {
449
+ const command = ghidraConfig.analyzeHeadlessPath;
450
+ const args = this.buildExtractFunctionsArgs(projectPath, projectKey, samplePath, scriptName);
451
+ logger.debug({
452
+ command,
453
+ args,
454
+ timeout: timeoutMs,
455
+ script: scriptName,
456
+ }, 'Executing Ghidra function extraction post-script');
457
+ return this.runGhidraCommand(command, args, projectPath, timeoutMs, undefined, `E_TIMEOUT: Function extraction (${scriptName}) exceeded timeout of ${timeoutMs}ms`, `Function extraction (${scriptName}) failed`);
458
+ }
459
+ async tryExtractFunctionsWithFallback(projectPath, projectKey, samplePath, timeoutMs) {
460
+ const warnings = [];
461
+ const attempts = [];
462
+ const primaryScript = 'ExtractFunctions.java';
463
+ const fallbackScript = 'ExtractFunctions.py';
464
+ const runAndParse = async (scriptName) => {
465
+ let output;
466
+ try {
467
+ output = await this.executeFunctionExtractionScript(projectPath, projectKey, samplePath, scriptName, timeoutMs);
468
+ }
469
+ catch (error) {
470
+ const diagnostics = error instanceof GhidraProcessError || error instanceof GhidraOutputParseError
471
+ ? error.diagnostics
472
+ : undefined;
473
+ attempts.push({
474
+ script: scriptName,
475
+ diagnostics,
476
+ error: error instanceof Error ? error.message : String(error),
477
+ });
478
+ throw error;
479
+ }
480
+ try {
481
+ const parsed = this.parseGhidraOutput(output.stdout, output.stderr, output.diagnostics);
482
+ attempts.push({
483
+ script: scriptName,
484
+ stdout: output.stdout,
485
+ stderr: output.stderr,
486
+ diagnostics: output.diagnostics,
487
+ });
488
+ return parsed;
489
+ }
490
+ catch (parseError) {
491
+ const parseMessage = parseError instanceof Error ? parseError.message : String(parseError);
492
+ const diagnostics = parseError instanceof GhidraOutputParseError
493
+ ? parseError.diagnostics
494
+ : output.diagnostics;
495
+ attempts.push({
496
+ script: scriptName,
497
+ stdout: output.stdout,
498
+ stderr: output.stderr,
499
+ diagnostics,
500
+ parse_error: parseMessage,
501
+ error: parseMessage,
502
+ });
503
+ throw parseError;
504
+ }
505
+ };
506
+ const scriptOrder = [primaryScript, fallbackScript];
507
+ for (const scriptName of scriptOrder) {
508
+ try {
509
+ const parsed = await runAndParse(scriptName);
510
+ if (!parsed) {
511
+ continue;
512
+ }
513
+ if (scriptName === fallbackScript) {
514
+ warnings.push(`${primaryScript} failed in current Ghidra runtime. ` +
515
+ `Falling back to ${fallbackScript}.`);
516
+ }
517
+ return {
518
+ output: parsed,
519
+ warnings,
520
+ scriptUsed: scriptName,
521
+ attempts,
522
+ };
523
+ }
524
+ catch (scriptError) {
525
+ const reason = scriptError instanceof Error ? scriptError.message : String(scriptError);
526
+ if (scriptName === primaryScript) {
527
+ warnings.push(`${primaryScript} failed in current Ghidra runtime. ` +
528
+ `Falling back to ${fallbackScript}.`);
529
+ continue;
530
+ }
531
+ warnings.push(`Function extraction failed with ${scriptName}: ${reason}`);
532
+ }
533
+ }
534
+ const fallbackAttempt = attempts.find((item) => item.script === fallbackScript);
535
+ if (fallbackAttempt?.error) {
536
+ warnings.push(`Fallback ${fallbackScript} extraction failed: ${fallbackAttempt.error}`);
537
+ }
538
+ return {
539
+ warnings,
540
+ attempts,
541
+ };
542
+ }
543
+ selectProbeTarget(functions) {
544
+ const preferred = functions.find((item) => item.is_entry_point && !item.is_external) ||
545
+ functions.find((item) => !item.is_external && !item.is_thunk) ||
546
+ functions.find((item) => !item.is_external) ||
547
+ functions[0];
548
+ return preferred?.address;
549
+ }
550
+ buildCapabilityReadyStatus(target, warnings) {
551
+ return {
552
+ available: true,
553
+ status: 'ready',
554
+ target,
555
+ checked_at: new Date().toISOString(),
556
+ warnings: warnings && warnings.length > 0 ? warnings : undefined,
557
+ };
558
+ }
559
+ buildCapabilityFailureStatus(capability, target, error) {
560
+ const diagnostics = getGhidraDiagnostics(error);
561
+ const message = error instanceof Error ? error.message : String(error);
562
+ const warnings = [];
563
+ if (diagnostics?.stderr) {
564
+ warnings.push(this.buildOutputSnippet(diagnostics.stderr, 600));
565
+ }
566
+ else if (diagnostics?.stdout) {
567
+ warnings.push(this.buildOutputSnippet(diagnostics.stdout, 600));
568
+ }
569
+ return {
570
+ available: false,
571
+ status: 'degraded',
572
+ reason: capability === 'decompile'
573
+ ? `Decompile probe failed: ${message}`
574
+ : `CFG probe failed: ${message}`,
575
+ target,
576
+ checked_at: new Date().toISOString(),
577
+ warnings: warnings.length > 0 ? warnings : undefined,
578
+ details: diagnostics
579
+ ? {
580
+ raw_cmd: diagnostics.raw_cmd,
581
+ exit_code: diagnostics.exit_code,
582
+ timed_out: diagnostics.timed_out,
583
+ cancelled: diagnostics.cancelled,
584
+ spawn_error: diagnostics.spawn_error,
585
+ }
586
+ : undefined,
587
+ };
588
+ }
589
+ async probeCapability(capability, projectPath, projectKey, samplePath, target, timeoutMs) {
590
+ try {
591
+ const output = capability === 'decompile'
592
+ ? await this.executeDecompileScript(projectPath, projectKey, samplePath, target, false, timeoutMs)
593
+ : await this.executeCFGScript(projectPath, projectKey, samplePath, target, timeoutMs);
594
+ if (capability === 'decompile') {
595
+ const parsed = this.parseDecompileOutput(output.stdout, output.stderr);
596
+ if ('error' in parsed) {
597
+ throw new Error(parsed.error);
598
+ }
599
+ }
600
+ else {
601
+ const parsed = this.parseCFGOutput(output.stdout, output.stderr);
602
+ if ('error' in parsed) {
603
+ throw new Error(parsed.error);
604
+ }
605
+ }
606
+ return {
607
+ status: this.buildCapabilityReadyStatus(target),
608
+ output,
609
+ };
610
+ }
611
+ catch (error) {
612
+ return {
613
+ status: this.buildCapabilityFailureStatus(capability, target, error),
614
+ };
615
+ }
616
+ }
617
+ resolveAnalysisProject(analysis) {
618
+ const metadata = parseGhidraAnalysisMetadata(analysis.output_json);
619
+ const projectPath = typeof metadata.project_path === 'string' ? metadata.project_path : '';
620
+ const projectKey = typeof metadata.project_key === 'string' ? metadata.project_key : '';
621
+ if (!projectPath || !projectKey) {
622
+ throw new Error(`Ghidra analysis ${analysis.id} has no reusable project metadata for downstream scripts.`);
623
+ }
624
+ return {
625
+ analysis,
626
+ projectPath,
627
+ projectKey,
628
+ };
629
+ }
630
+ resolveGhidraAnalysisForCapability(sampleId, capability) {
631
+ const analyses = this.database.findAnalysesBySample(sampleId);
632
+ const selected = findBestGhidraAnalysis(analyses, capability);
633
+ if (!selected) {
634
+ const capabilityLabel = capability === 'function_index'
635
+ ? 'function index'
636
+ : capability === 'decompile'
637
+ ? 'decompile'
638
+ : 'cfg';
639
+ throw new Error(`No Ghidra analysis with ${capabilityLabel} readiness found for sample: ${sampleId}. Please run ghidra.analyze first.`);
640
+ }
641
+ const readiness = getGhidraCapabilityStatus(selected, capability);
642
+ if (!isGhidraCapabilityReady(selected, capability)) {
643
+ const reason = readiness.reason ? ` ${readiness.reason}` : '';
644
+ throw new Error(`Ghidra ${capability} is not ready for sample: ${sampleId}.${reason}`.trim());
645
+ }
646
+ const project = this.resolveAnalysisProject(selected);
647
+ return {
648
+ ...project,
649
+ readiness,
650
+ };
651
+ }
652
+ /**
653
+ * Analyze a sample with Ghidra Headless
654
+ *
655
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
656
+ *
657
+ * @param sampleId - Sample identifier
658
+ * @param options - Ghidra analysis options
659
+ * @returns Analysis result with function count and project path
660
+ */
661
+ async analyze(sampleId, options = {}) {
662
+ // Check if Ghidra is configured
663
+ if (!ghidraConfig.isValid) {
664
+ throw new Error('Ghidra is not properly configured. Please set GHIDRA_PATH or GHIDRA_INSTALL_DIR environment variable.');
665
+ }
666
+ const startTime = Date.now();
667
+ // 1. Create analysis record (Requirement 8.2)
668
+ const analysisId = options.analysisId || randomUUID();
669
+ this.database.insertAnalysis({
670
+ id: analysisId,
671
+ sample_id: sampleId,
672
+ stage: 'ghidra',
673
+ backend: 'ghidra',
674
+ status: 'running',
675
+ started_at: new Date().toISOString(),
676
+ finished_at: null,
677
+ output_json: null,
678
+ metrics_json: null
679
+ });
680
+ logger.info({
681
+ analysisId,
682
+ sampleId,
683
+ options
684
+ }, 'Starting Ghidra analysis');
685
+ try {
686
+ // 2. Get sample information
687
+ const sample = this.database.findSample(sampleId);
688
+ if (!sample) {
689
+ throw new Error(`Sample not found: ${sampleId}`);
690
+ }
691
+ // 3. Get workspace paths
692
+ const workspace = await this.workspaceManager.getWorkspace(sampleId);
693
+ const samplePath = this.resolveSamplePath(workspace.original);
694
+ // Verify sample file exists
695
+ if (!fs.existsSync(samplePath)) {
696
+ throw new Error(`Sample file not found: ${samplePath}`);
697
+ }
698
+ // 4. Create isolated Ghidra project space (Requirement 8.1, 8.7)
699
+ const { projectPath, projectKey } = createGhidraProject(workspace.ghidra, options.projectKey);
700
+ logger.debug({
701
+ projectPath,
702
+ projectKey,
703
+ sampleId
704
+ }, 'Created Ghidra project');
705
+ // 5. Execute main Ghidra analysis/import phase
706
+ await this.executeMainAnalysis(projectPath, projectKey, samplePath, options);
707
+ // 6. Execute post-processing function extraction with fallback chain.
708
+ // If extraction fails but main analysis succeeded, persist partial_success.
709
+ const extraction = await this.tryExtractFunctionsWithFallback(projectPath, projectKey, samplePath, options.timeout || 300000);
710
+ const analysisOutput = extraction.output;
711
+ const extractionWarnings = extraction.warnings || [];
712
+ const extractionAttempts = extraction.attempts || [];
713
+ const probeTarget = analysisOutput
714
+ ? this.selectProbeTarget(analysisOutput.functions)
715
+ : undefined;
716
+ let decompileProbe;
717
+ let cfgProbe;
718
+ if (analysisOutput) {
719
+ // 7. Store functions to database (Requirement 8.4)
720
+ await this.storeFunctions(sampleId, analysisOutput.functions);
721
+ // 8. Store analysis artifact
722
+ const artifactId = randomUUID();
723
+ const artifactPath = `ghidra/functions_${projectKey}.json`;
724
+ const artifactFullPath = path.join(workspace.root, artifactPath);
725
+ // Ensure directory exists
726
+ const artifactDir = path.dirname(artifactFullPath);
727
+ if (!fs.existsSync(artifactDir)) {
728
+ fs.mkdirSync(artifactDir, { recursive: true });
729
+ }
730
+ // Write artifact
731
+ fs.writeFileSync(artifactFullPath, JSON.stringify(analysisOutput, null, 2));
732
+ // Compute artifact SHA256
733
+ const artifactSha256 = createHash('sha256')
734
+ .update(JSON.stringify(analysisOutput))
735
+ .digest('hex');
736
+ // Insert artifact record
737
+ this.database.insertArtifact({
738
+ id: artifactId,
739
+ sample_id: sampleId,
740
+ type: 'ghidra_functions',
741
+ path: artifactPath,
742
+ sha256: artifactSha256,
743
+ mime: 'application/json',
744
+ created_at: new Date().toISOString()
745
+ });
746
+ if (probeTarget) {
747
+ const probeTimeoutMs = Math.max(5000, Math.min(15000, Math.floor((options.timeout || 300000) / 6)));
748
+ decompileProbe = await this.probeCapability('decompile', projectPath, projectKey, samplePath, probeTarget, probeTimeoutMs);
749
+ cfgProbe = await this.probeCapability('cfg', projectPath, projectKey, samplePath, probeTarget, probeTimeoutMs);
750
+ }
751
+ }
752
+ // 9. Update analysis status
753
+ const elapsedMs = Date.now() - startTime;
754
+ const status = analysisOutput ? 'done' : 'partial_success';
755
+ const functionCount = analysisOutput?.function_count || 0;
756
+ const functionIndexReady = Boolean(analysisOutput && functionCount > 0);
757
+ const readiness = {
758
+ function_index: {
759
+ available: functionIndexReady,
760
+ status: functionIndexReady ? 'ready' : 'missing',
761
+ checked_at: new Date().toISOString(),
762
+ warnings: extractionWarnings.length > 0 ? extractionWarnings : undefined,
763
+ },
764
+ decompile: functionIndexReady
765
+ ? decompileProbe?.status ||
766
+ {
767
+ available: false,
768
+ status: 'missing',
769
+ reason: 'No decompile probe target was available from extracted functions.',
770
+ checked_at: new Date().toISOString(),
771
+ }
772
+ : {
773
+ available: false,
774
+ status: 'missing',
775
+ reason: 'Function index is unavailable, so decompile readiness was not probed.',
776
+ checked_at: new Date().toISOString(),
777
+ },
778
+ cfg: functionIndexReady
779
+ ? cfgProbe?.status ||
780
+ {
781
+ available: false,
782
+ status: 'missing',
783
+ reason: 'No CFG probe target was available from extracted functions.',
784
+ checked_at: new Date().toISOString(),
785
+ }
786
+ : {
787
+ available: false,
788
+ status: 'missing',
789
+ reason: 'Function index is unavailable, so CFG readiness was not probed.',
790
+ checked_at: new Date().toISOString(),
791
+ },
792
+ };
793
+ this.database.updateAnalysis(analysisId, {
794
+ status,
795
+ finished_at: new Date().toISOString(),
796
+ output_json: JSON.stringify({
797
+ function_count: functionCount,
798
+ project_path: projectPath,
799
+ project_key: projectKey,
800
+ readiness,
801
+ function_extraction: {
802
+ status: analysisOutput ? 'success' : 'failed',
803
+ script_used: extraction.scriptUsed,
804
+ warnings: extractionWarnings,
805
+ attempts: extractionAttempts,
806
+ },
807
+ end_to_end_probe: {
808
+ target: probeTarget,
809
+ decompile: decompileProbe?.status,
810
+ cfg: cfgProbe?.status,
811
+ checked_at: new Date().toISOString(),
812
+ },
813
+ }),
814
+ metrics_json: JSON.stringify({
815
+ elapsed_ms: elapsedMs,
816
+ function_count: functionCount
817
+ })
818
+ });
819
+ if (status === 'done') {
820
+ logger.info({
821
+ analysisId,
822
+ sampleId,
823
+ functionCount,
824
+ elapsedMs,
825
+ function_extraction_script: extraction.scriptUsed,
826
+ readiness,
827
+ }, 'Ghidra analysis completed successfully');
828
+ }
829
+ else {
830
+ logger.warn({
831
+ analysisId,
832
+ sampleId,
833
+ elapsedMs,
834
+ function_extraction_warnings: extractionWarnings,
835
+ readiness,
836
+ }, 'Ghidra analysis completed with partial_success (function extraction failed)');
837
+ }
838
+ return {
839
+ analysisId,
840
+ backend: 'ghidra',
841
+ functionCount,
842
+ projectPath,
843
+ status,
844
+ warnings: extractionWarnings.length > 0 ? extractionWarnings : undefined,
845
+ readiness,
846
+ };
847
+ }
848
+ catch (error) {
849
+ // Handle failure (Requirement 8.5, 8.6)
850
+ const errorMessage = error instanceof Error ? error.message : String(error);
851
+ const diagnostics = getGhidraDiagnostics(error);
852
+ const elapsedMs = Date.now() - startTime;
853
+ logger.error({
854
+ analysisId,
855
+ sampleId,
856
+ error: errorMessage,
857
+ ghidra_diagnostics: diagnostics,
858
+ elapsedMs
859
+ }, 'Ghidra analysis failed');
860
+ // Update analysis status to failed
861
+ const cancelled = error instanceof GhidraProcessError && error.errorCode === 'E_CANCELLED';
862
+ this.database.updateAnalysis(analysisId, {
863
+ status: cancelled ? 'cancelled' : 'failed',
864
+ finished_at: new Date().toISOString(),
865
+ output_json: JSON.stringify({
866
+ error: errorMessage,
867
+ ghidra_diagnostics: diagnostics
868
+ }),
869
+ metrics_json: JSON.stringify({
870
+ elapsed_ms: elapsedMs
871
+ })
872
+ });
873
+ throw error;
874
+ }
875
+ }
876
+ /**
877
+ * List functions from the functions table
878
+ *
879
+ * Requirements: 9.1
880
+ *
881
+ * @param sampleId - Sample identifier
882
+ * @param limit - Optional limit on number of functions to return
883
+ * @returns Array of function information
884
+ */
885
+ async listFunctions(sampleId, limit) {
886
+ logger.debug({ sampleId, limit }, 'Listing functions');
887
+ // Query functions from database
888
+ const dbFunctions = this.database.findFunctions(sampleId);
889
+ if (dbFunctions.length === 0) {
890
+ logger.warn({ sampleId }, 'No functions found for sample');
891
+ return [];
892
+ }
893
+ // Convert database functions to FunctionInfo format
894
+ const functionInfos = dbFunctions.map(func => ({
895
+ name: func.name || 'unknown',
896
+ address: func.address,
897
+ size: func.size || 0,
898
+ callers: func.caller_count || 0,
899
+ callees: func.callee_count || 0
900
+ }));
901
+ // Apply limit if specified
902
+ if (limit !== undefined && limit > 0) {
903
+ return functionInfos.slice(0, limit);
904
+ }
905
+ logger.info({
906
+ sampleId,
907
+ functionCount: functionInfos.length
908
+ }, 'Functions listed successfully');
909
+ return functionInfos;
910
+ }
911
+ /**
912
+ * Rank functions by interest score
913
+ *
914
+ * Requirements: 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8
915
+ *
916
+ * Scoring rules:
917
+ * - Large functions (> 1000 bytes): +10 points (Requirement 9.3)
918
+ * - High caller count (> 10): +5 * log(callers) points (Requirement 9.4)
919
+ * - Calls sensitive APIs: +15 points (Requirement 9.5)
920
+ * - Entry point or exported: +20 points (Requirement 9.6)
921
+ *
922
+ * @param sampleId - Sample identifier
923
+ * @param topK - Number of top functions to return (default: 20)
924
+ * @returns Array of ranked functions with scores and reasons
925
+ */
926
+ async rankFunctions(sampleId, topK = 20) {
927
+ logger.debug({ sampleId, topK }, 'Ranking functions');
928
+ // 1. Get all functions from database
929
+ const functions = this.database.findFunctions(sampleId);
930
+ if (functions.length === 0) {
931
+ logger.warn({ sampleId }, 'No functions found for ranking');
932
+ return [];
933
+ }
934
+ // 2. Define sensitive APIs (Requirement 9.5)
935
+ const sensitiveAPIs = [
936
+ 'CreateProcess',
937
+ 'CreateProcessA',
938
+ 'CreateProcessW',
939
+ 'WriteFile',
940
+ 'WriteFileEx',
941
+ 'RegSetValue',
942
+ 'RegSetValueEx',
943
+ 'RegSetValueExA',
944
+ 'RegSetValueExW',
945
+ 'InternetOpen',
946
+ 'InternetOpenA',
947
+ 'InternetOpenW',
948
+ 'InternetConnect',
949
+ 'HttpOpenRequest',
950
+ 'HttpSendRequest',
951
+ 'URLDownloadToFile',
952
+ 'WinExec',
953
+ 'ShellExecute',
954
+ 'ShellExecuteA',
955
+ 'ShellExecuteW',
956
+ 'VirtualAlloc',
957
+ 'VirtualAllocEx',
958
+ 'CreateRemoteThread',
959
+ 'WriteProcessMemory',
960
+ 'SetWindowsHookEx',
961
+ 'GetProcAddress',
962
+ 'LoadLibrary',
963
+ 'LoadLibraryA',
964
+ 'LoadLibraryW'
965
+ ];
966
+ const dynamicResolverAPIs = new Set(['GetProcAddress', 'LoadLibrary', 'LoadLibraryA', 'LoadLibraryW']);
967
+ const dynamicResolverLookup = new Set(Array.from(dynamicResolverAPIs, (item) => item.toLowerCase()));
968
+ const normalizeApiName = (value) => value.toLowerCase();
969
+ // 3. Calculate score for each function
970
+ const rankedFunctions = functions.map(func => {
971
+ let score = 0.0;
972
+ const reasons = [];
973
+ const xrefSummary = [];
974
+ // Rule 1: Large function (> 1000 bytes) - Requirement 9.3
975
+ if (func.size && func.size > 1000) {
976
+ score += 10.0;
977
+ reasons.push('large_function');
978
+ }
979
+ // Rule 2: High caller count (> 10) - Requirement 9.4
980
+ if (func.caller_count && func.caller_count > 10) {
981
+ const callerScore = 5.0 * Math.log(func.caller_count);
982
+ score += callerScore;
983
+ reasons.push('high_callers');
984
+ }
985
+ // Rule 3: Calls sensitive APIs - Requirement 9.5
986
+ if (func.callees) {
987
+ try {
988
+ const callees = JSON.parse(func.callees);
989
+ const matchedAPIs = callees.filter(callee => sensitiveAPIs.some(api => callee.includes(api)));
990
+ const normalizedMatched = new Set(matchedAPIs.map(normalizeApiName));
991
+ const hasDynamicResolver = Array.from(dynamicResolverAPIs).some((api) => normalizedMatched.has(api.toLowerCase()));
992
+ if (matchedAPIs.length > 0) {
993
+ score += 15.0;
994
+ matchedAPIs.forEach(api => {
995
+ reasons.push(`calls_sensitive_api:${api}`);
996
+ const provenance = dynamicResolverLookup.has(normalizeApiName(api))
997
+ ? 'dynamic_resolution_api'
998
+ : hasDynamicResolver
999
+ ? 'dynamic_resolution_helper'
1000
+ : 'static_named_call';
1001
+ const confidence = provenance === 'dynamic_resolution_api'
1002
+ ? 0.9
1003
+ : provenance === 'dynamic_resolution_helper'
1004
+ ? 0.62
1005
+ : 0.78;
1006
+ const evidence = provenance === 'dynamic_resolution_helper'
1007
+ ? [
1008
+ `callee:${api}`,
1009
+ ...matchedAPIs
1010
+ .filter((item) => dynamicResolverLookup.has(normalizeApiName(item)))
1011
+ .slice(0, 2)
1012
+ .map((item) => `resolver:${item}`),
1013
+ ]
1014
+ : [`callee:${api}`];
1015
+ xrefSummary.push({
1016
+ api,
1017
+ provenance,
1018
+ confidence,
1019
+ evidence,
1020
+ });
1021
+ });
1022
+ }
1023
+ }
1024
+ catch (error) {
1025
+ logger.warn({
1026
+ sampleId,
1027
+ address: func.address,
1028
+ error: error instanceof Error ? error.message : String(error)
1029
+ }, 'Failed to parse callees JSON');
1030
+ }
1031
+ }
1032
+ // Rule 4: Entry point or exported function - Requirement 9.6
1033
+ if (func.is_entry_point === 1 || func.is_exported === 1) {
1034
+ score += 20.0;
1035
+ if (func.is_entry_point === 1) {
1036
+ reasons.push('entry_point');
1037
+ }
1038
+ if (func.is_exported === 1) {
1039
+ reasons.push('exported');
1040
+ }
1041
+ }
1042
+ return {
1043
+ address: func.address,
1044
+ name: func.name || 'unknown',
1045
+ score,
1046
+ reasons,
1047
+ xref_summary: xrefSummary.length > 0 ? xrefSummary : undefined,
1048
+ };
1049
+ });
1050
+ // 4. Sort by score descending
1051
+ rankedFunctions.sort((a, b) => b.score - a.score);
1052
+ // 5. Update functions table with scores and tags (Requirement 9.7)
1053
+ for (const rankedFunc of rankedFunctions) {
1054
+ this.database.updateFunction(sampleId, rankedFunc.address, {
1055
+ score: rankedFunc.score,
1056
+ tags: JSON.stringify(rankedFunc.reasons)
1057
+ });
1058
+ }
1059
+ // 6. Return top K functions (Requirement 9.8)
1060
+ const topFunctions = rankedFunctions.slice(0, topK);
1061
+ logger.info({
1062
+ sampleId,
1063
+ totalFunctions: functions.length,
1064
+ topK,
1065
+ topScore: topFunctions[0]?.score || 0
1066
+ }, 'Functions ranked successfully');
1067
+ return topFunctions;
1068
+ }
1069
+ async searchFunctions(sampleId, options) {
1070
+ const apiQuery = options.apiQuery?.trim() || '';
1071
+ const stringQuery = options.stringQuery?.trim() || '';
1072
+ const limit = Math.max(1, options.limit || 20);
1073
+ if (!apiQuery && !stringQuery) {
1074
+ throw new Error('At least one of apiQuery or stringQuery must be provided.');
1075
+ }
1076
+ const sample = this.database.findSample(sampleId);
1077
+ if (!sample) {
1078
+ throw new Error(`Sample not found: ${sampleId}`);
1079
+ }
1080
+ if (!stringQuery) {
1081
+ try {
1082
+ if (ghidraConfig.isValid) {
1083
+ return await this.searchFunctionsWithGhidra(sampleId, apiQuery, '', limit, options.timeout || 30000);
1084
+ }
1085
+ }
1086
+ catch (error) {
1087
+ logger.warn({
1088
+ sampleId,
1089
+ apiQuery,
1090
+ error: error instanceof Error ? error.message : String(error),
1091
+ }, 'Falling back to function-index API search after Ghidra search failure');
1092
+ }
1093
+ return this.searchFunctionsFromIndex(sampleId, apiQuery, limit);
1094
+ }
1095
+ if (!ghidraConfig.isValid) {
1096
+ throw new Error('Ghidra is required for string-to-function reverse lookup. Please set GHIDRA_PATH or GHIDRA_INSTALL_DIR and run ghidra.analyze first.');
1097
+ }
1098
+ return this.searchFunctionsWithGhidra(sampleId, apiQuery, stringQuery, limit, options.timeout || 30000);
1099
+ }
1100
+ /**
1101
+ * Decompile a specific function
1102
+ *
1103
+ * Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6
1104
+ *
1105
+ * @param sampleId - Sample identifier
1106
+ * @param addressOrSymbol - Function address (hex string) or symbol name
1107
+ * @param includeXrefs - Whether to include cross-references (default: false)
1108
+ * @param timeout - Timeout in milliseconds (default: 30000)
1109
+ * @returns Decompiled function with pseudocode, callers, callees, and optional xrefs
1110
+ */
1111
+ async decompileFunction(sampleId, addressOrSymbol, includeXrefs = false, timeout = 30000) {
1112
+ // Check if Ghidra is configured
1113
+ if (!ghidraConfig.isValid) {
1114
+ throw new Error('Ghidra is not properly configured. Please set GHIDRA_PATH or GHIDRA_INSTALL_DIR environment variable.');
1115
+ }
1116
+ logger.debug({
1117
+ sampleId,
1118
+ addressOrSymbol,
1119
+ includeXrefs,
1120
+ timeout
1121
+ }, 'Decompiling function');
1122
+ // 1. Validate that the sample has been analyzed
1123
+ const sample = this.database.findSample(sampleId);
1124
+ if (!sample) {
1125
+ throw new Error(`Sample not found: ${sampleId}`);
1126
+ }
1127
+ const resolved = this.resolveGhidraAnalysisForCapability(sampleId, 'decompile');
1128
+ // 2. Get workspace and project paths
1129
+ const workspace = await this.workspaceManager.getWorkspace(sampleId);
1130
+ const samplePath = this.resolveSamplePath(workspace.original);
1131
+ // Verify sample file exists
1132
+ if (!fs.existsSync(samplePath)) {
1133
+ throw new Error(`Sample file not found: ${samplePath}`);
1134
+ }
1135
+ // 3. Reuse the Ghidra project from the capability-ready analysis
1136
+ const { projectPath, projectKey } = resolved;
1137
+ // 4. Execute DecompileFunction.py script
1138
+ try {
1139
+ const result = await this.runWithProjectLockRetry('Function decompilation', async () => {
1140
+ const output = await this.executeDecompileScript(projectPath, projectKey, samplePath, addressOrSymbol, includeXrefs, timeout);
1141
+ const parsed = this.parseDecompileOutput(output.stdout, output.stderr, output.diagnostics);
1142
+ if ('error' in parsed) {
1143
+ throw parsed.diagnostics
1144
+ ? new GhidraOutputParseError(parsed.error, parsed.diagnostics)
1145
+ : new Error(parsed.error);
1146
+ }
1147
+ return parsed;
1148
+ }, {
1149
+ sampleId,
1150
+ addressOrSymbol,
1151
+ includeXrefs,
1152
+ });
1153
+ logger.info({
1154
+ sampleId,
1155
+ function: result.function,
1156
+ address: result.address,
1157
+ pseudocodeLength: result.pseudocode?.length || 0,
1158
+ callerCount: result.callers?.length || 0,
1159
+ calleeCount: result.callees?.length || 0,
1160
+ xrefCount: result.xrefs?.length || 0
1161
+ }, 'Function decompiled successfully');
1162
+ return result;
1163
+ }
1164
+ catch (error) {
1165
+ const errorMessage = error instanceof Error ? error.message : String(error);
1166
+ const diagnostics = getGhidraDiagnostics(error);
1167
+ logger.error({
1168
+ sampleId,
1169
+ addressOrSymbol,
1170
+ error: errorMessage,
1171
+ ghidra_diagnostics: diagnostics
1172
+ }, 'Function decompilation failed');
1173
+ throw error;
1174
+ }
1175
+ }
1176
+ buildOutputSnippet(output, limit = 1200) {
1177
+ const normalized = output.replace(/\0/g, '').trim();
1178
+ if (normalized.length === 0) {
1179
+ return '(empty)';
1180
+ }
1181
+ if (normalized.length <= limit) {
1182
+ return normalized;
1183
+ }
1184
+ const truncated = normalized.slice(0, limit);
1185
+ return `${truncated}... (truncated ${normalized.length - limit} chars)`;
1186
+ }
1187
+ buildSyntheticDiagnostics(stdout, stderr) {
1188
+ return {
1189
+ raw_cmd: 'unknown',
1190
+ command: 'unknown',
1191
+ args: [],
1192
+ cwd: process.cwd(),
1193
+ exit_code: 0,
1194
+ signal: null,
1195
+ timed_out: false,
1196
+ cancelled: false,
1197
+ stdout,
1198
+ stderr,
1199
+ stdout_encoding: 'utf-8',
1200
+ stderr_encoding: 'utf-8',
1201
+ };
1202
+ }
1203
+ buildNoJsonOutputMessage(stage, stdout, stderr, diagnostics) {
1204
+ const rawCommand = diagnostics?.raw_cmd ? `raw_cmd=${diagnostics.raw_cmd}` : undefined;
1205
+ const lockHint = /unable to lock project|lockexception/i.test(`${stdout}\n${stderr}`)
1206
+ ? 'signal_hint=project_lock_detected'
1207
+ : undefined;
1208
+ return [
1209
+ `${stage}: No JSON output found`,
1210
+ rawCommand,
1211
+ lockHint,
1212
+ `stdout_snippet=${this.buildOutputSnippet(stdout)}`,
1213
+ `stderr_snippet=${this.buildOutputSnippet(stderr)}`,
1214
+ ]
1215
+ .filter((value) => Boolean(value))
1216
+ .join(' | ');
1217
+ }
1218
+ normalizeNamedAddressList(raw) {
1219
+ if (!Array.isArray(raw)) {
1220
+ return [];
1221
+ }
1222
+ const normalized = [];
1223
+ const seen = new Set();
1224
+ for (const item of raw) {
1225
+ if (!item || typeof item !== 'object') {
1226
+ continue;
1227
+ }
1228
+ const typed = item;
1229
+ const address = typeof typed.address === 'string' ? typed.address : '';
1230
+ const name = typeof typed.name === 'string' ? typed.name : '';
1231
+ if (!address && !name) {
1232
+ continue;
1233
+ }
1234
+ const key = `${address}|${name}`;
1235
+ if (seen.has(key)) {
1236
+ continue;
1237
+ }
1238
+ seen.add(key);
1239
+ normalized.push({ address, name });
1240
+ }
1241
+ return normalized;
1242
+ }
1243
+ normalizeStringArray(raw) {
1244
+ if (!Array.isArray(raw)) {
1245
+ return [];
1246
+ }
1247
+ return Array.from(new Set(raw.filter((item) => typeof item === 'string' && item.trim().length > 0)));
1248
+ }
1249
+ normalizeFunctionRelationships(raw) {
1250
+ if (!Array.isArray(raw)) {
1251
+ return [];
1252
+ }
1253
+ const normalized = [];
1254
+ const seen = new Set();
1255
+ for (const item of raw) {
1256
+ if (!item || typeof item !== 'object') {
1257
+ continue;
1258
+ }
1259
+ const typed = item;
1260
+ const address = typeof typed.address === 'string' ? typed.address : '';
1261
+ const name = typeof typed.name === 'string' ? typed.name : '';
1262
+ const relationTypes = this.normalizeStringArray(typed.relation_types);
1263
+ const referenceTypes = this.normalizeStringArray(typed.reference_types);
1264
+ const referenceAddresses = this.normalizeStringArray(typed.reference_addresses);
1265
+ const targetAddresses = this.normalizeStringArray(typed.target_addresses);
1266
+ const resolvedBy = typeof typed.resolved_by === 'string' ? typed.resolved_by : undefined;
1267
+ const isExact = typeof typed.is_exact === 'boolean' ? typed.is_exact : undefined;
1268
+ if (!address && !name && relationTypes.length === 0) {
1269
+ continue;
1270
+ }
1271
+ const key = `${address}|${name}|${relationTypes.join(',')}|${referenceTypes.join(',')}`;
1272
+ if (seen.has(key)) {
1273
+ continue;
1274
+ }
1275
+ seen.add(key);
1276
+ normalized.push({
1277
+ address,
1278
+ name,
1279
+ relation_types: relationTypes,
1280
+ reference_types: referenceTypes,
1281
+ reference_addresses: referenceAddresses,
1282
+ target_addresses: targetAddresses.length > 0 ? targetAddresses : undefined,
1283
+ resolved_by: resolvedBy,
1284
+ is_exact: isExact,
1285
+ });
1286
+ }
1287
+ return normalized;
1288
+ }
1289
+ normalizeCrossReferences(raw) {
1290
+ if (!Array.isArray(raw)) {
1291
+ return [];
1292
+ }
1293
+ const normalized = [];
1294
+ for (const item of raw) {
1295
+ if (!item || typeof item !== 'object') {
1296
+ continue;
1297
+ }
1298
+ const typed = item;
1299
+ if (typeof typed.from_address !== 'string' || typeof typed.type !== 'string') {
1300
+ continue;
1301
+ }
1302
+ normalized.push({
1303
+ from_address: typed.from_address,
1304
+ type: typed.type,
1305
+ is_call: Boolean(typed.is_call),
1306
+ is_data: Boolean(typed.is_data),
1307
+ from_function: typeof typed.from_function === 'string' ? typed.from_function : undefined,
1308
+ });
1309
+ }
1310
+ return normalized;
1311
+ }
1312
+ normalizeGhidraFunction(raw) {
1313
+ if (!raw || typeof raw !== 'object') {
1314
+ return null;
1315
+ }
1316
+ const typed = raw;
1317
+ const callers = this.normalizeNamedAddressList(typed.callers);
1318
+ const callees = this.normalizeNamedAddressList(typed.callees);
1319
+ const callerRelationships = this.normalizeFunctionRelationships(typed.caller_relationships);
1320
+ const calleeRelationships = this.normalizeFunctionRelationships(typed.callee_relationships);
1321
+ if (typeof typed.address !== 'string' || typeof typed.name !== 'string') {
1322
+ return null;
1323
+ }
1324
+ return {
1325
+ address: typed.address,
1326
+ name: typed.name,
1327
+ size: Number(typed.size || 0),
1328
+ is_thunk: Boolean(typed.is_thunk),
1329
+ is_external: Boolean(typed.is_external),
1330
+ calling_convention: typeof typed.calling_convention === 'string' ? typed.calling_convention : 'unknown',
1331
+ signature: typeof typed.signature === 'string' ? typed.signature : '',
1332
+ callers,
1333
+ caller_count: Number(typed.caller_count || callers.length || callerRelationships.length || 0),
1334
+ callees,
1335
+ callee_count: Number(typed.callee_count || callees.length || calleeRelationships.length || 0),
1336
+ caller_relationships: callerRelationships.length > 0 ? callerRelationships : undefined,
1337
+ callee_relationships: calleeRelationships.length > 0 ? calleeRelationships : undefined,
1338
+ is_entry_point: Boolean(typed.is_entry_point),
1339
+ is_exported: Boolean(typed.is_exported),
1340
+ };
1341
+ }
1342
+ normalizeDecompiledFunction(raw) {
1343
+ const typed = raw;
1344
+ const callers = this.normalizeNamedAddressList(typed.callers);
1345
+ const callees = this.normalizeNamedAddressList(typed.callees);
1346
+ const callerRelationships = this.normalizeFunctionRelationships(typed.caller_relationships);
1347
+ const calleeRelationships = this.normalizeFunctionRelationships(typed.callee_relationships);
1348
+ const xrefs = this.normalizeCrossReferences(typed.xrefs);
1349
+ return {
1350
+ function: typeof typed.function === 'string' ? typed.function : 'unknown',
1351
+ address: typeof typed.address === 'string' ? typed.address : '',
1352
+ pseudocode: typeof typed.pseudocode === 'string' ? typed.pseudocode : '',
1353
+ callers,
1354
+ callees,
1355
+ caller_relationships: callerRelationships.length > 0 ? callerRelationships : undefined,
1356
+ callee_relationships: calleeRelationships.length > 0 ? calleeRelationships : undefined,
1357
+ xrefs: xrefs.length > 0 ? xrefs : undefined,
1358
+ };
1359
+ }
1360
+ /**
1361
+ * Parse Ghidra output JSON
1362
+ *
1363
+ * Requirements: 8.3
1364
+ *
1365
+ * @param output - Ghidra stdout output
1366
+ * @returns Parsed analysis output
1367
+ */
1368
+ parseGhidraOutput(output, stderr, diagnostics) {
1369
+ try {
1370
+ // Extract JSON from output (Ghidra may output other text before/after JSON)
1371
+ const jsonMatch = output.match(/\{[\s\S]*"functions"[\s\S]*\}/);
1372
+ if (!jsonMatch) {
1373
+ throw new GhidraOutputParseError(this.buildNoJsonOutputMessage('ghidra.analyze', output, stderr, diagnostics), diagnostics || this.buildSyntheticDiagnostics(output, stderr));
1374
+ }
1375
+ const parsed = JSON.parse(jsonMatch[0]);
1376
+ // Validate required fields
1377
+ if (!parsed.functions || !Array.isArray(parsed.functions)) {
1378
+ throw new Error('Invalid Ghidra output: missing functions array');
1379
+ }
1380
+ const functions = parsed.functions
1381
+ .map((item) => this.normalizeGhidraFunction(item))
1382
+ .filter((item) => Boolean(item));
1383
+ const normalized = {
1384
+ program_name: typeof parsed.program_name === 'string' ? parsed.program_name : 'unknown',
1385
+ program_path: typeof parsed.program_path === 'string' ? parsed.program_path : '',
1386
+ function_count: Number(parsed.function_count || functions.length),
1387
+ functions,
1388
+ };
1389
+ logger.debug({
1390
+ functionCount: normalized.function_count,
1391
+ programName: normalized.program_name
1392
+ }, 'Parsed Ghidra output');
1393
+ return normalized;
1394
+ }
1395
+ catch (error) {
1396
+ if (error instanceof GhidraOutputParseError) {
1397
+ throw error;
1398
+ }
1399
+ const errorMessage = error instanceof Error ? error.message : String(error);
1400
+ logger.error({
1401
+ error: errorMessage,
1402
+ outputPreview: output.substring(0, 500),
1403
+ stderrPreview: stderr.substring(0, 500),
1404
+ }, 'Failed to parse Ghidra output');
1405
+ throw new GhidraOutputParseError(`Failed to parse Ghidra output: ${errorMessage}`, diagnostics || this.buildSyntheticDiagnostics(output, stderr));
1406
+ }
1407
+ }
1408
+ /**
1409
+ * Execute DecompileFunction.py script
1410
+ *
1411
+ * Requirements: 10.1, 10.2, 10.6 (timeout handling)
1412
+ *
1413
+ * @param projectPath - Ghidra project directory path
1414
+ * @param projectKey - Unique project key
1415
+ * @param samplePath - Path to sample file
1416
+ * @param addressOrSymbol - Function address or symbol name
1417
+ * @param includeXrefs - Whether to include cross-references
1418
+ * @param timeout - Timeout in milliseconds
1419
+ * @returns Ghidra output (stdout)
1420
+ */
1421
+ async executeDecompileScript(projectPath, projectKey, samplePath, addressOrSymbol, includeXrefs, timeout) {
1422
+ const scriptOrder = ['DecompileFunction.java'];
1423
+ let lastError;
1424
+ for (const scriptName of scriptOrder) {
1425
+ const command = ghidraConfig.analyzeHeadlessPath;
1426
+ const args = [
1427
+ projectPath,
1428
+ projectKey,
1429
+ '-process', path.basename(samplePath),
1430
+ '-readOnly',
1431
+ '-scriptPath', ghidraConfig.scriptsDir,
1432
+ '-postScript', scriptName, addressOrSymbol, String(includeXrefs),
1433
+ '-noanalysis'
1434
+ ];
1435
+ logger.debug({
1436
+ command,
1437
+ args,
1438
+ timeout,
1439
+ script: scriptName,
1440
+ }, 'Executing function decompilation post-script');
1441
+ try {
1442
+ return await this.runGhidraCommand(command, args, projectPath, timeout, undefined, `E_TIMEOUT: Function decompilation exceeded timeout of ${timeout}ms`, `Function decompilation failed (${scriptName})`);
1443
+ }
1444
+ catch (error) {
1445
+ lastError = error;
1446
+ const diagnostics = getGhidraDiagnostics(error);
1447
+ logger.warn({
1448
+ script: scriptName,
1449
+ error: error instanceof Error ? error.message : String(error),
1450
+ ghidra_diagnostics: diagnostics,
1451
+ }, 'Function decompilation script attempt failed');
1452
+ }
1453
+ }
1454
+ throw lastError instanceof Error
1455
+ ? lastError
1456
+ : new Error('Function decompilation failed for all configured post-scripts.');
1457
+ }
1458
+ /**
1459
+ * Parse decompile script output
1460
+ *
1461
+ * Requirements: 10.3, 10.4, 10.5
1462
+ *
1463
+ * @param output - Script stdout output
1464
+ * @returns Parsed decompiled function or error
1465
+ */
1466
+ parseDecompileOutput(output, stderr, diagnostics) {
1467
+ try {
1468
+ // Extract JSON from output
1469
+ const jsonMatch = output.match(/\{[\s\S]*\}/);
1470
+ if (!jsonMatch) {
1471
+ throw new GhidraOutputParseError(this.buildNoJsonOutputMessage('code.function.decompile', output, stderr, diagnostics), diagnostics || this.buildSyntheticDiagnostics(output, stderr));
1472
+ }
1473
+ const parsed = JSON.parse(jsonMatch[0]);
1474
+ // Check for error in the result
1475
+ if (parsed.error) {
1476
+ return {
1477
+ error: typeof parsed.error === 'string' ? parsed.error : String(parsed.error),
1478
+ diagnostics: diagnostics || this.buildSyntheticDiagnostics(output, stderr),
1479
+ };
1480
+ }
1481
+ // Validate required fields
1482
+ if (!parsed.function || !parsed.address || !parsed.pseudocode) {
1483
+ throw new Error('Invalid decompile output: missing required fields');
1484
+ }
1485
+ const normalized = this.normalizeDecompiledFunction(parsed);
1486
+ logger.debug({
1487
+ function: normalized.function,
1488
+ address: normalized.address,
1489
+ pseudocodeLength: normalized.pseudocode.length,
1490
+ callerRelationships: normalized.caller_relationships?.length || 0,
1491
+ calleeRelationships: normalized.callee_relationships?.length || 0,
1492
+ }, 'Parsed decompile output');
1493
+ return normalized;
1494
+ }
1495
+ catch (error) {
1496
+ const errorMessage = error instanceof Error ? error.message : String(error);
1497
+ const normalizedDiagnostics = diagnostics || getGhidraDiagnostics(error) || this.buildSyntheticDiagnostics(output, stderr);
1498
+ logger.error({
1499
+ error: errorMessage,
1500
+ outputPreview: output.substring(0, 500),
1501
+ stderrPreview: stderr.substring(0, 500),
1502
+ }, 'Failed to parse decompile output');
1503
+ return {
1504
+ error: `Failed to parse decompile output: ${errorMessage}`,
1505
+ diagnostics: normalizedDiagnostics,
1506
+ };
1507
+ }
1508
+ }
1509
+ /**
1510
+ * Get control flow graph for a function
1511
+ *
1512
+ * Requirements: 11.1, 11.2, 11.3, 11.4, 11.5
1513
+ *
1514
+ * @param sampleId - Sample identifier
1515
+ * @param addressOrSymbol - Function address (hex string) or symbol name
1516
+ * @param timeout - Timeout in milliseconds (default: 30000)
1517
+ * @returns Control flow graph with nodes and edges
1518
+ */
1519
+ async getFunctionCFG(sampleId, addressOrSymbol, timeout = 30000) {
1520
+ // Check if Ghidra is configured
1521
+ if (!ghidraConfig.isValid) {
1522
+ throw new Error('Ghidra is not properly configured. Please set GHIDRA_PATH or GHIDRA_INSTALL_DIR environment variable.');
1523
+ }
1524
+ logger.debug({
1525
+ sampleId,
1526
+ addressOrSymbol,
1527
+ timeout
1528
+ }, 'Extracting function CFG');
1529
+ // 1. Validate that the sample has been analyzed
1530
+ const sample = this.database.findSample(sampleId);
1531
+ if (!sample) {
1532
+ throw new Error(`Sample not found: ${sampleId}`);
1533
+ }
1534
+ const resolved = this.resolveGhidraAnalysisForCapability(sampleId, 'cfg');
1535
+ // 2. Get workspace and project paths
1536
+ const workspace = await this.workspaceManager.getWorkspace(sampleId);
1537
+ const samplePath = this.resolveSamplePath(workspace.original);
1538
+ // Verify sample file exists
1539
+ if (!fs.existsSync(samplePath)) {
1540
+ throw new Error(`Sample file not found: ${samplePath}`);
1541
+ }
1542
+ // 3. Reuse the Ghidra project from the capability-ready analysis
1543
+ const { projectPath, projectKey } = resolved;
1544
+ // 4. Execute ExtractCFG.py script
1545
+ try {
1546
+ const result = await this.runWithProjectLockRetry('Function CFG extraction', async () => {
1547
+ const output = await this.executeCFGScript(projectPath, projectKey, samplePath, addressOrSymbol, timeout);
1548
+ const parsed = this.parseCFGOutput(output.stdout, output.stderr, output.diagnostics);
1549
+ if ('error' in parsed) {
1550
+ throw parsed.diagnostics
1551
+ ? new GhidraOutputParseError(parsed.error, parsed.diagnostics)
1552
+ : new Error(parsed.error);
1553
+ }
1554
+ return parsed;
1555
+ }, {
1556
+ sampleId,
1557
+ addressOrSymbol,
1558
+ });
1559
+ logger.info({
1560
+ sampleId,
1561
+ function: result.function,
1562
+ address: result.address,
1563
+ nodeCount: result.nodes?.length || 0,
1564
+ edgeCount: result.edges?.length || 0
1565
+ }, 'Function CFG extracted successfully');
1566
+ return result;
1567
+ }
1568
+ catch (error) {
1569
+ const errorMessage = error instanceof Error ? error.message : String(error);
1570
+ const diagnostics = getGhidraDiagnostics(error);
1571
+ logger.error({
1572
+ sampleId,
1573
+ addressOrSymbol,
1574
+ error: errorMessage,
1575
+ ghidra_diagnostics: diagnostics
1576
+ }, 'Function CFG extraction failed');
1577
+ throw error;
1578
+ }
1579
+ }
1580
+ /**
1581
+ * Execute ExtractCFG.py script
1582
+ *
1583
+ * Requirements: 11.1
1584
+ *
1585
+ * @param projectPath - Ghidra project directory path
1586
+ * @param projectKey - Unique project key
1587
+ * @param samplePath - Path to sample file
1588
+ * @param addressOrSymbol - Function address or symbol name
1589
+ * @param timeout - Timeout in milliseconds
1590
+ * @returns Ghidra output (stdout)
1591
+ */
1592
+ async executeCFGScript(projectPath, projectKey, samplePath, addressOrSymbol, timeout) {
1593
+ const scriptOrder = ['ExtractCFG.java'];
1594
+ let lastError;
1595
+ for (const scriptName of scriptOrder) {
1596
+ const command = ghidraConfig.analyzeHeadlessPath;
1597
+ const args = [
1598
+ projectPath,
1599
+ projectKey,
1600
+ '-process', path.basename(samplePath),
1601
+ '-readOnly',
1602
+ '-scriptPath', ghidraConfig.scriptsDir,
1603
+ '-postScript', scriptName, addressOrSymbol,
1604
+ '-noanalysis'
1605
+ ];
1606
+ logger.debug({
1607
+ command,
1608
+ args,
1609
+ timeout,
1610
+ script: scriptName,
1611
+ }, 'Executing CFG extraction post-script');
1612
+ try {
1613
+ return await this.runGhidraCommand(command, args, projectPath, timeout, undefined, `E_TIMEOUT: CFG extraction exceeded timeout of ${timeout}ms`, `CFG extraction failed (${scriptName})`);
1614
+ }
1615
+ catch (error) {
1616
+ lastError = error;
1617
+ const diagnostics = getGhidraDiagnostics(error);
1618
+ logger.warn({
1619
+ script: scriptName,
1620
+ error: error instanceof Error ? error.message : String(error),
1621
+ ghidra_diagnostics: diagnostics,
1622
+ }, 'CFG extraction script attempt failed');
1623
+ }
1624
+ }
1625
+ throw lastError instanceof Error
1626
+ ? lastError
1627
+ : new Error('CFG extraction failed for all configured post-scripts.');
1628
+ }
1629
+ async searchFunctionsWithGhidra(sampleId, apiQuery, stringQuery, limit, timeout) {
1630
+ const resolved = this.resolveGhidraAnalysisForCapability(sampleId, 'function_index');
1631
+ const workspace = await this.workspaceManager.getWorkspace(sampleId);
1632
+ const samplePath = this.resolveSamplePath(workspace.original);
1633
+ if (!fs.existsSync(samplePath)) {
1634
+ throw new Error(`Sample file not found: ${samplePath}`);
1635
+ }
1636
+ return this.runWithProjectLockRetry('Function reference search', async () => {
1637
+ const output = await this.executeSearchScript(resolved.projectPath, resolved.projectKey, samplePath, apiQuery, stringQuery, limit, timeout);
1638
+ const result = this.parseSearchOutput(output.stdout, output.stderr, output.diagnostics);
1639
+ if ('error' in result) {
1640
+ throw result.diagnostics
1641
+ ? new GhidraOutputParseError(result.error, result.diagnostics)
1642
+ : new Error(result.error);
1643
+ }
1644
+ return result;
1645
+ }, {
1646
+ sampleId,
1647
+ apiQuery,
1648
+ stringQuery,
1649
+ });
1650
+ }
1651
+ searchFunctionsFromIndex(sampleId, apiQuery, limit) {
1652
+ const needle = apiQuery.toLowerCase();
1653
+ const matches = this.database
1654
+ .findFunctions(sampleId)
1655
+ .reduce((acc, func) => {
1656
+ const callees = this.parseFunctionCallees(func.callees);
1657
+ const apiMatches = callees.filter((callee) => callee.toLowerCase().includes(needle));
1658
+ if (apiMatches.length === 0) {
1659
+ return acc;
1660
+ }
1661
+ acc.push({
1662
+ function: func.name || 'unknown',
1663
+ address: func.address,
1664
+ caller_count: func.caller_count ?? 0,
1665
+ callee_count: func.callee_count ?? 0,
1666
+ api_matches: apiMatches,
1667
+ match_types: ['api_call_index'],
1668
+ });
1669
+ return acc;
1670
+ }, [])
1671
+ .sort((left, right) => {
1672
+ const leftScore = (left.api_matches?.length ?? 0) * 10 + left.caller_count;
1673
+ const rightScore = (right.api_matches?.length ?? 0) * 10 + right.caller_count;
1674
+ return rightScore - leftScore;
1675
+ })
1676
+ .slice(0, limit);
1677
+ return {
1678
+ query: {
1679
+ api: apiQuery,
1680
+ limit,
1681
+ },
1682
+ matches,
1683
+ count: matches.length,
1684
+ };
1685
+ }
1686
+ parseFunctionCallees(raw) {
1687
+ if (!raw) {
1688
+ return [];
1689
+ }
1690
+ try {
1691
+ const parsed = JSON.parse(raw);
1692
+ if (!Array.isArray(parsed)) {
1693
+ return [];
1694
+ }
1695
+ return parsed.filter((item) => typeof item === 'string');
1696
+ }
1697
+ catch {
1698
+ return [];
1699
+ }
1700
+ }
1701
+ async executeSearchScript(projectPath, projectKey, samplePath, apiQuery, stringQuery, limit, timeout) {
1702
+ const command = ghidraConfig.analyzeHeadlessPath;
1703
+ const args = [
1704
+ projectPath,
1705
+ projectKey,
1706
+ '-process', path.basename(samplePath),
1707
+ '-readOnly',
1708
+ '-scriptPath', ghidraConfig.scriptsDir,
1709
+ '-postScript', 'SearchFunctionReferences.java', apiQuery || '-', stringQuery || '-', String(limit),
1710
+ '-noanalysis',
1711
+ ];
1712
+ logger.debug({
1713
+ command,
1714
+ args,
1715
+ timeout,
1716
+ }, 'Executing function reference search post-script');
1717
+ return this.runGhidraCommand(command, args, projectPath, timeout, undefined, `E_TIMEOUT: Function search exceeded timeout of ${timeout}ms`, 'Function search failed');
1718
+ }
1719
+ parseSearchOutput(output, stderr, diagnostics) {
1720
+ try {
1721
+ const jsonMatch = output.match(/\{[\s\S]*\}/);
1722
+ if (!jsonMatch) {
1723
+ throw new GhidraOutputParseError(this.buildNoJsonOutputMessage('code.functions.search', output, stderr, diagnostics), diagnostics || this.buildSyntheticDiagnostics(output, stderr));
1724
+ }
1725
+ const parsed = JSON.parse(jsonMatch[0]);
1726
+ if (parsed.error) {
1727
+ return { error: String(parsed.error) };
1728
+ }
1729
+ if (!parsed.query || !parsed.matches || !Array.isArray(parsed.matches)) {
1730
+ throw new Error('Invalid function search output: missing query or matches');
1731
+ }
1732
+ const matches = [];
1733
+ for (const rawMatch of parsed.matches) {
1734
+ if (!rawMatch || typeof rawMatch !== 'object') {
1735
+ continue;
1736
+ }
1737
+ const match = rawMatch;
1738
+ const apiMatches = Array.isArray(match.api_matches)
1739
+ ? match.api_matches.filter((item) => typeof item === 'string')
1740
+ : [];
1741
+ const stringMatches = [];
1742
+ if (Array.isArray(match.string_matches)) {
1743
+ for (const item of match.string_matches) {
1744
+ if (!item || typeof item !== 'object') {
1745
+ continue;
1746
+ }
1747
+ const typed = item;
1748
+ const value = typeof typed.value === 'string' ? typed.value : '';
1749
+ if (!value) {
1750
+ continue;
1751
+ }
1752
+ const normalized = { value };
1753
+ if (typeof typed.data_address === 'string') {
1754
+ normalized.data_address = typed.data_address;
1755
+ }
1756
+ if (typeof typed.referenced_from === 'string') {
1757
+ normalized.referenced_from = typed.referenced_from;
1758
+ }
1759
+ stringMatches.push(normalized);
1760
+ }
1761
+ }
1762
+ const matchTypes = Array.isArray(match.match_types)
1763
+ ? match.match_types.filter((item) => item === 'api_call' || item === 'string_reference' || item === 'api_call_index')
1764
+ : [];
1765
+ const normalizedMatchTypes = Array.from(new Set([
1766
+ ...matchTypes,
1767
+ ...(apiMatches.length > 0 ? ['api_call'] : []),
1768
+ ...(stringMatches.length > 0 ? ['string_reference'] : []),
1769
+ ]));
1770
+ matches.push({
1771
+ function: typeof match.function === 'string' ? match.function : 'unknown',
1772
+ address: typeof match.address === 'string' ? match.address : '',
1773
+ caller_count: Number(match.caller_count || 0),
1774
+ callee_count: Number(match.callee_count || 0),
1775
+ api_matches: apiMatches,
1776
+ string_matches: stringMatches,
1777
+ match_types: normalizedMatchTypes,
1778
+ });
1779
+ }
1780
+ const normalizedResult = {
1781
+ query: {
1782
+ api: typeof parsed.query?.api === 'string'
1783
+ ? String(parsed.query.api)
1784
+ : undefined,
1785
+ string: typeof parsed.query?.string === 'string'
1786
+ ? String(parsed.query.string)
1787
+ : undefined,
1788
+ limit: Number(parsed.query?.limit || matches.length || 0),
1789
+ },
1790
+ matches,
1791
+ count: typeof parsed.count === 'number'
1792
+ ? parsed.count
1793
+ : matches.length,
1794
+ };
1795
+ return normalizedResult;
1796
+ }
1797
+ catch (error) {
1798
+ const errorMessage = error instanceof Error ? error.message : String(error);
1799
+ const normalizedDiagnostics = diagnostics || getGhidraDiagnostics(error) || this.buildSyntheticDiagnostics(output, stderr);
1800
+ logger.error({
1801
+ error: errorMessage,
1802
+ outputPreview: output.substring(0, 500),
1803
+ stderrPreview: stderr.substring(0, 500),
1804
+ }, 'Failed to parse function search output');
1805
+ return {
1806
+ error: `Failed to parse function search output: ${errorMessage}`,
1807
+ diagnostics: normalizedDiagnostics,
1808
+ };
1809
+ }
1810
+ }
1811
+ /**
1812
+ * Parse CFG script output
1813
+ *
1814
+ * Requirements: 11.2, 11.3, 11.4, 11.5
1815
+ *
1816
+ * @param output - Script stdout output
1817
+ * @returns Parsed control flow graph or error
1818
+ */
1819
+ parseCFGOutput(output, stderr, diagnostics) {
1820
+ try {
1821
+ // Extract JSON from output
1822
+ const jsonMatch = output.match(/\{[\s\S]*\}/);
1823
+ if (!jsonMatch) {
1824
+ throw new GhidraOutputParseError(this.buildNoJsonOutputMessage('code.function.cfg', output, stderr, diagnostics), diagnostics || this.buildSyntheticDiagnostics(output, stderr));
1825
+ }
1826
+ const parsed = JSON.parse(jsonMatch[0]);
1827
+ // Check for error in the result
1828
+ if (parsed.error) {
1829
+ return {
1830
+ error: parsed.error,
1831
+ diagnostics: diagnostics || this.buildSyntheticDiagnostics(output, stderr),
1832
+ };
1833
+ }
1834
+ // Validate required fields
1835
+ if (!parsed.function || !parsed.address || !parsed.nodes || !parsed.edges) {
1836
+ throw new Error('Invalid CFG output: missing required fields');
1837
+ }
1838
+ logger.debug({
1839
+ function: parsed.function,
1840
+ address: parsed.address,
1841
+ nodeCount: parsed.nodes.length,
1842
+ edgeCount: parsed.edges.length
1843
+ }, 'Parsed CFG output');
1844
+ return parsed;
1845
+ }
1846
+ catch (error) {
1847
+ const errorMessage = error instanceof Error ? error.message : String(error);
1848
+ const normalizedDiagnostics = diagnostics || getGhidraDiagnostics(error) || this.buildSyntheticDiagnostics(output, stderr);
1849
+ logger.error({
1850
+ error: errorMessage,
1851
+ outputPreview: output.substring(0, 500),
1852
+ stderrPreview: stderr.substring(0, 500),
1853
+ }, 'Failed to parse CFG output');
1854
+ return {
1855
+ error: `Failed to parse CFG output: ${errorMessage}`,
1856
+ diagnostics: normalizedDiagnostics,
1857
+ };
1858
+ }
1859
+ }
1860
+ /**
1861
+ * Store functions to database
1862
+ *
1863
+ * Requirements: 8.4
1864
+ *
1865
+ * @param sampleId - Sample identifier
1866
+ * @param functions - Array of functions from Ghidra
1867
+ */
1868
+ async storeFunctions(sampleId, functions) {
1869
+ if (functions.length === 0) {
1870
+ logger.warn({ sampleId }, 'No functions to store');
1871
+ return;
1872
+ }
1873
+ logger.debug({
1874
+ sampleId,
1875
+ functionCount: functions.length
1876
+ }, 'Storing functions to database');
1877
+ // Convert Ghidra functions to database format
1878
+ const dbFunctions = functions.map(func => {
1879
+ const calleeNames = Array.from(new Set([
1880
+ ...func.callees.map(c => c.name).filter((name) => typeof name === 'string' && name.length > 0),
1881
+ ...(func.callee_relationships || [])
1882
+ .map((relationship) => relationship.name)
1883
+ .filter((name) => typeof name === 'string' && name.length > 0),
1884
+ ]));
1885
+ return {
1886
+ sample_id: sampleId,
1887
+ address: func.address,
1888
+ name: func.name,
1889
+ size: func.size,
1890
+ score: 0.0, // Will be calculated by rankFunctions later
1891
+ tags: JSON.stringify([]), // Will be populated by rankFunctions
1892
+ summary: null,
1893
+ caller_count: Math.max(func.caller_count, func.callers.length, func.caller_relationships?.length || 0),
1894
+ callee_count: Math.max(func.callee_count, func.callees.length, func.callee_relationships?.length || 0),
1895
+ is_entry_point: func.is_entry_point ? 1 : 0,
1896
+ is_exported: func.is_exported ? 1 : 0,
1897
+ callees: JSON.stringify(calleeNames)
1898
+ };
1899
+ });
1900
+ // Use batch insert for better performance
1901
+ this.database.insertFunctionsBatch(dbFunctions);
1902
+ logger.info({
1903
+ sampleId,
1904
+ functionCount: dbFunctions.length
1905
+ }, 'Functions stored successfully');
1906
+ }
1907
+ /**
1908
+ * Create a job result from analysis result
1909
+ * Helper method for job queue integration
1910
+ *
1911
+ * @param analysisResult - Analysis result
1912
+ * @param elapsedMs - Elapsed time in milliseconds
1913
+ * @returns Job result
1914
+ */
1915
+ createJobResult(analysisResult, elapsedMs) {
1916
+ return {
1917
+ jobId: analysisResult.analysisId,
1918
+ ok: true,
1919
+ data: analysisResult,
1920
+ errors: [],
1921
+ warnings: [],
1922
+ artifacts: [],
1923
+ metrics: {
1924
+ elapsedMs,
1925
+ peakRssMb: 0 // TODO: Implement memory tracking
1926
+ }
1927
+ };
1928
+ }
1929
+ /**
1930
+ * Create a job result from error
1931
+ * Helper method for job queue integration
1932
+ *
1933
+ * @param jobId - Job identifier
1934
+ * @param error - Error that occurred
1935
+ * @param elapsedMs - Elapsed time in milliseconds
1936
+ * @returns Job result
1937
+ */
1938
+ createErrorJobResult(jobId, error, elapsedMs) {
1939
+ return {
1940
+ jobId,
1941
+ ok: false,
1942
+ errors: [error.message],
1943
+ warnings: [],
1944
+ artifacts: [],
1945
+ metrics: {
1946
+ elapsedMs,
1947
+ peakRssMb: 0
1948
+ }
1949
+ };
1950
+ }
1951
+ }
1952
+ /**
1953
+ * Create a decompiler worker instance
1954
+ *
1955
+ * @param database - Database manager
1956
+ * @param workspaceManager - Workspace manager
1957
+ * @returns Decompiler worker instance
1958
+ */
1959
+ export function createDecompilerWorker(database, workspaceManager) {
1960
+ return new DecompilerWorker(database, workspaceManager);
1961
+ }
1962
+ //# sourceMappingURL=decompiler-worker.js.map