windows-exe-decompiler-mcp-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CODEX_INSTALLATION.md +69 -0
  2. package/COPILOT_INSTALLATION.md +77 -0
  3. package/LICENSE +21 -0
  4. package/README.md +314 -0
  5. package/bin/windows-exe-decompiler-mcp-server.js +3 -0
  6. package/dist/analysis-provenance.d.ts +184 -0
  7. package/dist/analysis-provenance.js +74 -0
  8. package/dist/analysis-task-runner.d.ts +31 -0
  9. package/dist/analysis-task-runner.js +160 -0
  10. package/dist/artifact-inventory.d.ts +23 -0
  11. package/dist/artifact-inventory.js +175 -0
  12. package/dist/cache-manager.d.ts +128 -0
  13. package/dist/cache-manager.js +454 -0
  14. package/dist/confidence-semantics.d.ts +66 -0
  15. package/dist/confidence-semantics.js +122 -0
  16. package/dist/config.d.ts +335 -0
  17. package/dist/config.js +193 -0
  18. package/dist/database.d.ts +227 -0
  19. package/dist/database.js +601 -0
  20. package/dist/decompiler-worker.d.ts +441 -0
  21. package/dist/decompiler-worker.js +1962 -0
  22. package/dist/dynamic-trace.d.ts +95 -0
  23. package/dist/dynamic-trace.js +629 -0
  24. package/dist/env-validator.d.ts +15 -0
  25. package/dist/env-validator.js +249 -0
  26. package/dist/error-handler.d.ts +28 -0
  27. package/dist/error-handler.example.d.ts +22 -0
  28. package/dist/error-handler.example.js +141 -0
  29. package/dist/error-handler.js +139 -0
  30. package/dist/ghidra-analysis-status.d.ts +49 -0
  31. package/dist/ghidra-analysis-status.js +178 -0
  32. package/dist/ghidra-config.d.ts +134 -0
  33. package/dist/ghidra-config.js +464 -0
  34. package/dist/index.d.ts +9 -0
  35. package/dist/index.js +200 -0
  36. package/dist/job-queue.d.ts +169 -0
  37. package/dist/job-queue.js +407 -0
  38. package/dist/logger.d.ts +106 -0
  39. package/dist/logger.js +176 -0
  40. package/dist/policy-guard.d.ts +115 -0
  41. package/dist/policy-guard.js +243 -0
  42. package/dist/process-output.d.ts +15 -0
  43. package/dist/process-output.js +90 -0
  44. package/dist/prompts/function-explanation-review.d.ts +5 -0
  45. package/dist/prompts/function-explanation-review.js +64 -0
  46. package/dist/prompts/semantic-name-review.d.ts +5 -0
  47. package/dist/prompts/semantic-name-review.js +63 -0
  48. package/dist/runtime-correlation.d.ts +34 -0
  49. package/dist/runtime-correlation.js +279 -0
  50. package/dist/runtime-paths.d.ts +3 -0
  51. package/dist/runtime-paths.js +11 -0
  52. package/dist/selection-diff.d.ts +667 -0
  53. package/dist/selection-diff.js +53 -0
  54. package/dist/semantic-name-suggestion-artifacts.d.ts +116 -0
  55. package/dist/semantic-name-suggestion-artifacts.js +314 -0
  56. package/dist/server.d.ts +129 -0
  57. package/dist/server.js +578 -0
  58. package/dist/tools/artifact-read.d.ts +235 -0
  59. package/dist/tools/artifact-read.js +317 -0
  60. package/dist/tools/artifacts-diff.d.ts +728 -0
  61. package/dist/tools/artifacts-diff.js +304 -0
  62. package/dist/tools/artifacts-list.d.ts +515 -0
  63. package/dist/tools/artifacts-list.js +389 -0
  64. package/dist/tools/attack-map.d.ts +290 -0
  65. package/dist/tools/attack-map.js +519 -0
  66. package/dist/tools/cache-observability.d.ts +4 -0
  67. package/dist/tools/cache-observability.js +36 -0
  68. package/dist/tools/code-function-cfg.d.ts +50 -0
  69. package/dist/tools/code-function-cfg.js +102 -0
  70. package/dist/tools/code-function-decompile.d.ts +55 -0
  71. package/dist/tools/code-function-decompile.js +103 -0
  72. package/dist/tools/code-function-disassemble.d.ts +43 -0
  73. package/dist/tools/code-function-disassemble.js +185 -0
  74. package/dist/tools/code-function-explain-apply.d.ts +255 -0
  75. package/dist/tools/code-function-explain-apply.js +225 -0
  76. package/dist/tools/code-function-explain-prepare.d.ts +535 -0
  77. package/dist/tools/code-function-explain-prepare.js +276 -0
  78. package/dist/tools/code-function-explain-review.d.ts +397 -0
  79. package/dist/tools/code-function-explain-review.js +589 -0
  80. package/dist/tools/code-function-rename-apply.d.ts +248 -0
  81. package/dist/tools/code-function-rename-apply.js +220 -0
  82. package/dist/tools/code-function-rename-prepare.d.ts +506 -0
  83. package/dist/tools/code-function-rename-prepare.js +279 -0
  84. package/dist/tools/code-function-rename-review.d.ts +574 -0
  85. package/dist/tools/code-function-rename-review.js +761 -0
  86. package/dist/tools/code-functions-list.d.ts +37 -0
  87. package/dist/tools/code-functions-list.js +91 -0
  88. package/dist/tools/code-functions-rank.d.ts +34 -0
  89. package/dist/tools/code-functions-rank.js +90 -0
  90. package/dist/tools/code-functions-reconstruct.d.ts +2725 -0
  91. package/dist/tools/code-functions-reconstruct.js +2807 -0
  92. package/dist/tools/code-functions-search.d.ts +39 -0
  93. package/dist/tools/code-functions-search.js +90 -0
  94. package/dist/tools/code-reconstruct-export.d.ts +1212 -0
  95. package/dist/tools/code-reconstruct-export.js +4002 -0
  96. package/dist/tools/code-reconstruct-plan.d.ts +274 -0
  97. package/dist/tools/code-reconstruct-plan.js +342 -0
  98. package/dist/tools/dotnet-metadata-extract.d.ts +541 -0
  99. package/dist/tools/dotnet-metadata-extract.js +355 -0
  100. package/dist/tools/dotnet-reconstruct-export.d.ts +567 -0
  101. package/dist/tools/dotnet-reconstruct-export.js +1151 -0
  102. package/dist/tools/dotnet-types-list.d.ts +325 -0
  103. package/dist/tools/dotnet-types-list.js +201 -0
  104. package/dist/tools/dynamic-dependencies.d.ts +115 -0
  105. package/dist/tools/dynamic-dependencies.js +213 -0
  106. package/dist/tools/dynamic-memory-import.d.ts +10 -0
  107. package/dist/tools/dynamic-memory-import.js +567 -0
  108. package/dist/tools/dynamic-trace-import.d.ts +10 -0
  109. package/dist/tools/dynamic-trace-import.js +235 -0
  110. package/dist/tools/entrypoint-fallback-disasm.d.ts +30 -0
  111. package/dist/tools/entrypoint-fallback-disasm.js +89 -0
  112. package/dist/tools/ghidra-analyze.d.ts +88 -0
  113. package/dist/tools/ghidra-analyze.js +208 -0
  114. package/dist/tools/ghidra-health.d.ts +37 -0
  115. package/dist/tools/ghidra-health.js +212 -0
  116. package/dist/tools/ioc-export.d.ts +209 -0
  117. package/dist/tools/ioc-export.js +542 -0
  118. package/dist/tools/packer-detect.d.ts +165 -0
  119. package/dist/tools/packer-detect.js +284 -0
  120. package/dist/tools/pe-exports-extract.d.ts +175 -0
  121. package/dist/tools/pe-exports-extract.js +253 -0
  122. package/dist/tools/pe-fingerprint.d.ts +234 -0
  123. package/dist/tools/pe-fingerprint.js +269 -0
  124. package/dist/tools/pe-imports-extract.d.ts +105 -0
  125. package/dist/tools/pe-imports-extract.js +245 -0
  126. package/dist/tools/report-generate.d.ts +157 -0
  127. package/dist/tools/report-generate.js +457 -0
  128. package/dist/tools/report-summarize.d.ts +2131 -0
  129. package/dist/tools/report-summarize.js +596 -0
  130. package/dist/tools/runtime-detect.d.ts +135 -0
  131. package/dist/tools/runtime-detect.js +247 -0
  132. package/dist/tools/sample-ingest.d.ts +94 -0
  133. package/dist/tools/sample-ingest.js +327 -0
  134. package/dist/tools/sample-profile-get.d.ts +183 -0
  135. package/dist/tools/sample-profile-get.js +121 -0
  136. package/dist/tools/sandbox-execute.d.ts +441 -0
  137. package/dist/tools/sandbox-execute.js +392 -0
  138. package/dist/tools/strings-extract.d.ts +375 -0
  139. package/dist/tools/strings-extract.js +314 -0
  140. package/dist/tools/strings-floss-decode.d.ts +143 -0
  141. package/dist/tools/strings-floss-decode.js +259 -0
  142. package/dist/tools/system-health.d.ts +434 -0
  143. package/dist/tools/system-health.js +446 -0
  144. package/dist/tools/task-cancel.d.ts +21 -0
  145. package/dist/tools/task-cancel.js +70 -0
  146. package/dist/tools/task-status.d.ts +27 -0
  147. package/dist/tools/task-status.js +106 -0
  148. package/dist/tools/task-sweep.d.ts +22 -0
  149. package/dist/tools/task-sweep.js +77 -0
  150. package/dist/tools/tool-help.d.ts +340 -0
  151. package/dist/tools/tool-help.js +261 -0
  152. package/dist/tools/yara-scan.d.ts +554 -0
  153. package/dist/tools/yara-scan.js +313 -0
  154. package/dist/types.d.ts +266 -0
  155. package/dist/types.js +41 -0
  156. package/dist/worker-pool.d.ts +204 -0
  157. package/dist/worker-pool.js +650 -0
  158. package/dist/workflows/deep-static.d.ts +104 -0
  159. package/dist/workflows/deep-static.js +276 -0
  160. package/dist/workflows/function-explanation-review.d.ts +655 -0
  161. package/dist/workflows/function-explanation-review.js +440 -0
  162. package/dist/workflows/reconstruct.d.ts +2053 -0
  163. package/dist/workflows/reconstruct.js +666 -0
  164. package/dist/workflows/semantic-name-review.d.ts +2418 -0
  165. package/dist/workflows/semantic-name-review.js +521 -0
  166. package/dist/workflows/triage.d.ts +659 -0
  167. package/dist/workflows/triage.js +1374 -0
  168. package/dist/workspace-manager.d.ts +150 -0
  169. package/dist/workspace-manager.js +411 -0
  170. package/ghidra_scripts/DecompileFunction.java +487 -0
  171. package/ghidra_scripts/DecompileFunction.py +150 -0
  172. package/ghidra_scripts/ExtractCFG.java +256 -0
  173. package/ghidra_scripts/ExtractCFG.py +233 -0
  174. package/ghidra_scripts/ExtractFunctions.java +442 -0
  175. package/ghidra_scripts/ExtractFunctions.py +101 -0
  176. package/ghidra_scripts/README.md +125 -0
  177. package/ghidra_scripts/SearchFunctionReferences.java +380 -0
  178. package/helpers/DotNetMetadataProbe/DotNetMetadataProbe.csproj +9 -0
  179. package/helpers/DotNetMetadataProbe/Program.cs +566 -0
  180. package/install-to-codex.ps1 +178 -0
  181. package/install-to-copilot.ps1 +303 -0
  182. package/package.json +101 -0
  183. package/requirements.txt +9 -0
  184. package/workers/requirements-dynamic.txt +11 -0
  185. package/workers/requirements.txt +8 -0
  186. package/workers/speakeasy_compat.py +175 -0
  187. package/workers/static_worker.py +5183 -0
  188. package/workers/yara_rules/default.yar +33 -0
  189. package/workers/yara_rules/malware_families.yar +93 -0
  190. package/workers/yara_rules/packers.yar +80 -0
@@ -0,0 +1,2807 @@
1
+ /**
2
+ * code.functions.reconstruct tool implementation
3
+ * Function-level semantic reconstruction by combining decompile + CFG + assembly evidence.
4
+ */
5
+ import { z } from 'zod';
6
+ import fs from 'fs';
7
+ import path from 'path';
8
+ import { DecompilerWorker, } from '../decompiler-worker.js';
9
+ import { findBestGhidraAnalysis } from '../ghidra-analysis-status.js';
10
+ import { ghidraConfig } from '../ghidra-config.js';
11
+ import { generateCacheKey } from '../cache-manager.js';
12
+ import { lookupCachedResult, formatCacheWarning } from './cache-observability.js';
13
+ import { runEntrypointFallbackDisasm } from './entrypoint-fallback-disasm.js';
14
+ import { loadDynamicTraceEvidence } from '../dynamic-trace.js';
15
+ import { createStringsExtractHandler } from './strings-extract.js';
16
+ import { correlateFunctionWithRuntimeEvidence, extractSensitiveApisFromReasons, } from '../runtime-correlation.js';
17
+ import { findSemanticNameSuggestion, loadSemanticNameSuggestionIndex, SEMANTIC_NAME_SUGGESTIONS_ARTIFACT_TYPE, } from '../semantic-name-suggestion-artifacts.js';
18
+ import { ConfidenceSemanticsSchema, buildNamingConfidenceSemantics, buildReconstructionConfidenceSemantics, buildRuntimeConfidenceSemantics, } from '../confidence-semantics.js';
19
+ import { AnalysisProvenanceSchema, buildRuntimeArtifactProvenance, buildSemanticArtifactProvenance, } from '../analysis-provenance.js';
20
+ const TOOL_NAME = 'code.functions.reconstruct';
21
+ const TOOL_VERSION = '0.2.13';
22
+ const CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
23
+ export const CodeFunctionsReconstructInputSchema = z.object({
24
+ sample_id: z.string().describe('Sample ID (format: sha256:<hex>)'),
25
+ address: z.string().optional().describe('Specific function address (hex)'),
26
+ symbol: z.string().optional().describe('Specific function symbol'),
27
+ topk: z
28
+ .number()
29
+ .int()
30
+ .min(1)
31
+ .max(20)
32
+ .default(3)
33
+ .describe('When address/symbol not provided, reconstruct top-K ranked functions'),
34
+ include_xrefs: z
35
+ .boolean()
36
+ .default(false)
37
+ .describe('Include xrefs when calling function decompile'),
38
+ max_pseudocode_lines: z
39
+ .number()
40
+ .int()
41
+ .min(20)
42
+ .max(300)
43
+ .default(120)
44
+ .describe('Maximum pseudocode lines in source-like snippet'),
45
+ max_assembly_lines: z
46
+ .number()
47
+ .int()
48
+ .min(10)
49
+ .max(240)
50
+ .default(80)
51
+ .describe('Maximum assembly lines in assembly excerpt'),
52
+ timeout: z
53
+ .number()
54
+ .int()
55
+ .min(5)
56
+ .max(300)
57
+ .default(30)
58
+ .describe('Per-function timeout in seconds'),
59
+ evidence_scope: z
60
+ .enum(['all', 'latest', 'session'])
61
+ .default('all')
62
+ .describe('Runtime evidence scope: all artifacts, only the latest artifact window, or a specific session selector'),
63
+ evidence_session_tag: z
64
+ .string()
65
+ .optional()
66
+ .describe('Optional runtime evidence session selector used when evidence_scope=session or to narrow all/latest results'),
67
+ semantic_scope: z
68
+ .enum(['all', 'latest', 'session'])
69
+ .default('all')
70
+ .describe('Semantic review artifact scope: all artifacts, only the latest semantic artifact window, or a specific semantic review session'),
71
+ semantic_session_tag: z
72
+ .string()
73
+ .optional()
74
+ .describe('Optional semantic review session selector used when semantic_scope=session or to narrow all/latest results'),
75
+ })
76
+ .refine((value) => value.evidence_scope !== 'session' || Boolean(value.evidence_session_tag?.trim()), {
77
+ message: 'evidence_session_tag is required when evidence_scope=session',
78
+ path: ['evidence_session_tag'],
79
+ })
80
+ .refine((value) => value.semantic_scope !== 'session' || Boolean(value.semantic_session_tag?.trim()), {
81
+ message: 'semantic_session_tag is required when semantic_scope=session',
82
+ path: ['semantic_session_tag'],
83
+ });
84
+ const FunctionXrefSignalSchema = z.object({
85
+ api: z.string(),
86
+ provenance: z.enum([
87
+ 'static_named_call',
88
+ 'dynamic_resolution_api',
89
+ 'dynamic_resolution_helper',
90
+ 'global_string_hint',
91
+ 'unknown',
92
+ ]),
93
+ confidence: z.number().min(0).max(1),
94
+ evidence: z.array(z.string()),
95
+ });
96
+ const FunctionRelationshipEntrySchema = z.object({
97
+ target: z.string(),
98
+ relation_types: z.array(z.string()),
99
+ reference_types: z.array(z.string()),
100
+ resolved_by: z.string().nullable(),
101
+ is_exact: z.boolean().nullable(),
102
+ });
103
+ const FunctionRuntimeContextSchema = z
104
+ .object({
105
+ corroborated_apis: z.array(z.string()),
106
+ corroborated_stages: z.array(z.string()),
107
+ notes: z.array(z.string()),
108
+ confidence: z.number().min(0).max(1),
109
+ executed: z.boolean().optional(),
110
+ evidence_sources: z.array(z.string()).optional(),
111
+ source_names: z.array(z.string()).optional(),
112
+ artifact_count: z.number().int().nonnegative().optional(),
113
+ executed_artifact_count: z.number().int().nonnegative().optional(),
114
+ matched_memory_regions: z.array(z.string()).optional(),
115
+ suggested_modules: z.array(z.string()).optional(),
116
+ matched_by: z.array(z.string()).optional(),
117
+ provenance_layers: z.array(z.string()).optional(),
118
+ latest_artifact_at: z.string().nullable().optional(),
119
+ scope_note: z.string().optional(),
120
+ })
121
+ .nullable()
122
+ .optional();
123
+ const FunctionCFGShapeSchema = z.object({
124
+ node_count: z.number().int().nonnegative(),
125
+ edge_count: z.number().int().nonnegative(),
126
+ has_loop: z.boolean(),
127
+ has_branching: z.boolean(),
128
+ block_types: z.array(z.string()),
129
+ entry_block_type: z.string().nullable(),
130
+ });
131
+ const FunctionParameterRoleSchema = z.object({
132
+ slot: z.string(),
133
+ role: z.string(),
134
+ inferred_type: z.string(),
135
+ confidence: z.number().min(0).max(1),
136
+ evidence: z.array(z.string()),
137
+ });
138
+ const FunctionStateRoleSchema = z.object({
139
+ state_key: z.string(),
140
+ role: z.string(),
141
+ confidence: z.number().min(0).max(1),
142
+ evidence: z.array(z.string()),
143
+ });
144
+ const FunctionStructFieldSchema = z.object({
145
+ name: z.string(),
146
+ inferred_type: z.string(),
147
+ source_slot: z.string().nullable().optional(),
148
+ });
149
+ const FunctionStructInferenceSchema = z.object({
150
+ semantic_name: z.string(),
151
+ rewrite_type_name: z.string().nullable().optional(),
152
+ kind: z.enum(['request', 'result', 'context', 'table', 'session']),
153
+ confidence: z.number().min(0).max(1),
154
+ fields: z.array(FunctionStructFieldSchema),
155
+ evidence: z.array(z.string()),
156
+ });
157
+ const FunctionSemanticEvidenceSchema = z.object({
158
+ semantic_summary: z.string(),
159
+ xref_signals: z.array(FunctionXrefSignalSchema),
160
+ call_relationships: z.object({
161
+ callers: z.array(FunctionRelationshipEntrySchema),
162
+ callees: z.array(FunctionRelationshipEntrySchema),
163
+ }),
164
+ runtime_context: FunctionRuntimeContextSchema,
165
+ string_hints: z.array(z.string()),
166
+ pseudocode_excerpt: z.string(),
167
+ cfg_shape: FunctionCFGShapeSchema,
168
+ parameter_roles: z.array(FunctionParameterRoleSchema),
169
+ state_roles: z.array(FunctionStateRoleSchema),
170
+ struct_inference: z.array(FunctionStructInferenceSchema),
171
+ });
172
+ const FunctionNameResolutionSchema = z.object({
173
+ rule_based_name: z.string().nullable(),
174
+ llm_suggested_name: z.string().nullable(),
175
+ llm_confidence: z.number().min(0).max(1).nullable(),
176
+ llm_why: z.string().nullable(),
177
+ required_assumptions: z.array(z.string()),
178
+ evidence_used: z.array(z.string()),
179
+ validated_name: z.string().nullable(),
180
+ resolution_source: z.enum(['rule', 'llm', 'hybrid', 'unresolved']),
181
+ unresolved_semantic_name: z.boolean(),
182
+ });
183
+ const ReconstructedFunctionSchema = z.object({
184
+ target: z.string(),
185
+ function: z.string(),
186
+ address: z.string(),
187
+ rank_score: z.number().nullable(),
188
+ rank_reasons: z.array(z.string()),
189
+ suggested_name: z.string().nullable().optional(),
190
+ suggested_role: z.string().nullable().optional(),
191
+ rename_confidence: z.number().min(0).max(1).nullable().optional(),
192
+ rename_evidence: z.array(z.string()).optional(),
193
+ semantic_summary: z.string(),
194
+ xref_signals: z.array(FunctionXrefSignalSchema),
195
+ call_context: z.object({
196
+ callers: z.array(z.string()),
197
+ callees: z.array(z.string()),
198
+ }),
199
+ call_relationships: z.object({
200
+ callers: z.array(FunctionRelationshipEntrySchema),
201
+ callees: z.array(FunctionRelationshipEntrySchema),
202
+ }),
203
+ runtime_context: FunctionRuntimeContextSchema,
204
+ parameter_roles: z.array(FunctionParameterRoleSchema).optional(),
205
+ state_roles: z.array(FunctionStateRoleSchema).optional(),
206
+ struct_inference: z.array(FunctionStructInferenceSchema).optional(),
207
+ semantic_evidence: FunctionSemanticEvidenceSchema.optional(),
208
+ name_resolution: FunctionNameResolutionSchema.optional(),
209
+ confidence_profile: ConfidenceSemanticsSchema.optional(),
210
+ runtime_confidence_profile: ConfidenceSemanticsSchema.nullable().optional(),
211
+ naming_confidence_profile: ConfidenceSemanticsSchema.optional(),
212
+ confidence: z.number().min(0).max(1),
213
+ confidence_breakdown: z.object({
214
+ decompile: z.number().min(0).max(1),
215
+ cfg: z.number().min(0).max(1),
216
+ assembly: z.number().min(0).max(1),
217
+ context: z.number().min(0).max(1),
218
+ }),
219
+ gaps: z.array(z.string()),
220
+ evidence: z.object({
221
+ pseudocode_lines: z.number().int().nonnegative(),
222
+ cfg_nodes: z.number().int().nonnegative(),
223
+ cfg_edges: z.number().int().nonnegative(),
224
+ instruction_count: z.number().int().nonnegative(),
225
+ caller_count: z.number().int().nonnegative(),
226
+ callee_count: z.number().int().nonnegative(),
227
+ }),
228
+ behavior_tags: z.array(z.string()),
229
+ source_like_snippet: z.string(),
230
+ assembly_excerpt: z.string(),
231
+ });
232
+ export const CodeFunctionsReconstructOutputSchema = z.object({
233
+ ok: z.boolean(),
234
+ data: z
235
+ .object({
236
+ sample_id: z.string(),
237
+ mode: z.enum(['single', 'topk']),
238
+ requested_count: z.number().int().nonnegative(),
239
+ reconstructed_count: z.number().int().nonnegative(),
240
+ overall_confidence: z.number().min(0).max(1),
241
+ provenance: AnalysisProvenanceSchema,
242
+ confidence_map: z.array(z.object({
243
+ function: z.string(),
244
+ address: z.string(),
245
+ confidence: z.number().min(0).max(1),
246
+ gaps: z.array(z.string()),
247
+ })),
248
+ functions: z.array(ReconstructedFunctionSchema),
249
+ })
250
+ .optional(),
251
+ warnings: z.array(z.string()).optional(),
252
+ errors: z.array(z.string()).optional(),
253
+ metrics: z
254
+ .object({
255
+ elapsed_ms: z.number(),
256
+ tool: z.string(),
257
+ cached: z.boolean().optional(),
258
+ cache_key: z.string().optional(),
259
+ cache_tier: z.string().optional(),
260
+ cache_created_at: z.string().optional(),
261
+ cache_expires_at: z.string().optional(),
262
+ cache_hit_at: z.string().optional(),
263
+ })
264
+ .optional(),
265
+ });
266
+ export const codeFunctionsReconstructToolDefinition = {
267
+ name: TOOL_NAME,
268
+ description: 'Reconstruct function-level semantics by combining decompile, CFG, and assembly evidence with confidence and unresolved gaps.',
269
+ inputSchema: CodeFunctionsReconstructInputSchema,
270
+ outputSchema: CodeFunctionsReconstructOutputSchema,
271
+ };
272
+ const SENSITIVE_API_SUMMARY_PATTERNS = [
273
+ 'WriteProcessMemory',
274
+ 'ReadProcessMemory',
275
+ 'CreateRemoteThread',
276
+ 'SetThreadContext',
277
+ 'ResumeThread',
278
+ 'OpenProcess',
279
+ 'CreateProcessA',
280
+ 'CreateProcessW',
281
+ 'WinExec',
282
+ 'ShellExecuteA',
283
+ 'ShellExecuteW',
284
+ 'VirtualAllocEx',
285
+ 'VirtualAlloc',
286
+ 'GetProcAddress',
287
+ 'LoadLibraryA',
288
+ 'LoadLibraryW',
289
+ 'LoadLibraryExA',
290
+ 'LoadLibraryExW',
291
+ 'InternetOpenA',
292
+ 'InternetOpenW',
293
+ 'InternetConnectA',
294
+ 'InternetConnectW',
295
+ 'HttpSendRequestA',
296
+ 'HttpSendRequestW',
297
+ 'RegOpenKeyExA',
298
+ 'RegOpenKeyExW',
299
+ 'RegSetValueExA',
300
+ 'RegSetValueExW',
301
+ 'CreateFileA',
302
+ 'CreateFileW',
303
+ 'WriteFile',
304
+ 'ReadFile',
305
+ 'DeleteFileA',
306
+ 'DeleteFileW',
307
+ 'BCryptEncrypt',
308
+ 'BCryptDecrypt',
309
+ 'IsDebuggerPresent',
310
+ 'CheckRemoteDebuggerPresent',
311
+ 'NtQueryInformationProcess',
312
+ 'NtQuerySystemInformation',
313
+ ];
314
+ const KNOWN_LIBRARY_SYMBOL_NAMES = new Set([
315
+ 'memcpy',
316
+ 'memcmp',
317
+ 'memset',
318
+ 'strlen',
319
+ 'strcmp',
320
+ 'strncmp',
321
+ 'strcpy',
322
+ 'strncpy',
323
+ 'strcat',
324
+ 'strncat',
325
+ 'malloc',
326
+ 'calloc',
327
+ 'realloc',
328
+ 'free',
329
+ 'qsort',
330
+ 'bsearch',
331
+ ]);
332
+ const LINKED_SUGGESTION_PRIORITY_PREFIXES = [
333
+ 'resolve_',
334
+ 'prepare_',
335
+ 'transfer_',
336
+ 'query_',
337
+ 'scan_',
338
+ 'dispatch_',
339
+ 'read_',
340
+ 'write_',
341
+ 'inspect_',
342
+ 'collect_',
343
+ 'build_',
344
+ 'finalize_',
345
+ ];
346
+ const SEMANTIC_STOPWORDS = new Set([
347
+ 'this',
348
+ 'that',
349
+ 'with',
350
+ 'from',
351
+ 'into',
352
+ 'then',
353
+ 'void',
354
+ 'code',
355
+ 'true',
356
+ 'false',
357
+ 'return',
358
+ 'call',
359
+ 'calls',
360
+ 'function',
361
+ 'likely',
362
+ 'after',
363
+ 'before',
364
+ 'using',
365
+ 'through',
366
+ 'stage',
367
+ ]);
368
+ function clamp(value, min, max) {
369
+ return Math.max(min, Math.min(max, value));
370
+ }
371
+ function dedupe(values) {
372
+ return Array.from(new Set(values.filter((value) => value.length > 0)));
373
+ }
374
+ function uniqBy(items, keyFn) {
375
+ const seen = new Set();
376
+ const output = [];
377
+ for (const item of items) {
378
+ const key = keyFn(item);
379
+ if (seen.has(key)) {
380
+ continue;
381
+ }
382
+ seen.add(key);
383
+ output.push(item);
384
+ }
385
+ return output;
386
+ }
387
+ function buildNamedAddressLabel(item) {
388
+ if (item.name && item.address) {
389
+ return `${item.name}@${item.address}`;
390
+ }
391
+ return item.name || item.address || 'unknown';
392
+ }
393
+ function buildRelationshipTargetLabel(relationship) {
394
+ if (relationship.name && relationship.address) {
395
+ return `${relationship.name}@${relationship.address}`;
396
+ }
397
+ return relationship.name || relationship.address || 'unknown';
398
+ }
399
+ function buildRelationshipSummaryEntry(relationship) {
400
+ return {
401
+ target: buildRelationshipTargetLabel(relationship),
402
+ relation_types: dedupe(relationship.relation_types || []).slice(0, 4),
403
+ reference_types: dedupe(relationship.reference_types || []).slice(0, 4),
404
+ resolved_by: relationship.resolved_by || null,
405
+ is_exact: typeof relationship.is_exact === 'boolean' ? relationship.is_exact : null,
406
+ };
407
+ }
408
+ function buildRelationshipContext(decompiled) {
409
+ const buildEntries = (relationships, directEntries, limit) => {
410
+ const relationshipEntries = (relationships || []).map((relationship) => buildRelationshipSummaryEntry(relationship));
411
+ const coveredTargets = new Set(relationshipEntries.map((item) => item.target));
412
+ const entries = [
413
+ ...relationshipEntries,
414
+ ...directEntries.map((item) => ({
415
+ target: buildNamedAddressLabel(item),
416
+ relation_types: [],
417
+ reference_types: [],
418
+ resolved_by: null,
419
+ is_exact: true,
420
+ })),
421
+ ].filter((item) => !coveredTargets.has(item.target) || item.relation_types.length > 0);
422
+ return uniqBy(entries.filter((item) => item.target.length > 0), (item) => `${item.target}|${item.relation_types.join(',')}|${item.reference_types.join(',')}|${item.resolved_by || ''}|${item.is_exact === null ? 'unknown' : item.is_exact ? 'exact' : 'heuristic'}`).slice(0, limit);
423
+ };
424
+ return {
425
+ callers: buildEntries(decompiled?.caller_relationships, decompiled?.callers || [], 6),
426
+ callees: buildEntries(decompiled?.callee_relationships, decompiled?.callees || [], 8),
427
+ };
428
+ }
429
+ function formatRelationshipEntry(entry) {
430
+ const details = dedupe([
431
+ ...(entry.relation_types || []),
432
+ ...(entry.reference_types || []),
433
+ entry.resolved_by ? `resolved_by=${entry.resolved_by}` : '',
434
+ entry.is_exact === false ? 'heuristic' : '',
435
+ ]).filter((item) => item.length > 0);
436
+ if (details.length === 0) {
437
+ return entry.target;
438
+ }
439
+ return `${entry.target} [${details.join('; ')}]`;
440
+ }
441
+ function summarizeRelationshipInsights(relationships) {
442
+ const insights = [
443
+ ...relationships.callers
444
+ .filter((item) => item.relation_types.length > 0 || item.reference_types.length > 0 || item.resolved_by)
445
+ .slice(0, 1)
446
+ .map((item) => `caller ${formatRelationshipEntry(item)}`),
447
+ ...relationships.callees
448
+ .filter((item) => item.relation_types.length > 0 || item.reference_types.length > 0 || item.resolved_by)
449
+ .slice(0, 2)
450
+ .map((item) => `callee ${formatRelationshipEntry(item)}`),
451
+ ].slice(0, 2);
452
+ if (insights.length === 0) {
453
+ return null;
454
+ }
455
+ return `relationship recovery links this routine to ${insights.join(' and ')}`;
456
+ }
457
+ function parsePseudocodeLines(pseudocode) {
458
+ if (!pseudocode) {
459
+ return [];
460
+ }
461
+ return pseudocode
462
+ .split(/\r?\n/)
463
+ .map((line) => line.replace(/\s+$/g, ''))
464
+ .filter((line) => line.length > 0);
465
+ }
466
+ function extractAssemblyFromCFG(cfg, maxLines) {
467
+ if (!cfg || cfg.nodes.length === 0) {
468
+ return {
469
+ excerpt: '; assembly unavailable (missing CFG)',
470
+ instructionCount: 0,
471
+ };
472
+ }
473
+ const lines = [];
474
+ let instructionCount = 0;
475
+ for (const node of cfg.nodes) {
476
+ lines.push(`; block ${node.id} (${node.type}) @ ${node.address}`);
477
+ for (const instruction of node.instructions) {
478
+ instructionCount += 1;
479
+ if (lines.length < maxLines) {
480
+ lines.push(instruction);
481
+ }
482
+ }
483
+ if (lines.length < maxLines) {
484
+ lines.push('');
485
+ }
486
+ if (lines.length >= maxLines) {
487
+ break;
488
+ }
489
+ }
490
+ if (instructionCount > 0 && lines.length >= maxLines) {
491
+ lines[lines.length - 1] = '; ...truncated';
492
+ }
493
+ return {
494
+ excerpt: lines.join('\n'),
495
+ instructionCount,
496
+ };
497
+ }
498
+ function collectGaps(pseudocodeLines, cfg, decompiled, maxPseudocodeLines) {
499
+ const gaps = [];
500
+ const pseudocode = pseudocodeLines.join('\n');
501
+ const callerCount = Math.max(decompiled?.callers.length || 0, decompiled?.caller_relationships?.length || 0);
502
+ const calleeCount = Math.max(decompiled?.callees.length || 0, decompiled?.callee_relationships?.length || 0);
503
+ if (!decompiled || pseudocodeLines.length === 0) {
504
+ gaps.push('missing_pseudocode');
505
+ }
506
+ if (!cfg || cfg.nodes.length === 0) {
507
+ gaps.push('missing_cfg');
508
+ }
509
+ else if (cfg.nodes.length <= 1) {
510
+ gaps.push('limited_control_flow_visibility');
511
+ }
512
+ if (decompiled && callerCount === 0 && calleeCount === 0) {
513
+ gaps.push('limited_call_context');
514
+ }
515
+ if (pseudocodeLines.length > maxPseudocodeLines) {
516
+ gaps.push('snippet_truncated');
517
+ }
518
+ if (/\bDAT_[0-9a-f]+\b/i.test(pseudocode) || /\bundefined\d*\b/i.test(pseudocode)) {
519
+ gaps.push('unresolved_data_symbols');
520
+ }
521
+ if (/\bFUN_[0-9a-f]+\b/i.test(pseudocode)) {
522
+ gaps.push('unresolved_function_symbols');
523
+ }
524
+ if (!decompiled && !cfg) {
525
+ gaps.push('missing_all_primary_evidence');
526
+ }
527
+ return dedupe(gaps);
528
+ }
529
+ function inferBehaviorTags(decompiled, assembly) {
530
+ const relationshipCorpus = [
531
+ ...(decompiled?.callers || []).map((item) => item.name),
532
+ ...(decompiled?.callees || []).map((item) => item.name),
533
+ ...(decompiled?.caller_relationships || []).flatMap((item) => [item.name, item.resolved_by]),
534
+ ...(decompiled?.callee_relationships || []).flatMap((item) => [item.name, item.resolved_by]),
535
+ ]
536
+ .filter((item) => typeof item === 'string' && item.length > 0)
537
+ .join('\n');
538
+ const corpus = `${decompiled?.pseudocode || ''}\n${assembly}\n${relationshipCorpus}`;
539
+ const checks = [
540
+ {
541
+ tag: 'process_injection',
542
+ regex: /\b(WriteProcessMemory|CreateRemoteThread|VirtualAllocEx|NtWriteVirtualMemory)\b/i,
543
+ },
544
+ { tag: 'process_spawn', regex: /\b(CreateProcess(?:A|W)?|WinExec|ShellExecute(?:A|W)?)\b/i },
545
+ {
546
+ tag: 'networking',
547
+ regex: /\b(InternetOpen(?:A|W)?|InternetConnect(?:A|W)?|HttpSendRequest(?:A|W)?|WinHttp\w*|socket|connect|WSAStartup|send|recv|bind|listen|accept)\b/i,
548
+ },
549
+ { tag: 'file_io', regex: /\b(CreateFile(?:A|W)?|WriteFile|ReadFile|DeleteFile)\b/i },
550
+ { tag: 'registry', regex: /\b(RegSetValue|RegSetValueEx|RegOpenKey|RegCreateKey)\b/i },
551
+ { tag: 'crypto', regex: /\b(CryptAcquire|CryptEncrypt|CryptDecrypt|BCrypt)\b/i },
552
+ {
553
+ tag: 'anti_debug',
554
+ regex: /\b(IsDebuggerPresent|CheckRemoteDebuggerPresent|NtQueryInformationProcess)\b/i,
555
+ },
556
+ {
557
+ tag: 'service_control',
558
+ regex: /\b(CreateService(?:A|W)?|StartService(?:A|W)?|OpenSCManager(?:A|W)?|ControlService|RegisterServiceCtrlHandler(?:A|W)?)\b/i,
559
+ },
560
+ {
561
+ tag: 'com_activation',
562
+ regex: /\b(CoCreateInstance|QueryInterface|RegisterClassObject|DllGetClassObject|IID_|CLSID_)\b/i,
563
+ },
564
+ {
565
+ tag: 'dll_lifecycle',
566
+ regex: /\b(DllMain|DllRegisterServer|DllUnregisterServer|DllInstall|DLL_PROCESS_ATTACH|DLL_THREAD_ATTACH)\b/i,
567
+ },
568
+ {
569
+ tag: 'export_dispatch',
570
+ regex: /\b(export|ordinal|forwarder|DllGetClassObject|DllCanUnloadNow)\b/i,
571
+ },
572
+ {
573
+ tag: 'plugin_callback',
574
+ regex: /\b(callback|plugin|host interface|event sink|notification handler)\b/i,
575
+ },
576
+ ];
577
+ return checks.filter((item) => item.regex.test(corpus)).map((item) => item.tag);
578
+ }
579
+ function buildCallContext(decompiled) {
580
+ const relationships = buildRelationshipContext(decompiled);
581
+ const callerRelationshipTargets = new Set(relationships.callers.map((item) => item.target));
582
+ const calleeRelationshipTargets = new Set(relationships.callees.map((item) => item.target));
583
+ const callers = dedupe([
584
+ ...(decompiled?.callers || [])
585
+ .map((item) => buildNamedAddressLabel(item))
586
+ .filter((item) => !callerRelationshipTargets.has(item)),
587
+ ...relationships.callers.map((item) => formatRelationshipEntry(item)),
588
+ ]).slice(0, 6);
589
+ const callees = dedupe([
590
+ ...(decompiled?.callees || [])
591
+ .map((item) => buildNamedAddressLabel(item))
592
+ .filter((item) => !calleeRelationshipTargets.has(item)),
593
+ ...relationships.callees.map((item) => formatRelationshipEntry(item)),
594
+ ]).slice(0, 8);
595
+ return { callers, callees };
596
+ }
597
+ function normalizeCalleeApiCandidate(raw) {
598
+ if (!raw) {
599
+ return null;
600
+ }
601
+ const candidate = raw.trim();
602
+ if (candidate.length < 3) {
603
+ return null;
604
+ }
605
+ if (/^(FUN|LAB|DAT|UNK|sub)_[0-9a-f]+$/i.test(candidate)) {
606
+ return null;
607
+ }
608
+ if (/^0x[0-9a-f]+$/i.test(candidate)) {
609
+ return null;
610
+ }
611
+ if (!/[A-Za-z]/.test(candidate)) {
612
+ return null;
613
+ }
614
+ return candidate;
615
+ }
616
+ function inferRelationshipProvenance(relationship, api) {
617
+ const relationCorpus = [
618
+ api,
619
+ ...(relationship.relation_types || []),
620
+ ...(relationship.reference_types || []),
621
+ relationship.resolved_by || '',
622
+ ]
623
+ .join(' ')
624
+ .toLowerCase();
625
+ if (/^GetProcAddress$/i.test(api) ||
626
+ /^LoadLibrary/i.test(api) ||
627
+ relationCorpus.includes('dynamic') ||
628
+ relationCorpus.includes('getprocaddress') ||
629
+ relationCorpus.includes('loadlibrary')) {
630
+ return /^GetProcAddress$/i.test(api) || /^LoadLibrary/i.test(api)
631
+ ? 'dynamic_resolution_api'
632
+ : 'dynamic_resolution_helper';
633
+ }
634
+ if (relationCorpus.includes('string') ||
635
+ (relationCorpus.includes('data') && relationCorpus.includes('body_reference_hint'))) {
636
+ return 'global_string_hint';
637
+ }
638
+ if (relationCorpus.includes('direct_call') ||
639
+ relationCorpus.includes('tail_jump_hint') ||
640
+ relationCorpus.includes('call')) {
641
+ return 'static_named_call';
642
+ }
643
+ return 'unknown';
644
+ }
645
+ function buildRelationshipEvidence(relationship) {
646
+ return dedupe([
647
+ ...(relationship.relation_types || []).map((item) => `relation:${item}`),
648
+ ...(relationship.reference_types || []).map((item) => `reference:${item}`),
649
+ ...(relationship.reference_addresses || []).slice(0, 2).map((item) => `ref_addr:${item}`),
650
+ relationship.resolved_by ? `resolved_by:${relationship.resolved_by}` : '',
651
+ typeof relationship.is_exact === 'boolean'
652
+ ? `is_exact:${relationship.is_exact ? 'true' : 'false'}`
653
+ : '',
654
+ ]);
655
+ }
656
+ function collectXrefSignals(target, decompiled, assemblyExcerpt) {
657
+ const signals = [];
658
+ const pushSignal = (signal) => {
659
+ signals.push({
660
+ api: signal.api,
661
+ provenance: signal.provenance,
662
+ confidence: clamp(signal.confidence, 0, 1),
663
+ evidence: dedupe(signal.evidence),
664
+ });
665
+ };
666
+ for (const signal of target.xrefSummary || []) {
667
+ pushSignal(signal);
668
+ }
669
+ for (const reason of target.rankReasons) {
670
+ const match = /^calls_sensitive_api:(.+)$/i.exec(reason);
671
+ if (!match) {
672
+ continue;
673
+ }
674
+ pushSignal({
675
+ api: match[1],
676
+ provenance: 'static_named_call',
677
+ confidence: 0.6,
678
+ evidence: [`rank_reason:${match[1]}`],
679
+ });
680
+ }
681
+ for (const callee of decompiled?.callees || []) {
682
+ const api = normalizeCalleeApiCandidate(callee.name);
683
+ if (!api) {
684
+ continue;
685
+ }
686
+ pushSignal({
687
+ api,
688
+ provenance: 'static_named_call',
689
+ confidence: 0.55,
690
+ evidence: [`callee:${api}`],
691
+ });
692
+ }
693
+ for (const relationship of decompiled?.callee_relationships || []) {
694
+ const api = normalizeCalleeApiCandidate(relationship.name) ||
695
+ normalizeCalleeApiCandidate(relationship.resolved_by);
696
+ if (!api) {
697
+ continue;
698
+ }
699
+ const provenance = inferRelationshipProvenance(relationship, api);
700
+ const confidenceByProvenance = {
701
+ static_named_call: 0.63,
702
+ dynamic_resolution_api: 0.76,
703
+ dynamic_resolution_helper: 0.68,
704
+ global_string_hint: 0.46,
705
+ unknown: 0.4,
706
+ };
707
+ pushSignal({
708
+ api,
709
+ provenance,
710
+ confidence: confidenceByProvenance[provenance],
711
+ evidence: buildRelationshipEvidence(relationship),
712
+ });
713
+ }
714
+ const textCorpus = `${decompiled?.pseudocode || ''}\n${assemblyExcerpt}`;
715
+ for (const api of SENSITIVE_API_SUMMARY_PATTERNS) {
716
+ const matcher = new RegExp(`\\b${api.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\$&')}\\b`, 'i');
717
+ if (!matcher.test(textCorpus)) {
718
+ continue;
719
+ }
720
+ const provenance = /^GetProcAddress$|^LoadLibrary/i.test(api) ? 'dynamic_resolution_api' : 'static_named_call';
721
+ pushSignal({
722
+ api,
723
+ provenance,
724
+ confidence: provenance === 'dynamic_resolution_api' ? 0.72 : 0.58,
725
+ evidence: [`text_match:${api}`],
726
+ });
727
+ }
728
+ return uniqBy(signals, (item) => `${item.api.toLowerCase()}|${item.provenance}`)
729
+ .sort((a, b) => b.confidence - a.confidence || a.api.localeCompare(b.api))
730
+ .slice(0, 8);
731
+ }
732
+ function describeBehaviorTag(tag) {
733
+ const mapping = {
734
+ process_injection: 'remote process injection',
735
+ process_spawn: 'process creation or command execution',
736
+ networking: 'network communication',
737
+ file_io: 'file system operations',
738
+ registry: 'registry access',
739
+ crypto: 'cryptographic processing',
740
+ anti_debug: 'anti-analysis checks',
741
+ service_control: 'service control logic',
742
+ com_activation: 'COM activation or interface brokering',
743
+ dll_lifecycle: 'DLL entrypoint or registration handling',
744
+ export_dispatch: 'DLL export dispatch or host-facing command routing',
745
+ plugin_callback: 'plugin or callback-driven host integration',
746
+ };
747
+ return mapping[tag] || tag.replace(/_/g, ' ');
748
+ }
749
+ function buildRenameSuggestion(functionName, behaviorTags, xrefSignals, callContext, relationshipContext, gaps, rankReasons, semanticSummary, additionalEvidenceText, runtimeContext) {
750
+ const normalizedFunctionName = functionName.trim().toLowerCase();
751
+ if (KNOWN_LIBRARY_SYMBOL_NAMES.has(normalizedFunctionName)) {
752
+ return {
753
+ suggested_name: null,
754
+ suggested_role: null,
755
+ rename_confidence: 0,
756
+ rename_evidence: [],
757
+ };
758
+ }
759
+ const apiSet = new Set(xrefSignals.map((item) => item.api.toLowerCase()).filter((item) => item.length > 0));
760
+ const stageSet = new Set((runtimeContext?.corroborated_stages || []).map((item) => item.toLowerCase()));
761
+ const tagSet = new Set(behaviorTags.map((item) => item.toLowerCase()));
762
+ const textCorpus = [
763
+ functionName,
764
+ semanticSummary,
765
+ additionalEvidenceText,
766
+ ...rankReasons,
767
+ ...callContext.callers,
768
+ ...callContext.callees,
769
+ ...relationshipContext.callers.map((item) => item.target),
770
+ ...relationshipContext.callees.map((item) => item.target),
771
+ ...relationshipContext.callers.flatMap((item) => item.relation_types || []),
772
+ ...relationshipContext.callees.flatMap((item) => item.relation_types || []),
773
+ ...gaps,
774
+ ...(runtimeContext?.notes || []),
775
+ ]
776
+ .join('\n')
777
+ .toLowerCase();
778
+ const evidence = [];
779
+ const hasApi = (...apis) => apis.some((api) => apiSet.has(api.toLowerCase()));
780
+ const hasStage = (...stages) => stages.some((stage) => stageSet.has(stage.toLowerCase()));
781
+ const hasTag = (...tags) => tags.some((tag) => tagSet.has(tag.toLowerCase()));
782
+ const textHas = (pattern) => pattern.test(textCorpus);
783
+ const callerCount = callContext.callers.length;
784
+ const calleeCount = callContext.callees.length;
785
+ const hasTailJumpHint = functionName.toLowerCase().startsWith('thunk_') ||
786
+ textHas(/\btail_jump_hint\b|\bunconditional_jump\b|\bthunk_fun_\b/i);
787
+ const finalize = (suggestedName, suggestedRole, baseConfidence, matchedEvidence) => ({
788
+ suggested_name: suggestedName,
789
+ suggested_role: suggestedRole,
790
+ rename_confidence: clamp(baseConfidence +
791
+ (runtimeContext?.executed ? 0.06 : 0) +
792
+ Math.min((matchedEvidence.length - 1) * 0.03, 0.12), 0.35, 0.98),
793
+ rename_evidence: dedupe(matchedEvidence).slice(0, 6),
794
+ });
795
+ if (hasTailJumpHint && calleeCount >= 1) {
796
+ if (functionName.toLowerCase().startsWith('thunk_')) {
797
+ evidence.push('name:thunk');
798
+ }
799
+ if (textHas(/\btail_jump_hint\b/i)) {
800
+ evidence.push('relation:tail_jump_hint');
801
+ }
802
+ if (calleeCount === 1) {
803
+ evidence.push('callee_count:1');
804
+ }
805
+ return finalize('tailcall_dispatch_thunk', 'Thin forwarding thunk that jumps into a resolved callee or dispatch target.', 0.72, evidence);
806
+ }
807
+ if (hasApi('ReadProcessMemory') || hasStage('read_remote_memory')) {
808
+ evidence.push('api:ReadProcessMemory');
809
+ if (hasStage('prepare_remote_process_access')) {
810
+ evidence.push('stage:prepare_remote_process_access');
811
+ }
812
+ return finalize('read_remote_memory', 'Reads remote process memory after preparing a target process handle.', 0.82, evidence);
813
+ }
814
+ if (hasApi('WriteProcessMemory')) {
815
+ evidence.push('api:WriteProcessMemory');
816
+ if (hasApi('VirtualAllocEx')) {
817
+ evidence.push('api:VirtualAllocEx');
818
+ }
819
+ if (hasStage('prepare_remote_process_access')) {
820
+ evidence.push('stage:prepare_remote_process_access');
821
+ }
822
+ return finalize('write_remote_memory', 'Writes payload or control data into a remote process address space.', 0.86, evidence);
823
+ }
824
+ if (hasApi('SetThreadContext', 'ResumeThread', 'CreateRemoteThread') ||
825
+ hasStage('transfer_remote_execution', 'resume_remote_thread')) {
826
+ if (hasApi('SetThreadContext')) {
827
+ evidence.push('api:SetThreadContext');
828
+ }
829
+ if (hasApi('ResumeThread')) {
830
+ evidence.push('api:ResumeThread');
831
+ }
832
+ if (hasApi('CreateRemoteThread')) {
833
+ evidence.push('api:CreateRemoteThread');
834
+ }
835
+ if (hasStage('transfer_remote_execution')) {
836
+ evidence.push('stage:transfer_remote_execution');
837
+ }
838
+ return finalize('transfer_remote_execution', 'Transfers execution into a prepared remote process or thread context.', 0.84, evidence);
839
+ }
840
+ if (hasApi('GetProcAddress', 'LoadLibraryA', 'LoadLibraryW', 'LoadLibraryExA', 'LoadLibraryExW') ||
841
+ hasStage('resolve_dynamic_apis')) {
842
+ if (hasApi('GetProcAddress')) {
843
+ evidence.push('api:GetProcAddress');
844
+ }
845
+ if (hasApi('LoadLibraryA', 'LoadLibraryW', 'LoadLibraryExA', 'LoadLibraryExW')) {
846
+ evidence.push('api:LoadLibrary*');
847
+ }
848
+ if (hasStage('resolve_dynamic_apis')) {
849
+ evidence.push('stage:resolve_dynamic_apis');
850
+ }
851
+ return finalize('resolve_dynamic_apis', 'Builds or refreshes runtime API resolver state before later actions.', 0.8, evidence);
852
+ }
853
+ if (hasApi('OpenProcess', 'CreateProcessA', 'CreateProcessW') ||
854
+ hasStage('prepare_remote_process_access', 'spawn_remote_target') ||
855
+ (hasTag('process_spawn', 'process_injection') && textHas(/\b(openprocess|createprocess|remote process)\b/i))) {
856
+ if (hasApi('OpenProcess')) {
857
+ evidence.push('api:OpenProcess');
858
+ }
859
+ if (hasApi('CreateProcessA', 'CreateProcessW')) {
860
+ evidence.push('api:CreateProcess*');
861
+ }
862
+ if (hasStage('prepare_remote_process_access')) {
863
+ evidence.push('stage:prepare_remote_process_access');
864
+ }
865
+ return finalize('prepare_remote_process_access', 'Prepares a target process, launch context, or access token for later memory operations.', 0.78, evidence);
866
+ }
867
+ if (hasApi('NtQueryInformationProcess') ||
868
+ hasStage('inspect_process_context') ||
869
+ textHas(/\bpeb|remote process snapshot|process information\b/i)) {
870
+ evidence.push('api:NtQueryInformationProcess');
871
+ return finalize('query_remote_process_snapshot', 'Collects remote process state such as handles, PEB-adjacent metadata, or integrity flags.', 0.77, evidence);
872
+ }
873
+ if (hasApi('NtQuerySystemInformation') ||
874
+ textHas(/\bcode integrity|kernel_code_integrity_status_raw\b/i)) {
875
+ if (hasApi('NtQuerySystemInformation')) {
876
+ evidence.push('api:NtQuerySystemInformation');
877
+ }
878
+ if (textHas(/\bcode integrity|kernel_code_integrity_status_raw\b/i)) {
879
+ evidence.push('text:code_integrity');
880
+ }
881
+ return finalize('query_code_integrity_state', 'Queries system-level integrity or anti-analysis state before operator actions.', 0.79, evidence);
882
+ }
883
+ if (textHas(/\bpacker|protector|vmprotect|themida|upx|entry point in non-first section\b/i)) {
884
+ evidence.push('text:packer_detection');
885
+ return finalize('scan_packer_signatures', 'Scans PE layout and signature indicators for common packers or protectors.', 0.83, evidence);
886
+ }
887
+ if (hasApi('CreateFileA', 'CreateFileW', 'ReadFile', 'WriteFile', 'DeleteFileA', 'DeleteFileW') ||
888
+ hasTag('file_io')) {
889
+ if (hasApi('CreateFileA', 'CreateFileW')) {
890
+ evidence.push('api:CreateFile*');
891
+ }
892
+ if (hasApi('ReadFile', 'WriteFile', 'DeleteFileA', 'DeleteFileW')) {
893
+ evidence.push('api:file_io');
894
+ }
895
+ return finalize('dispatch_file_operations', 'Coordinates file-system access such as opening, reading, writing, or deleting files.', 0.73, evidence);
896
+ }
897
+ if (rankReasons.includes('high_callers') &&
898
+ callerCount >= 4 &&
899
+ calleeCount >= 2 &&
900
+ gaps.includes('unresolved_function_symbols')) {
901
+ evidence.push('rank_reason:high_callers');
902
+ evidence.push(`caller_count:${callerCount}`);
903
+ evidence.push(`callee_count:${calleeCount}`);
904
+ evidence.push('gap:unresolved_function_symbols');
905
+ return finalize('dispatch_shared_routine', 'Shared high-fan-in dispatcher that fans out into several subordinate routines.', 0.66, evidence);
906
+ }
907
+ if (textHas(/\bdispatch|table|capability\b/i) || rankReasons.includes('entry_point')) {
908
+ if (textHas(/\bdispatch\b/i)) {
909
+ evidence.push('text:dispatch');
910
+ }
911
+ if (textHas(/\bcapability\b/i)) {
912
+ evidence.push('text:capability');
913
+ }
914
+ if (rankReasons.includes('entry_point')) {
915
+ evidence.push('rank_reason:entry_point');
916
+ }
917
+ return finalize('dispatch_module_capabilities', 'Acts as a dispatcher that routes execution into subordinate capability handlers.', 0.62, evidence);
918
+ }
919
+ return {
920
+ suggested_name: null,
921
+ suggested_role: null,
922
+ rename_confidence: 0,
923
+ rename_evidence: [],
924
+ };
925
+ }
926
+ function withSuggestedNameHeader(sourceLikeSnippet, suggestion) {
927
+ const baseLines = sourceLikeSnippet
928
+ .split(/\r?\n/)
929
+ .filter((line) => !line.startsWith('// suggested_name='));
930
+ if (!suggestion?.suggested_name) {
931
+ return baseLines.join('\n');
932
+ }
933
+ return [
934
+ `// suggested_name=${suggestion.suggested_name} role=${suggestion.suggested_role || 'unknown'} rename_confidence=${suggestion.rename_confidence.toFixed(2)} evidence=${suggestion.rename_evidence.join(', ') || 'none'}`,
935
+ ...baseLines,
936
+ ].join('\n');
937
+ }
938
+ function extractSnippetBodyShape(sourceLikeSnippet) {
939
+ const pseudocode = sourceLikeSnippet
940
+ .split(/\r?\n/)
941
+ .filter((line) => !line.startsWith('//'))
942
+ .join('\n')
943
+ .trim();
944
+ const compact = pseudocode.replace(/\s+/g, ' ').trim();
945
+ const constantReturnMatch = compact.match(/\breturn\s+(-?\d+)\s*;/);
946
+ const constantReturn = constantReturnMatch ? Number(constantReturnMatch[1]) : null;
947
+ return {
948
+ pseudocode,
949
+ is_void_return_stub: /\{\s*return;\s*\}\s*$/i.test(compact),
950
+ constant_return: Number.isFinite(constantReturn) ? constantReturn : null,
951
+ has_trap_tail: /\bswi\s*\(\s*3\s*\)|\b(__debugbreak|debugbreak|trap|abort|unreachable)\b/i.test(compact),
952
+ };
953
+ }
954
+ function normalizeSemanticHint(value, maxLength = 120) {
955
+ return value.replace(/\s+/g, ' ').trim().slice(0, maxLength);
956
+ }
957
+ function tokenizeSemanticText(value) {
958
+ return dedupe(value
959
+ .toLowerCase()
960
+ .split(/[^a-z0-9_]+/)
961
+ .map((token) => token.trim())
962
+ .filter((token) => token.length >= 4 && !SEMANTIC_STOPWORDS.has(token)));
963
+ }
964
+ function buildCFGShape(cfg) {
965
+ const blockTypes = dedupe((cfg?.nodes || []).map((node) => node.type));
966
+ const entryBlockType = cfg?.nodes.find((node) => node.type === 'entry')?.type || cfg?.nodes[0]?.type || null;
967
+ const loopEdges = (cfg?.edges || []).filter((edge) => edge.from === edge.to);
968
+ const outgoingCount = new Map();
969
+ for (const edge of cfg?.edges || []) {
970
+ outgoingCount.set(edge.from, (outgoingCount.get(edge.from) || 0) + 1);
971
+ }
972
+ return {
973
+ node_count: cfg?.nodes.length || 0,
974
+ edge_count: cfg?.edges.length || 0,
975
+ has_loop: loopEdges.length > 0,
976
+ has_branching: Array.from(outgoingCount.values()).some((count) => count > 1),
977
+ block_types: blockTypes,
978
+ entry_block_type: entryBlockType,
979
+ };
980
+ }
981
+ function buildPseudocodeExcerpt(sourceLikeSnippet, maxLines = 10) {
982
+ const lines = sourceLikeSnippet
983
+ .split(/\r?\n/)
984
+ .filter((line) => !line.startsWith('//'))
985
+ .slice(0, maxLines);
986
+ return lines.join('\n').trim();
987
+ }
988
+ function buildFunctionStringHints(sampleStrings, functionName, behaviorTags, xrefSignals, runtimeContext, semanticSummary, sourceLikeSnippet) {
989
+ if (!sampleStrings) {
990
+ return [];
991
+ }
992
+ const keywords = new Set([
993
+ ...tokenizeSemanticText(functionName),
994
+ ...behaviorTags.map((tag) => tag.toLowerCase()),
995
+ ...xrefSignals.flatMap((item) => tokenizeSemanticText(item.api)),
996
+ ...(runtimeContext?.corroborated_stages || []).map((item) => item.toLowerCase()),
997
+ ...(runtimeContext?.corroborated_apis || []).flatMap((item) => tokenizeSemanticText(item)),
998
+ ...tokenizeSemanticText(semanticSummary),
999
+ ]);
1000
+ const evidenceCorpus = `${semanticSummary}\n${sourceLikeSnippet}`.toLowerCase();
1001
+ const scoredHints = [];
1002
+ const consider = (rawValue, baseScore) => {
1003
+ const hint = normalizeSemanticHint(rawValue);
1004
+ if (!hint) {
1005
+ return;
1006
+ }
1007
+ const lowered = hint.toLowerCase();
1008
+ let score = baseScore;
1009
+ for (const keyword of keywords) {
1010
+ if (lowered.includes(keyword)) {
1011
+ score += 2;
1012
+ }
1013
+ }
1014
+ if (xrefSignals.some((item) => lowered.includes(item.api.toLowerCase()))) {
1015
+ score += 3;
1016
+ }
1017
+ if ((runtimeContext?.corroborated_stages || []).some((item) => lowered.includes(item.toLowerCase()))) {
1018
+ score += 3;
1019
+ }
1020
+ if ((runtimeContext?.corroborated_apis || []).some((item) => lowered.includes(item.toLowerCase()))) {
1021
+ score += 2;
1022
+ }
1023
+ if (evidenceCorpus.includes(lowered)) {
1024
+ score += 1;
1025
+ }
1026
+ if (/(packer|protector|entry point|section|vmprotect|themida|upx|readprocessmemory|writeprocessmemory|getprocaddress|loadlibrary)/i.test(hint)) {
1027
+ score += 2;
1028
+ }
1029
+ scoredHints.push({ hint, score });
1030
+ };
1031
+ for (const item of sampleStrings.top_high_value || []) {
1032
+ consider(item.string, 3);
1033
+ }
1034
+ for (const window of sampleStrings.context_windows || []) {
1035
+ for (const item of window.strings || []) {
1036
+ consider(item.string, 1 + Math.min(window.score, 6));
1037
+ }
1038
+ }
1039
+ return scoredHints
1040
+ .sort((a, b) => b.score - a.score || a.hint.localeCompare(b.hint))
1041
+ .map((item) => item.hint)
1042
+ .filter((value, index, all) => all.indexOf(value) === index)
1043
+ .slice(0, 6);
1044
+ }
1045
+ function inferParameterRoles(behaviorTags, xrefSignals, runtimeContext, stringHints, semanticSummary, sourceLikeSnippet) {
1046
+ const roles = [];
1047
+ const corpus = [
1048
+ semanticSummary,
1049
+ sourceLikeSnippet,
1050
+ ...stringHints,
1051
+ ...xrefSignals.map((item) => `${item.api} ${item.provenance}`),
1052
+ ...(runtimeContext?.corroborated_apis || []),
1053
+ ...(runtimeContext?.corroborated_stages || []),
1054
+ ...behaviorTags,
1055
+ ]
1056
+ .join('\n')
1057
+ .toLowerCase();
1058
+ const addRole = (slot, role, inferredType, confidence, evidence) => {
1059
+ if (roles.some((item) => item.slot === slot && item.role === role)) {
1060
+ return;
1061
+ }
1062
+ roles.push({
1063
+ slot,
1064
+ role,
1065
+ inferred_type: inferredType,
1066
+ confidence: clamp(confidence, 0, 1),
1067
+ evidence: dedupe(evidence),
1068
+ });
1069
+ };
1070
+ const hasProcessOps = behaviorTags.some((tag) => ['process_injection', 'process_spawn', 'anti_debug'].includes(tag)) ||
1071
+ /(writeprocessmemory|readprocessmemory|openprocess|createremotethread|setthreadcontext|resumethread|createprocessw|createprocessa)/i.test(corpus) ||
1072
+ (runtimeContext?.corroborated_stages || []).includes('prepare_remote_process_access');
1073
+ const hasDynamicResolver = /getprocaddress|loadlibrary|resolve_dynamic_apis|dynamic api/i.test(corpus);
1074
+ const hasFileOps = behaviorTags.includes('file_io') ||
1075
+ /(createfile|readfile|writefile|deletefile|copyfile|findfirstfile|findnextfile)/i.test(corpus) ||
1076
+ (runtimeContext?.corroborated_stages || []).includes('file_operations');
1077
+ const hasRegistryOps = behaviorTags.includes('registry') ||
1078
+ /(regopenkey|regsetvalue|regqueryvalue|registry_operations)/i.test(corpus);
1079
+ const hasPackerScan = behaviorTags.includes('packer_detection') ||
1080
+ /(packer|protector|entry point in non-first section|vmprotect|themida|upx)/i.test(corpus);
1081
+ const hasNetworkOps = behaviorTags.includes('networking') ||
1082
+ /(internetopen|internetconnect|httpsendrequest|winhttp|socket|connect|send|recv|bind|listen|accept|wsastartup|urlmon|webrequest)/i.test(corpus);
1083
+ const hasServiceOps = behaviorTags.includes('service_control') ||
1084
+ /(createservice|startservice|openscmanager|controlservice|registerservicectrlhandler|service_main|service control)/i.test(corpus);
1085
+ const hasComOps = behaviorTags.includes('com_activation') ||
1086
+ /(cocreateinstance|queryinterface|registerclassobject|dllgetclassobject|clsid_|iid_|class factory|com activation)/i.test(corpus);
1087
+ const hasDllEntry = behaviorTags.includes('dll_lifecycle') ||
1088
+ /(dllmain|dllregisterserver|dllunregisterserver|dllinstall|dllcanunloadnow|dll_process_attach|dll_thread_attach|reason code)/i.test(corpus);
1089
+ const hasExportDispatch = behaviorTags.includes('export_dispatch') ||
1090
+ /(export|ordinal|forwarder|dispatch exported|host-facing command|dllgetclassobject|dllcanunloadnow)/i.test(corpus);
1091
+ const hasCliHints = /(usage:|--help|\/\?|command|subcommand|detect|scan|dump|inject)/i.test(corpus);
1092
+ if (hasProcessOps) {
1093
+ addRole('string_arg_0', 'target_process_selector', 'const char *', 0.78, [
1094
+ 'behavior:process_injection_or_spawn',
1095
+ 'runtime_stage:prepare_remote_process_access',
1096
+ ]);
1097
+ addRole('string_arg_1', 'launch_command_line', 'const char *', 0.67, [
1098
+ 'api:CreateProcessW/CreateProcessA',
1099
+ 'summary:spawn_or_remote_execution_context',
1100
+ ]);
1101
+ addRole('pointer_arg_0', 'payload_buffer', 'void *', 0.73, [
1102
+ 'api:WriteProcessMemory/ReadProcessMemory',
1103
+ 'summary:remote_memory_transfer',
1104
+ ]);
1105
+ addRole('handle_arg_0', 'process_handle', 'HANDLE', 0.82, [
1106
+ 'api:OpenProcess',
1107
+ 'runtime_stage:prepare_remote_process_access',
1108
+ ]);
1109
+ addRole('handle_arg_1', 'thread_handle', 'HANDLE', 0.71, [
1110
+ 'api:ResumeThread/SetThreadContext/CreateRemoteThread',
1111
+ 'summary:execution_transfer',
1112
+ ]);
1113
+ addRole('scalar_arg_0', 'operation_mode_flags', 'uint64_t', 0.55, [
1114
+ 'summary:mode_flags',
1115
+ ]);
1116
+ }
1117
+ if (hasDynamicResolver) {
1118
+ addRole('string_arg_0', hasProcessOps ? 'target_process_selector' : 'module_name_hint', 'const char *', hasProcessOps ? 0.78 : 0.76, [
1119
+ 'api:GetProcAddress/LoadLibrary*',
1120
+ ]);
1121
+ addRole('string_arg_1', 'api_name_hint', 'const char *', 0.74, [
1122
+ 'api:GetProcAddress',
1123
+ 'summary:dynamic_resolution',
1124
+ ]);
1125
+ addRole('handle_arg_0', hasProcessOps ? 'process_handle' : 'module_handle_hint', hasProcessOps ? 'HANDLE' : 'HMODULE', hasProcessOps ? 0.82 : 0.58, [
1126
+ 'api:LoadLibrary*/GetProcAddress',
1127
+ ]);
1128
+ }
1129
+ if (hasFileOps) {
1130
+ addRole('string_arg_0', 'primary_path', 'const char *', 0.77, [
1131
+ 'api:CreateFile*/DeleteFile*/CopyFile*',
1132
+ ]);
1133
+ addRole('string_arg_1', 'secondary_path_or_pattern', 'const char *', 0.61, [
1134
+ 'api:CopyFile*/FindFirstFile*',
1135
+ ]);
1136
+ addRole('pointer_arg_0', 'buffer_view', 'void *', 0.66, [
1137
+ 'api:ReadFile/WriteFile',
1138
+ ]);
1139
+ addRole('handle_arg_0', 'file_handle', 'HANDLE', 0.74, [
1140
+ 'api:CreateFile*/ReadFile/WriteFile',
1141
+ ]);
1142
+ addRole('scalar_arg_0', 'file_operation_flags', 'uint64_t', 0.52, [
1143
+ 'summary:file_operation_mode',
1144
+ ]);
1145
+ }
1146
+ if (hasRegistryOps) {
1147
+ addRole('string_arg_0', 'registry_path', 'const char *', 0.75, [
1148
+ 'api:RegOpenKey*/RegCreateKey*',
1149
+ ]);
1150
+ addRole('string_arg_1', 'registry_value_name', 'const char *', 0.63, [
1151
+ 'api:RegSetValue*/RegQueryValue*',
1152
+ ]);
1153
+ addRole('pointer_arg_0', 'registry_value_buffer', 'void *', 0.59, [
1154
+ 'api:RegSetValue*/RegQueryValue*',
1155
+ ]);
1156
+ addRole('handle_arg_0', 'registry_key_handle', 'HKEY', 0.72, [
1157
+ 'api:RegOpenKey*/RegCreateKey*',
1158
+ ]);
1159
+ }
1160
+ if (hasNetworkOps) {
1161
+ addRole('string_arg_0', 'remote_host_or_url', 'const char *', 0.78, [
1162
+ 'api:InternetConnect*/WinHttp*/socket/connect',
1163
+ ]);
1164
+ addRole('string_arg_1', 'request_path_or_header', 'const char *', 0.63, [
1165
+ 'api:HttpSendRequest*/send',
1166
+ ]);
1167
+ addRole('pointer_arg_0', 'network_buffer', 'void *', 0.69, [
1168
+ 'api:send/recv/HttpSendRequest*',
1169
+ ]);
1170
+ addRole('handle_arg_0', 'socket_or_request_handle', 'uintptr_t', 0.76, [
1171
+ 'api:InternetOpen*/InternetConnect*/socket',
1172
+ ]);
1173
+ addRole('scalar_arg_0', 'network_option_flags', 'uint64_t', 0.57, [
1174
+ 'summary:network_configuration_or_mode',
1175
+ ]);
1176
+ }
1177
+ if (hasServiceOps) {
1178
+ addRole('string_arg_0', 'service_name', 'const char *', 0.77, [
1179
+ 'api:CreateService*/OpenService*',
1180
+ ]);
1181
+ addRole('string_arg_1', 'service_display_name_or_command', 'const char *', 0.62, [
1182
+ 'api:CreateService*/StartService*',
1183
+ ]);
1184
+ addRole('handle_arg_0', 'service_manager_or_service_handle', 'SC_HANDLE', 0.73, [
1185
+ 'api:OpenSCManager/CreateService/OpenService',
1186
+ ]);
1187
+ addRole('scalar_arg_0', 'service_control_code', 'uint32_t', 0.58, [
1188
+ 'api:ControlService/RegisterServiceCtrlHandler',
1189
+ ]);
1190
+ }
1191
+ if (hasComOps) {
1192
+ addRole('string_arg_0', 'class_or_interface_identifier', 'const char *', 0.64, [
1193
+ 'api:CoCreateInstance/QueryInterface',
1194
+ ]);
1195
+ addRole('pointer_arg_0', 'interface_or_object_pointer', 'void **', 0.71, [
1196
+ 'api:QueryInterface/DllGetClassObject',
1197
+ ]);
1198
+ addRole('scalar_arg_0', 'class_context_flags', 'uint32_t', 0.56, [
1199
+ 'api:CoCreateInstance',
1200
+ ]);
1201
+ }
1202
+ if (hasDllEntry) {
1203
+ addRole('handle_arg_0', 'module_instance', 'HMODULE', 0.79, [
1204
+ 'summary:dll_entrypoint_or_registration',
1205
+ ]);
1206
+ addRole('scalar_arg_0', 'dll_reason_code', 'uint32_t', 0.74, [
1207
+ 'summary:dll_process_or_thread_attach',
1208
+ ]);
1209
+ addRole('pointer_arg_0', 'reserved_context', 'void *', 0.61, [
1210
+ 'summary:dll_reserved_context',
1211
+ ]);
1212
+ }
1213
+ if (hasExportDispatch) {
1214
+ addRole('string_arg_0', 'exported_command_name_or_dispatch_key', 'const char *', 0.6, [
1215
+ 'summary:export_dispatch_or_forwarder_selection',
1216
+ ]);
1217
+ addRole('pointer_arg_0', 'export_argument_block', 'void *', 0.57, [
1218
+ 'summary:export_dispatch_argument_block',
1219
+ ]);
1220
+ addRole('scalar_arg_0', 'ordinal_or_dispatch_flags', 'uint32_t', 0.55, [
1221
+ 'summary:export_ordinal_or_dispatch_mode',
1222
+ ]);
1223
+ }
1224
+ if (hasPackerScan) {
1225
+ addRole('pointer_arg_0', hasFileOps ? 'buffer_view' : 'image_view', 'void *', hasFileOps ? 0.66 : 0.81, [
1226
+ 'summary:packer_or_pe_layout_scan',
1227
+ ]);
1228
+ addRole('string_arg_0', hasCliHints ? 'command_hint' : 'section_name_hint', 'const char *', 0.56, [
1229
+ 'strings:packer/protector/help_text',
1230
+ ]);
1231
+ addRole('scalar_arg_0', 'scan_mode_flags', 'uint64_t', 0.58, [
1232
+ 'summary:heuristic_scan_mode',
1233
+ ]);
1234
+ }
1235
+ if (hasCliHints) {
1236
+ addRole('string_arg_0', 'command_verb_or_primary_text', 'const char *', 0.54, [
1237
+ 'strings:cli_or_help_text',
1238
+ ]);
1239
+ }
1240
+ return roles;
1241
+ }
1242
+ function inferStateRoles(behaviorTags, xrefSignals, runtimeContext, stringHints, semanticSummary, sourceLikeSnippet) {
1243
+ const roles = [];
1244
+ const corpus = [
1245
+ semanticSummary,
1246
+ sourceLikeSnippet,
1247
+ ...stringHints,
1248
+ ...xrefSignals.map((item) => item.api),
1249
+ ...(runtimeContext?.corroborated_stages || []),
1250
+ ...(runtimeContext?.corroborated_apis || []),
1251
+ ...behaviorTags,
1252
+ ]
1253
+ .join('\n')
1254
+ .toLowerCase();
1255
+ const addRole = (stateKey, role, confidence, evidence) => {
1256
+ if (roles.some((item) => item.state_key === stateKey)) {
1257
+ return;
1258
+ }
1259
+ roles.push({
1260
+ state_key: stateKey,
1261
+ role,
1262
+ confidence: clamp(confidence, 0, 1),
1263
+ evidence: dedupe(evidence),
1264
+ });
1265
+ };
1266
+ if (/getprocaddress|loadlibrary|resolve_dynamic_apis/i.test(corpus)) {
1267
+ addRole('dynamic_api_table', 'Caches dynamically resolved imports or late-bound API pointers.', 0.84, [
1268
+ 'api:GetProcAddress/LoadLibrary*',
1269
+ ]);
1270
+ }
1271
+ if (/createfile|readfile|writefile|deletefile|copyfile|file_operations/i.test(corpus)) {
1272
+ addRole('file_api_table', 'Tracks file-system capability pointers or file-operation state.', 0.74, [
1273
+ 'api:CreateFile*/ReadFile/WriteFile',
1274
+ ]);
1275
+ }
1276
+ if (/regopenkey|regsetvalue|regqueryvalue|registry_operations/i.test(corpus)) {
1277
+ addRole('registry_api_table', 'Tracks registry capability pointers or key/value update state.', 0.74, [
1278
+ 'api:RegOpenKey*/RegSetValue*',
1279
+ ]);
1280
+ }
1281
+ if (/ntqueryinformationprocess|ntquerysysteminformation|isdebuggerpresent|code integrity|anti_analysis_checks/i.test(corpus)) {
1282
+ addRole('process_probe', 'Accumulates anti-analysis probes and remote-process environment observations.', 0.79, [
1283
+ 'api:NtQueryInformationProcess/NtQuerySystemInformation',
1284
+ ]);
1285
+ }
1286
+ if (/writeprocessmemory|readprocessmemory|setthreadcontext|resumethread|createprocess/i.test(corpus)) {
1287
+ addRole('execution_transfer_result', 'Stores the currently selected process-transfer stage and observed status.', 0.77, [
1288
+ 'api:WriteProcessMemory/SetThreadContext/ResumeThread/CreateProcess*',
1289
+ ]);
1290
+ }
1291
+ if (/packer|protector|upx|themida|vmprotect|entry point in non-first section/i.test(corpus)) {
1292
+ addRole('packer_heuristics', 'Accumulates packer heuristics, matched signatures, and section-layout findings.', 0.83, [
1293
+ 'strings:packer/protector',
1294
+ ]);
1295
+ }
1296
+ if (/usage:|--help|command|subcommand|detect|scan|dump|inject/i.test(corpus)) {
1297
+ addRole('cli_model', 'Captures recovered command verbs, help banners, and command summaries.', 0.63, [
1298
+ 'strings:cli_or_help_text',
1299
+ ]);
1300
+ }
1301
+ if (/dispatch|capability/i.test(corpus)) {
1302
+ addRole('dispatch_plan', 'Stores intermediate routing decisions between capability-specific handlers.', 0.61, [
1303
+ 'summary:dispatch_or_capability_routing',
1304
+ ]);
1305
+ }
1306
+ if (/internetopen|internetconnect|httpsendrequest|winhttp|socket|connect|send|recv|bind|listen|accept|networking/i.test(corpus)) {
1307
+ addRole('network_session', 'Tracks socket or HTTP request state, buffers, and remote endpoint intent.', 0.76, [
1308
+ 'api:InternetConnect*/HttpSendRequest*/socket',
1309
+ ]);
1310
+ }
1311
+ if (/createservice|startservice|openscmanager|controlservice|registerservicectrlhandler|service_main/i.test(corpus)) {
1312
+ addRole('service_control_state', 'Tracks service manager handles, lifecycle commands, and SCM-facing status.', 0.75, [
1313
+ 'api:CreateService/OpenSCManager/ControlService',
1314
+ ]);
1315
+ }
1316
+ if (/cocreateinstance|queryinterface|registerclassobject|dllgetclassobject|clsid_|iid_|class factory/i.test(corpus)) {
1317
+ addRole('com_class_factory', 'Tracks COM class/object activation flow and interface handoff state.', 0.73, [
1318
+ 'api:CoCreateInstance/QueryInterface/DllGetClassObject',
1319
+ ]);
1320
+ }
1321
+ if (/dllmain|dllregisterserver|dllunregisterserver|dllinstall|dllcanunloadnow|dll_process_attach|dll_thread_attach/i.test(corpus)) {
1322
+ addRole('dll_entry_state', 'Tracks DLL entrypoint reasons, registration lifecycle, or attach/detach state.', 0.72, [
1323
+ 'summary:dll_lifecycle',
1324
+ ]);
1325
+ }
1326
+ if (/export|ordinal|forwarder|dispatch exported|host-facing command|dllgetclassobject|dllcanunloadnow/i.test(corpus)) {
1327
+ addRole('export_dispatch_table', 'Tracks export ordinals, forwarders, and host-facing dispatch routing.', 0.67, [
1328
+ 'summary:export_dispatch',
1329
+ ]);
1330
+ }
1331
+ return roles;
1332
+ }
1333
+ function inferStructInference(parameterRoles, stateRoles) {
1334
+ const structs = [];
1335
+ const hasState = (stateKey) => stateRoles.some((item) => item.state_key === stateKey);
1336
+ const hasRole = (slot, role) => parameterRoles.some((item) => item.slot === slot && item.role === role);
1337
+ const addStruct = (value) => {
1338
+ if (structs.some((item) => item.semantic_name === value.semantic_name)) {
1339
+ return;
1340
+ }
1341
+ structs.push(value);
1342
+ };
1343
+ if (hasRole('string_arg_0', 'target_process_selector') ||
1344
+ hasRole('pointer_arg_0', 'payload_buffer') ||
1345
+ hasRole('handle_arg_0', 'process_handle')) {
1346
+ addStruct({
1347
+ semantic_name: 'remote_process_request',
1348
+ rewrite_type_name: 'AkRemoteProcessRequest',
1349
+ kind: 'request',
1350
+ confidence: 0.82,
1351
+ fields: [
1352
+ { name: 'target_selector', inferred_type: 'const char *', source_slot: 'string_arg_0' },
1353
+ { name: 'launch_command_line', inferred_type: 'const char *', source_slot: 'string_arg_1' },
1354
+ { name: 'payload_view', inferred_type: 'void *', source_slot: 'pointer_arg_0' },
1355
+ { name: 'process_handle', inferred_type: 'HANDLE', source_slot: 'handle_arg_0' },
1356
+ { name: 'thread_handle', inferred_type: 'HANDLE', source_slot: 'handle_arg_1' },
1357
+ { name: 'mode_flags', inferred_type: 'uint64_t', source_slot: 'scalar_arg_0' },
1358
+ ],
1359
+ evidence: ['parameter_roles:target_process_selector/payload_buffer/process_handle'],
1360
+ });
1361
+ }
1362
+ if (hasState('execution_transfer_result')) {
1363
+ addStruct({
1364
+ semantic_name: 'execution_transfer_result',
1365
+ rewrite_type_name: 'AkExecutionTransferResult',
1366
+ kind: 'result',
1367
+ confidence: 0.78,
1368
+ fields: [
1369
+ { name: 'status_code', inferred_type: 'int' },
1370
+ { name: 'stage_name', inferred_type: 'const char *' },
1371
+ { name: 'detail', inferred_type: 'const char *' },
1372
+ { name: 'transfer_mode', inferred_type: 'const char *' },
1373
+ { name: 'observed_value', inferred_type: 'uint64_t' },
1374
+ ],
1375
+ evidence: ['state_roles:execution_transfer_result'],
1376
+ });
1377
+ }
1378
+ if (hasState('dispatch_plan')) {
1379
+ addStruct({
1380
+ semantic_name: 'capability_dispatch_plan',
1381
+ rewrite_type_name: 'AkCapabilityDispatchPlan',
1382
+ kind: 'session',
1383
+ confidence: 0.66,
1384
+ fields: [
1385
+ { name: 'request', inferred_type: 'dispatch_request' },
1386
+ { name: 'result', inferred_type: 'dispatch_result' },
1387
+ ],
1388
+ evidence: ['state_roles:dispatch_plan'],
1389
+ });
1390
+ }
1391
+ if (hasRole('pointer_arg_0', 'image_view') ||
1392
+ hasRole('string_arg_0', 'section_name_hint') ||
1393
+ hasState('packer_heuristics')) {
1394
+ addStruct({
1395
+ semantic_name: 'packer_scan_session',
1396
+ rewrite_type_name: 'AkPackerScanSession',
1397
+ kind: 'session',
1398
+ confidence: 0.79,
1399
+ fields: [
1400
+ { name: 'request', inferred_type: 'packer_scan_request' },
1401
+ { name: 'result', inferred_type: 'packer_scan_result' },
1402
+ ],
1403
+ evidence: ['parameter_roles:image_view/section_name_hint', 'state_roles:packer_heuristics'],
1404
+ });
1405
+ }
1406
+ if (hasState('dynamic_api_table')) {
1407
+ addStruct({
1408
+ semantic_name: 'api_resolution_table',
1409
+ rewrite_type_name: 'AkResolvedApiTable',
1410
+ kind: 'table',
1411
+ confidence: 0.77,
1412
+ fields: [
1413
+ { name: 'ready', inferred_type: 'int' },
1414
+ { name: 'role', inferred_type: 'const char *' },
1415
+ { name: 'apis', inferred_type: 'const char *[8]' },
1416
+ { name: 'api_count', inferred_type: 'int' },
1417
+ ],
1418
+ evidence: ['state_roles:dynamic_api_table'],
1419
+ });
1420
+ }
1421
+ if (hasRole('string_arg_0', 'remote_host_or_url') ||
1422
+ hasRole('pointer_arg_0', 'network_buffer') ||
1423
+ hasState('network_session')) {
1424
+ addStruct({
1425
+ semantic_name: 'network_request_context',
1426
+ rewrite_type_name: 'AkNetworkRequestContext',
1427
+ kind: 'request',
1428
+ confidence: 0.75,
1429
+ fields: [
1430
+ { name: 'remote_host_or_url', inferred_type: 'const char *', source_slot: 'string_arg_0' },
1431
+ { name: 'request_path_or_header', inferred_type: 'const char *', source_slot: 'string_arg_1' },
1432
+ { name: 'buffer_view', inferred_type: 'void *', source_slot: 'pointer_arg_0' },
1433
+ { name: 'request_handle', inferred_type: 'uintptr_t', source_slot: 'handle_arg_0' },
1434
+ { name: 'option_flags', inferred_type: 'uint64_t', source_slot: 'scalar_arg_0' },
1435
+ ],
1436
+ evidence: ['parameter_roles:remote_host_or_url/network_buffer', 'state_roles:network_session'],
1437
+ });
1438
+ }
1439
+ if (hasRole('string_arg_0', 'service_name') ||
1440
+ hasRole('handle_arg_0', 'service_manager_or_service_handle') ||
1441
+ hasState('service_control_state')) {
1442
+ addStruct({
1443
+ semantic_name: 'service_control_context',
1444
+ rewrite_type_name: 'AkServiceControlContext',
1445
+ kind: 'context',
1446
+ confidence: 0.74,
1447
+ fields: [
1448
+ { name: 'service_name', inferred_type: 'const char *', source_slot: 'string_arg_0' },
1449
+ {
1450
+ name: 'display_name_or_command',
1451
+ inferred_type: 'const char *',
1452
+ source_slot: 'string_arg_1',
1453
+ },
1454
+ {
1455
+ name: 'service_handle',
1456
+ inferred_type: 'SC_HANDLE',
1457
+ source_slot: 'handle_arg_0',
1458
+ },
1459
+ { name: 'control_code', inferred_type: 'uint32_t', source_slot: 'scalar_arg_0' },
1460
+ ],
1461
+ evidence: ['parameter_roles:service_name/service_manager_or_service_handle', 'state_roles:service_control_state'],
1462
+ });
1463
+ }
1464
+ if (hasRole('string_arg_0', 'class_or_interface_identifier') ||
1465
+ hasRole('pointer_arg_0', 'interface_or_object_pointer') ||
1466
+ hasState('com_class_factory')) {
1467
+ addStruct({
1468
+ semantic_name: 'com_activation_context',
1469
+ rewrite_type_name: 'AkComActivationContext',
1470
+ kind: 'context',
1471
+ confidence: 0.72,
1472
+ fields: [
1473
+ {
1474
+ name: 'class_or_interface_id',
1475
+ inferred_type: 'const char *',
1476
+ source_slot: 'string_arg_0',
1477
+ },
1478
+ {
1479
+ name: 'object_pointer',
1480
+ inferred_type: 'void **',
1481
+ source_slot: 'pointer_arg_0',
1482
+ },
1483
+ { name: 'class_context', inferred_type: 'uint32_t', source_slot: 'scalar_arg_0' },
1484
+ ],
1485
+ evidence: ['parameter_roles:class_or_interface_identifier/interface_or_object_pointer', 'state_roles:com_class_factory'],
1486
+ });
1487
+ }
1488
+ if (hasRole('handle_arg_0', 'module_instance') ||
1489
+ hasRole('scalar_arg_0', 'dll_reason_code') ||
1490
+ hasState('dll_entry_state')) {
1491
+ addStruct({
1492
+ semantic_name: 'dll_entry_context',
1493
+ rewrite_type_name: 'AkDllEntryContext',
1494
+ kind: 'context',
1495
+ confidence: 0.74,
1496
+ fields: [
1497
+ { name: 'module_instance', inferred_type: 'HMODULE', source_slot: 'handle_arg_0' },
1498
+ { name: 'reason_code', inferred_type: 'uint32_t', source_slot: 'scalar_arg_0' },
1499
+ { name: 'reserved_context', inferred_type: 'void *', source_slot: 'pointer_arg_0' },
1500
+ ],
1501
+ evidence: ['parameter_roles:module_instance/dll_reason_code', 'state_roles:dll_entry_state'],
1502
+ });
1503
+ }
1504
+ if (hasRole('string_arg_0', 'exported_command_name_or_dispatch_key') ||
1505
+ hasRole('pointer_arg_0', 'export_argument_block') ||
1506
+ hasState('export_dispatch_table')) {
1507
+ addStruct({
1508
+ semantic_name: 'export_dispatch_table',
1509
+ rewrite_type_name: 'AkExportDispatchTable',
1510
+ kind: 'table',
1511
+ confidence: 0.68,
1512
+ fields: [
1513
+ {
1514
+ name: 'dispatch_key',
1515
+ inferred_type: 'const char *',
1516
+ source_slot: 'string_arg_0',
1517
+ },
1518
+ { name: 'argument_block', inferred_type: 'void *', source_slot: 'pointer_arg_0' },
1519
+ { name: 'ordinal_or_flags', inferred_type: 'uint32_t', source_slot: 'scalar_arg_0' },
1520
+ ],
1521
+ evidence: ['parameter_roles:exported_command_name_or_dispatch_key/export_argument_block', 'state_roles:export_dispatch_table'],
1522
+ });
1523
+ }
1524
+ if (stateRoles.length > 0) {
1525
+ const runtimeFields = [
1526
+ hasState('dynamic_api_table')
1527
+ ? { name: 'dynamic_apis', inferred_type: 'api_resolution_table' }
1528
+ : null,
1529
+ hasState('file_api_table') ? { name: 'file_apis', inferred_type: 'api_resolution_table' } : null,
1530
+ hasState('registry_api_table')
1531
+ ? { name: 'registry_apis', inferred_type: 'api_resolution_table' }
1532
+ : null,
1533
+ hasState('process_probe') ? { name: 'process_probe', inferred_type: 'process_probe_state' } : null,
1534
+ hasState('network_session')
1535
+ ? { name: 'network_session', inferred_type: 'network_request_context' }
1536
+ : null,
1537
+ hasState('service_control_state')
1538
+ ? { name: 'service_control', inferred_type: 'service_control_context' }
1539
+ : null,
1540
+ hasState('com_class_factory')
1541
+ ? { name: 'com_activation', inferred_type: 'com_activation_context' }
1542
+ : null,
1543
+ hasState('dll_entry_state') ? { name: 'dll_entry', inferred_type: 'dll_entry_context' } : null,
1544
+ hasState('export_dispatch_table')
1545
+ ? { name: 'exports', inferred_type: 'export_dispatch_table' }
1546
+ : null,
1547
+ hasState('packer_heuristics')
1548
+ ? { name: 'packer_heuristics', inferred_type: 'packer_heuristics' }
1549
+ : null,
1550
+ hasState('cli_model') ? { name: 'cli', inferred_type: 'cli_model' } : null,
1551
+ ].filter((item) => Boolean(item));
1552
+ addStruct({
1553
+ semantic_name: 'runtime_context',
1554
+ rewrite_type_name: 'AkRuntimeContext',
1555
+ kind: 'context',
1556
+ confidence: clamp(0.55 + stateRoles.length * 0.05, 0.55, 0.88),
1557
+ fields: runtimeFields,
1558
+ evidence: stateRoles.map((item) => `state_roles:${item.state_key}`),
1559
+ });
1560
+ }
1561
+ return structs;
1562
+ }
1563
+ function summarizeParameterRoles(parameterRoles) {
1564
+ if (parameterRoles.length === 0) {
1565
+ return 'none';
1566
+ }
1567
+ return parameterRoles
1568
+ .slice(0, 6)
1569
+ .map((item) => `${item.slot}=>${item.role}<${item.inferred_type}>`)
1570
+ .join('; ');
1571
+ }
1572
+ function summarizeStateRoles(stateRoles) {
1573
+ if (stateRoles.length === 0) {
1574
+ return 'none';
1575
+ }
1576
+ return stateRoles
1577
+ .slice(0, 6)
1578
+ .map((item) => `${item.state_key}=>${item.role}`)
1579
+ .join('; ');
1580
+ }
1581
+ function summarizeStructInference(structInference) {
1582
+ if (structInference.length === 0) {
1583
+ return 'none';
1584
+ }
1585
+ return structInference
1586
+ .slice(0, 4)
1587
+ .map((item) => `${item.semantic_name}${item.rewrite_type_name ? `=>${item.rewrite_type_name}` : ''}`)
1588
+ .join('; ');
1589
+ }
1590
+ export function buildDefaultSemanticNameSuggestion(evidencePack) {
1591
+ const textCorpus = [
1592
+ evidencePack.semantic_summary,
1593
+ evidencePack.pseudocode_excerpt,
1594
+ ...evidencePack.string_hints,
1595
+ ...evidencePack.xref_signals.map((item) => item.api),
1596
+ ...(evidencePack.runtime_context?.corroborated_stages || []),
1597
+ ...(evidencePack.runtime_context?.corroborated_apis || []),
1598
+ ]
1599
+ .join('\n')
1600
+ .toLowerCase();
1601
+ const evidenceUsed = [];
1602
+ const assumptions = [];
1603
+ const pushEvidence = (value) => {
1604
+ if (value && !evidenceUsed.includes(value)) {
1605
+ evidenceUsed.push(value);
1606
+ }
1607
+ };
1608
+ if (/(packer|protector|entry point in non-first section|section entropy|vmprotect|themida|upx)/i.test(textCorpus)) {
1609
+ for (const hint of evidencePack.string_hints.filter((value) => /(packer|protector|entry point|vmprotect|themida|upx)/i.test(value))) {
1610
+ pushEvidence(`string_hint:${hint}`);
1611
+ }
1612
+ if (evidencePack.cfg_shape.node_count >= 20) {
1613
+ pushEvidence(`cfg_nodes:${evidencePack.cfg_shape.node_count}`);
1614
+ }
1615
+ assumptions.push('Assumes PE layout and packer heuristics dominate this routine over generic helper duties.');
1616
+ return {
1617
+ candidate_name: 'scan_pe_layout_or_sections',
1618
+ confidence: clamp(0.62 + Math.min(evidenceUsed.length * 0.04, 0.14), 0.55, 0.82),
1619
+ why: 'Evidence clusters around packer/protector strings and PE layout style checks.',
1620
+ required_assumptions: assumptions,
1621
+ evidence_used: evidenceUsed,
1622
+ };
1623
+ }
1624
+ if (/\b(writeprocessmemory|setthreadcontext|resumethread|createremotethread|virtualallocex)\b/i.test(textCorpus)) {
1625
+ for (const api of evidencePack.xref_signals.map((item) => item.api)) {
1626
+ if (/writeprocessmemory|setthreadcontext|resumethread|createremotethread|virtualallocex/i.test(api)) {
1627
+ pushEvidence(`api:${api}`);
1628
+ }
1629
+ }
1630
+ assumptions.push('Assumes remote-process mutation is the primary goal rather than a supporting capability table build.');
1631
+ return {
1632
+ candidate_name: 'orchestrate_remote_memory_transfer',
1633
+ confidence: clamp(0.6 + Math.min(evidenceUsed.length * 0.05, 0.18), 0.56, 0.84),
1634
+ why: 'Cross-evidence suggests remote memory write or execution-transfer behavior.',
1635
+ required_assumptions: assumptions,
1636
+ evidence_used: evidenceUsed,
1637
+ };
1638
+ }
1639
+ if (/\b(openprocess|readprocessmemory|ntqueryinformationprocess|remote process)\b/i.test(textCorpus)) {
1640
+ for (const api of evidencePack.xref_signals.map((item) => item.api)) {
1641
+ if (/openprocess|readprocessmemory|ntqueryinformationprocess/i.test(api)) {
1642
+ pushEvidence(`api:${api}`);
1643
+ }
1644
+ }
1645
+ assumptions.push('Assumes the routine is inspecting or preparing remote process state rather than only dispatching.');
1646
+ return {
1647
+ candidate_name: 'inspect_remote_process_state',
1648
+ confidence: clamp(0.58 + Math.min(evidenceUsed.length * 0.05, 0.16), 0.54, 0.8),
1649
+ why: 'Observed APIs and summary both point to remote process state collection or access preparation.',
1650
+ required_assumptions: assumptions,
1651
+ evidence_used: evidenceUsed,
1652
+ };
1653
+ }
1654
+ if (/\b(createfile|readfile|writefile|deletefile|copyfile|movefile)\b/i.test(textCorpus)) {
1655
+ for (const api of evidencePack.xref_signals.map((item) => item.api)) {
1656
+ if (/createfile|readfile|writefile|deletefile|copyfile|movefile/i.test(api)) {
1657
+ pushEvidence(`api:${api}`);
1658
+ }
1659
+ }
1660
+ assumptions.push('Assumes recovered file APIs are part of a file-materialization path, not incidental support code.');
1661
+ return {
1662
+ candidate_name: 'prepare_file_artifact_state',
1663
+ confidence: clamp(0.57 + Math.min(evidenceUsed.length * 0.05, 0.15), 0.53, 0.78),
1664
+ why: 'API and string evidence both lean toward file or artifact staging behavior.',
1665
+ required_assumptions: assumptions,
1666
+ evidence_used: evidenceUsed,
1667
+ };
1668
+ }
1669
+ if (evidencePack.cfg_shape.node_count <= 3 &&
1670
+ evidencePack.call_relationships.callers.length >= 8 &&
1671
+ evidencePack.call_relationships.callees.length <= 2) {
1672
+ pushEvidence(`cfg_nodes:${evidencePack.cfg_shape.node_count}`);
1673
+ pushEvidence(`caller_count:${evidencePack.call_relationships.callers.length}`);
1674
+ assumptions.push('Assumes the routine is a shared helper or control-flow utility because it is tiny and heavily reused.');
1675
+ return {
1676
+ candidate_name: 'shared_control_flow_helper',
1677
+ confidence: 0.56,
1678
+ why: 'Shape suggests a small heavily-reused helper, but semantics remain broad.',
1679
+ required_assumptions: assumptions,
1680
+ evidence_used: evidenceUsed,
1681
+ };
1682
+ }
1683
+ return null;
1684
+ }
1685
+ function withNameResolutionHeader(sourceLikeSnippet, nameResolution) {
1686
+ const baseLines = sourceLikeSnippet
1687
+ .split(/\r?\n/)
1688
+ .filter((line) => !line.startsWith('// name_resolution='));
1689
+ if (!nameResolution) {
1690
+ return baseLines.join('\n');
1691
+ }
1692
+ return [
1693
+ `// name_resolution=source:${nameResolution.resolution_source} rule:${nameResolution.rule_based_name || 'none'} llm:${nameResolution.llm_suggested_name || 'none'} validated:${nameResolution.validated_name || 'none'} unresolved:${nameResolution.unresolved_semantic_name ? 'yes' : 'no'}`,
1694
+ ...baseLines,
1695
+ ].join('\n');
1696
+ }
1697
+ async function finalizeLayeredNameResolution(func, externalSuggestion, semanticNameSuggester) {
1698
+ const ruleBasedName = func.suggested_name || null;
1699
+ const evidencePack = {
1700
+ function_name: func.function,
1701
+ address: func.address,
1702
+ semantic_summary: func.semantic_summary,
1703
+ xref_signals: func.xref_signals,
1704
+ call_relationships: func.call_relationships,
1705
+ runtime_context: func.runtime_context || undefined,
1706
+ string_hints: func.semantic_evidence?.string_hints || [],
1707
+ pseudocode_excerpt: func.semantic_evidence?.pseudocode_excerpt || buildPseudocodeExcerpt(func.source_like_snippet),
1708
+ cfg_shape: func.semantic_evidence?.cfg_shape || {
1709
+ node_count: func.evidence.cfg_nodes,
1710
+ edge_count: func.evidence.cfg_edges,
1711
+ has_loop: false,
1712
+ has_branching: func.evidence.cfg_edges > func.evidence.cfg_nodes,
1713
+ block_types: [],
1714
+ entry_block_type: null,
1715
+ },
1716
+ parameter_roles: func.parameter_roles || func.semantic_evidence?.parameter_roles || [],
1717
+ state_roles: func.state_roles || func.semantic_evidence?.state_roles || [],
1718
+ struct_inference: func.struct_inference || func.semantic_evidence?.struct_inference || [],
1719
+ };
1720
+ let llmSuggestion = null;
1721
+ if (externalSuggestion?.normalized_candidate_name) {
1722
+ llmSuggestion = {
1723
+ candidate_name: externalSuggestion.normalized_candidate_name,
1724
+ confidence: clamp(externalSuggestion.confidence, 0, 1),
1725
+ why: externalSuggestion.why,
1726
+ required_assumptions: externalSuggestion.required_assumptions,
1727
+ evidence_used: dedupe([
1728
+ ...externalSuggestion.evidence_used,
1729
+ ...(externalSuggestion.client_name ? [`client:${externalSuggestion.client_name}`] : []),
1730
+ ...(externalSuggestion.model_name ? [`model:${externalSuggestion.model_name}`] : []),
1731
+ `artifact:${externalSuggestion.artifact_id}`,
1732
+ ]),
1733
+ };
1734
+ }
1735
+ else if (!ruleBasedName) {
1736
+ llmSuggestion = await semanticNameSuggester(evidencePack);
1737
+ }
1738
+ const validatedName = ruleBasedName ||
1739
+ (llmSuggestion && llmSuggestion.confidence >= 0.62 ? llmSuggestion.candidate_name : null);
1740
+ const resolutionSource = ruleBasedName && llmSuggestion
1741
+ ? 'hybrid'
1742
+ : ruleBasedName
1743
+ ? 'rule'
1744
+ : validatedName
1745
+ ? 'llm'
1746
+ : 'unresolved';
1747
+ const nameResolution = {
1748
+ rule_based_name: ruleBasedName,
1749
+ llm_suggested_name: llmSuggestion?.candidate_name || null,
1750
+ llm_confidence: llmSuggestion?.confidence || null,
1751
+ llm_why: llmSuggestion?.why || null,
1752
+ required_assumptions: llmSuggestion?.required_assumptions || [],
1753
+ evidence_used: llmSuggestion?.evidence_used || [],
1754
+ validated_name: validatedName,
1755
+ resolution_source: resolutionSource,
1756
+ unresolved_semantic_name: !validatedName,
1757
+ };
1758
+ const finalSuggestion = validatedName
1759
+ ? {
1760
+ suggested_name: validatedName,
1761
+ suggested_role: ruleBasedName === validatedName
1762
+ ? func.suggested_role || llmSuggestion?.why || null
1763
+ : llmSuggestion?.why || func.suggested_role || null,
1764
+ rename_confidence: ruleBasedName === validatedName
1765
+ ? Number(func.rename_confidence || 0)
1766
+ : Number(llmSuggestion?.confidence || 0),
1767
+ rename_evidence: ruleBasedName === validatedName
1768
+ ? func.rename_evidence || []
1769
+ : llmSuggestion?.evidence_used || [],
1770
+ }
1771
+ : {
1772
+ suggested_name: null,
1773
+ suggested_role: null,
1774
+ rename_confidence: 0,
1775
+ rename_evidence: [],
1776
+ };
1777
+ return {
1778
+ nameResolution,
1779
+ finalSuggestion,
1780
+ };
1781
+ }
1782
+ function extractLinkedLabelToken(label) {
1783
+ const addressMatch = label.match(/@((?:0x)?[0-9a-f]+)\b/i);
1784
+ const nameMatch = label.match(/^([^@[]+)/);
1785
+ return {
1786
+ name: nameMatch ? nameMatch[1].trim() : null,
1787
+ address: addressMatch ? addressMatch[1].replace(/^0x/i, '').toLowerCase() : null,
1788
+ };
1789
+ }
1790
+ function buildLinkedSuggestedNames(func, renamedFunctions) {
1791
+ const byAddress = new Map();
1792
+ const byName = new Map();
1793
+ for (const item of renamedFunctions) {
1794
+ if (!item.suggested_name) {
1795
+ continue;
1796
+ }
1797
+ byAddress.set(item.address.replace(/^0x/i, '').toLowerCase(), item.suggested_name);
1798
+ byName.set(item.function.toLowerCase(), item.suggested_name);
1799
+ }
1800
+ const linked = [];
1801
+ for (const label of [
1802
+ ...(func.call_context?.callers || []),
1803
+ ...(func.call_context?.callees || []),
1804
+ ...((func.call_relationships?.callers || []).map((item) => item.target)),
1805
+ ...((func.call_relationships?.callees || []).map((item) => item.target)),
1806
+ ]) {
1807
+ const token = extractLinkedLabelToken(label);
1808
+ if (token.address && byAddress.has(token.address)) {
1809
+ linked.push(byAddress.get(token.address));
1810
+ continue;
1811
+ }
1812
+ if (token.name && byName.has(token.name.toLowerCase())) {
1813
+ linked.push(byName.get(token.name.toLowerCase()));
1814
+ }
1815
+ }
1816
+ return dedupe(linked);
1817
+ }
1818
+ function scoreLinkedSuggestedName(name) {
1819
+ const normalized = name.trim().toLowerCase();
1820
+ if (normalized === 'resolve_dynamic_apis') {
1821
+ return 120;
1822
+ }
1823
+ if (normalized === 'prepare_remote_process_access') {
1824
+ return 112;
1825
+ }
1826
+ if (normalized === 'transfer_remote_execution') {
1827
+ return 108;
1828
+ }
1829
+ if (normalized === 'query_remote_process_snapshot') {
1830
+ return 104;
1831
+ }
1832
+ if (normalized === 'query_code_integrity_state') {
1833
+ return 102;
1834
+ }
1835
+ if (normalized === 'scan_packer_signatures') {
1836
+ return 98;
1837
+ }
1838
+ const prefixIndex = LINKED_SUGGESTION_PRIORITY_PREFIXES.findIndex((prefix) => normalized.startsWith(prefix));
1839
+ if (prefixIndex >= 0) {
1840
+ return 90 - prefixIndex;
1841
+ }
1842
+ if (normalized.startsWith('shared_') || normalized.endsWith('_stub')) {
1843
+ return 20;
1844
+ }
1845
+ return 40;
1846
+ }
1847
+ function pickPreferredLinkedSuggestedName(linkedSuggestedNames) {
1848
+ const candidates = dedupe(linkedSuggestedNames).filter((name) => {
1849
+ const normalized = name.trim().toLowerCase();
1850
+ if (!normalized) {
1851
+ return false;
1852
+ }
1853
+ if (normalized.endsWith('_helper') ||
1854
+ normalized.endsWith('_guard') ||
1855
+ normalized.endsWith('_stub')) {
1856
+ return false;
1857
+ }
1858
+ if (normalized.startsWith('shared_')) {
1859
+ return false;
1860
+ }
1861
+ return true;
1862
+ });
1863
+ if (candidates.length === 0) {
1864
+ return null;
1865
+ }
1866
+ return candidates.sort((a, b) => {
1867
+ const scoreDelta = scoreLinkedSuggestedName(b) - scoreLinkedSuggestedName(a);
1868
+ if (scoreDelta !== 0) {
1869
+ return scoreDelta;
1870
+ }
1871
+ return a.localeCompare(b);
1872
+ })[0];
1873
+ }
1874
+ function appendSemanticSuffix(baseName, suffix) {
1875
+ return baseName.endsWith(`_${suffix}`) ? baseName : `${baseName}_${suffix}`;
1876
+ }
1877
+ function buildLinkedRefinedSuggestion(linkedSuggestedNames, kind) {
1878
+ const linked = pickPreferredLinkedSuggestedName(linkedSuggestedNames);
1879
+ if (!linked) {
1880
+ return null;
1881
+ }
1882
+ if (linked === 'resolve_dynamic_apis') {
1883
+ return {
1884
+ suggested_name: appendSemanticSuffix(linked, kind),
1885
+ suggested_role: kind === 'helper'
1886
+ ? 'Trivial helper reached from the dynamic API resolution path.'
1887
+ : 'Small guard routine used to gate the dynamic API resolution path.',
1888
+ rename_confidence: kind === 'helper' ? 0.68 : 0.67,
1889
+ rename_evidence: [`linked_caller:${linked}`],
1890
+ };
1891
+ }
1892
+ if (linked.startsWith('dispatch_')) {
1893
+ return {
1894
+ suggested_name: kind === 'helper' ? 'dispatch_guard_stub' : 'dispatch_false_guard',
1895
+ suggested_role: kind === 'helper'
1896
+ ? 'Small guard or bookkeeping stub reached from a dispatch path.'
1897
+ : 'Small guard routine that returns a branch value for dispatch callers.',
1898
+ rename_confidence: 0.66,
1899
+ rename_evidence: [`linked_caller:${linked}`],
1900
+ };
1901
+ }
1902
+ if (LINKED_SUGGESTION_PRIORITY_PREFIXES.some((prefix) => linked.startsWith(prefix))) {
1903
+ return {
1904
+ suggested_name: appendSemanticSuffix(linked, kind),
1905
+ suggested_role: kind === 'helper'
1906
+ ? 'Small helper routine attached to a named operational path.'
1907
+ : 'Small guard routine attached to a named operational path.',
1908
+ rename_confidence: kind === 'helper' ? 0.64 : 0.63,
1909
+ rename_evidence: [`linked_caller:${linked}`],
1910
+ };
1911
+ }
1912
+ return null;
1913
+ }
1914
+ function deriveRefinedRenameSuggestion(func, renamedFunctions) {
1915
+ if (func.suggested_name || KNOWN_LIBRARY_SYMBOL_NAMES.has(func.function.trim().toLowerCase())) {
1916
+ return null;
1917
+ }
1918
+ const bodyShape = extractSnippetBodyShape(func.source_like_snippet);
1919
+ const linkedSuggestedNames = buildLinkedSuggestedNames(func, renamedFunctions);
1920
+ const evidence = [];
1921
+ const chooseLinkedHelperName = () => {
1922
+ return buildLinkedRefinedSuggestion(linkedSuggestedNames, 'helper');
1923
+ };
1924
+ const chooseLinkedGuardName = () => {
1925
+ return buildLinkedRefinedSuggestion(linkedSuggestedNames, 'guard');
1926
+ };
1927
+ if (bodyShape.has_trap_tail &&
1928
+ func.evidence.cfg_nodes <= 1 &&
1929
+ func.evidence.callee_count === 1 &&
1930
+ func.evidence.caller_count >= 4) {
1931
+ return {
1932
+ suggested_name: 'call_then_trap_stub',
1933
+ suggested_role: 'Calls a single callee and then immediately traps or reaches an unreachable edge.',
1934
+ rename_confidence: 0.61,
1935
+ rename_evidence: [
1936
+ 'body:trap_after_call',
1937
+ `caller_count:${func.evidence.caller_count}`,
1938
+ `callee_count:${func.evidence.callee_count}`,
1939
+ ],
1940
+ };
1941
+ }
1942
+ if (bodyShape.is_void_return_stub &&
1943
+ func.evidence.cfg_nodes <= 1 &&
1944
+ func.evidence.callee_count === 0 &&
1945
+ func.evidence.caller_count >= 4) {
1946
+ const linkedSuggestion = chooseLinkedHelperName();
1947
+ if (linkedSuggestion) {
1948
+ linkedSuggestion.rename_confidence = clamp(linkedSuggestion.rename_confidence + 0.08, 0, 0.95);
1949
+ linkedSuggestion.rename_evidence = dedupe([
1950
+ ...linkedSuggestion.rename_evidence,
1951
+ 'body:void_return_stub',
1952
+ `caller_count:${func.evidence.caller_count}`,
1953
+ ]);
1954
+ return linkedSuggestion;
1955
+ }
1956
+ evidence.push('body:void_return_stub');
1957
+ evidence.push(`caller_count:${func.evidence.caller_count}`);
1958
+ evidence.push(`cfg_nodes:${func.evidence.cfg_nodes}`);
1959
+ return {
1960
+ suggested_name: 'shared_noop_stub',
1961
+ suggested_role: 'Tiny shared stub with no observable side effects beyond returning.',
1962
+ rename_confidence: 0.58,
1963
+ rename_evidence: evidence,
1964
+ };
1965
+ }
1966
+ if (bodyShape.constant_return === 0 &&
1967
+ func.evidence.cfg_nodes <= 1 &&
1968
+ func.evidence.caller_count >= 3) {
1969
+ const linkedSuggestion = chooseLinkedGuardName();
1970
+ if (linkedSuggestion?.suggested_name) {
1971
+ return {
1972
+ suggested_name: linkedSuggestion.suggested_name,
1973
+ suggested_role: linkedSuggestion.suggested_name === 'dispatch_false_guard'
1974
+ ? 'Small guard routine that returns a false/zero branch value for dispatch callers.'
1975
+ : 'Small guard-like helper attached to a named operational path that returns a false/zero branch value.',
1976
+ rename_confidence: 0.67,
1977
+ rename_evidence: dedupe([
1978
+ ...linkedSuggestion.rename_evidence,
1979
+ 'body:return_0',
1980
+ `caller_count:${func.evidence.caller_count}`,
1981
+ ]),
1982
+ };
1983
+ }
1984
+ return {
1985
+ suggested_name: 'shared_false_guard',
1986
+ suggested_role: 'Small guard-like helper that returns a constant zero/false result.',
1987
+ rename_confidence: 0.56,
1988
+ rename_evidence: ['body:return_0', `caller_count:${func.evidence.caller_count}`],
1989
+ };
1990
+ }
1991
+ if (bodyShape.constant_return === 1 &&
1992
+ func.evidence.cfg_nodes <= 1 &&
1993
+ func.evidence.caller_count >= 3) {
1994
+ const linkedSuggestion = chooseLinkedGuardName();
1995
+ if (linkedSuggestion?.suggested_name) {
1996
+ const suggestedName = linkedSuggestion.suggested_name === 'dispatch_false_guard'
1997
+ ? 'dispatch_true_guard'
1998
+ : linkedSuggestion.suggested_name;
1999
+ return {
2000
+ suggested_name: suggestedName,
2001
+ suggested_role: suggestedName === 'dispatch_true_guard'
2002
+ ? 'Small guard routine that returns a true/success branch value for dispatch callers.'
2003
+ : 'Small guard-like helper attached to a named operational path that returns success.',
2004
+ rename_confidence: clamp(linkedSuggestion.rename_confidence + 0.01, 0, 0.95),
2005
+ rename_evidence: dedupe([
2006
+ ...linkedSuggestion.rename_evidence,
2007
+ 'body:return_1',
2008
+ `caller_count:${func.evidence.caller_count}`,
2009
+ ]),
2010
+ };
2011
+ }
2012
+ return {
2013
+ suggested_name: 'shared_true_guard',
2014
+ suggested_role: 'Small guard-like helper that returns a constant true/success result.',
2015
+ rename_confidence: 0.56,
2016
+ rename_evidence: ['body:return_1', `caller_count:${func.evidence.caller_count}`],
2017
+ };
2018
+ }
2019
+ return null;
2020
+ }
2021
+ function refineRenameSuggestions(functions) {
2022
+ return functions.map((func) => {
2023
+ const refined = deriveRefinedRenameSuggestion(func, functions);
2024
+ if (!refined?.suggested_name) {
2025
+ return func;
2026
+ }
2027
+ return {
2028
+ ...func,
2029
+ suggested_name: refined.suggested_name,
2030
+ suggested_role: refined.suggested_role,
2031
+ rename_confidence: refined.rename_confidence,
2032
+ rename_evidence: refined.rename_evidence,
2033
+ source_like_snippet: withSuggestedNameHeader(func.source_like_snippet, refined),
2034
+ };
2035
+ });
2036
+ }
2037
+ function buildSemanticSummary(functionName, behaviorTags, xrefSignals, callContext, relationshipContext, gaps, rankReasons, parameterRoles, stateRoles, structInference, runtimeContext) {
2038
+ const phrases = [];
2039
+ const topApis = xrefSignals.slice(0, 3).map((item) => item.api);
2040
+ if (behaviorTags.length > 0) {
2041
+ const described = behaviorTags.slice(0, 2).map(describeBehaviorTag);
2042
+ const suffix = topApis.length > 0 ? ` via ${topApis.join(', ')}` : '';
2043
+ phrases.push(`Likely handles ${described.join(' and ')}${suffix}`);
2044
+ }
2045
+ else if (xrefSignals.length > 0) {
2046
+ phrases.push(`Likely coordinates API-facing behavior around ${xrefSignals
2047
+ .slice(0, 2)
2048
+ .map((item) => item.api)
2049
+ .join(' and ')}`);
2050
+ }
2051
+ else {
2052
+ phrases.push(`Partial semantic recovery for ${functionName}`);
2053
+ }
2054
+ if (rankReasons.includes('entry_point')) {
2055
+ phrases.push('appears to be an entry or dispatch point');
2056
+ }
2057
+ else if (callContext.callers.length > 0 || callContext.callees.length > 0) {
2058
+ const parts = [];
2059
+ if (callContext.callers.length > 0) {
2060
+ parts.push(`called by ${callContext.callers.slice(0, 2).join(', ')}`);
2061
+ }
2062
+ if (callContext.callees.length > 0) {
2063
+ parts.push(`invokes ${callContext.callees.slice(0, 3).join(', ')}`);
2064
+ }
2065
+ if (parts.length > 0) {
2066
+ phrases.push(parts.join(' and '));
2067
+ }
2068
+ }
2069
+ const relationshipInsights = summarizeRelationshipInsights(relationshipContext);
2070
+ if (relationshipInsights) {
2071
+ phrases.push(relationshipInsights);
2072
+ }
2073
+ const gapSummary = gaps.filter((gap) => ['missing_cfg', 'unresolved_function_symbols', 'unresolved_data_symbols'].includes(gap));
2074
+ if (gapSummary.length > 0) {
2075
+ phrases.push(`analysis gaps remain: ${gapSummary.join(', ')}`);
2076
+ }
2077
+ if (parameterRoles.length > 0) {
2078
+ phrases.push(`expected inputs resemble ${parameterRoles
2079
+ .slice(0, 3)
2080
+ .map((item) => item.role)
2081
+ .join(', ')}`);
2082
+ }
2083
+ if (stateRoles.length > 0) {
2084
+ phrases.push(`likely maintains ${stateRoles
2085
+ .slice(0, 2)
2086
+ .map((item) => item.state_key)
2087
+ .join(' and ')} state`);
2088
+ }
2089
+ if (structInference.length > 0) {
2090
+ phrases.push(`recovered data contracts suggest ${structInference
2091
+ .slice(0, 2)
2092
+ .map((item) => item.semantic_name)
2093
+ .join(' and ')}`);
2094
+ }
2095
+ if (runtimeContext && (runtimeContext.corroborated_apis.length > 0 || runtimeContext.corroborated_stages.length > 0)) {
2096
+ const runtimePhrases = [];
2097
+ if (runtimeContext.corroborated_apis.length > 0) {
2098
+ runtimePhrases.push(`runtime corroborates ${runtimeContext.corroborated_apis.slice(0, 3).join(', ')}`);
2099
+ }
2100
+ if (runtimeContext.corroborated_stages.length > 0) {
2101
+ runtimePhrases.push(`observed runtime stages include ${runtimeContext.corroborated_stages.slice(0, 2).join(', ')}`);
2102
+ }
2103
+ if (runtimeContext.executed) {
2104
+ runtimePhrases.push('evidence includes executed runtime trace');
2105
+ }
2106
+ if ((runtimeContext.evidence_sources || []).length > 0) {
2107
+ runtimePhrases.push(`runtime sources=${(runtimeContext.evidence_sources || []).slice(0, 3).join(', ')}`);
2108
+ }
2109
+ if ((runtimeContext.source_names || []).length > 0) {
2110
+ runtimePhrases.push(`runtime names=${(runtimeContext.source_names || []).slice(0, 3).join(', ')}`);
2111
+ }
2112
+ if ((runtimeContext.provenance_layers || []).length > 0) {
2113
+ runtimePhrases.push(`runtime layers=${(runtimeContext.provenance_layers || []).slice(0, 3).join(', ')}`);
2114
+ }
2115
+ if ((runtimeContext.matched_memory_regions || []).length > 0) {
2116
+ runtimePhrases.push(`memory regions include ${(runtimeContext.matched_memory_regions || []).slice(0, 2).join(', ')}`);
2117
+ }
2118
+ if (runtimeContext.scope_note) {
2119
+ runtimePhrases.push(runtimeContext.scope_note);
2120
+ }
2121
+ if ((runtimeContext.suggested_modules || []).length > 0) {
2122
+ runtimePhrases.push(`suggested modules=${(runtimeContext.suggested_modules || []).slice(0, 3).join(', ')}`);
2123
+ }
2124
+ phrases.push(runtimePhrases.join(' and '));
2125
+ }
2126
+ return `${phrases.join('; ')}.`;
2127
+ }
2128
+ function computeConfidence(decompiled, cfg, instructionCount, rankScore, runtimeConfidence) {
2129
+ const pseudocodeLines = parsePseudocodeLines(decompiled?.pseudocode);
2130
+ const breakdown = {
2131
+ decompile: 0,
2132
+ cfg: 0,
2133
+ assembly: 0,
2134
+ context: 0,
2135
+ };
2136
+ if (decompiled && pseudocodeLines.length > 0) {
2137
+ breakdown.decompile = 0.35;
2138
+ if (pseudocodeLines.length > 20) {
2139
+ breakdown.decompile += 0.1;
2140
+ }
2141
+ if (decompiled.callers.length +
2142
+ decompiled.callees.length +
2143
+ (decompiled.caller_relationships?.length || 0) +
2144
+ (decompiled.callee_relationships?.length || 0) >
2145
+ 0) {
2146
+ breakdown.decompile += 0.05;
2147
+ }
2148
+ }
2149
+ if (cfg && cfg.nodes.length > 0) {
2150
+ breakdown.cfg = 0.2;
2151
+ if (cfg.nodes.length > 3) {
2152
+ breakdown.cfg += 0.08;
2153
+ }
2154
+ if (cfg.edges.length > 3) {
2155
+ breakdown.cfg += 0.07;
2156
+ }
2157
+ }
2158
+ if (instructionCount > 0) {
2159
+ breakdown.assembly = 0.08;
2160
+ if (instructionCount > 30) {
2161
+ breakdown.assembly += 0.05;
2162
+ }
2163
+ }
2164
+ if (rankScore !== null) {
2165
+ breakdown.context = clamp(0.05 + rankScore / 100, 0.05, 0.2);
2166
+ }
2167
+ if (runtimeConfidence && runtimeConfidence > 0) {
2168
+ breakdown.context = clamp(breakdown.context + runtimeConfidence * 0.08, 0.05, 0.28);
2169
+ }
2170
+ const confidence = breakdown.decompile + breakdown.cfg + breakdown.assembly + breakdown.context;
2171
+ return {
2172
+ confidence: clamp(confidence, 0, 1),
2173
+ breakdown,
2174
+ };
2175
+ }
2176
+ function buildSourceLikeSnippet(functionName, confidence, gaps, pseudocodeLines, maxPseudocodeLines, semanticSummary, xrefSignals, callContext, relationshipContext, rankReasons, parameterRoles, stateRoles, structInference, runtimeContext) {
2177
+ const header = `// function=${functionName} confidence=${confidence.toFixed(2)} gaps=${gaps.length > 0 ? gaps.join(',') : 'none'}`;
2178
+ const commentLines = [header, `// summary=${semanticSummary}`];
2179
+ if (xrefSignals.length > 0) {
2180
+ commentLines.push(`// xrefs=${xrefSignals
2181
+ .slice(0, 4)
2182
+ .map((item) => `${item.api}[${item.provenance},${item.confidence.toFixed(2)}]`)
2183
+ .join('; ')}`);
2184
+ }
2185
+ if (callContext.callers.length > 0 || callContext.callees.length > 0) {
2186
+ commentLines.push(`// callers=${callContext.callers.join(', ') || 'none'} | callees=${callContext.callees.join(', ') || 'none'}`);
2187
+ }
2188
+ if (relationshipContext.callers.length > 0 || relationshipContext.callees.length > 0) {
2189
+ commentLines.push(`// relationship_hints=callers:${relationshipContext.callers
2190
+ .map((item) => formatRelationshipEntry(item))
2191
+ .join(' || ') || 'none'} | callees:${relationshipContext.callees
2192
+ .map((item) => formatRelationshipEntry(item))
2193
+ .join(' || ') || 'none'}`);
2194
+ }
2195
+ if (rankReasons.length > 0) {
2196
+ commentLines.push(`// rank_reasons=${rankReasons.slice(0, 5).join(', ')}`);
2197
+ }
2198
+ if (parameterRoles.length > 0) {
2199
+ commentLines.push(`// parameter_roles=${summarizeParameterRoles(parameterRoles)}`);
2200
+ }
2201
+ if (stateRoles.length > 0) {
2202
+ commentLines.push(`// state_roles=${summarizeStateRoles(stateRoles)}`);
2203
+ }
2204
+ if (structInference.length > 0) {
2205
+ commentLines.push(`// struct_inference=${summarizeStructInference(structInference)}`);
2206
+ }
2207
+ if (runtimeContext &&
2208
+ (runtimeContext.corroborated_apis.length > 0 ||
2209
+ runtimeContext.corroborated_stages.length > 0 ||
2210
+ (runtimeContext.matched_memory_regions || []).length > 0)) {
2211
+ commentLines.push(`// runtime_evidence=apis:${runtimeContext.corroborated_apis.join(', ') || 'none'} | stages:${runtimeContext.corroborated_stages.join(', ') || 'none'} | regions:${(runtimeContext.matched_memory_regions || []).join(', ') || 'none'} | modules:${(runtimeContext.suggested_modules || []).join(', ') || 'none'} | confidence:${runtimeContext.confidence.toFixed(2)} | executed:${runtimeContext.executed ? 'yes' : 'no'} | sources:${(runtimeContext.evidence_sources || []).join(', ') || 'unknown'} | names:${(runtimeContext.source_names || []).join(', ') || 'unknown'} | layers:${(runtimeContext.provenance_layers || []).join(', ') || 'unknown'} | latest:${runtimeContext.latest_artifact_at || 'unknown'} | matched_by:${(runtimeContext.matched_by || []).join(', ') || 'unknown'} | artifacts:${runtimeContext.executed_artifact_count || 0}/${runtimeContext.artifact_count || 0}`);
2212
+ if (runtimeContext.notes.length > 0) {
2213
+ commentLines.push(`// runtime_notes=${runtimeContext.notes.join(' || ')}`);
2214
+ }
2215
+ if (runtimeContext.scope_note) {
2216
+ commentLines.push(`// runtime_scope=${runtimeContext.scope_note}`);
2217
+ }
2218
+ }
2219
+ const snippetLines = pseudocodeLines.slice(0, maxPseudocodeLines);
2220
+ if (snippetLines.length === 0) {
2221
+ return [...commentLines, '// pseudocode unavailable; inspect CFG/assembly manually'].join('\n');
2222
+ }
2223
+ return [...commentLines, ...snippetLines].join('\n');
2224
+ }
2225
+ function normalizeError(error) {
2226
+ if (error instanceof Error) {
2227
+ return error.message;
2228
+ }
2229
+ return String(error);
2230
+ }
2231
+ async function buildDegradedFallbackFunction(workspaceManager, sampleId, targetLabel) {
2232
+ let workspace;
2233
+ try {
2234
+ workspace = await workspaceManager.getWorkspace(sampleId);
2235
+ }
2236
+ catch {
2237
+ return {
2238
+ target: targetLabel,
2239
+ function: 'degraded_static_summary',
2240
+ address: 'unknown',
2241
+ rank_score: null,
2242
+ rank_reasons: ['fallback_without_workspace'],
2243
+ semantic_summary: 'Degraded static summary only; no Ghidra workspace was available for deeper recovery.',
2244
+ xref_signals: [],
2245
+ call_context: {
2246
+ callers: [],
2247
+ callees: [],
2248
+ },
2249
+ call_relationships: {
2250
+ callers: [],
2251
+ callees: [],
2252
+ },
2253
+ confidence: 0.1,
2254
+ confidence_breakdown: {
2255
+ decompile: 0,
2256
+ cfg: 0,
2257
+ assembly: 0,
2258
+ context: 0.1,
2259
+ },
2260
+ gaps: ['missing_ghidra_analysis', 'workspace_unavailable', 'missing_all_primary_evidence'],
2261
+ evidence: {
2262
+ pseudocode_lines: 0,
2263
+ cfg_nodes: 0,
2264
+ cfg_edges: 0,
2265
+ instruction_count: 0,
2266
+ caller_count: 0,
2267
+ callee_count: 0,
2268
+ },
2269
+ behavior_tags: [],
2270
+ source_like_snippet: [
2271
+ '// degraded fallback: ghidra artifacts unavailable',
2272
+ '// workspace not found for this sample',
2273
+ '// next step: run sample.ingest (if needed) and ghidra.analyze',
2274
+ ].join('\n'),
2275
+ assembly_excerpt: '; assembly unavailable in degraded fallback mode',
2276
+ };
2277
+ }
2278
+ const files = fs
2279
+ .readdirSync(workspace.original, { withFileTypes: true })
2280
+ .filter((entry) => entry.isFile())
2281
+ .map((entry) => entry.name)
2282
+ .sort((a, b) => a.localeCompare(b));
2283
+ if (files.length === 0) {
2284
+ return {
2285
+ target: targetLabel,
2286
+ function: 'degraded_static_summary',
2287
+ address: 'unknown',
2288
+ rank_score: null,
2289
+ rank_reasons: ['fallback_without_sample_file'],
2290
+ semantic_summary: 'Degraded static summary only; workspace exists but the original sample file is unavailable.',
2291
+ xref_signals: [],
2292
+ call_context: {
2293
+ callers: [],
2294
+ callees: [],
2295
+ },
2296
+ call_relationships: {
2297
+ callers: [],
2298
+ callees: [],
2299
+ },
2300
+ confidence: 0.12,
2301
+ confidence_breakdown: {
2302
+ decompile: 0,
2303
+ cfg: 0,
2304
+ assembly: 0,
2305
+ context: 0.12,
2306
+ },
2307
+ gaps: ['missing_ghidra_analysis', 'sample_file_unavailable', 'missing_all_primary_evidence'],
2308
+ evidence: {
2309
+ pseudocode_lines: 0,
2310
+ cfg_nodes: 0,
2311
+ cfg_edges: 0,
2312
+ instruction_count: 0,
2313
+ caller_count: 0,
2314
+ callee_count: 0,
2315
+ },
2316
+ behavior_tags: [],
2317
+ source_like_snippet: [
2318
+ '// degraded fallback: ghidra artifacts unavailable',
2319
+ '// sample file missing in workspace.original',
2320
+ '// next step: run ghidra.analyze and retry',
2321
+ ].join('\n'),
2322
+ assembly_excerpt: '; assembly unavailable in degraded fallback mode',
2323
+ };
2324
+ }
2325
+ const samplePath = path.join(workspace.original, files[0]);
2326
+ const sampleBuffer = fs.readFileSync(samplePath);
2327
+ const scanWindow = sampleBuffer.subarray(0, Math.min(sampleBuffer.length, 2 * 1024 * 1024));
2328
+ const asciiCorpus = scanWindow
2329
+ .toString('latin1')
2330
+ .match(/[ -~]{6,}/g)
2331
+ ?.slice(0, 500)
2332
+ .join('\n') || '';
2333
+ const behaviorTags = inferBehaviorTags(undefined, asciiCorpus).slice(0, 8);
2334
+ const topHints = asciiCorpus
2335
+ .split('\n')
2336
+ .map((line) => line.trim())
2337
+ .filter((line) => line.length > 0)
2338
+ .filter((line, index, all) => all.indexOf(line) === index)
2339
+ .slice(0, 8);
2340
+ const snippetLines = [
2341
+ '// degraded fallback: ghidra function artifacts unavailable',
2342
+ `// sample_path=${samplePath}`,
2343
+ `// inferred_behaviors=${behaviorTags.length > 0 ? behaviorTags.join(',') : 'none'}`,
2344
+ '// hint_strings:',
2345
+ ...topHints.map((line) => `// ${line}`),
2346
+ '// next step: run ghidra.analyze to unlock function-level pseudocode/cfg',
2347
+ ];
2348
+ return {
2349
+ target: targetLabel,
2350
+ function: 'degraded_static_summary',
2351
+ address: 'unknown',
2352
+ rank_score: null,
2353
+ rank_reasons: ['fallback_static_summary'],
2354
+ semantic_summary: behaviorTags.length > 0
2355
+ ? `Static-only fallback suggests ${behaviorTags.map(describeBehaviorTag).join(' and ')}.`
2356
+ : 'Static-only fallback summary; run ghidra.analyze for function-level semantics.',
2357
+ xref_signals: [],
2358
+ call_context: {
2359
+ callers: [],
2360
+ callees: [],
2361
+ },
2362
+ call_relationships: {
2363
+ callers: [],
2364
+ callees: [],
2365
+ },
2366
+ confidence: behaviorTags.length > 0 ? 0.24 : 0.16,
2367
+ confidence_breakdown: {
2368
+ decompile: 0,
2369
+ cfg: 0,
2370
+ assembly: 0,
2371
+ context: behaviorTags.length > 0 ? 0.24 : 0.16,
2372
+ },
2373
+ gaps: ['missing_ghidra_analysis', 'missing_pseudocode', 'missing_cfg'],
2374
+ evidence: {
2375
+ pseudocode_lines: 0,
2376
+ cfg_nodes: 0,
2377
+ cfg_edges: 0,
2378
+ instruction_count: 0,
2379
+ caller_count: 0,
2380
+ callee_count: 0,
2381
+ },
2382
+ behavior_tags: behaviorTags,
2383
+ source_like_snippet: snippetLines.join('\n'),
2384
+ assembly_excerpt: '; assembly unavailable in degraded fallback mode',
2385
+ };
2386
+ }
2387
+ export function createCodeFunctionsReconstructHandler(workspaceManager, database, cacheManager, dependencies) {
2388
+ const decompilerWorker = new DecompilerWorker(database, workspaceManager);
2389
+ const stringsExtractHandler = createStringsExtractHandler(workspaceManager, database, cacheManager);
2390
+ const rankFunctions = dependencies?.rankFunctions ||
2391
+ ((sampleId, topK) => decompilerWorker.rankFunctions(sampleId, topK));
2392
+ const decompileFunction = dependencies?.decompileFunction ||
2393
+ ((sampleId, addressOrSymbol, includeXrefs, timeoutMs) => decompilerWorker.decompileFunction(sampleId, addressOrSymbol, includeXrefs, timeoutMs));
2394
+ const getFunctionCFG = dependencies?.getFunctionCFG ||
2395
+ ((sampleId, addressOrSymbol, timeoutMs) => decompilerWorker.getFunctionCFG(sampleId, addressOrSymbol, timeoutMs));
2396
+ const runtimeEvidenceLoader = dependencies?.runtimeEvidenceLoader ||
2397
+ ((sampleId, options) => loadDynamicTraceEvidence(workspaceManager, database, sampleId, {
2398
+ evidenceScope: options?.evidenceScope,
2399
+ sessionTag: options?.sessionTag,
2400
+ }));
2401
+ const stringEvidenceLoader = dependencies?.stringEvidenceLoader ||
2402
+ (async (sampleId) => {
2403
+ const response = await stringsExtractHandler({
2404
+ sample_id: sampleId,
2405
+ max_strings: 120,
2406
+ max_context_windows: 8,
2407
+ max_string_length: 160,
2408
+ category_filter: 'all',
2409
+ });
2410
+ const responseData = response.data;
2411
+ if (!response.ok || !responseData?.summary) {
2412
+ return null;
2413
+ }
2414
+ return {
2415
+ top_high_value: responseData.summary.top_high_value || [],
2416
+ context_windows: responseData.summary.context_windows || [],
2417
+ };
2418
+ });
2419
+ const semanticNameSuggester = dependencies?.semanticNameSuggester ||
2420
+ (async () => null);
2421
+ const externalSemanticSuggestionLoader = dependencies?.externalSemanticSuggestionLoader ||
2422
+ ((sampleId, options) => loadSemanticNameSuggestionIndex(workspaceManager, database, sampleId, options));
2423
+ return async (args) => {
2424
+ const startTime = Date.now();
2425
+ try {
2426
+ const input = CodeFunctionsReconstructInputSchema.parse(args);
2427
+ const sample = database.findSample(input.sample_id);
2428
+ if (!sample) {
2429
+ return {
2430
+ ok: false,
2431
+ errors: [`Sample not found: ${input.sample_id}`],
2432
+ };
2433
+ }
2434
+ const completedGhidraAnalysis = findBestGhidraAnalysis(database.findAnalysesBySample(input.sample_id), 'function_index');
2435
+ const analysisMarker = completedGhidraAnalysis?.finished_at || completedGhidraAnalysis?.id || 'none';
2436
+ const runtimeArtifacts = [
2437
+ ...database.findArtifactsByType(input.sample_id, 'dynamic_trace_json'),
2438
+ ...database.findArtifactsByType(input.sample_id, 'sandbox_trace_json'),
2439
+ ];
2440
+ const runtimeMarker = runtimeArtifacts.length > 0
2441
+ ? runtimeArtifacts.map((item) => `${item.type}:${item.sha256}`).sort().join('|')
2442
+ : 'none';
2443
+ const semanticNameArtifacts = database.findArtifactsByType(input.sample_id, SEMANTIC_NAME_SUGGESTIONS_ARTIFACT_TYPE);
2444
+ const semanticNameMarker = semanticNameArtifacts.length > 0
2445
+ ? semanticNameArtifacts.map((item) => `${item.id}:${item.sha256}`).sort().join('|')
2446
+ : 'none';
2447
+ const mode = input.address || input.symbol ? 'single' : 'topk';
2448
+ const cacheKey = generateCacheKey({
2449
+ sampleSha256: sample.sha256,
2450
+ toolName: TOOL_NAME,
2451
+ toolVersion: TOOL_VERSION,
2452
+ args: {
2453
+ mode,
2454
+ address: input.address || null,
2455
+ symbol: input.symbol || null,
2456
+ topk: input.topk,
2457
+ include_xrefs: input.include_xrefs,
2458
+ max_pseudocode_lines: input.max_pseudocode_lines,
2459
+ max_assembly_lines: input.max_assembly_lines,
2460
+ timeout: input.timeout,
2461
+ evidence_scope: input.evidence_scope,
2462
+ evidence_session_tag: input.evidence_session_tag || null,
2463
+ semantic_scope: input.semantic_scope,
2464
+ semantic_session_tag: input.semantic_session_tag || null,
2465
+ analysis_marker: analysisMarker,
2466
+ runtime_marker: runtimeMarker,
2467
+ semantic_name_marker: semanticNameMarker,
2468
+ ghidra_valid: ghidraConfig.isValid,
2469
+ ghidra_install_dir: ghidraConfig.installDir || 'none',
2470
+ ghidra_version: ghidraConfig.version || 'unknown',
2471
+ },
2472
+ });
2473
+ const cachedLookup = await lookupCachedResult(cacheManager, cacheKey);
2474
+ if (cachedLookup) {
2475
+ return {
2476
+ ok: true,
2477
+ data: cachedLookup.data,
2478
+ warnings: ['Result from cache', formatCacheWarning(cachedLookup.metadata)],
2479
+ metrics: {
2480
+ elapsed_ms: Date.now() - startTime,
2481
+ tool: TOOL_NAME,
2482
+ cached: true,
2483
+ cache_key: cachedLookup.metadata.key,
2484
+ cache_tier: cachedLookup.metadata.tier,
2485
+ cache_created_at: cachedLookup.metadata.createdAt,
2486
+ cache_expires_at: cachedLookup.metadata.expiresAt,
2487
+ cache_hit_at: cachedLookup.metadata.fetchedAt,
2488
+ },
2489
+ };
2490
+ }
2491
+ const dynamicEvidence = await runtimeEvidenceLoader(input.sample_id, {
2492
+ evidenceScope: input.evidence_scope,
2493
+ sessionTag: input.evidence_session_tag,
2494
+ });
2495
+ const externalSemanticSuggestions = await externalSemanticSuggestionLoader(input.sample_id, {
2496
+ scope: input.semantic_scope,
2497
+ sessionTag: input.semantic_session_tag,
2498
+ });
2499
+ const provenance = {
2500
+ runtime: buildRuntimeArtifactProvenance(dynamicEvidence, input.evidence_scope, input.evidence_session_tag),
2501
+ semantic_names: buildSemanticArtifactProvenance('semantic naming artifacts', externalSemanticSuggestions, input.semantic_scope, input.semantic_session_tag),
2502
+ };
2503
+ let targets = [];
2504
+ if (mode === 'single') {
2505
+ targets = [
2506
+ {
2507
+ target: input.address || input.symbol || '',
2508
+ rankScore: null,
2509
+ rankReasons: [],
2510
+ xrefSummary: [],
2511
+ },
2512
+ ];
2513
+ }
2514
+ else {
2515
+ const ranked = await rankFunctions(input.sample_id, input.topk);
2516
+ targets = ranked.map((item) => ({
2517
+ target: item.address,
2518
+ rankScore: item.score,
2519
+ rankReasons: item.reasons || [],
2520
+ xrefSummary: item.xref_summary || [],
2521
+ }));
2522
+ }
2523
+ if (targets.length === 0) {
2524
+ const fallbackTarget = input.address || input.symbol || `topk:${input.topk}`;
2525
+ const fallbackFunction = await buildDegradedFallbackFunction(workspaceManager, input.sample_id, fallbackTarget);
2526
+ const fallbackOutput = {
2527
+ sample_id: input.sample_id,
2528
+ mode,
2529
+ requested_count: mode === 'single' ? 1 : input.topk,
2530
+ reconstructed_count: 1,
2531
+ overall_confidence: fallbackFunction.confidence,
2532
+ provenance,
2533
+ confidence_map: [
2534
+ {
2535
+ function: fallbackFunction.function,
2536
+ address: fallbackFunction.address,
2537
+ confidence: fallbackFunction.confidence,
2538
+ gaps: fallbackFunction.gaps,
2539
+ },
2540
+ ],
2541
+ functions: [fallbackFunction],
2542
+ };
2543
+ await cacheManager.setCachedResult(cacheKey, fallbackOutput, CACHE_TTL_MS, sample.sha256);
2544
+ return {
2545
+ ok: true,
2546
+ data: fallbackOutput,
2547
+ warnings: [
2548
+ `No candidate functions available; returned degraded fallback summary. Run ghidra.analyze for full function-level reconstruction on ${input.sample_id}.`,
2549
+ ],
2550
+ metrics: {
2551
+ elapsed_ms: Date.now() - startTime,
2552
+ tool: TOOL_NAME,
2553
+ },
2554
+ };
2555
+ }
2556
+ const warnings = [];
2557
+ const timeoutMs = input.timeout * 1000;
2558
+ let sampleStringEvidence = null;
2559
+ try {
2560
+ sampleStringEvidence = await stringEvidenceLoader(input.sample_id);
2561
+ }
2562
+ catch (error) {
2563
+ warnings.push(`String evidence unavailable: ${normalizeError(error)}`);
2564
+ }
2565
+ const reconstructedFunctions = [];
2566
+ let fallbackDisasm = null;
2567
+ let fallbackDisasmAttempted = false;
2568
+ const resolveFallbackDisasm = async () => {
2569
+ if (fallbackDisasm) {
2570
+ return fallbackDisasm;
2571
+ }
2572
+ if (fallbackDisasmAttempted) {
2573
+ return null;
2574
+ }
2575
+ fallbackDisasmAttempted = true;
2576
+ try {
2577
+ const workspace = await workspaceManager.getWorkspace(input.sample_id);
2578
+ const fallbackFile = fs
2579
+ .readdirSync(workspace.original, { withFileTypes: true })
2580
+ .filter((entry) => entry.isFile())
2581
+ .map((entry) => entry.name)
2582
+ .sort((a, b) => a.localeCompare(b))[0];
2583
+ if (!fallbackFile) {
2584
+ warnings.push('fallback disassembly unavailable: sample file missing in workspace.original');
2585
+ return null;
2586
+ }
2587
+ const samplePath = path.join(workspace.original, fallbackFile);
2588
+ fallbackDisasm = await runEntrypointFallbackDisasm(samplePath, {
2589
+ max_instructions: 140,
2590
+ max_bytes: 1536,
2591
+ });
2592
+ return fallbackDisasm;
2593
+ }
2594
+ catch (error) {
2595
+ warnings.push(`fallback disassembly failed: ${normalizeError(error)}`);
2596
+ return null;
2597
+ }
2598
+ };
2599
+ for (const target of targets) {
2600
+ let decompiled;
2601
+ let cfg;
2602
+ try {
2603
+ decompiled = await decompileFunction(input.sample_id, target.target, input.include_xrefs, timeoutMs);
2604
+ }
2605
+ catch (error) {
2606
+ warnings.push(`decompile failed for ${target.target}: ${normalizeError(error)}`);
2607
+ }
2608
+ try {
2609
+ cfg = await getFunctionCFG(input.sample_id, target.target, timeoutMs);
2610
+ }
2611
+ catch (error) {
2612
+ warnings.push(`cfg failed for ${target.target}: ${normalizeError(error)}`);
2613
+ }
2614
+ const pseudocodeLines = parsePseudocodeLines(decompiled?.pseudocode);
2615
+ let assembly = extractAssemblyFromCFG(cfg, input.max_assembly_lines);
2616
+ let fallbackUsedForTarget = false;
2617
+ let fallbackAddress;
2618
+ if (!decompiled && !cfg && assembly.instructionCount === 0) {
2619
+ const fallback = await resolveFallbackDisasm();
2620
+ if (fallback) {
2621
+ fallbackUsedForTarget = true;
2622
+ fallbackAddress = fallback.result.address;
2623
+ assembly = {
2624
+ excerpt: fallback.result.assembly,
2625
+ instructionCount: fallback.result.instruction_count,
2626
+ };
2627
+ warnings.push(`fallback disassembly used for ${target.target} (${fallback.result.backend}/${fallback.result.parser}, section=${fallback.result.entry_section})`);
2628
+ if (Array.isArray(fallback.warnings) && fallback.warnings.length > 0) {
2629
+ warnings.push(...fallback.warnings.map((item) => `fallback note: ${item}`));
2630
+ }
2631
+ }
2632
+ }
2633
+ const gaps = dedupe([
2634
+ ...collectGaps(pseudocodeLines, cfg, decompiled, input.max_pseudocode_lines),
2635
+ ...(fallbackUsedForTarget ? ['ghidra_unavailable_fallback_disasm'] : []),
2636
+ ]);
2637
+ const functionName = decompiled?.function || (fallbackUsedForTarget ? 'entrypoint_fallback' : target.target);
2638
+ const functionAddress = decompiled?.address ||
2639
+ cfg?.address ||
2640
+ fallbackAddress ||
2641
+ (target.target.startsWith('0x') ? target.target : 'unknown');
2642
+ const behaviorTags = inferBehaviorTags(decompiled, assembly.excerpt);
2643
+ const relationshipContext = buildRelationshipContext(decompiled);
2644
+ const callContext = buildCallContext(decompiled);
2645
+ const xrefSignals = collectXrefSignals(target, decompiled, assembly.excerpt);
2646
+ const runtimeContext = correlateFunctionWithRuntimeEvidence({
2647
+ functionName,
2648
+ behaviorTags,
2649
+ xrefApis: [
2650
+ ...xrefSignals.map((item) => item.api),
2651
+ ...extractSensitiveApisFromReasons(target.rankReasons),
2652
+ ],
2653
+ rankReasons: target.rankReasons,
2654
+ semanticSummary: decompiled?.pseudocode || assembly.excerpt,
2655
+ callTargets: [...callContext.callers, ...callContext.callees],
2656
+ }, dynamicEvidence);
2657
+ const draftSemanticSummary = buildSemanticSummary(functionName, behaviorTags, xrefSignals, callContext, relationshipContext, gaps, target.rankReasons, [], [], [], runtimeContext);
2658
+ const cfgShape = buildCFGShape(cfg || undefined);
2659
+ const renameSuggestion = buildRenameSuggestion(functionName, behaviorTags, xrefSignals, callContext, relationshipContext, gaps, target.rankReasons, draftSemanticSummary, `${decompiled?.pseudocode || ''}\n${assembly.excerpt}`, runtimeContext);
2660
+ const confidence = computeConfidence(decompiled, cfg, assembly.instructionCount, target.rankScore, runtimeContext?.confidence);
2661
+ const draftSourceLikeSnippet = buildSourceLikeSnippet(functionName, confidence.confidence, gaps, pseudocodeLines, input.max_pseudocode_lines, draftSemanticSummary, xrefSignals, callContext, relationshipContext, target.rankReasons, [], [], [], runtimeContext);
2662
+ const functionStringHints = buildFunctionStringHints(sampleStringEvidence, functionName, behaviorTags, xrefSignals, runtimeContext, draftSemanticSummary, draftSourceLikeSnippet);
2663
+ const parameterRoles = inferParameterRoles(behaviorTags, xrefSignals, runtimeContext, functionStringHints, draftSemanticSummary, draftSourceLikeSnippet);
2664
+ const stateRoles = inferStateRoles(behaviorTags, xrefSignals, runtimeContext, functionStringHints, draftSemanticSummary, draftSourceLikeSnippet);
2665
+ const structInference = inferStructInference(parameterRoles, stateRoles);
2666
+ const semanticSummary = buildSemanticSummary(functionName, behaviorTags, xrefSignals, callContext, relationshipContext, gaps, target.rankReasons, parameterRoles, stateRoles, structInference, runtimeContext);
2667
+ const sourceLikeSnippet = buildSourceLikeSnippet(functionName, confidence.confidence, gaps, pseudocodeLines, input.max_pseudocode_lines, semanticSummary, xrefSignals, callContext, relationshipContext, target.rankReasons, parameterRoles, stateRoles, structInference, runtimeContext);
2668
+ const semanticEvidence = {
2669
+ semantic_summary: semanticSummary,
2670
+ xref_signals: xrefSignals,
2671
+ call_relationships: relationshipContext,
2672
+ runtime_context: runtimeContext || null,
2673
+ string_hints: functionStringHints,
2674
+ pseudocode_excerpt: buildPseudocodeExcerpt(sourceLikeSnippet),
2675
+ cfg_shape: cfgShape,
2676
+ parameter_roles: parameterRoles,
2677
+ state_roles: stateRoles,
2678
+ struct_inference: structInference,
2679
+ };
2680
+ const enrichedSourceLikeSnippet = withSuggestedNameHeader(sourceLikeSnippet, renameSuggestion);
2681
+ reconstructedFunctions.push({
2682
+ target: target.target,
2683
+ function: functionName,
2684
+ address: functionAddress,
2685
+ rank_score: target.rankScore,
2686
+ rank_reasons: target.rankReasons,
2687
+ suggested_name: renameSuggestion.suggested_name,
2688
+ suggested_role: renameSuggestion.suggested_role,
2689
+ rename_confidence: renameSuggestion.suggested_name
2690
+ ? renameSuggestion.rename_confidence
2691
+ : null,
2692
+ rename_evidence: renameSuggestion.rename_evidence,
2693
+ semantic_summary: semanticSummary,
2694
+ xref_signals: xrefSignals,
2695
+ call_context: callContext,
2696
+ call_relationships: relationshipContext,
2697
+ runtime_context: runtimeContext,
2698
+ parameter_roles: parameterRoles,
2699
+ state_roles: stateRoles,
2700
+ struct_inference: structInference,
2701
+ semantic_evidence: semanticEvidence,
2702
+ confidence_profile: buildReconstructionConfidenceSemantics({
2703
+ score: confidence.confidence,
2704
+ breakdown: confidence.breakdown,
2705
+ runtimeConfidence: runtimeContext?.confidence,
2706
+ }),
2707
+ runtime_confidence_profile: buildRuntimeConfidenceSemantics({
2708
+ score: runtimeContext?.confidence,
2709
+ matchedApis: runtimeContext?.corroborated_apis,
2710
+ matchedStages: runtimeContext?.corroborated_stages,
2711
+ matchedMemoryRegions: runtimeContext?.matched_memory_regions,
2712
+ executed: runtimeContext?.executed,
2713
+ evidenceSources: runtimeContext?.evidence_sources,
2714
+ }),
2715
+ naming_confidence_profile: buildNamingConfidenceSemantics({
2716
+ resolutionSource: renameSuggestion.suggested_name ? 'rule' : 'unresolved',
2717
+ renameConfidence: renameSuggestion.suggested_name
2718
+ ? renameSuggestion.rename_confidence
2719
+ : null,
2720
+ ruleBasedName: renameSuggestion.suggested_name,
2721
+ validatedName: renameSuggestion.suggested_name,
2722
+ }),
2723
+ confidence: confidence.confidence,
2724
+ confidence_breakdown: confidence.breakdown,
2725
+ gaps,
2726
+ evidence: {
2727
+ pseudocode_lines: pseudocodeLines.length,
2728
+ cfg_nodes: cfg?.nodes.length || 0,
2729
+ cfg_edges: cfg?.edges.length || 0,
2730
+ instruction_count: assembly.instructionCount,
2731
+ caller_count: Math.max(decompiled?.callers.length || 0, decompiled?.caller_relationships?.length || 0),
2732
+ callee_count: Math.max(decompiled?.callees.length || 0, decompiled?.callee_relationships?.length || 0),
2733
+ },
2734
+ behavior_tags: behaviorTags,
2735
+ source_like_snippet: enrichedSourceLikeSnippet,
2736
+ assembly_excerpt: assembly.excerpt,
2737
+ });
2738
+ }
2739
+ const refinedFunctions = refineRenameSuggestions(reconstructedFunctions);
2740
+ const layeredFunctions = [];
2741
+ for (const func of refinedFunctions) {
2742
+ const externalSuggestion = findSemanticNameSuggestion(externalSemanticSuggestions, func.address, func.function);
2743
+ const { nameResolution, finalSuggestion } = await finalizeLayeredNameResolution(func, externalSuggestion, semanticNameSuggester);
2744
+ const suggestionAppliedSnippet = withSuggestedNameHeader(func.source_like_snippet, finalSuggestion);
2745
+ layeredFunctions.push({
2746
+ ...func,
2747
+ suggested_name: finalSuggestion.suggested_name,
2748
+ suggested_role: finalSuggestion.suggested_role,
2749
+ rename_confidence: finalSuggestion.suggested_name
2750
+ ? finalSuggestion.rename_confidence
2751
+ : null,
2752
+ rename_evidence: finalSuggestion.rename_evidence,
2753
+ name_resolution: nameResolution,
2754
+ naming_confidence_profile: buildNamingConfidenceSemantics({
2755
+ resolutionSource: nameResolution.resolution_source,
2756
+ renameConfidence: finalSuggestion.suggested_name
2757
+ ? finalSuggestion.rename_confidence
2758
+ : null,
2759
+ llmConfidence: nameResolution.llm_confidence,
2760
+ ruleBasedName: nameResolution.rule_based_name,
2761
+ validatedName: nameResolution.validated_name,
2762
+ }),
2763
+ source_like_snippet: withNameResolutionHeader(suggestionAppliedSnippet, nameResolution),
2764
+ });
2765
+ }
2766
+ layeredFunctions.sort((a, b) => b.confidence - a.confidence);
2767
+ const overallConfidence = layeredFunctions.reduce((sum, item) => sum + item.confidence, 0) /
2768
+ layeredFunctions.length;
2769
+ const outputData = {
2770
+ sample_id: input.sample_id,
2771
+ mode,
2772
+ requested_count: targets.length,
2773
+ reconstructed_count: layeredFunctions.length,
2774
+ overall_confidence: clamp(overallConfidence, 0, 1),
2775
+ provenance,
2776
+ confidence_map: layeredFunctions.map((item) => ({
2777
+ function: item.function,
2778
+ address: item.address,
2779
+ confidence: item.confidence,
2780
+ gaps: item.gaps,
2781
+ })),
2782
+ functions: layeredFunctions,
2783
+ };
2784
+ await cacheManager.setCachedResult(cacheKey, outputData, CACHE_TTL_MS, sample.sha256);
2785
+ return {
2786
+ ok: true,
2787
+ data: outputData,
2788
+ warnings: warnings.length > 0 ? warnings : undefined,
2789
+ metrics: {
2790
+ elapsed_ms: Date.now() - startTime,
2791
+ tool: TOOL_NAME,
2792
+ },
2793
+ };
2794
+ }
2795
+ catch (error) {
2796
+ return {
2797
+ ok: false,
2798
+ errors: [normalizeError(error)],
2799
+ metrics: {
2800
+ elapsed_ms: Date.now() - startTime,
2801
+ tool: TOOL_NAME,
2802
+ },
2803
+ };
2804
+ }
2805
+ };
2806
+ }
2807
+ //# sourceMappingURL=code-functions-reconstruct.js.map