ida-pro-mcp-xjoker 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. ida_pro_mcp/__init__.py +0 -0
  2. ida_pro_mcp/__main__.py +6 -0
  3. ida_pro_mcp/ida_mcp/__init__.py +68 -0
  4. ida_pro_mcp/ida_mcp/api_analysis.py +1296 -0
  5. ida_pro_mcp/ida_mcp/api_core.py +337 -0
  6. ida_pro_mcp/ida_mcp/api_debug.py +617 -0
  7. ida_pro_mcp/ida_mcp/api_memory.py +304 -0
  8. ida_pro_mcp/ida_mcp/api_modify.py +406 -0
  9. ida_pro_mcp/ida_mcp/api_python.py +179 -0
  10. ida_pro_mcp/ida_mcp/api_resources.py +295 -0
  11. ida_pro_mcp/ida_mcp/api_stack.py +167 -0
  12. ida_pro_mcp/ida_mcp/api_types.py +480 -0
  13. ida_pro_mcp/ida_mcp/auth.py +166 -0
  14. ida_pro_mcp/ida_mcp/cache.py +232 -0
  15. ida_pro_mcp/ida_mcp/config.py +228 -0
  16. ida_pro_mcp/ida_mcp/framework.py +547 -0
  17. ida_pro_mcp/ida_mcp/http.py +859 -0
  18. ida_pro_mcp/ida_mcp/port_utils.py +104 -0
  19. ida_pro_mcp/ida_mcp/rpc.py +187 -0
  20. ida_pro_mcp/ida_mcp/server_manager.py +339 -0
  21. ida_pro_mcp/ida_mcp/sync.py +233 -0
  22. ida_pro_mcp/ida_mcp/tests/__init__.py +14 -0
  23. ida_pro_mcp/ida_mcp/tests/test_api_analysis.py +336 -0
  24. ida_pro_mcp/ida_mcp/tests/test_api_core.py +237 -0
  25. ida_pro_mcp/ida_mcp/tests/test_api_memory.py +207 -0
  26. ida_pro_mcp/ida_mcp/tests/test_api_modify.py +123 -0
  27. ida_pro_mcp/ida_mcp/tests/test_api_resources.py +199 -0
  28. ida_pro_mcp/ida_mcp/tests/test_api_stack.py +77 -0
  29. ida_pro_mcp/ida_mcp/tests/test_api_types.py +249 -0
  30. ida_pro_mcp/ida_mcp/ui.py +357 -0
  31. ida_pro_mcp/ida_mcp/utils.py +1186 -0
  32. ida_pro_mcp/ida_mcp/zeromcp/__init__.py +5 -0
  33. ida_pro_mcp/ida_mcp/zeromcp/jsonrpc.py +384 -0
  34. ida_pro_mcp/ida_mcp/zeromcp/mcp.py +883 -0
  35. ida_pro_mcp/ida_mcp.py +186 -0
  36. ida_pro_mcp/idalib_server.py +354 -0
  37. ida_pro_mcp/idalib_session_manager.py +259 -0
  38. ida_pro_mcp/server.py +1060 -0
  39. ida_pro_mcp/test.py +170 -0
  40. ida_pro_mcp_xjoker-1.0.1.dist-info/METADATA +405 -0
  41. ida_pro_mcp_xjoker-1.0.1.dist-info/RECORD +45 -0
  42. ida_pro_mcp_xjoker-1.0.1.dist-info/WHEEL +5 -0
  43. ida_pro_mcp_xjoker-1.0.1.dist-info/entry_points.txt +4 -0
  44. ida_pro_mcp_xjoker-1.0.1.dist-info/licenses/LICENSE +21 -0
  45. ida_pro_mcp_xjoker-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1296 @@
1
+ from itertools import islice
2
+ import struct
3
+ from typing import Annotated, Optional
4
+ import ida_lines
5
+ import ida_funcs
6
+ import idaapi
7
+ import idautils
8
+ import ida_typeinf
9
+ import ida_nalt
10
+ import ida_bytes
11
+ import ida_ida
12
+ import ida_idaapi
13
+ import ida_xref
14
+ import ida_ua
15
+ import ida_name
16
+ from .rpc import tool
17
+ from .sync import idasync, tool_timeout
18
+ from .cache import decompile_cache, xrefs_cache
19
+ from .utils import (
20
+ parse_address,
21
+ normalize_list_input,
22
+ get_function,
23
+ get_prototype,
24
+ get_stack_frame_variables_internal,
25
+ decompile_function_safe,
26
+ get_assembly_lines,
27
+ get_all_xrefs,
28
+ get_all_comments,
29
+ Argument,
30
+ DisassemblyFunction,
31
+ Xref,
32
+ BasicBlock,
33
+ StructFieldQuery,
34
+ )
35
+
36
+ # ============================================================================
37
+ # Instruction Helpers
38
+ # ============================================================================
39
+
40
+ _IMM_SCAN_BACK_MAX = 15
41
+
42
+
43
+ def _decode_insn_at(ea: int) -> ida_ua.insn_t | None:
44
+ insn = ida_ua.insn_t()
45
+ if ida_ua.decode_insn(insn, ea) == 0:
46
+ return None
47
+ return insn
48
+
49
+
50
+ def _next_head(ea: int, end_ea: int) -> int:
51
+ return ida_bytes.next_head(ea, end_ea)
52
+
53
+
54
+ def _operand_value(insn: ida_ua.insn_t, i: int) -> int | None:
55
+ op = insn.ops[i]
56
+ if op.type == ida_ua.o_void:
57
+ return None
58
+ if op.type in (ida_ua.o_mem, ida_ua.o_far, ida_ua.o_near):
59
+ return op.addr
60
+ return op.value
61
+
62
+
63
+ def _operand_type(insn: ida_ua.insn_t, i: int) -> int:
64
+ return insn.ops[i].type
65
+
66
+
67
+ def _insn_mnem(insn: ida_ua.insn_t) -> str:
68
+ try:
69
+ return insn.get_canon_mnem().lower()
70
+ except Exception:
71
+ return ""
72
+
73
+
74
+ def _value_to_le_bytes(value: int) -> tuple[bytes, int, int] | None:
75
+ if value < 0:
76
+ if value >= -0x80000000:
77
+ size = 4
78
+ value &= 0xFFFFFFFF
79
+ elif value >= -0x8000000000000000:
80
+ size = 8
81
+ value &= 0xFFFFFFFFFFFFFFFF
82
+ else:
83
+ return None
84
+ else:
85
+ if value <= 0xFFFFFFFF:
86
+ size = 4
87
+ elif value <= 0xFFFFFFFFFFFFFFFF:
88
+ size = 8
89
+ else:
90
+ return None
91
+
92
+ fmt = "<I" if size == 4 else "<Q"
93
+ return struct.pack(fmt, value), size, value
94
+
95
+
96
+ def _value_candidates_for_immediate(value: int) -> list[tuple[int, int, bytes]]:
97
+ candidates: list[tuple[int, int, bytes]] = []
98
+
99
+ def add(size: int, signed_val: int):
100
+ if size == 4:
101
+ masked = signed_val & 0xFFFFFFFF
102
+ if not (-0x80000000 <= signed_val <= 0x7FFFFFFF):
103
+ return
104
+ b = struct.pack("<I", masked)
105
+ else:
106
+ masked = signed_val & 0xFFFFFFFFFFFFFFFF
107
+ if not (-0x8000000000000000 <= signed_val <= 0x7FFFFFFFFFFFFFFF):
108
+ return
109
+ b = struct.pack("<Q", masked)
110
+ candidates.append((masked, size, b))
111
+
112
+ add(4, value)
113
+ add(8, value)
114
+ return candidates
115
+
116
+
117
+ def _resolve_immediate_insn_start(
118
+ match_ea: int,
119
+ value: int,
120
+ seg_start: int,
121
+ alt_value: int | None = None,
122
+ ) -> int | None:
123
+ start_min = max(seg_start, match_ea - _IMM_SCAN_BACK_MAX)
124
+ for start in range(match_ea, start_min - 1, -1):
125
+ insn = _decode_insn_at(start)
126
+ if insn is None:
127
+ continue
128
+ end_ea = start + insn.size
129
+ if not (start <= match_ea < end_ea):
130
+ continue
131
+ for i in range(8):
132
+ op_type = _operand_type(insn, i)
133
+ if op_type == ida_ua.o_void:
134
+ break
135
+ if op_type != ida_ua.o_imm:
136
+ continue
137
+ op_val = _operand_value(insn, i)
138
+ if op_val is None:
139
+ continue
140
+ if op_val == value or (alt_value is not None and op_val == alt_value):
141
+ offb = getattr(insn.ops[i], "offb", 0)
142
+ if offb and start + offb != match_ea:
143
+ continue
144
+ return start
145
+ return None
146
+
147
+
148
+ # ============================================================================
149
+ # Code Analysis & Decompilation
150
+ # ============================================================================
151
+
152
+
153
+ @tool
154
+ @idasync
155
+ @tool_timeout(90.0)
156
+ def decompile(
157
+ addr: Annotated[str, "Function address to decompile"],
158
+ ) -> dict:
159
+ """Decompile function to pseudocode"""
160
+ try:
161
+ start = parse_address(addr)
162
+
163
+ # Try cache first
164
+ cache_key = hex(start)
165
+ cached = decompile_cache.get(cache_key)
166
+ if cached is not None:
167
+ return cached
168
+
169
+ code = decompile_function_safe(start)
170
+ if code is None:
171
+ result = {"addr": addr, "code": None, "error": "Decompilation failed"}
172
+ else:
173
+ result = {"addr": addr, "code": code}
174
+
175
+ # Cache successful decompilations
176
+ if code is not None:
177
+ decompile_cache.set(cache_key, result)
178
+
179
+ return result
180
+ except Exception as e:
181
+ return {"addr": addr, "code": None, "error": str(e)}
182
+
183
+
184
+ @tool
185
+ @idasync
186
+ @tool_timeout(90.0)
187
+ def disasm(
188
+ addr: Annotated[str, "Function address to disassemble"],
189
+ max_instructions: Annotated[
190
+ int, "Max instructions per function (default: 5000, max: 50000)"
191
+ ] = 5000,
192
+ offset: Annotated[int, "Skip first N instructions (default: 0)"] = 0,
193
+ include_total: Annotated[
194
+ bool, "Compute total instruction count (default: false)"
195
+ ] = False,
196
+ ) -> dict:
197
+ """Disassemble function to assembly instructions"""
198
+
199
+ # Enforce max limit
200
+ if max_instructions <= 0 or max_instructions > 50000:
201
+ max_instructions = 50000
202
+ if offset < 0:
203
+ offset = 0
204
+
205
+ try:
206
+ start = parse_address(addr)
207
+ func = idaapi.get_func(start)
208
+
209
+ # Get segment info
210
+ seg = idaapi.getseg(start)
211
+ if not seg:
212
+ return {
213
+ "addr": addr,
214
+ "asm": None,
215
+ "error": "No segment found",
216
+ "cursor": {"done": True},
217
+ }
218
+
219
+ segment_name = idaapi.get_segm_name(seg) if seg else "UNKNOWN"
220
+
221
+ if func:
222
+ # Function exists: disassemble function items starting from requested address
223
+ func_name: str = ida_funcs.get_func_name(func.start_ea) or "<unnamed>"
224
+ header_addr = start # Use requested address, not function start
225
+ else:
226
+ # No function: disassemble sequentially from start address
227
+ func_name = "<no function>"
228
+ header_addr = start
229
+
230
+ lines = []
231
+ seen = 0
232
+ total_count = 0
233
+ more = False
234
+
235
+ def _maybe_add(ea: int) -> bool:
236
+ nonlocal seen, total_count, more
237
+ if include_total:
238
+ total_count += 1
239
+ if seen < offset:
240
+ seen += 1
241
+ return True
242
+ if len(lines) < max_instructions:
243
+ line = ida_lines.generate_disasm_line(ea, 0)
244
+ instruction = ida_lines.tag_remove(line) if line else ""
245
+ lines.append(f"{ea:x} {instruction}")
246
+ seen += 1
247
+ return True
248
+ more = True
249
+ seen += 1
250
+ return include_total
251
+
252
+ if func:
253
+ for ea in idautils.FuncItems(func.start_ea):
254
+ if ea == idaapi.BADADDR:
255
+ continue
256
+ if ea < start:
257
+ continue
258
+ if not _maybe_add(ea):
259
+ break
260
+ else:
261
+ ea = start
262
+ while ea < seg.end_ea:
263
+ if ea == idaapi.BADADDR:
264
+ break
265
+ if _decode_insn_at(ea) is None:
266
+ break
267
+ if not _maybe_add(ea):
268
+ break
269
+ ea = _next_head(ea, seg.end_ea)
270
+ if ea == idaapi.BADADDR:
271
+ break
272
+
273
+ if include_total and not more:
274
+ more = total_count > offset + max_instructions
275
+
276
+ lines_str = f"{func_name} ({segment_name} @ {hex(header_addr)}):"
277
+ if lines:
278
+ lines_str += "\n" + "\n".join(lines)
279
+
280
+ rettype = None
281
+ args: Optional[list[Argument]] = None
282
+ stack_frame = None
283
+
284
+ if func:
285
+ tif = ida_typeinf.tinfo_t()
286
+ if ida_nalt.get_tinfo(tif, func.start_ea) and tif.is_func():
287
+ ftd = ida_typeinf.func_type_data_t()
288
+ if tif.get_func_details(ftd):
289
+ rettype = str(ftd.rettype)
290
+ args = [
291
+ Argument(name=(a.name or f"arg{i}"), type=str(a.type))
292
+ for i, a in enumerate(ftd)
293
+ ]
294
+ stack_frame = get_stack_frame_variables_internal(func.start_ea, False)
295
+
296
+ out: DisassemblyFunction = {
297
+ "name": func_name,
298
+ "start_ea": hex(header_addr),
299
+ "lines": lines_str,
300
+ }
301
+ if stack_frame:
302
+ out["stack_frame"] = stack_frame
303
+ if rettype:
304
+ out["return_type"] = rettype
305
+ if args is not None:
306
+ out["arguments"] = args
307
+
308
+ return {
309
+ "addr": addr,
310
+ "asm": out,
311
+ "instruction_count": len(lines),
312
+ "total_instructions": total_count if include_total else None,
313
+ "cursor": ({"next": offset + max_instructions} if more else {"done": True}),
314
+ }
315
+ except Exception as e:
316
+ return {
317
+ "addr": addr,
318
+ "asm": None,
319
+ "error": str(e),
320
+ "cursor": {"done": True},
321
+ }
322
+
323
+
324
+ # ============================================================================
325
+ # Cross-Reference Analysis
326
+ # ============================================================================
327
+
328
+
329
+ @tool
330
+ @idasync
331
+ def xrefs_to(
332
+ addrs: Annotated[list[str] | str, "Addresses to find cross-references to"],
333
+ limit: Annotated[int, "Max xrefs per address (default: 100, max: 1000)"] = 100,
334
+ ) -> list[dict]:
335
+ """Get cross-references to specified addresses"""
336
+ addrs = normalize_list_input(addrs)
337
+
338
+ if limit <= 0 or limit > 1000:
339
+ limit = 1000
340
+
341
+ results = []
342
+
343
+ for addr in addrs:
344
+ # Try cache first
345
+ cache_key = f"xrefs:{addr}:{limit}"
346
+ cached = xrefs_cache.get(cache_key)
347
+ if cached is not None:
348
+ results.append(cached)
349
+ continue
350
+
351
+ try:
352
+ xrefs = []
353
+ more = False
354
+ for xref in idautils.XrefsTo(parse_address(addr)):
355
+ if len(xrefs) >= limit:
356
+ more = True
357
+ break
358
+ xrefs.append(
359
+ Xref(
360
+ addr=hex(xref.frm),
361
+ type="code" if xref.iscode else "data",
362
+ fn=get_function(xref.frm, raise_error=False),
363
+ )
364
+ )
365
+ result = {"addr": addr, "xrefs": xrefs, "more": more}
366
+ except Exception as e:
367
+ result = {"addr": addr, "xrefs": None, "error": str(e)}
368
+
369
+ # Cache the result
370
+ xrefs_cache.set(cache_key, result)
371
+ results.append(result)
372
+
373
+ return results
374
+
375
+
376
+ @tool
377
+ @idasync
378
+ def xrefs_to_field(queries: list[StructFieldQuery] | StructFieldQuery) -> list[dict]:
379
+ """Get cross-references to structure fields"""
380
+ if isinstance(queries, dict):
381
+ queries = [queries]
382
+
383
+ results = []
384
+ til = ida_typeinf.get_idati()
385
+ if not til:
386
+ return [
387
+ {
388
+ "struct": q.get("struct"),
389
+ "field": q.get("field"),
390
+ "xrefs": [],
391
+ "error": "Failed to retrieve type library",
392
+ }
393
+ for q in queries
394
+ ]
395
+
396
+ for query in queries:
397
+ struct_name = query.get("struct", "")
398
+ field_name = query.get("field", "")
399
+
400
+ try:
401
+ tif = ida_typeinf.tinfo_t()
402
+ if not tif.get_named_type(
403
+ til, struct_name, ida_typeinf.BTF_STRUCT, True, False
404
+ ):
405
+ results.append(
406
+ {
407
+ "struct": struct_name,
408
+ "field": field_name,
409
+ "xrefs": [],
410
+ "error": f"Struct '{struct_name}' not found",
411
+ }
412
+ )
413
+ continue
414
+
415
+ idx = ida_typeinf.get_udm_by_fullname(None, struct_name + "." + field_name)
416
+ if idx == -1:
417
+ results.append(
418
+ {
419
+ "struct": struct_name,
420
+ "field": field_name,
421
+ "xrefs": [],
422
+ "error": f"Field '{field_name}' not found in '{struct_name}'",
423
+ }
424
+ )
425
+ continue
426
+
427
+ tid = tif.get_udm_tid(idx)
428
+ if tid == ida_idaapi.BADADDR:
429
+ results.append(
430
+ {
431
+ "struct": struct_name,
432
+ "field": field_name,
433
+ "xrefs": [],
434
+ "error": "Unable to get tid",
435
+ }
436
+ )
437
+ continue
438
+
439
+ xrefs = []
440
+ xref: ida_xref.xrefblk_t
441
+ for xref in idautils.XrefsTo(tid):
442
+ xrefs += [
443
+ Xref(
444
+ addr=hex(xref.frm),
445
+ type="code" if xref.iscode else "data",
446
+ fn=get_function(xref.frm, raise_error=False),
447
+ )
448
+ ]
449
+ results.append({"struct": struct_name, "field": field_name, "xrefs": xrefs})
450
+ except Exception as e:
451
+ results.append(
452
+ {
453
+ "struct": struct_name,
454
+ "field": field_name,
455
+ "xrefs": [],
456
+ "error": str(e),
457
+ }
458
+ )
459
+
460
+ return results
461
+
462
+
463
+ # ============================================================================
464
+ # Call Graph Analysis
465
+ # ============================================================================
466
+
467
+
468
+ @tool
469
+ @idasync
470
+ def callees(
471
+ addrs: Annotated[list[str] | str, "Function addresses to get callees for"],
472
+ limit: Annotated[int, "Max callees per function (default: 200, max: 500)"] = 200,
473
+ ) -> list[dict]:
474
+ """Get functions called by the specified functions"""
475
+ addrs = normalize_list_input(addrs)
476
+
477
+ if limit <= 0 or limit > 500:
478
+ limit = 500
479
+
480
+ results = []
481
+
482
+ for fn_addr in addrs:
483
+ try:
484
+ func_start = parse_address(fn_addr)
485
+ func = idaapi.get_func(func_start)
486
+ if not func:
487
+ results.append(
488
+ {"addr": fn_addr, "callees": None, "error": "No function found"}
489
+ )
490
+ continue
491
+ func_end = func.end_ea
492
+ callees_dict = {}
493
+ more = False
494
+ current_ea = func_start
495
+ while current_ea < func_end:
496
+ if len(callees_dict) >= limit:
497
+ more = True
498
+ break
499
+ insn = _decode_insn_at(current_ea)
500
+ if insn is None:
501
+ next_ea = _next_head(current_ea, func_end)
502
+ if next_ea == idaapi.BADADDR:
503
+ break
504
+ current_ea = next_ea
505
+ continue
506
+ if insn.itype in [idaapi.NN_call, idaapi.NN_callfi, idaapi.NN_callni]:
507
+ op0 = insn.ops[0]
508
+ if op0.type in (ida_ua.o_mem, ida_ua.o_near, ida_ua.o_far):
509
+ target = op0.addr
510
+ elif op0.type == ida_ua.o_imm:
511
+ target = op0.value
512
+ else:
513
+ target = None
514
+ if target is not None and target not in callees_dict:
515
+ func_type = (
516
+ "internal"
517
+ if idaapi.get_func(target) is not None
518
+ else "external"
519
+ )
520
+ func_name = ida_name.get_name(target)
521
+ if func_name is not None:
522
+ callees_dict[target] = {
523
+ "addr": hex(target),
524
+ "name": func_name,
525
+ "type": func_type,
526
+ }
527
+ next_ea = _next_head(current_ea, func_end)
528
+ if next_ea == idaapi.BADADDR:
529
+ break
530
+ current_ea = next_ea
531
+
532
+ results.append(
533
+ {
534
+ "addr": fn_addr,
535
+ "callees": list(callees_dict.values()),
536
+ "more": more,
537
+ }
538
+ )
539
+ except Exception as e:
540
+ results.append({"addr": fn_addr, "callees": None, "error": str(e)})
541
+
542
+ return results
543
+
544
+
545
+ # ============================================================================
546
+ # Pattern Matching & Signature Tools
547
+ # ============================================================================
548
+
549
+
550
+ @tool
551
+ @idasync
552
+ def find_bytes(
553
+ patterns: Annotated[
554
+ list[str] | str, "Byte patterns to search for (e.g. '48 8B ?? ??')"
555
+ ],
556
+ limit: Annotated[int, "Max matches per pattern (default: 1000, max: 10000)"] = 1000,
557
+ offset: Annotated[int, "Skip first N matches (default: 0)"] = 0,
558
+ ) -> list[dict]:
559
+ """Search for byte patterns in the binary (supports wildcards with ??)"""
560
+ patterns = normalize_list_input(patterns)
561
+
562
+ # Enforce max limit
563
+ if limit <= 0 or limit > 10000:
564
+ limit = 10000
565
+
566
+ results = []
567
+ for pattern in patterns:
568
+ matches = []
569
+ skipped = 0
570
+ more = False
571
+ try:
572
+ # Parse the pattern
573
+ compiled = ida_bytes.compiled_binpat_vec_t()
574
+ err = ida_bytes.parse_binpat_str(
575
+ compiled, ida_ida.inf_get_min_ea(), pattern, 16
576
+ )
577
+ if err:
578
+ results.append(
579
+ {
580
+ "pattern": pattern,
581
+ "matches": [],
582
+ "n": 0,
583
+ "cursor": {"done": True},
584
+ }
585
+ )
586
+ continue
587
+
588
+ # Search with early exit
589
+ ea = ida_ida.inf_get_min_ea()
590
+ max_ea = ida_ida.inf_get_max_ea()
591
+ while ea != idaapi.BADADDR:
592
+ ea = ida_bytes.bin_search(
593
+ ea, max_ea, compiled, ida_bytes.BIN_SEARCH_FORWARD
594
+ )
595
+ if ea != idaapi.BADADDR:
596
+ if skipped < offset:
597
+ skipped += 1
598
+ else:
599
+ matches.append(hex(ea))
600
+ if len(matches) >= limit:
601
+ # Check if there's more
602
+ next_ea = ida_bytes.bin_search(
603
+ ea + 1, max_ea, compiled, ida_bytes.BIN_SEARCH_FORWARD
604
+ )
605
+ more = next_ea != idaapi.BADADDR
606
+ break
607
+ ea += 1
608
+ except Exception:
609
+ pass
610
+
611
+ results.append(
612
+ {
613
+ "pattern": pattern,
614
+ "matches": matches,
615
+ "n": len(matches),
616
+ "cursor": {"next": offset + limit} if more else {"done": True},
617
+ }
618
+ )
619
+ return results
620
+
621
+
622
+ # ============================================================================
623
+ # Control Flow Analysis
624
+ # ============================================================================
625
+
626
+
627
+ @tool
628
+ @idasync
629
+ def basic_blocks(
630
+ addrs: Annotated[list[str] | str, "Function addresses to get basic blocks for"],
631
+ max_blocks: Annotated[
632
+ int, "Max basic blocks per function (default: 1000, max: 10000)"
633
+ ] = 1000,
634
+ offset: Annotated[int, "Skip first N blocks (default: 0)"] = 0,
635
+ ) -> list[dict]:
636
+ """Get control flow graph basic blocks for functions"""
637
+ addrs = normalize_list_input(addrs)
638
+
639
+ # Enforce max limit
640
+ if max_blocks <= 0 or max_blocks > 10000:
641
+ max_blocks = 10000
642
+
643
+ results = []
644
+ for fn_addr in addrs:
645
+ try:
646
+ ea = parse_address(fn_addr)
647
+ func = idaapi.get_func(ea)
648
+ if not func:
649
+ results.append(
650
+ {
651
+ "addr": fn_addr,
652
+ "error": "Function not found",
653
+ "blocks": [],
654
+ "cursor": {"done": True},
655
+ }
656
+ )
657
+ continue
658
+
659
+ flowchart = idaapi.FlowChart(func)
660
+ all_blocks = []
661
+
662
+ for block in flowchart:
663
+ all_blocks.append(
664
+ BasicBlock(
665
+ start=hex(block.start_ea),
666
+ end=hex(block.end_ea),
667
+ size=block.end_ea - block.start_ea,
668
+ type=block.type,
669
+ successors=[hex(succ.start_ea) for succ in block.succs()],
670
+ predecessors=[hex(pred.start_ea) for pred in block.preds()],
671
+ )
672
+ )
673
+
674
+ # Apply pagination
675
+ total_blocks = len(all_blocks)
676
+ blocks = all_blocks[offset : offset + max_blocks]
677
+ more = offset + max_blocks < total_blocks
678
+
679
+ results.append(
680
+ {
681
+ "addr": fn_addr,
682
+ "blocks": blocks,
683
+ "count": len(blocks),
684
+ "total_blocks": total_blocks,
685
+ "cursor": (
686
+ {"next": offset + max_blocks} if more else {"done": True}
687
+ ),
688
+ "error": None,
689
+ }
690
+ )
691
+ except Exception as e:
692
+ results.append(
693
+ {
694
+ "addr": fn_addr,
695
+ "error": str(e),
696
+ "blocks": [],
697
+ "cursor": {"done": True},
698
+ }
699
+ )
700
+ return results
701
+
702
+
703
+ # ============================================================================
704
+ # Search Operations
705
+ # ============================================================================
706
+
707
+
708
+ @tool
709
+ @idasync
710
+ def find(
711
+ type: Annotated[
712
+ str, "Search type: 'string', 'immediate', 'data_ref', or 'code_ref'"
713
+ ],
714
+ targets: Annotated[
715
+ list[str | int] | str | int, "Search targets (strings, integers, or addresses)"
716
+ ],
717
+ limit: Annotated[int, "Max matches per target (default: 1000, max: 10000)"] = 1000,
718
+ offset: Annotated[int, "Skip first N matches (default: 0)"] = 0,
719
+ ) -> list[dict]:
720
+ """Search for patterns in the binary (strings, immediate values, or references)"""
721
+ if not isinstance(targets, list):
722
+ targets = [targets]
723
+
724
+ # Enforce max limit to prevent token overflow
725
+ if limit <= 0 or limit > 10000:
726
+ limit = 10000
727
+
728
+ results = []
729
+
730
+ if type == "string":
731
+ # Raw byte search for UTF-8 substrings across the binary
732
+ for pattern in targets:
733
+ pattern_str = str(pattern)
734
+ pattern_bytes = pattern_str.encode("utf-8")
735
+ if not pattern_bytes:
736
+ results.append(
737
+ {
738
+ "query": pattern_str,
739
+ "matches": [],
740
+ "count": 0,
741
+ "cursor": {"done": True},
742
+ "error": "Empty pattern",
743
+ }
744
+ )
745
+ continue
746
+
747
+ matches = []
748
+ skipped = 0
749
+ more = False
750
+ try:
751
+ ea = ida_ida.inf_get_min_ea()
752
+ max_ea = ida_ida.inf_get_max_ea()
753
+ mask = b"\xff" * len(pattern_bytes)
754
+ flags = ida_bytes.BIN_SEARCH_FORWARD | ida_bytes.BIN_SEARCH_NOSHOW
755
+ while ea != idaapi.BADADDR:
756
+ ea = ida_bytes.bin_search(
757
+ ea, max_ea, pattern_bytes, mask, len(pattern_bytes), flags
758
+ )
759
+ if ea != idaapi.BADADDR:
760
+ if skipped < offset:
761
+ skipped += 1
762
+ else:
763
+ matches.append(hex(ea))
764
+ if len(matches) >= limit:
765
+ next_ea = ida_bytes.bin_search(
766
+ ea + 1,
767
+ max_ea,
768
+ pattern_bytes,
769
+ mask,
770
+ len(pattern_bytes),
771
+ flags,
772
+ )
773
+ more = next_ea != idaapi.BADADDR
774
+ break
775
+ ea += 1
776
+ except Exception:
777
+ pass
778
+
779
+ results.append(
780
+ {
781
+ "query": pattern_str,
782
+ "matches": matches,
783
+ "count": len(matches),
784
+ "cursor": {"next": offset + limit} if more else {"done": True},
785
+ "error": None,
786
+ }
787
+ )
788
+
789
+ elif type == "immediate":
790
+ # Search for immediate values
791
+ for value in targets:
792
+ if isinstance(value, str):
793
+ try:
794
+ value = int(value, 0)
795
+ except ValueError:
796
+ value = 0
797
+
798
+ matches = []
799
+ skipped = 0
800
+ more = False
801
+ try:
802
+ candidates = _value_candidates_for_immediate(value)
803
+ if not candidates:
804
+ results.append(
805
+ {
806
+ "query": value,
807
+ "matches": [],
808
+ "count": 0,
809
+ "cursor": {"done": True},
810
+ "error": "Immediate out of range",
811
+ }
812
+ )
813
+ continue
814
+
815
+ seen_insn = set()
816
+ for seg_ea in idautils.Segments():
817
+ seg = idaapi.getseg(seg_ea)
818
+ if not seg or not (seg.perm & idaapi.SEGPERM_EXEC):
819
+ continue
820
+ for normalized, size, pattern_bytes in candidates:
821
+ ea = seg.start_ea
822
+ while ea != idaapi.BADADDR and ea < seg.end_ea:
823
+ ea = ida_bytes.bin_search(
824
+ ea,
825
+ seg.end_ea,
826
+ pattern_bytes,
827
+ b"\xff" * size,
828
+ size,
829
+ ida_bytes.BIN_SEARCH_FORWARD,
830
+ )
831
+ if ea == idaapi.BADADDR:
832
+ break
833
+
834
+ insn_start = _resolve_immediate_insn_start(
835
+ ea, value, seg.start_ea, normalized
836
+ )
837
+ if insn_start is not None and insn_start not in seen_insn:
838
+ seen_insn.add(insn_start)
839
+ if skipped < offset:
840
+ skipped += 1
841
+ else:
842
+ matches.append(hex(insn_start))
843
+ if len(matches) >= limit:
844
+ more = True
845
+ break
846
+
847
+ ea += 1
848
+
849
+ if more:
850
+ break
851
+ if more:
852
+ break
853
+ except Exception:
854
+ pass
855
+
856
+ results.append(
857
+ {
858
+ "query": value,
859
+ "matches": matches,
860
+ "count": len(matches),
861
+ "cursor": {"next": offset + limit} if more else {"done": True},
862
+ "error": None,
863
+ }
864
+ )
865
+
866
+ elif type == "data_ref":
867
+ # Find all data references to targets
868
+ for target_str in targets:
869
+ try:
870
+ target = parse_address(str(target_str))
871
+ gen = (hex(xref) for xref in idautils.DataRefsTo(target))
872
+ # Skip offset items, take limit+1 to check more
873
+ matches = list(islice(islice(gen, offset, None), limit + 1))
874
+ more = len(matches) > limit
875
+ if more:
876
+ matches = matches[:limit]
877
+
878
+ results.append(
879
+ {
880
+ "query": str(target_str),
881
+ "matches": matches,
882
+ "count": len(matches),
883
+ "cursor": (
884
+ {"next": offset + limit} if more else {"done": True}
885
+ ),
886
+ "error": None,
887
+ }
888
+ )
889
+ except Exception as e:
890
+ results.append(
891
+ {
892
+ "query": str(target_str),
893
+ "matches": [],
894
+ "count": 0,
895
+ "cursor": {"done": True},
896
+ "error": str(e),
897
+ }
898
+ )
899
+
900
+ elif type == "code_ref":
901
+ # Find all code references to targets
902
+ for target_str in targets:
903
+ try:
904
+ target = parse_address(str(target_str))
905
+ gen = (hex(xref) for xref in idautils.CodeRefsTo(target, 0))
906
+ # Skip offset items, take limit+1 to check more
907
+ matches = list(islice(islice(gen, offset, None), limit + 1))
908
+ more = len(matches) > limit
909
+ if more:
910
+ matches = matches[:limit]
911
+
912
+ results.append(
913
+ {
914
+ "query": str(target_str),
915
+ "matches": matches,
916
+ "count": len(matches),
917
+ "cursor": (
918
+ {"next": offset + limit} if more else {"done": True}
919
+ ),
920
+ "error": None,
921
+ }
922
+ )
923
+ except Exception as e:
924
+ results.append(
925
+ {
926
+ "query": str(target_str),
927
+ "matches": [],
928
+ "count": 0,
929
+ "cursor": {"done": True},
930
+ "error": str(e),
931
+ }
932
+ )
933
+
934
+ else:
935
+ results.append(
936
+ {
937
+ "query": None,
938
+ "matches": [],
939
+ "count": 0,
940
+ "cursor": {"done": True},
941
+ "error": f"Unknown search type: {type}",
942
+ }
943
+ )
944
+
945
+ return results
946
+
947
+
948
+ def _resolve_insn_scan_ranges(
949
+ pattern: dict, allow_broad: bool
950
+ ) -> tuple[list[tuple[int, int]], str | None]:
951
+ func_addr = pattern.get("func")
952
+ segment_name = pattern.get("segment")
953
+ start_s = pattern.get("start")
954
+ end_s = pattern.get("end")
955
+
956
+ exec_segments = []
957
+ for seg_ea in idautils.Segments():
958
+ seg = idaapi.getseg(seg_ea)
959
+ if seg and (seg.perm & idaapi.SEGPERM_EXEC):
960
+ exec_segments.append(seg)
961
+
962
+ if func_addr is not None:
963
+ try:
964
+ ea = parse_address(func_addr)
965
+ func = idaapi.get_func(ea)
966
+ if not func:
967
+ return [], f"Function not found at {func_addr}"
968
+ return [(func.start_ea, func.end_ea)], None
969
+ except Exception as e:
970
+ return [], str(e)
971
+
972
+ if segment_name is not None:
973
+ for seg in exec_segments:
974
+ if idaapi.get_segm_name(seg) == segment_name:
975
+ return [(seg.start_ea, seg.end_ea)], None
976
+ return [], f"Executable segment not found: {segment_name}"
977
+
978
+ if start_s is not None or end_s is not None:
979
+ if start_s is None:
980
+ return [], "start is required when end is set"
981
+ try:
982
+ start_ea = parse_address(start_s)
983
+ end_ea = parse_address(end_s) if end_s is not None else None
984
+ except Exception as e:
985
+ return [], str(e)
986
+
987
+ if not exec_segments:
988
+ return [], "No executable segments found"
989
+
990
+ if end_ea is None:
991
+ seg = idaapi.getseg(start_ea)
992
+ if not seg or not (seg.perm & idaapi.SEGPERM_EXEC):
993
+ return [], "start address not in executable segment"
994
+ end_ea = seg.end_ea
995
+
996
+ if end_ea <= start_ea:
997
+ return [], "end must be greater than start"
998
+
999
+ ranges = []
1000
+ for seg in exec_segments:
1001
+ seg_start = max(seg.start_ea, start_ea)
1002
+ seg_end = min(seg.end_ea, end_ea)
1003
+ if seg_end > seg_start:
1004
+ ranges.append((seg_start, seg_end))
1005
+
1006
+ if not ranges:
1007
+ return [], "No executable ranges within start/end"
1008
+
1009
+ return ranges, None
1010
+
1011
+ if not allow_broad:
1012
+ return [], "Scope required: set func/segment/start/end or allow_broad=true"
1013
+
1014
+ if not exec_segments:
1015
+ return [], "No executable segments found"
1016
+
1017
+ return [(seg.start_ea, seg.end_ea) for seg in exec_segments], None
1018
+
1019
+
1020
+ def _scan_insn_ranges(
1021
+ ranges: list[tuple[int, int]],
1022
+ mnem: str,
1023
+ op0_val: int | None,
1024
+ op1_val: int | None,
1025
+ op2_val: int | None,
1026
+ any_val: int | None,
1027
+ limit: int,
1028
+ offset: int,
1029
+ max_scan_insns: int,
1030
+ ) -> tuple[list[str], bool, int, bool, int | None]:
1031
+ matches: list[str] = []
1032
+ skipped = 0
1033
+ scanned = 0
1034
+ more = False
1035
+ truncated = False
1036
+ next_start: int | None = None
1037
+
1038
+ for start_ea, end_ea in ranges:
1039
+ ea = start_ea
1040
+ while ea < end_ea:
1041
+ if scanned >= max_scan_insns:
1042
+ truncated = True
1043
+ next_start = ea
1044
+ break
1045
+
1046
+ scanned += 1
1047
+
1048
+ insn = _decode_insn_at(ea)
1049
+ if insn is None:
1050
+ ea = _next_head(ea, end_ea)
1051
+ if ea == idaapi.BADADDR:
1052
+ break
1053
+ continue
1054
+
1055
+ if mnem and _insn_mnem(insn) != mnem:
1056
+ ea = _next_head(ea, end_ea)
1057
+ if ea == idaapi.BADADDR:
1058
+ break
1059
+ continue
1060
+
1061
+ match = True
1062
+ if op0_val is not None and _operand_value(insn, 0) != op0_val:
1063
+ match = False
1064
+ if op1_val is not None and _operand_value(insn, 1) != op1_val:
1065
+ match = False
1066
+ if op2_val is not None and _operand_value(insn, 2) != op2_val:
1067
+ match = False
1068
+
1069
+ if any_val is not None and match:
1070
+ found_any = False
1071
+ for i in range(8):
1072
+ if _operand_type(insn, i) == ida_ua.o_void:
1073
+ break
1074
+ if _operand_value(insn, i) == any_val:
1075
+ found_any = True
1076
+ break
1077
+ if not found_any:
1078
+ match = False
1079
+
1080
+ if match:
1081
+ if skipped < offset:
1082
+ skipped += 1
1083
+ else:
1084
+ matches.append(hex(ea))
1085
+ if len(matches) > limit:
1086
+ more = True
1087
+ matches = matches[:limit]
1088
+ break
1089
+
1090
+ ea = _next_head(ea, end_ea)
1091
+ if ea == idaapi.BADADDR:
1092
+ break
1093
+
1094
+ if more or truncated:
1095
+ break
1096
+
1097
+ return matches, more, scanned, truncated, next_start
1098
+
1099
+
1100
+ # ============================================================================
1101
+ # Export Operations
1102
+ # ============================================================================
1103
+
1104
+
1105
+ @tool
1106
+ @idasync
1107
+ def export_funcs(
1108
+ addrs: Annotated[list[str] | str, "Function addresses to export"],
1109
+ format: Annotated[
1110
+ str, "Export format: json (default), c_header, or prototypes"
1111
+ ] = "json",
1112
+ ) -> dict:
1113
+ """Export function data in various formats"""
1114
+ addrs = normalize_list_input(addrs)
1115
+ results = []
1116
+
1117
+ for addr in addrs:
1118
+ try:
1119
+ ea = parse_address(addr)
1120
+ func = idaapi.get_func(ea)
1121
+ if not func:
1122
+ results.append({"addr": addr, "error": "Function not found"})
1123
+ continue
1124
+
1125
+ func_data = {
1126
+ "addr": addr,
1127
+ "name": ida_funcs.get_func_name(func.start_ea),
1128
+ "prototype": get_prototype(func),
1129
+ "size": hex(func.end_ea - func.start_ea),
1130
+ "comments": get_all_comments(ea),
1131
+ }
1132
+
1133
+ if format == "json":
1134
+ func_data["asm"] = get_assembly_lines(ea)
1135
+ func_data["code"] = decompile_function_safe(ea)
1136
+ func_data["xrefs"] = get_all_xrefs(ea)
1137
+
1138
+ results.append(func_data)
1139
+
1140
+ except Exception as e:
1141
+ results.append({"addr": addr, "error": str(e)})
1142
+
1143
+ if format == "c_header":
1144
+ # Generate C header file
1145
+ lines = ["// Auto-generated by IDA Pro MCP", ""]
1146
+ for func in results:
1147
+ if "prototype" in func and func["prototype"]:
1148
+ lines.append(f"{func['prototype']};")
1149
+ return {"format": "c_header", "content": "\n".join(lines)}
1150
+
1151
+ elif format == "prototypes":
1152
+ # Just prototypes
1153
+ prototypes = []
1154
+ for func in results:
1155
+ if "prototype" in func and func["prototype"]:
1156
+ prototypes.append(
1157
+ {"name": func.get("name"), "prototype": func["prototype"]}
1158
+ )
1159
+ return {"format": "prototypes", "functions": prototypes}
1160
+
1161
+ return {"format": "json", "functions": results}
1162
+
1163
+
1164
+ # ============================================================================
1165
+ # Graph Operations
1166
+ # ============================================================================
1167
+
1168
+
1169
+ @tool
1170
+ @idasync
1171
+ def callgraph(
1172
+ roots: Annotated[
1173
+ list[str] | str, "Root function addresses to start call graph traversal from"
1174
+ ],
1175
+ max_depth: Annotated[int, "Maximum depth for call graph traversal"] = 5,
1176
+ max_nodes: Annotated[
1177
+ int, "Max nodes across the graph (default: 1000, max: 100000)"
1178
+ ] = 1000,
1179
+ max_edges: Annotated[
1180
+ int, "Max edges across the graph (default: 5000, max: 200000)"
1181
+ ] = 5000,
1182
+ max_edges_per_func: Annotated[
1183
+ int, "Max edges per function (default: 200, max: 5000)"
1184
+ ] = 200,
1185
+ ) -> list[dict]:
1186
+ """Build call graph starting from root functions"""
1187
+ roots = normalize_list_input(roots)
1188
+ if max_depth < 0:
1189
+ max_depth = 0
1190
+ if max_nodes <= 0 or max_nodes > 100000:
1191
+ max_nodes = 100000
1192
+ if max_edges <= 0 or max_edges > 200000:
1193
+ max_edges = 200000
1194
+ if max_edges_per_func <= 0 or max_edges_per_func > 5000:
1195
+ max_edges_per_func = 5000
1196
+ results = []
1197
+
1198
+ for root in roots:
1199
+ try:
1200
+ ea = parse_address(root)
1201
+ func = idaapi.get_func(ea)
1202
+ if not func:
1203
+ results.append(
1204
+ {
1205
+ "root": root,
1206
+ "error": "Function not found",
1207
+ "nodes": [],
1208
+ "edges": [],
1209
+ }
1210
+ )
1211
+ continue
1212
+
1213
+ nodes = {}
1214
+ edges = []
1215
+ visited = set()
1216
+ truncated = False
1217
+ per_func_capped = False
1218
+ limit_reason = None
1219
+
1220
+ def hit_limit(reason: str):
1221
+ nonlocal truncated, limit_reason
1222
+ truncated = True
1223
+ limit_reason = reason
1224
+
1225
+ def traverse(addr, depth):
1226
+ nonlocal per_func_capped
1227
+ if truncated:
1228
+ return
1229
+ if depth > max_depth or addr in visited:
1230
+ return
1231
+ if len(nodes) >= max_nodes:
1232
+ hit_limit("nodes")
1233
+ return
1234
+ visited.add(addr)
1235
+
1236
+ f = idaapi.get_func(addr)
1237
+ if not f:
1238
+ return
1239
+
1240
+ func_name = ida_funcs.get_func_name(f.start_ea)
1241
+ nodes[hex(addr)] = {
1242
+ "addr": hex(addr),
1243
+ "name": func_name,
1244
+ "depth": depth,
1245
+ }
1246
+
1247
+ # Get callees
1248
+ edges_added = 0
1249
+ for item_ea in idautils.FuncItems(f.start_ea):
1250
+ if truncated:
1251
+ break
1252
+ for xref in idautils.CodeRefsFrom(item_ea, 0):
1253
+ if truncated:
1254
+ break
1255
+ if edges_added >= max_edges_per_func:
1256
+ per_func_capped = True
1257
+ break
1258
+ callee_func = idaapi.get_func(xref)
1259
+ if callee_func:
1260
+ if len(edges) >= max_edges:
1261
+ hit_limit("edges")
1262
+ break
1263
+ edges.append(
1264
+ {
1265
+ "from": hex(addr),
1266
+ "to": hex(callee_func.start_ea),
1267
+ "type": "call",
1268
+ }
1269
+ )
1270
+ edges_added += 1
1271
+ traverse(callee_func.start_ea, depth + 1)
1272
+ if edges_added >= max_edges_per_func:
1273
+ break
1274
+
1275
+ traverse(ea, 0)
1276
+
1277
+ results.append(
1278
+ {
1279
+ "root": root,
1280
+ "nodes": list(nodes.values()),
1281
+ "edges": edges,
1282
+ "max_depth": max_depth,
1283
+ "truncated": truncated,
1284
+ "limit_reason": limit_reason,
1285
+ "max_nodes": max_nodes,
1286
+ "max_edges": max_edges,
1287
+ "max_edges_per_func": max_edges_per_func,
1288
+ "per_func_capped": per_func_capped,
1289
+ "error": None,
1290
+ }
1291
+ )
1292
+
1293
+ except Exception as e:
1294
+ results.append({"root": root, "error": str(e), "nodes": [], "edges": []})
1295
+
1296
+ return results