PyPI - ida-code - Versions diffs - 0.2.1__py3-none-any.whl - Mend

ida-code 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

ida_code/__init__.py +2 -0
ida_code/_search_utils.py +33 -0
ida_code/comments.py +191 -0
ida_code/config.py +9 -0
ida_code/doc_search.py +255 -0
ida_code/example_search.py +570 -0
ida_code/executor.py +145 -0
ida_code/guidelines.py +370 -0
ida_code/macho.py +67 -0
ida_code/prompts.py +176 -0
ida_code/server.py +1011 -0
ida_code/session.py +293 -0
ida_code/snapshots.py +110 -0
ida_code/structures.py +227 -0
ida_code/undo.py +102 -0
ida_code/variables.py +206 -0
ida_code-0.2.1.dist-info/METADATA +167 -0
ida_code-0.2.1.dist-info/RECORD +21 -0
ida_code-0.2.1.dist-info/WHEEL +4 -0
ida_code-0.2.1.dist-info/entry_points.txt +2 -0
ida_code-0.2.1.dist-info/licenses/LICENSE +21 -0

ida_code/guidelines.py ADDED Viewed

@@ -0,0 +1,370 @@
+"""Coding guidelines and templates for IDAPython development."""
+_STANDALONE_SCRIPT = """\
+# Standalone idalib Script
+## Overview
+A standalone script uses idalib to analyze binaries outside the IDA GUI.
+It runs as a normal Python program and loads IDA's analysis engine in-process
+via the `idapro` package. Use this when you need batch processing, CI
+integration, or headless analysis.
+## Template
+```python
+#!/usr/bin/env python3
+\"\"\"Standalone idalib analysis script.\"\"\"
+import os
+import sys
+from pathlib import Path
+# --- idalib bootstrap (must happen before any ida_* imports) ---
+IDA_DIR = os.environ.get("IDA_INSTALL_DIR")
+if not IDA_DIR:
+    print("Error: IDA_INSTALL_DIR env is not set")
+    sys.exit(1)
+IDA_DIR = Path(IDA_DIR)
+sys.path.insert(0, str(IDA_DIR / "idalib" / "python"))
+os.environ.setdefault("IDADIR", str(IDA_DIR))
+import idapro  # Must be first — before any ida_* modules
+# --- Now safe to import ida_* ---
+import ida_funcs
+import ida_bytes
+import ida_name
+import idautils
+import idc
+# import ida_hexrays  # Only if Hex-Rays decompiler is available
+def analyze(binary_path: str) -> None:
+    \"\"\"Main analysis logic.\"\"\"
+    rc = idapro.open_database(binary_path, True)  # True = wait for auto-analysis
+    if rc != 0:
+        print(f"Error: open_database returned {rc}")
+        return
+    try:
+        # --- Your analysis code here ---
+        for ea in idautils.Functions():
+            name = ida_funcs.get_func_name(ea)
+            print(f"{ea:#x}  {name}")
+    finally:
+        idapro.close_database()
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <binary>")
+        sys.exit(1)
+    analyze(sys.argv[1])
+```
+## Key Constraints
+1. **Import order** — `import idapro` MUST come before any `ida_*` imports.
+   Violating this causes segfaults or ImportError.
+2. **Single database** — idalib supports only one open database at a time.
+   Close before opening another.
+3. **Single thread** — All `ida_*` calls must come from the thread that opened
+   the database.
+4. **auto_analysis** — Pass `True` to `open_database` to wait for IDA's
+   auto-analysis to finish. Pass `False` for pre-analyzed databases (.i64/.idb).
+5. **Cleanup** — Always call `idapro.close_database()` in a `try/finally` block.
+## Common Patterns
+### Iterate functions
+```python
+for ea in idautils.Functions():
+    func = ida_funcs.get_func(ea)
+    name = ida_funcs.get_func_name(ea)
+    size = func.size()
+```
+### Read bytes
+```python
+data = ida_bytes.get_bytes(ea, size)
+```
+### Cross-references to an address
+```python
+for xref in idautils.XrefsTo(ea):
+    print(f"  referenced from {xref.frm:#x}")
+```
+### Decompile (requires Hex-Rays)
+```python
+import ida_hexrays
+cfunc = ida_hexrays.decompile(ea)
+if cfunc:
+    print(cfunc)
+```
+"""
+_PLUGIN = """\
+# IDA Plugin
+## Overview
+An IDA plugin is loaded inside the IDA GUI. It subclasses `idaapi.plugin_t`
+and is placed in IDA's `plugins/` directory. Plugins can add menu items,
+register hotkeys, hook into events, and extend the UI.
+## Template
+```python
+\"\"\"My IDA Plugin — brief description.\"\"\"
+import idaapi
+import ida_kernwin
+class MyPlugin(idaapi.plugin_t):
+    flags = idaapi.PLUGIN_PROC   # Loaded at startup, stays resident
+    comment = "Brief description"
+    help = "Extended help text"
+    wanted_name = "My Plugin"     # Shown in Edit > Plugins
+    wanted_hotkey = "Ctrl-Alt-M"  # Hotkey to trigger run()
+    def init(self):
+        \"\"\"Called when IDA loads the plugin. Return PLUGIN_KEEP to stay loaded.\"\"\"
+        print(f"[{self.wanted_name}] Loaded")
+        return idaapi.PLUGIN_KEEP
+    def run(self, arg):
+        \"\"\"Called when the user activates the plugin (hotkey or menu).\"\"\"
+        print(f"[{self.wanted_name}] Running")
+    def term(self):
+        \"\"\"Called when IDA shuts down or unloads the plugin.\"\"\"
+        print(f"[{self.wanted_name}] Unloaded")
+def PLUGIN_ENTRY():
+    \"\"\"Required entry point — IDA calls this to instantiate the plugin.\"\"\"
+    return MyPlugin()
+```
+## Plugin Flags
+| Flag | Meaning |
+|------|---------|
+| `PLUGIN_PROC` | Load when a processor module is loaded (most common) |
+| `PLUGIN_FIX`  | Load at startup, never unload |
+| `PLUGIN_HIDE` | Don't show in the Plugins menu |
+| `PLUGIN_UNL`  | Unload after each `run()` call |
+| `PLUGIN_MULTI` | Can have multiple instances (IDA 7.4+) |
+## init() Return Values
+| Return | Meaning |
+|--------|---------|
+| `PLUGIN_KEEP` | Keep the plugin loaded |
+| `PLUGIN_OK`   | Keep loaded, can be unloaded by IDA if needed |
+| `PLUGIN_SKIP` | Do not load (wrong file type, missing dependency, etc.) |
+## Adding Menu Items (Action-Based)
+```python
+class MyActionHandler(idaapi.action_handler_t):
+    def activate(self, ctx):
+        print("Action triggered!")
+        return 1
+    def update(self, ctx):
+        return idaapi.AST_ENABLE_ALWAYS
+# In init():
+action_desc = idaapi.action_desc_t(
+    "my_plugin:my_action",   # Unique action name
+    "My Action",              # Display text
+    MyActionHandler(),        # Handler instance
+    "Ctrl-Shift-M",          # Hotkey (optional)
+    "Tooltip text",           # Tooltip (optional)
+)
+idaapi.register_action(action_desc)
+idaapi.attach_action_to_menu(
+    "Edit/Plugins/",          # Menu path
+    "my_plugin:my_action",    # Action name
+    idaapi.SETMENU_APP,
+)
+# In term():
+idaapi.unregister_action("my_plugin:my_action")
+```
+## Hooks
+### UI Hooks
+```python
+class MyUIHooks(ida_kernwin.UI_Hooks):
+    def finish_populating_widget_popup(self, widget, popup_handle, ctx):
+        idaapi.attach_action_to_popup(
+            widget, popup_handle, "my_plugin:my_action",
+        )
+# In init():  hooks = MyUIHooks(); hooks.hook()
+# In term():  hooks.unhook()
+```
+### IDB Hooks (database events)
+```python
+import ida_idp
+class MyIDBHooks(ida_idp.IDB_Hooks):
+    def auto_empty_finally(self):
+        # Called when auto-analysis completes
+        return 0
+# In init():  hooks = MyIDBHooks(); hooks.hook()
+# In term():  hooks.unhook()
+```
+## Key Constraints
+1. **`PLUGIN_ENTRY()`** — Required top-level function. IDA calls it to get the
+   plugin instance.
+2. **File location** — Place the `.py` file in `$IDA_DIR/plugins/` for auto-load,
+   or use `ida_loader.load_plugin(path)` for dynamic loading.
+3. **GUI thread** — All UI operations must run on IDA's main thread. Use
+   `idaapi.execute_sync()` if calling from another thread.
+4. **init() gating** — Return `PLUGIN_SKIP` if the plugin doesn't apply to the
+   current database (e.g., wrong architecture).
+5. **Cleanup in term()** — Unhook all hooks, unregister all actions, free
+   resources. Failing to do so causes crashes on exit or reload.
+"""
+_IDAPYTHON_SCRIPT = """\
+# IDAPython Script (In-GUI)
+## Overview
+A classic IDAPython script runs inside the IDA GUI via File > Script File,
+the output window's Python console, or Alt-F7. All `ida_*` modules are
+already available — no bootstrap needed. Use this for quick analysis tasks,
+one-off automation, and interactive exploration.
+## Template
+```python
+\"\"\"IDAPython script — brief description.
+Run via File > Script File (Alt-F7) or the Python console.
+\"\"\"
+import ida_funcs
+import ida_bytes
+import ida_name
+import idautils
+import idc
+import ida_kernwin
+def main():
+    \"\"\"Main script logic.\"\"\"
+    ea = ida_kernwin.get_screen_ea()  # Current cursor address
+    func = ida_funcs.get_func(ea)
+    if not func:
+        print(f"No function at {ea:#x}")
+        return
+    name = ida_funcs.get_func_name(func.start_ea)
+    print(f"Current function: {name} ({func.start_ea:#x} - {func.end_ea:#x})")
+    # --- Your analysis code here ---
+    for head in idautils.Heads(func.start_ea, func.end_ea):
+        disasm = idc.GetDisasm(head)
+        print(f"  {head:#x}  {disasm}")
+if __name__ == "__main__":
+    main()
+```
+## Key Differences from Standalone Scripts
+- **No bootstrap** — `ida_*` modules are pre-loaded by IDA. No `import idapro`,
+  no `sys.path` manipulation, no `open_database`/`close_database`.
+- **Database is already open** — the script operates on whichever database the
+  user has open in IDA.
+- **GUI available** — `ida_kernwin` functions work: dialogs, choosers, forms,
+  `get_screen_ea()`, etc.
+- **Output goes to IDA's Output window** — `print()` writes there, not to a
+  terminal.
+## Common Patterns
+### Get current cursor position
+```python
+ea = ida_kernwin.get_screen_ea()
+```
+### Ask the user for input
+```python
+val = ida_kernwin.ask_str("default", 0, "Enter a value:")
+ea = ida_kernwin.ask_addr(0, "Enter an address:")
+```
+### Show a chooser (list dialog)
+```python
+class MyChooser(ida_kernwin.Choose):
+    def __init__(self, items):
+        super().__init__("Title", [["Address", 16], ["Name", 30]])
+        self.items = items
+    def OnGetSize(self):
+        return len(self.items)
+    def OnGetLine(self, n):
+        return self.items[n]
+chooser = MyChooser([
+    [f"{ea:#x}", name]
+    for ea, name in my_results
+])
+chooser.Show()
+```
+### Color an address
+```python
+idc.set_color(ea, idc.CIC_ITEM, 0x00FF00)  # Green
+```
+### Add a comment
+```python
+idc.set_cmt(ea, "my comment", 0)       # Regular comment
+idc.set_cmt(ea, "my comment", 1)       # Repeatable comment
+```
+### Iterate all strings
+```python
+import ida_bytes
+for s in idautils.Strings():
+    print(f"{s.ea:#x}  {ida_bytes.get_strlit_contents(s.ea, s.length, s.strtype)}")
+```
+"""
+_GUIDELINES: dict[str, str] = {
+    "standalone_script": _STANDALONE_SCRIPT,
+    "plugin": _PLUGIN,
+    "idapython_script": _IDAPYTHON_SCRIPT,
+}
+def get(target: str) -> str:
+    """Return coding guidelines for the given target type."""
+    text = _GUIDELINES.get(target)
+    if text is None:
+        available = ", ".join(_GUIDELINES)
+        raise KeyError(f"Unknown target {target!r}. Available: {available}")
+    return text
+def list_targets() -> list[str]:
+    """Return all available guideline target names."""
+    return list(_GUIDELINES)

ida_code/macho.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Fat Mach-O architecture listing and slice extraction using LIEF."""
+from __future__ import annotations
+import logging
+import lief
+log = logging.getLogger(__name__)
+def _arch_name(binary: lief.MachO.Binary) -> str:
+    """Derive a human-friendly architecture name from a Mach-O binary header."""
+    name = binary.header.cpu_type.name.lower()
+    # ARM64 subtype 2 is arm64e (pointer authentication).
+    if name == "arm64" and binary.header.cpu_subtype == 2:
+        return "arm64e"
+    return name
+def list_architectures(path: str) -> list[str]:
+    """List architecture names in a fat Mach-O binary.
+    Returns e.g. ["x86_64", "arm64e"]. Returns an empty list if *path*
+    is not a fat (universal) Mach-O.
+    """
+    fat = lief.MachO.parse(path)
+    if fat is None:
+        return []
+    # A single-slice FatBinary is effectively a thin binary.
+    if len(fat) <= 1:
+        return []
+    return [_arch_name(binary) for binary in fat]
+def extract_slice(path: str, arch: str) -> str:
+    """Extract a single architecture slice to ``{path}.{arch}`` and return that path.
+    Raises ``ValueError`` if *path* is not a fat Mach-O or the requested
+    architecture is not found.
+    """
+    fat = lief.MachO.parse(path)
+    if fat is None or len(fat) <= 1:
+        raise ValueError(f"Not a fat Mach-O: {path}")
+    # Try exact match first, then fall back to base cpu_type match.
+    exact: lief.MachO.Binary | None = None
+    base_match: lief.MachO.Binary | None = None
+    for binary in fat:
+        name = _arch_name(binary)
+        if name == arch:
+            exact = binary
+            break
+        if binary.header.cpu_type.name.lower() == arch:
+            base_match = binary
+    chosen = exact or base_match
+    if chosen is None:
+        available = [_arch_name(b) for b in fat]
+        raise ValueError(
+            f"Architecture '{arch}' not found. Available: {available}"
+        )
+    output_path = f"{path}.{arch}"
+    log.info("Extracting %s slice from %s -> %s", arch, path, output_path)
+    chosen.write(output_path)
+    return output_path

ida_code/prompts.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""MCP prompt templates for common IDA workflows."""
+from ida_code import guidelines
+_REVERSE_ENGINEER = """\
+# Reverse Engineering Workflow
+A structured approach to analyzing an unknown binary using ida-code MCP tools.
+## Phase 1: Reconnaissance
+Start by opening the binary and gathering high-level information.
+1. **Open the binary** — Use `open_database` with the path to the binary. \
+For fat (universal) Mach-O binaries, call `list_architectures` first to discover \
+available slices, then pass the desired `arch` to `open_database`.
+2. **Survey the database** — Call `get_database_info` to see the processor type, \
+bitness, segments, and entry points.
+3. **List functions** — Use `list_functions` to browse the function table. Start \
+with a small `limit` to get an overview, then paginate or use `name_filter`.
+4. **Enumerate strings** — Use `get_strings` to list strings in the database. \
+It searches both ASCII and UTF-16 strings. Use `name_filter` to search for \
+specific content and `min_length` to filter out noise.
+5. **Check imports/exports** — Use `get_imports` to list imported functions \
+grouped by module, and `get_exports` to list exported symbols.
+## Phase 2: Triage
+Prioritize which functions to analyze first.
+- **Name-based filtering** — Use `list_functions` with `name_filter` to find \
+functions related to security (`auth`, `crypt`, `hash`, `verify`, `sign`, \
+`key`), parsing (`parse`, `decode`, `deserialize`, `read`, `load`), networking \
+(`send`, `recv`, `connect`, `socket`, `http`), or file I/O (`open`, `write`, \
+`fopen`, `mmap`).
+- **Size-based prioritization** — Large functions often contain the most logic. \
+Sort by size to find the most complex code.
+- **String cross-references** — Interesting strings (error messages, format \
+strings, URLs, file paths) often lead to important code. Use `get_xrefs_to` \
+with a string's address to find which functions reference it.
+## Phase 3: Deep Analysis
+Dive into individual functions.
+1. **Decompile** — Use `decompile` with the function name or address. Read the \
+pseudocode to understand the logic.
+2. **Cross-reference tracing** — Use `get_xrefs_to` to find callers of a \
+function ("who calls this?") and `get_xrefs_from` to find callees ("what does \
+this call?"). The xref type field distinguishes calls, jumps, and data references.
+3. **Disassembly** — Use `get_disassembly` for instruction-level detail when the \
+decompiler output is unclear or for analyzing data sections.
+4. **Structure recovery** — When you identify structured data, use \
+`create_structure` to define it and `set_variable` to apply the type to variables.
+## Phase 4: Annotation
+Document your findings directly in the database.
+- **Rename functions** — Use `rename_function` to give meaningful names to \
+auto-named functions (e.g., rename `sub_3f08` to `parse_header`).
+- **Retype functions** — Use `retype_function` to fix function signatures \
+(e.g., `"int __fastcall(struct header *hdr, size_t len)"`).
+- **Rename variables** — Use `set_variable` to give meaningful names to local \
+and global variables (e.g., rename `v12` to `buffer_size`).
+- **Retype variables** — Use `set_variable` with `new_type` to apply correct C \
+types (e.g., `"struct my_header *"`).
+- **Add comments** — Use `set_comment` to annotate key addresses with your \
+findings. Use `function` type for function-level summaries, `regular` for \
+inline notes.
+- **Define structures** — Use `create_structure` and `edit_structure` to build \
+type definitions that match the binary's data layouts.
+## Phase 5: Iteration
+Reverse engineering is iterative — each pass reveals more.
+1. **Re-decompile** — After renaming and retyping, call `decompile` again. The \
+pseudocode will be dramatically more readable with proper names and types.
+2. **Verify with disassembly** — Use `get_disassembly` to confirm the decompiler's \
+interpretation matches the actual instructions.
+3. **Expand scope** — Follow cross-references to related functions and repeat \
+the analysis cycle.
+## Best Practices
+- **Prefer dedicated tools over `execute`** — Use `get_strings`, `get_imports`, \
+`get_exports`, `get_xrefs_to`, `get_xrefs_from`, `rename_function`, and \
+`retype_function` instead of writing IDAPython boilerplate via `execute`. They \
+return structured data, handle errors, and are faster to use.
+- **Use `execute` for custom analysis** — The `execute` tool gives you full \
+IDAPython access. Write custom scripts for pattern matching, data extraction, \
+or anything the dedicated tools don't cover.
+- **Search docs and examples** — Use `search_docs` to look up unfamiliar IDA \
+APIs. Use `search_examples` to find working IDAPython code patterns — it indexes \
+125 official examples with metadata, API usage, and source code.
+- **Snapshot before bulk changes** — Call `create_snapshot` before renaming or \
+retyping many symbols. Use `restore_snapshot` to roll back if something goes wrong.
+- **Work incrementally** — Rename and retype a few variables, re-decompile, \
+verify, then continue. Small batches are easier to validate.
+- **Namespace persistence** — Variables and functions defined via `execute` \
+persist across calls. Build up helper functions incrementally.
+- **Close when done** — Call `close_database` when analysis is complete to free \
+resources and save the database.
+"""
+_SCRIPT_BEST_PRACTICES = """\
+## IDAPython Best Practices
+### Error Handling
+- Always check return values: `ida_funcs.get_func()` returns `None` if no \
+function exists at the address.
+- Wrap `ida_hexrays.decompile()` in try/except — it raises \
+`DecompilationFailure` for functions the decompiler can't handle.
+- Call `ida_hexrays.init_hexrays_plugin()` before using any Hex-Rays APIs \
+and check the return value.
+### Performance
+- Cache `ida_name.get_name_ea()` lookups — name resolution is not free.
+- Use `ida_bytes.get_bytes(ea, size)` for bulk reads instead of reading \
+byte-by-byte with `ida_bytes.get_byte()`.
+- Prefer `idautils.Functions()`, `idautils.Heads()`, `idautils.XrefsTo()` \
+iterators over manual linked-list traversal.
+### Naming Conventions
+- `ea` — effective address (an integer, not a pointer)
+- `pfn` — pointer to `func_t` (from `ida_funcs.get_func()`)
+- `cfunc` — `cfunc_t` object (from `ida_hexrays.decompile()`)
+- `tif` — `tinfo_t` object (type information)
+- `ti` — `ida_typeinf` module
+### Common Pitfalls
+- **`idc` vs `ida_funcs`** — `idc` functions are thin wrappers with less \
+control. Prefer `ida_funcs`, `ida_bytes`, etc. for new code.
+- **String encoding** — `ida_bytes.get_strlit_contents()` returns `bytes`, \
+not `str`. Decode with `.decode('utf-8', errors='replace')` if needed.
+- **Address arithmetic** — Addresses are plain integers. Use `& 0xFFFFFFFF` \
+(32-bit) or `& 0xFFFFFFFFFFFFFFFF` (64-bit) to handle overflow, or better, \
+use the database's bitness from `ida_ida.inf_get_app_bitness()`.
+- **Segment boundaries** — Don't assume contiguous address space. Check \
+segment membership with `ida_segment.getseg(ea)` before accessing data.
+### MCP Tool Usage for Testing
+- Use `execute` to test script snippets interactively before assembling \
+the final script.
+- The execution namespace persists — define helpers in one call and use \
+them in the next.
+- Use `search_docs` and `search_examples` to find API patterns.
+"""
+def reverse_engineer() -> str:
+    """Return a comprehensive reverse engineering workflow guide."""
+    return _REVERSE_ENGINEER
+def create_script(target: str, description: str | None = None) -> str:
+    """Return coding guidelines for the given script type plus best practices.
+    *target* is one of: ``standalone_script``, ``plugin``, ``idapython_script``.
+    *description* is an optional description of what the script should do.
+    """
+    try:
+        text = guidelines.get(target)
+    except KeyError:
+        available = ", ".join(guidelines.list_targets())
+        raise ValueError(
+            f"Unknown target {target!r}. Available targets: {available}"
+        )
+    parts = [text, _SCRIPT_BEST_PRACTICES]
+    if description:
+        parts.append(f"\n## Task\n\n{description}\n")
+    return "\n".join(parts)