ida-code 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {ida_code-0.2.3 → ida_code-0.2.4}/CHANGELOG.md +11 -0
  2. {ida_code-0.2.3 → ida_code-0.2.4}/PKG-INFO +3 -3
  3. {ida_code-0.2.3 → ida_code-0.2.4}/README.md +2 -2
  4. {ida_code-0.2.3 → ida_code-0.2.4}/pyproject.toml +1 -1
  5. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/code_search.py +1 -1
  6. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/doc_search.py +45 -24
  7. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/prompts.py +14 -4
  8. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/server.py +71 -23
  9. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_doc_search.py +17 -12
  10. {ida_code-0.2.3 → ida_code-0.2.4}/.gitignore +0 -0
  11. {ida_code-0.2.3 → ida_code-0.2.4}/LICENSE +0 -0
  12. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/__init__.py +0 -0
  13. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/_search_utils.py +0 -0
  14. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/comments.py +0 -0
  15. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/config.py +0 -0
  16. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/executor.py +0 -0
  17. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/guidelines.py +0 -0
  18. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/ida_thread.py +0 -0
  19. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/macho.py +0 -0
  20. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/session.py +0 -0
  21. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/snapshots.py +0 -0
  22. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/structures.py +0 -0
  23. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/undo.py +0 -0
  24. {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/variables.py +0 -0
  25. {ida_code-0.2.3 → ida_code-0.2.4}/tests/__init__.py +0 -0
  26. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_code_search.py +0 -0
  27. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_comments.py +0 -0
  28. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_e2e.py +0 -0
  29. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_executor.py +0 -0
  30. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_ida_thread.py +0 -0
  31. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_macho.py +0 -0
  32. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_search_utils.py +0 -0
  33. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_session.py +0 -0
  34. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_structures.py +0 -0
  35. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_undo.py +0 -0
  36. {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_variables.py +0 -0
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
6
6
 
7
+ ## [0.2.4] - 2026-05-19
8
+
9
+ ### Added
10
+
11
+ - **`get_guideline` tool** — tool-form companion to the `guidelines://*` resources. Same content, but tool listings are read more reliably than resource listings by cold LLM clients.
12
+
13
+ ### Changed
14
+
15
+ - `search_docs` snippet cap is word-based (`max_snippet_length` → `max_snippet_words`, default 25). Avoids mid-word truncation and lines up better with token cost.
16
+ - `search_docs` / `search_code` docstrings and the server `instructions` paragraph rewritten as discovery steers — `search_code` for API lookups, `search_docs` narrowed to HTML prose, both pointing at `get_guideline` / `get_source`.
17
+
7
18
  ## [0.2.3] - 2026-05-10
8
19
 
9
20
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ida-code
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: MCP server for AI-assisted IDAPython scripting via idalib
5
5
  Project-URL: Homepage, https://github.com/Dil4rd/ida-code
6
6
  Project-URL: Repository, https://github.com/Dil4rd/ida-code
@@ -76,7 +76,7 @@ For other MCP clients, run the server directly:
76
76
  IDA_INSTALL_DIR=/opt/ida-pro-9.3 ida-code # stdio transport
77
77
  ```
78
78
 
79
- ## Tools (36)
79
+ ## Tools (37)
80
80
 
81
81
  Full parameter docs live in each tool's docstring — surfaced automatically to MCP clients via `tools/list`.
82
82
 
@@ -90,7 +90,7 @@ Full parameter docs live in each tool's docstring — surfaced automatically to
90
90
  | Snapshots | `list_snapshots`, `create_snapshot`, `restore_snapshot`, `delete_snapshot` |
91
91
  | Undo/redo | `get_undo_status`, `perform_undo`, `perform_redo` |
92
92
  | Inventory | `get_strings`, `get_imports`, `get_exports` |
93
- | Search | `search_docs`, `search_code`, `get_source` |
93
+ | Search | `search_docs`, `search_code`, `get_source`, `get_guideline` |
94
94
 
95
95
  ## Resources & prompts
96
96
 
@@ -47,7 +47,7 @@ For other MCP clients, run the server directly:
47
47
  IDA_INSTALL_DIR=/opt/ida-pro-9.3 ida-code # stdio transport
48
48
  ```
49
49
 
50
- ## Tools (36)
50
+ ## Tools (37)
51
51
 
52
52
  Full parameter docs live in each tool's docstring — surfaced automatically to MCP clients via `tools/list`.
53
53
 
@@ -61,7 +61,7 @@ Full parameter docs live in each tool's docstring — surfaced automatically to
61
61
  | Snapshots | `list_snapshots`, `create_snapshot`, `restore_snapshot`, `delete_snapshot` |
62
62
  | Undo/redo | `get_undo_status`, `perform_undo`, `perform_redo` |
63
63
  | Inventory | `get_strings`, `get_imports`, `get_exports` |
64
- | Search | `search_docs`, `search_code`, `get_source` |
64
+ | Search | `search_docs`, `search_code`, `get_source`, `get_guideline` |
65
65
 
66
66
  ## Resources & prompts
67
67
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ida-code"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  description = "MCP server for AI-assisted IDAPython scripting via idalib"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -725,7 +725,7 @@ def search(
725
725
 
726
726
  if include_docs:
727
727
  from ida_code.doc_search import search as _search_docs
728
- doc_res = _search_docs(query, max_results=2, max_snippet_length=200, include_examples=False)
728
+ doc_res = _search_docs(query, max_results=2, max_snippet_words=30, include_examples=False)
729
729
  if doc_res.get("results"):
730
730
  out["related_docs"] = doc_res["results"]
731
731
 
@@ -61,7 +61,7 @@ def _ensure_indexes():
61
61
  def search(
62
62
  query: str,
63
63
  max_results: int = 5,
64
- max_snippet_length: int = 150,
64
+ max_snippet_words: int = 25,
65
65
  include_examples: bool = True,
66
66
  ) -> dict:
67
67
  """Search IDA HTML documentation. Returns a structured dict.
@@ -83,7 +83,7 @@ def search(
83
83
  for title, text, location in _html_docs:
84
84
  score = _score(terms, title, text)
85
85
  if score > 0:
86
- snippet = _excerpt(text, terms, max_len=max_snippet_length)
86
+ snippet = _excerpt(text, terms, max_words=max_snippet_words)
87
87
  results.append((score, title, snippet, f"docs: {location}"))
88
88
 
89
89
  results.sort(key=lambda r: r[0], reverse=True)
@@ -138,31 +138,52 @@ def _score(terms: list[str], title: str, text: str) -> float:
138
138
  return total
139
139
 
140
140
 
141
- def _excerpt(text: str, terms: list[str], max_len: int = 300) -> str:
142
- """Extract a snippet of *text* around the first matching term."""
141
+ def _excerpt(text: str, terms: list[str], max_words: int = 25) -> str:
142
+ """Extract a word-bounded snippet of *text* around the first matching term.
143
+
144
+ Words are tokens produced by ``str.split()`` (whitespace-separated).
145
+ The snippet is at most ``max_words`` words plus optional ``"..."``
146
+ ellipses on each side when the window doesn't cover the whole text.
147
+ Cutting on word boundaries avoids mid-word truncation that char-based
148
+ capping produces, and the unit aligns more closely with token cost
149
+ than character count does.
150
+ """
143
151
  if not text:
144
152
  return ""
145
153
 
154
+ words = text.split()
155
+ if not words:
156
+ return ""
157
+
146
158
  lowered = text.lower()
147
- best_pos = -1
159
+ best_char = -1
148
160
  for term in terms:
149
161
  pos = lowered.find(term)
150
- if pos != -1 and (best_pos == -1 or pos < best_pos):
151
- best_pos = pos
152
-
153
- if best_pos == -1:
154
- snippet = text[:max_len]
155
- else:
156
- half = max_len // 2
157
- start = max(0, best_pos - half)
158
- end = min(len(text), start + max_len)
159
- if end - start < max_len:
160
- start = max(0, end - max_len)
161
- snippet = text[start:end]
162
- if start > 0:
163
- snippet = "..." + snippet
164
- if end < len(text):
165
- snippet = snippet + "..."
166
-
167
- # Collapse whitespace for readability.
168
- return " ".join(snippet.split())
162
+ if pos != -1 and (best_char == -1 or pos < best_char):
163
+ best_char = pos
164
+
165
+ # Locate which word index the best-match character lands in.
166
+ match_word_idx = 0
167
+ if best_char != -1:
168
+ # Re-tokenise on the same boundaries: walk words, tracking the
169
+ # original text offset by re-finding each word from the cursor.
170
+ cursor = 0
171
+ for i, w in enumerate(words):
172
+ cursor = text.find(w, cursor)
173
+ if cursor == -1 or cursor > best_char:
174
+ break
175
+ match_word_idx = i
176
+ cursor += len(w)
177
+
178
+ half = max_words // 2
179
+ start = max(0, match_word_idx - half)
180
+ end = min(len(words), start + max_words)
181
+ if end - start < max_words:
182
+ start = max(0, end - max_words)
183
+
184
+ snippet = " ".join(words[start:end])
185
+ if start > 0:
186
+ snippet = "..." + snippet
187
+ if end < len(words):
188
+ snippet = snippet + "..."
189
+ return snippet
@@ -91,9 +91,15 @@ return structured data, handle errors, and are faster to use.
91
91
  - **Use `execute` for custom analysis** — The `execute` tool gives you full \
92
92
  IDAPython access. Write custom scripts for pattern matching, data extraction, \
93
93
  or anything the dedicated tools don't cover.
94
- - **Search docs and examples** — Use `search_docs` to look up unfamiliar IDA \
95
- APIs. Use `search_examples` to find working IDAPython code patterns it indexes \
96
- 125 official examples with metadata, API usage, and source code.
94
+ - **Search docs and code** — Use `search_docs` for IDA HTML documentation. \
95
+ Use `search_code` to find Python source library API definitions and \
96
+ working example scripts in one query. Library entries show `def` signatures \
97
+ + docstrings; example entries cover the in-IDA `python/examples` and the \
98
+ standalone-idalib `idalib/examples` corpora. For "everything about func X", \
99
+ a single `search_code("X")` call returns the API definition, example uses, \
100
+ and cross-linked HTML docs. When a snippet is truncated, the result carries \
101
+ `snippet_start_line` + `total_lines`; pass the same `file` to `get_source` \
102
+ to fetch additional lines (sandboxed to the indexed corpora).
97
103
  - **Snapshot before bulk changes** — Call `create_snapshot` before renaming or \
98
104
  retyping many symbols. Use `restore_snapshot` to roll back if something goes wrong.
99
105
  - **Work incrementally** — Rename and retype a few variables, re-decompile, \
@@ -146,7 +152,11 @@ segment membership with `ida_segment.getseg(ea)` before accessing data.
146
152
  the final script.
147
153
  - The execution namespace persists — define helpers in one call and use \
148
154
  them in the next.
149
- - Use `search_docs` and `search_examples` to find API patterns.
155
+ - Use `search_docs` for IDA HTML documentation. Use `search_code` for \
156
+ Python source (API definitions + examples); set `docstring_only=True` \
157
+ when searching by intent ("function that opens a database") rather than \
158
+ identifier name. When a snippet is truncated, follow up with `get_source` \
159
+ to fetch the rest from the same `file`.
150
160
  """
151
161
 
152
162
 
@@ -36,7 +36,14 @@ mcp = FastMCP(
36
36
  "annotate code, and run IDAPython scripts.\n\n"
37
37
  "Typical workflow: open_database → list_functions → decompile → "
38
38
  "annotate (rename_function, set_comment, set_variable) → iterate.\n\n"
39
- "Only one database can be open at a time. Most tools require an open database."
39
+ "Only one database can be open at a time. Most tools require an open database.\n\n"
40
+ "Discovery: `guidelines://standalone_script`, `guidelines://plugin`, and "
41
+ "`guidelines://idapython_script` resources hold code templates and Hex-Rays "
42
+ "coding conventions — read whichever matches your task before writing. The "
43
+ "`reverse_engineer` and `create_script` prompts walk through full workflows. "
44
+ "For Python API signatures, idapro module, or example scripts use `search_code` "
45
+ "(then `get_source` to fetch more lines from any file it returns); "
46
+ "`search_docs` is HTML prose only (user-guide, developer-guide)."
40
47
  ),
41
48
  )
42
49
 
@@ -85,6 +92,11 @@ async def open_database(
85
92
  *arch* selects a specific architecture slice from a fat (universal) Mach-O
86
93
  binary (e.g. "arm64e", "x86_64"). Use list_architectures to discover
87
94
  available slices. Ignored for non-fat binaries.
95
+
96
+ Writing a standalone idalib script that calls this? First call
97
+ ``get_guideline("standalone_script")`` for the bootstrap template
98
+ (sys.path / IDADIR setup) and Hex-Rays coding conventions — those aren't
99
+ in this docstring.
88
100
  """
89
101
  open_path, original_path = session._prepare_open(path, arch, overwrite)
90
102
  return await on_ida_thread(
@@ -337,30 +349,32 @@ async def list_functions(offset: int = 0, limit: int = 50, name_filter: str = ""
337
349
  return await on_ida_thread(_impl)
338
350
 
339
351
 
340
- @mcp.tool
341
- def search_docs(
342
- query: str,
343
- max_results: int = 5,
344
- max_snippet_length: int = 150,
345
- include_examples: bool = True,
346
- ) -> dict:
347
- """Look up IDA HTML documentation. No database needs to be open.
352
+ GuidelineTarget = Literal["standalone_script", "plugin", "idapython_script"]
348
353
 
349
- Use this to find prose explanations, user-guide / developer-guide
350
- chapters, and conceptual context.
351
354
 
352
- For Python source — library API signatures (``ida_*.py``, ``idapro``)
353
- and example scripts use ``search_code`` instead.
354
-
355
- Uses word-boundary matching: "set" matches "set_name" but not "reset".
356
-
357
- When *include_examples* is True (default), also returns up to 2 matching
358
- example scripts in the ``related_examples`` key (cross-linked from
359
- ``search_code`` with ``kind="example"``).
360
-
361
- *max_snippet_length* caps each snippet (default 150 chars).
355
+ @mcp.tool
356
+ def get_guideline(target: GuidelineTarget) -> str:
357
+ """Return the coding guideline for an IDA script type. No database needed.
358
+
359
+ Read this BEFORE writing any IDA Python code. Covers the bootstrap
360
+ template, key constraints (import order, single-thread, single-database),
361
+ Hex-Rays coding conventions (avoid `idc.py`/`idaapi`/`from X import Y`,
362
+ double-quote strings), and the search_code / search_docs / get_source
363
+ workflow for finding APIs and examples.
364
+
365
+ Targets:
366
+ - ``standalone_script`` — uses idalib outside IDA. sys.path setup,
367
+ ``import idapro`` first, ``open_database`` / ``close_database``.
368
+ - ``plugin`` — IDA plugin loaded inside the GUI. Subclass of
369
+ ``idaapi.plugin_t``, ``PLUGIN_ENTRY`` factory, hooks, actions.
370
+ - ``idapython_script`` — classic IDAPython script run via File >
371
+ Script File or the Python console. No bootstrap needed.
372
+
373
+ Identical content is also available as the MCP resource
374
+ ``guidelines://<target>``; the tool form is offered because tool listings
375
+ are read more reliably than resource listings by most MCP clients.
362
376
  """
363
- return _search_docs(query, max_results, max_snippet_length, include_examples)
377
+ return _guidelines.get(target)
364
378
 
365
379
 
366
380
  CodeKind = Literal["library", "example", ""]
@@ -379,7 +393,12 @@ def search_code(
379
393
  max_snippet_line_chars: int = 200,
380
394
  include_docs: bool = True,
381
395
  ) -> dict:
382
- """Find Python source — library APIs and/or example scripts. No database needs to be open.
396
+ """Find Python source — API signatures, idapro module, and example scripts.
397
+
398
+ **Primary tool for "what's the signature of X?" or "show me code that does Y"**
399
+ queries — covers `ida_*.py`, `idautils.py`, `idc.py`, the standalone idalib
400
+ `idapro` package, plus all in-IDA and idalib example scripts. No database
401
+ needs to be open.
383
402
 
384
403
  Unified search over:
385
404
 
@@ -436,6 +455,35 @@ def search_code(
436
455
  )
437
456
 
438
457
 
458
+ @mcp.tool
459
+ def search_docs(
460
+ query: str,
461
+ max_results: int = 5,
462
+ max_snippet_words: int = 25,
463
+ include_examples: bool = True,
464
+ ) -> dict:
465
+ """Look up IDA *HTML prose* documentation (user-guide / developer-guide).
466
+
467
+ **For Python API signatures, idapro module, or example scripts, use
468
+ `search_code` instead** — this tool only indexes the HTML docs, not
469
+ Python source. No database needs to be open.
470
+
471
+ Use this for conceptual context and chapter-style explanations:
472
+ "what is auto-analysis", "how does the structure editor work", etc.
473
+
474
+ Uses word-boundary matching: "set" matches "set_name" but not "reset".
475
+
476
+ When *include_examples* is True (default), also returns up to 2 matching
477
+ example scripts in the ``related_examples`` key (cross-linked from
478
+ ``search_code`` with ``kind="example"``).
479
+
480
+ *max_snippet_words* caps each snippet at this many whitespace-separated
481
+ words (default 25). Word-based cap avoids mid-word truncation and aligns
482
+ more closely with LLM token cost than character cap.
483
+ """
484
+ return _search_docs(query, max_results, max_snippet_words, include_examples)
485
+
486
+
439
487
  @mcp.tool
440
488
  def get_source(file: str, start_line: int = 1, line_count: int = 200) -> dict:
441
489
  """Read a slice of a Python file from the indexed corpora. No database needed.
@@ -82,33 +82,38 @@ class TestScore:
82
82
 
83
83
  class TestExcerpt:
84
84
  def test_short_text_returned_fully(self):
85
- result = _excerpt("hello world", ["hello"], max_len=300)
85
+ result = _excerpt("hello world", ["hello"], max_words=10)
86
86
  assert "hello world" in result
87
87
 
88
88
  def test_excerpt_around_match(self):
89
- text = "A" * 200 + " TARGET " + "B" * 200
90
- result = _excerpt(text, ["target"], max_len=100)
89
+ text = " ".join(["before"] * 30 + ["TARGET"] + ["after"] * 30)
90
+ result = _excerpt(text, ["target"], max_words=10)
91
91
  assert "TARGET" in result
92
- assert len(result) < 200 # much shorter than original
92
+ assert len(result.split()) <= 11 # 10 words + maybe ellipsis tokens
93
93
 
94
94
  def test_ellipsis_at_start(self):
95
- text = "A" * 200 + "match" + "B" * 200
96
- result = _excerpt(text, ["match"], max_len=100)
95
+ text = " ".join(["before"] * 50 + ["MATCH"] + ["after"] * 50)
96
+ result = _excerpt(text, ["match"], max_words=5)
97
97
  assert result.startswith("...")
98
98
 
99
99
  def test_ellipsis_at_end(self):
100
- text = "A" * 200 + "match" + "B" * 200
101
- result = _excerpt(text, ["match"], max_len=100)
100
+ text = " ".join(["before"] * 50 + ["MATCH"] + ["after"] * 50)
101
+ result = _excerpt(text, ["match"], max_words=5)
102
102
  assert result.endswith("...")
103
103
 
104
104
  def test_no_match_returns_beginning(self):
105
105
  text = "start of text and more"
106
- result = _excerpt(text, ["nonexistent"], max_len=300)
106
+ result = _excerpt(text, ["nonexistent"], max_words=50)
107
107
  assert "start" in result
108
108
 
109
- def test_whitespace_collapsed(self):
110
- result = _excerpt("foo \n\n bar", ["foo"], max_len=300)
111
- assert " " not in result
109
+ def test_word_boundary_no_midword_cut(self):
110
+ """Truncation lands on whole words, never mid-word."""
111
+ text = " ".join(["alphabet"] * 100)
112
+ result = _excerpt(text, ["alphabet"], max_words=5)
113
+ # Strip ellipses, every remaining word should be the full "alphabet"
114
+ body = result.replace("...", "").strip()
115
+ for word in body.split():
116
+ assert word == "alphabet", f"got partial word: {word!r}"
112
117
 
113
118
 
114
119
  class TestSearchCrossLinking:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes