ida-code 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ida_code/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ # Import session first to ensure idapro is loaded before any other ida_* imports.
2
+ from ida_code import session # noqa: F401
@@ -0,0 +1,33 @@
1
+ """Shared search utilities for doc_search and example_search.
2
+
3
+ Provides word-boundary-aware matching: "set" matches "set_name" and
4
+ "ida_name.set_name" but not "reset" or "offset".
5
+
6
+ Boundaries are: start-of-string, underscore, dot, whitespace.
7
+ """
8
+
9
+ import re
10
+ from functools import lru_cache
11
+
12
+
13
+ @lru_cache(maxsize=128)
14
+ def _boundary_pattern(term: str) -> re.Pattern:
15
+ """Regex that matches term at an underscore/dot/whitespace boundary."""
16
+ escaped = re.escape(term)
17
+ return re.compile(rf"(?:^|[_.\s]){escaped}", re.IGNORECASE)
18
+
19
+
20
+ def term_matches(term: str, text: str) -> bool:
21
+ """Check if term appears in text at a word boundary.
22
+
23
+ Boundaries are: start-of-string, underscore, dot, whitespace.
24
+ Fast path: rejects via substring check before running regex.
25
+
26
+ Dotted terms (e.g. "ida_funcs.get_func") use plain substring
27
+ matching since the dot is already specific enough.
28
+ """
29
+ if term not in text.lower():
30
+ return False
31
+ if "." in term:
32
+ return True # dotted terms: substring match is sufficient
33
+ return _boundary_pattern(term).search(text) is not None
ida_code/comments.py ADDED
@@ -0,0 +1,191 @@
1
+ """Comment management (regular, repeatable, function, anterior, posterior)."""
2
+
3
+ import logging
4
+
5
+ from fastmcp.exceptions import ToolError
6
+
7
+ from ida_code import session
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ _COMMENT_TYPES = {"regular", "repeatable", "function", "anterior", "posterior"}
12
+
13
+
14
+ def _validate_comment_type(comment_type: str, allow_empty: bool = False) -> None:
15
+ """Raise ToolError if comment_type is not recognized."""
16
+ if allow_empty and comment_type == "":
17
+ return
18
+ if comment_type not in _COMMENT_TYPES:
19
+ allowed = ", ".join(sorted(_COMMENT_TYPES))
20
+ if allow_empty:
21
+ allowed += ', or "" for all types'
22
+ raise ToolError(
23
+ f"Invalid comment_type '{comment_type}'. Must be one of: {allowed}"
24
+ )
25
+
26
+
27
+ def _get_func(ea: int):
28
+ """Resolve address to func_t, raise ToolError if not in a function."""
29
+ import ida_funcs
30
+
31
+ pfn = ida_funcs.get_func(ea)
32
+ if pfn is None:
33
+ raise ToolError(f"Address {ea:#x} is not within a recognized function.")
34
+ return pfn
35
+
36
+
37
+ def _get_anterior(ea: int) -> str:
38
+ """Collect all anterior extra comment lines at ea."""
39
+ import ida_lines
40
+
41
+ lines = []
42
+ idx = 0
43
+ while True:
44
+ line = ida_lines.get_extra_cmt(ea, ida_lines.E_PREV + idx)
45
+ if line is None:
46
+ break
47
+ lines.append(line)
48
+ idx += 1
49
+ return "\n".join(lines)
50
+
51
+
52
+ def _get_posterior(ea: int) -> str:
53
+ """Collect all posterior extra comment lines at ea."""
54
+ import ida_lines
55
+
56
+ lines = []
57
+ idx = 0
58
+ while True:
59
+ line = ida_lines.get_extra_cmt(ea, ida_lines.E_NEXT + idx)
60
+ if line is None:
61
+ break
62
+ lines.append(line)
63
+ idx += 1
64
+ return "\n".join(lines)
65
+
66
+
67
+ def get_comment(ea: int, comment_type: str = "") -> dict:
68
+ """Get comment(s) at an address.
69
+
70
+ When *comment_type* is empty, returns all non-empty comment types.
71
+ When a specific type is given, returns just that type.
72
+ """
73
+ session.require_open()
74
+ _validate_comment_type(comment_type, allow_empty=True)
75
+
76
+ import idc
77
+ import ida_funcs
78
+
79
+ if comment_type == "":
80
+ result: dict = {"address": f"{ea:#x}"}
81
+
82
+ regular = idc.get_cmt(ea, 0) or ""
83
+ if regular:
84
+ result["regular"] = regular
85
+
86
+ repeatable = idc.get_cmt(ea, 1) or ""
87
+ if repeatable:
88
+ result["repeatable"] = repeatable
89
+
90
+ pfn = ida_funcs.get_func(ea)
91
+ if pfn is not None:
92
+ func_cmt = ida_funcs.get_func_cmt(pfn, 0) or ""
93
+ if func_cmt:
94
+ result["function"] = func_cmt
95
+
96
+ anterior = _get_anterior(ea)
97
+ if anterior:
98
+ result["anterior"] = anterior
99
+
100
+ posterior = _get_posterior(ea)
101
+ if posterior:
102
+ result["posterior"] = posterior
103
+
104
+ return result
105
+
106
+ # Specific type requested.
107
+ if comment_type == "regular":
108
+ comment = idc.get_cmt(ea, 0) or ""
109
+ elif comment_type == "repeatable":
110
+ comment = idc.get_cmt(ea, 1) or ""
111
+ elif comment_type == "function":
112
+ pfn = _get_func(ea)
113
+ comment = ida_funcs.get_func_cmt(pfn, 0) or ""
114
+ elif comment_type == "anterior":
115
+ comment = _get_anterior(ea)
116
+ else: # posterior
117
+ comment = _get_posterior(ea)
118
+
119
+ return {"address": f"{ea:#x}", "comment_type": comment_type, "comment": comment}
120
+
121
+
122
+ def set_comment(ea: int, comment: str, comment_type: str = "regular") -> dict:
123
+ """Set a comment at an address."""
124
+ session.require_open()
125
+ _validate_comment_type(comment_type)
126
+
127
+ import idc
128
+ import ida_funcs
129
+ import ida_lines
130
+
131
+ if comment_type == "regular":
132
+ idc.set_cmt(ea, comment, 0)
133
+ elif comment_type == "repeatable":
134
+ idc.set_cmt(ea, comment, 1)
135
+ elif comment_type == "function":
136
+ pfn = _get_func(ea)
137
+ ida_funcs.set_func_cmt(pfn, comment, 0)
138
+ elif comment_type == "anterior":
139
+ lines = comment.split("\n")
140
+ for i, line in enumerate(lines):
141
+ ida_lines.update_extra_cmt(ea, ida_lines.E_PREV + i, line)
142
+ else: # posterior
143
+ lines = comment.split("\n")
144
+ for i, line in enumerate(lines):
145
+ ida_lines.update_extra_cmt(ea, ida_lines.E_NEXT + i, line)
146
+
147
+ log.info("Set %s comment at %#x", comment_type, ea)
148
+ return {
149
+ "address": f"{ea:#x}",
150
+ "comment_type": comment_type,
151
+ "comment": comment,
152
+ "status": "updated",
153
+ }
154
+
155
+
156
+ def delete_comment(ea: int, comment_type: str = "regular") -> dict:
157
+ """Delete a comment at an address."""
158
+ session.require_open()
159
+ _validate_comment_type(comment_type)
160
+
161
+ import idc
162
+ import ida_funcs
163
+ import ida_lines
164
+
165
+ if comment_type == "regular":
166
+ idc.set_cmt(ea, "", 0)
167
+ elif comment_type == "repeatable":
168
+ idc.set_cmt(ea, "", 1)
169
+ elif comment_type == "function":
170
+ pfn = _get_func(ea)
171
+ ida_funcs.set_func_cmt(pfn, "", 0)
172
+ elif comment_type == "anterior":
173
+ # Count existing lines first, then delete all.
174
+ count = 0
175
+ while ida_lines.get_extra_cmt(ea, ida_lines.E_PREV + count) is not None:
176
+ count += 1
177
+ for i in range(count):
178
+ ida_lines.del_extra_cmt(ea, ida_lines.E_PREV + i)
179
+ else: # posterior
180
+ count = 0
181
+ while ida_lines.get_extra_cmt(ea, ida_lines.E_NEXT + count) is not None:
182
+ count += 1
183
+ for i in range(count):
184
+ ida_lines.del_extra_cmt(ea, ida_lines.E_NEXT + i)
185
+
186
+ log.info("Deleted %s comment at %#x", comment_type, ea)
187
+ return {
188
+ "address": f"{ea:#x}",
189
+ "comment_type": comment_type,
190
+ "status": "deleted",
191
+ }
ida_code/config.py ADDED
@@ -0,0 +1,9 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ IDA_INSTALL_DIR = Path(os.environ.get("IDA_INSTALL_DIR", "/opt/ida-pro-9.2"))
5
+ IDA_DOCS_DIR = IDA_INSTALL_DIR / "docs"
6
+ IDA_PYTHON_DIR = IDA_INSTALL_DIR / "python"
7
+ IDA_EXAMPLES_DIR = IDA_PYTHON_DIR / "examples"
8
+ LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper()
9
+ MCP_AUTH_TOKEN = os.environ.get("MCP_AUTH_TOKEN", "")
ida_code/doc_search.py ADDED
@@ -0,0 +1,255 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from html.parser import HTMLParser
5
+ from pathlib import Path
6
+
7
+ from ida_code._search_utils import term_matches
8
+ from ida_code.config import IDA_DOCS_DIR, IDA_PYTHON_DIR
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+ # Lazily-built indexes.
13
+ _html_docs: list[tuple[str, str, str]] | None = None # (title, clean_text, location)
14
+ _py_chunks: list[tuple[str, str, str]] | None = None # (name, body, source_file)
15
+
16
+
17
+ class _HTMLStripper(HTMLParser):
18
+ """Strip HTML tags, keep text content."""
19
+
20
+ def __init__(self):
21
+ super().__init__()
22
+ self._parts: list[str] = []
23
+
24
+ def handle_data(self, data: str):
25
+ self._parts.append(data)
26
+
27
+ def get_text(self) -> str:
28
+ return "".join(self._parts)
29
+
30
+
31
+ def _strip_html(html: str) -> str:
32
+ s = _HTMLStripper()
33
+ s.feed(html)
34
+ return s.get_text()
35
+
36
+
37
+ def _load_html_docs() -> list[tuple[str, str, str]]:
38
+ index_path = IDA_DOCS_DIR / "search" / "search_index.json"
39
+ with open(index_path) as f:
40
+ data = json.load(f)
41
+
42
+ entries = []
43
+ for doc in data["docs"]:
44
+ title = doc.get("title", "")
45
+ text = _strip_html(doc.get("text", ""))
46
+ location = doc.get("location", "")
47
+ if title or text:
48
+ entries.append((title, text, location))
49
+ return entries
50
+
51
+
52
+ def _load_py_chunks() -> list[tuple[str, str, str]]:
53
+ chunks = []
54
+ for py_file in sorted(IDA_PYTHON_DIR.glob("ida_*.py")):
55
+ _parse_py_file(py_file, chunks)
56
+ # Also include idautils.py and idc.py if present.
57
+ for name in ("idautils.py", "idc.py"):
58
+ p = IDA_PYTHON_DIR / name
59
+ if p.exists():
60
+ _parse_py_file(p, chunks)
61
+ return chunks
62
+
63
+
64
+ def _parse_py_file(path: Path, chunks: list[tuple[str, str, str]]):
65
+ """Split a Python file into chunks at top-level def/class boundaries."""
66
+ source_name = path.name
67
+ try:
68
+ lines = path.read_text(errors="replace").splitlines()
69
+ except OSError:
70
+ return
71
+
72
+ # Find lines that start a new def or class at the top level (no indentation
73
+ # or class-level indentation for methods).
74
+ boundary_pattern = re.compile(r"^(def |class )")
75
+ boundaries: list[int] = []
76
+ for i, line in enumerate(lines):
77
+ if boundary_pattern.match(line):
78
+ boundaries.append(i)
79
+
80
+ if not boundaries:
81
+ return
82
+
83
+ for idx, start in enumerate(boundaries):
84
+ end = boundaries[idx + 1] if idx + 1 < len(boundaries) else len(lines)
85
+ block = lines[start:end]
86
+
87
+ # Extract name from the first line.
88
+ first_line = block[0]
89
+ m = re.match(r"(?:def|class)\s+(\w+)", first_line)
90
+ if not m:
91
+ continue
92
+ name = m.group(1)
93
+
94
+ # Skip SWIG internals.
95
+ if name.startswith("_swig"):
96
+ continue
97
+
98
+ # Keep a reasonable amount of context: signature + docstring + a few lines.
99
+ body = "\n".join(block[:40])
100
+ chunks.append((name, body, source_name))
101
+
102
+
103
+ def _ensure_indexes():
104
+ global _html_docs, _py_chunks
105
+ if _html_docs is None:
106
+ log.info("Loading HTML docs index from %s", IDA_DOCS_DIR)
107
+ _html_docs = _load_html_docs()
108
+ log.info("Loaded %d HTML doc entries", len(_html_docs))
109
+ if _py_chunks is None:
110
+ log.info("Loading Python API chunks from %s", IDA_PYTHON_DIR)
111
+ _py_chunks = _load_py_chunks()
112
+ log.info("Loaded %d Python API chunks", len(_py_chunks))
113
+
114
+
115
+ def search(
116
+ query: str,
117
+ max_results: int = 5,
118
+ max_snippet_length: int = 150,
119
+ include_examples: bool = True,
120
+ ) -> dict:
121
+ """Search IDA docs and Python API sources. Returns structured dict."""
122
+ _ensure_indexes()
123
+
124
+ terms = query.lower().split()
125
+ if not terms:
126
+ return {"query": query, "results": []}
127
+ log.debug("Searching for terms: %s", terms)
128
+
129
+ results: list[tuple[float, str, str, str]] = [] # (score, title, snippet, source)
130
+
131
+ # Search HTML docs.
132
+ for title, text, location in _html_docs:
133
+ score = _score(terms, title, text)
134
+ if score > 0:
135
+ snippet = _excerpt(text, terms, max_len=max_snippet_length)
136
+ results.append((score, title, snippet, f"docs: {location}"))
137
+
138
+ # Search Python API chunks (name field gets higher weight).
139
+ for name, body, source_file in _py_chunks:
140
+ score = _score_py(terms, name, body)
141
+ if score > 0:
142
+ snippet = _excerpt(body, terms, max_len=max_snippet_length)
143
+ results.append((score, name, snippet, f"python: {source_file}"))
144
+
145
+ # Sort by score descending.
146
+ results.sort(key=lambda r: r[0], reverse=True)
147
+ results = results[:max_results]
148
+
149
+ result = {
150
+ "query": query,
151
+ "results": [
152
+ {"source": source, "title": title, "snippet": snippet, "score": score}
153
+ for score, title, snippet, source in results
154
+ ],
155
+ }
156
+
157
+ # Cross-link: append matching examples if available.
158
+ if include_examples:
159
+ from ida_code.example_search import search as _search_examples
160
+
161
+ ex_results = _search_examples(query, max_results=2, max_snippet_lines=5)
162
+ if ex_results["results"]:
163
+ result["related_examples"] = ex_results["results"]
164
+
165
+ return result
166
+
167
+
168
+ def _score(terms: list[str], title: str, text: str) -> float:
169
+ """Score a document against search terms with field weighting.
170
+
171
+ Title matches score 4.0, body matches score 1.0.
172
+ All-terms-match bonus: 1.5x multiplier.
173
+ """
174
+ total = 0.0
175
+ matched_terms = 0
176
+
177
+ title_lower = title.lower()
178
+ text_lower = text.lower()
179
+
180
+ for term in terms:
181
+ term_score = 0.0
182
+
183
+ # Title match (high value)
184
+ if term_matches(term, title_lower):
185
+ term_score = max(term_score, 4.0)
186
+
187
+ # Body match (lower value)
188
+ if term_matches(term, text_lower):
189
+ term_score = max(term_score, 1.0)
190
+
191
+ if term_score > 0:
192
+ matched_terms += 1
193
+ total += term_score
194
+
195
+ # All-terms-match bonus
196
+ if len(terms) > 1 and matched_terms == len(terms):
197
+ total *= 1.5
198
+
199
+ return total
200
+
201
+
202
+ def _score_py(terms: list[str], name: str, body: str) -> float:
203
+ """Score a Python API chunk with name-weighted scoring.
204
+
205
+ Name matches score 5.0, body matches score 1.0.
206
+ """
207
+ total = 0.0
208
+ matched_terms = 0
209
+
210
+ name_lower = name.lower()
211
+ body_lower = body.lower()
212
+
213
+ for term in terms:
214
+ term_score = 0.0
215
+
216
+ # Name match (highest value — this IS the API definition)
217
+ if term_matches(term, name_lower):
218
+ term_score = max(term_score, 5.0)
219
+
220
+ # Body match
221
+ if term_matches(term, body_lower):
222
+ term_score = max(term_score, 1.0)
223
+
224
+ if term_score > 0:
225
+ matched_terms += 1
226
+ total += term_score
227
+
228
+ if len(terms) > 1 and matched_terms == len(terms):
229
+ total *= 1.5
230
+
231
+ return total
232
+
233
+
234
+ def _excerpt(text: str, terms: list[str], max_len: int = 300) -> str:
235
+ """Extract a snippet around the first matching term."""
236
+ text_lower = text.lower()
237
+ best_pos = len(text)
238
+ for t in terms:
239
+ # Use simple substring find for excerpt positioning
240
+ pos = text_lower.find(t)
241
+ if pos != -1 and pos < best_pos:
242
+ best_pos = pos
243
+
244
+ start = max(0, best_pos - 50)
245
+ end = start + max_len
246
+ snippet = text[start:end].strip()
247
+
248
+ if start > 0:
249
+ snippet = "..." + snippet
250
+ if end < len(text):
251
+ snippet = snippet + "..."
252
+
253
+ # Collapse whitespace.
254
+ snippet = re.sub(r"\s+", " ", snippet)
255
+ return snippet