ida-code 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ida_code/__init__.py +2 -0
- ida_code/_search_utils.py +33 -0
- ida_code/comments.py +191 -0
- ida_code/config.py +9 -0
- ida_code/doc_search.py +255 -0
- ida_code/example_search.py +570 -0
- ida_code/executor.py +145 -0
- ida_code/guidelines.py +370 -0
- ida_code/macho.py +67 -0
- ida_code/prompts.py +176 -0
- ida_code/server.py +1011 -0
- ida_code/session.py +293 -0
- ida_code/snapshots.py +110 -0
- ida_code/structures.py +227 -0
- ida_code/undo.py +102 -0
- ida_code/variables.py +206 -0
- ida_code-0.2.1.dist-info/METADATA +167 -0
- ida_code-0.2.1.dist-info/RECORD +21 -0
- ida_code-0.2.1.dist-info/WHEEL +4 -0
- ida_code-0.2.1.dist-info/entry_points.txt +2 -0
- ida_code-0.2.1.dist-info/licenses/LICENSE +21 -0
ida_code/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Shared search utilities for doc_search and example_search.
|
|
2
|
+
|
|
3
|
+
Provides word-boundary-aware matching: "set" matches "set_name" and
|
|
4
|
+
"ida_name.set_name" but not "reset" or "offset".
|
|
5
|
+
|
|
6
|
+
Boundaries are: start-of-string, underscore, dot, whitespace.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@lru_cache(maxsize=128)
|
|
14
|
+
def _boundary_pattern(term: str) -> re.Pattern:
|
|
15
|
+
"""Regex that matches term at an underscore/dot/whitespace boundary."""
|
|
16
|
+
escaped = re.escape(term)
|
|
17
|
+
return re.compile(rf"(?:^|[_.\s]){escaped}", re.IGNORECASE)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def term_matches(term: str, text: str) -> bool:
|
|
21
|
+
"""Check if term appears in text at a word boundary.
|
|
22
|
+
|
|
23
|
+
Boundaries are: start-of-string, underscore, dot, whitespace.
|
|
24
|
+
Fast path: rejects via substring check before running regex.
|
|
25
|
+
|
|
26
|
+
Dotted terms (e.g. "ida_funcs.get_func") use plain substring
|
|
27
|
+
matching since the dot is already specific enough.
|
|
28
|
+
"""
|
|
29
|
+
if term not in text.lower():
|
|
30
|
+
return False
|
|
31
|
+
if "." in term:
|
|
32
|
+
return True # dotted terms: substring match is sufficient
|
|
33
|
+
return _boundary_pattern(term).search(text) is not None
|
ida_code/comments.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Comment management (regular, repeatable, function, anterior, posterior)."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from fastmcp.exceptions import ToolError
|
|
6
|
+
|
|
7
|
+
from ida_code import session
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
_COMMENT_TYPES = {"regular", "repeatable", "function", "anterior", "posterior"}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _validate_comment_type(comment_type: str, allow_empty: bool = False) -> None:
|
|
15
|
+
"""Raise ToolError if comment_type is not recognized."""
|
|
16
|
+
if allow_empty and comment_type == "":
|
|
17
|
+
return
|
|
18
|
+
if comment_type not in _COMMENT_TYPES:
|
|
19
|
+
allowed = ", ".join(sorted(_COMMENT_TYPES))
|
|
20
|
+
if allow_empty:
|
|
21
|
+
allowed += ', or "" for all types'
|
|
22
|
+
raise ToolError(
|
|
23
|
+
f"Invalid comment_type '{comment_type}'. Must be one of: {allowed}"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_func(ea: int):
|
|
28
|
+
"""Resolve address to func_t, raise ToolError if not in a function."""
|
|
29
|
+
import ida_funcs
|
|
30
|
+
|
|
31
|
+
pfn = ida_funcs.get_func(ea)
|
|
32
|
+
if pfn is None:
|
|
33
|
+
raise ToolError(f"Address {ea:#x} is not within a recognized function.")
|
|
34
|
+
return pfn
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_anterior(ea: int) -> str:
|
|
38
|
+
"""Collect all anterior extra comment lines at ea."""
|
|
39
|
+
import ida_lines
|
|
40
|
+
|
|
41
|
+
lines = []
|
|
42
|
+
idx = 0
|
|
43
|
+
while True:
|
|
44
|
+
line = ida_lines.get_extra_cmt(ea, ida_lines.E_PREV + idx)
|
|
45
|
+
if line is None:
|
|
46
|
+
break
|
|
47
|
+
lines.append(line)
|
|
48
|
+
idx += 1
|
|
49
|
+
return "\n".join(lines)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_posterior(ea: int) -> str:
|
|
53
|
+
"""Collect all posterior extra comment lines at ea."""
|
|
54
|
+
import ida_lines
|
|
55
|
+
|
|
56
|
+
lines = []
|
|
57
|
+
idx = 0
|
|
58
|
+
while True:
|
|
59
|
+
line = ida_lines.get_extra_cmt(ea, ida_lines.E_NEXT + idx)
|
|
60
|
+
if line is None:
|
|
61
|
+
break
|
|
62
|
+
lines.append(line)
|
|
63
|
+
idx += 1
|
|
64
|
+
return "\n".join(lines)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_comment(ea: int, comment_type: str = "") -> dict:
|
|
68
|
+
"""Get comment(s) at an address.
|
|
69
|
+
|
|
70
|
+
When *comment_type* is empty, returns all non-empty comment types.
|
|
71
|
+
When a specific type is given, returns just that type.
|
|
72
|
+
"""
|
|
73
|
+
session.require_open()
|
|
74
|
+
_validate_comment_type(comment_type, allow_empty=True)
|
|
75
|
+
|
|
76
|
+
import idc
|
|
77
|
+
import ida_funcs
|
|
78
|
+
|
|
79
|
+
if comment_type == "":
|
|
80
|
+
result: dict = {"address": f"{ea:#x}"}
|
|
81
|
+
|
|
82
|
+
regular = idc.get_cmt(ea, 0) or ""
|
|
83
|
+
if regular:
|
|
84
|
+
result["regular"] = regular
|
|
85
|
+
|
|
86
|
+
repeatable = idc.get_cmt(ea, 1) or ""
|
|
87
|
+
if repeatable:
|
|
88
|
+
result["repeatable"] = repeatable
|
|
89
|
+
|
|
90
|
+
pfn = ida_funcs.get_func(ea)
|
|
91
|
+
if pfn is not None:
|
|
92
|
+
func_cmt = ida_funcs.get_func_cmt(pfn, 0) or ""
|
|
93
|
+
if func_cmt:
|
|
94
|
+
result["function"] = func_cmt
|
|
95
|
+
|
|
96
|
+
anterior = _get_anterior(ea)
|
|
97
|
+
if anterior:
|
|
98
|
+
result["anterior"] = anterior
|
|
99
|
+
|
|
100
|
+
posterior = _get_posterior(ea)
|
|
101
|
+
if posterior:
|
|
102
|
+
result["posterior"] = posterior
|
|
103
|
+
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
# Specific type requested.
|
|
107
|
+
if comment_type == "regular":
|
|
108
|
+
comment = idc.get_cmt(ea, 0) or ""
|
|
109
|
+
elif comment_type == "repeatable":
|
|
110
|
+
comment = idc.get_cmt(ea, 1) or ""
|
|
111
|
+
elif comment_type == "function":
|
|
112
|
+
pfn = _get_func(ea)
|
|
113
|
+
comment = ida_funcs.get_func_cmt(pfn, 0) or ""
|
|
114
|
+
elif comment_type == "anterior":
|
|
115
|
+
comment = _get_anterior(ea)
|
|
116
|
+
else: # posterior
|
|
117
|
+
comment = _get_posterior(ea)
|
|
118
|
+
|
|
119
|
+
return {"address": f"{ea:#x}", "comment_type": comment_type, "comment": comment}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def set_comment(ea: int, comment: str, comment_type: str = "regular") -> dict:
|
|
123
|
+
"""Set a comment at an address."""
|
|
124
|
+
session.require_open()
|
|
125
|
+
_validate_comment_type(comment_type)
|
|
126
|
+
|
|
127
|
+
import idc
|
|
128
|
+
import ida_funcs
|
|
129
|
+
import ida_lines
|
|
130
|
+
|
|
131
|
+
if comment_type == "regular":
|
|
132
|
+
idc.set_cmt(ea, comment, 0)
|
|
133
|
+
elif comment_type == "repeatable":
|
|
134
|
+
idc.set_cmt(ea, comment, 1)
|
|
135
|
+
elif comment_type == "function":
|
|
136
|
+
pfn = _get_func(ea)
|
|
137
|
+
ida_funcs.set_func_cmt(pfn, comment, 0)
|
|
138
|
+
elif comment_type == "anterior":
|
|
139
|
+
lines = comment.split("\n")
|
|
140
|
+
for i, line in enumerate(lines):
|
|
141
|
+
ida_lines.update_extra_cmt(ea, ida_lines.E_PREV + i, line)
|
|
142
|
+
else: # posterior
|
|
143
|
+
lines = comment.split("\n")
|
|
144
|
+
for i, line in enumerate(lines):
|
|
145
|
+
ida_lines.update_extra_cmt(ea, ida_lines.E_NEXT + i, line)
|
|
146
|
+
|
|
147
|
+
log.info("Set %s comment at %#x", comment_type, ea)
|
|
148
|
+
return {
|
|
149
|
+
"address": f"{ea:#x}",
|
|
150
|
+
"comment_type": comment_type,
|
|
151
|
+
"comment": comment,
|
|
152
|
+
"status": "updated",
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def delete_comment(ea: int, comment_type: str = "regular") -> dict:
|
|
157
|
+
"""Delete a comment at an address."""
|
|
158
|
+
session.require_open()
|
|
159
|
+
_validate_comment_type(comment_type)
|
|
160
|
+
|
|
161
|
+
import idc
|
|
162
|
+
import ida_funcs
|
|
163
|
+
import ida_lines
|
|
164
|
+
|
|
165
|
+
if comment_type == "regular":
|
|
166
|
+
idc.set_cmt(ea, "", 0)
|
|
167
|
+
elif comment_type == "repeatable":
|
|
168
|
+
idc.set_cmt(ea, "", 1)
|
|
169
|
+
elif comment_type == "function":
|
|
170
|
+
pfn = _get_func(ea)
|
|
171
|
+
ida_funcs.set_func_cmt(pfn, "", 0)
|
|
172
|
+
elif comment_type == "anterior":
|
|
173
|
+
# Count existing lines first, then delete all.
|
|
174
|
+
count = 0
|
|
175
|
+
while ida_lines.get_extra_cmt(ea, ida_lines.E_PREV + count) is not None:
|
|
176
|
+
count += 1
|
|
177
|
+
for i in range(count):
|
|
178
|
+
ida_lines.del_extra_cmt(ea, ida_lines.E_PREV + i)
|
|
179
|
+
else: # posterior
|
|
180
|
+
count = 0
|
|
181
|
+
while ida_lines.get_extra_cmt(ea, ida_lines.E_NEXT + count) is not None:
|
|
182
|
+
count += 1
|
|
183
|
+
for i in range(count):
|
|
184
|
+
ida_lines.del_extra_cmt(ea, ida_lines.E_NEXT + i)
|
|
185
|
+
|
|
186
|
+
log.info("Deleted %s comment at %#x", comment_type, ea)
|
|
187
|
+
return {
|
|
188
|
+
"address": f"{ea:#x}",
|
|
189
|
+
"comment_type": comment_type,
|
|
190
|
+
"status": "deleted",
|
|
191
|
+
}
|
ida_code/config.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
IDA_INSTALL_DIR = Path(os.environ.get("IDA_INSTALL_DIR", "/opt/ida-pro-9.2"))
|
|
5
|
+
IDA_DOCS_DIR = IDA_INSTALL_DIR / "docs"
|
|
6
|
+
IDA_PYTHON_DIR = IDA_INSTALL_DIR / "python"
|
|
7
|
+
IDA_EXAMPLES_DIR = IDA_PYTHON_DIR / "examples"
|
|
8
|
+
LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper()
|
|
9
|
+
MCP_AUTH_TOKEN = os.environ.get("MCP_AUTH_TOKEN", "")
|
ida_code/doc_search.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from html.parser import HTMLParser
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ida_code._search_utils import term_matches
|
|
8
|
+
from ida_code.config import IDA_DOCS_DIR, IDA_PYTHON_DIR
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
# Lazily-built indexes.
|
|
13
|
+
_html_docs: list[tuple[str, str, str]] | None = None # (title, clean_text, location)
|
|
14
|
+
_py_chunks: list[tuple[str, str, str]] | None = None # (name, body, source_file)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class _HTMLStripper(HTMLParser):
|
|
18
|
+
"""Strip HTML tags, keep text content."""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
super().__init__()
|
|
22
|
+
self._parts: list[str] = []
|
|
23
|
+
|
|
24
|
+
def handle_data(self, data: str):
|
|
25
|
+
self._parts.append(data)
|
|
26
|
+
|
|
27
|
+
def get_text(self) -> str:
|
|
28
|
+
return "".join(self._parts)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _strip_html(html: str) -> str:
|
|
32
|
+
s = _HTMLStripper()
|
|
33
|
+
s.feed(html)
|
|
34
|
+
return s.get_text()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _load_html_docs() -> list[tuple[str, str, str]]:
|
|
38
|
+
index_path = IDA_DOCS_DIR / "search" / "search_index.json"
|
|
39
|
+
with open(index_path) as f:
|
|
40
|
+
data = json.load(f)
|
|
41
|
+
|
|
42
|
+
entries = []
|
|
43
|
+
for doc in data["docs"]:
|
|
44
|
+
title = doc.get("title", "")
|
|
45
|
+
text = _strip_html(doc.get("text", ""))
|
|
46
|
+
location = doc.get("location", "")
|
|
47
|
+
if title or text:
|
|
48
|
+
entries.append((title, text, location))
|
|
49
|
+
return entries
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _load_py_chunks() -> list[tuple[str, str, str]]:
|
|
53
|
+
chunks = []
|
|
54
|
+
for py_file in sorted(IDA_PYTHON_DIR.glob("ida_*.py")):
|
|
55
|
+
_parse_py_file(py_file, chunks)
|
|
56
|
+
# Also include idautils.py and idc.py if present.
|
|
57
|
+
for name in ("idautils.py", "idc.py"):
|
|
58
|
+
p = IDA_PYTHON_DIR / name
|
|
59
|
+
if p.exists():
|
|
60
|
+
_parse_py_file(p, chunks)
|
|
61
|
+
return chunks
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _parse_py_file(path: Path, chunks: list[tuple[str, str, str]]):
|
|
65
|
+
"""Split a Python file into chunks at top-level def/class boundaries."""
|
|
66
|
+
source_name = path.name
|
|
67
|
+
try:
|
|
68
|
+
lines = path.read_text(errors="replace").splitlines()
|
|
69
|
+
except OSError:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Find lines that start a new def or class at the top level (no indentation
|
|
73
|
+
# or class-level indentation for methods).
|
|
74
|
+
boundary_pattern = re.compile(r"^(def |class )")
|
|
75
|
+
boundaries: list[int] = []
|
|
76
|
+
for i, line in enumerate(lines):
|
|
77
|
+
if boundary_pattern.match(line):
|
|
78
|
+
boundaries.append(i)
|
|
79
|
+
|
|
80
|
+
if not boundaries:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
for idx, start in enumerate(boundaries):
|
|
84
|
+
end = boundaries[idx + 1] if idx + 1 < len(boundaries) else len(lines)
|
|
85
|
+
block = lines[start:end]
|
|
86
|
+
|
|
87
|
+
# Extract name from the first line.
|
|
88
|
+
first_line = block[0]
|
|
89
|
+
m = re.match(r"(?:def|class)\s+(\w+)", first_line)
|
|
90
|
+
if not m:
|
|
91
|
+
continue
|
|
92
|
+
name = m.group(1)
|
|
93
|
+
|
|
94
|
+
# Skip SWIG internals.
|
|
95
|
+
if name.startswith("_swig"):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Keep a reasonable amount of context: signature + docstring + a few lines.
|
|
99
|
+
body = "\n".join(block[:40])
|
|
100
|
+
chunks.append((name, body, source_name))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _ensure_indexes():
|
|
104
|
+
global _html_docs, _py_chunks
|
|
105
|
+
if _html_docs is None:
|
|
106
|
+
log.info("Loading HTML docs index from %s", IDA_DOCS_DIR)
|
|
107
|
+
_html_docs = _load_html_docs()
|
|
108
|
+
log.info("Loaded %d HTML doc entries", len(_html_docs))
|
|
109
|
+
if _py_chunks is None:
|
|
110
|
+
log.info("Loading Python API chunks from %s", IDA_PYTHON_DIR)
|
|
111
|
+
_py_chunks = _load_py_chunks()
|
|
112
|
+
log.info("Loaded %d Python API chunks", len(_py_chunks))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def search(
|
|
116
|
+
query: str,
|
|
117
|
+
max_results: int = 5,
|
|
118
|
+
max_snippet_length: int = 150,
|
|
119
|
+
include_examples: bool = True,
|
|
120
|
+
) -> dict:
|
|
121
|
+
"""Search IDA docs and Python API sources. Returns structured dict."""
|
|
122
|
+
_ensure_indexes()
|
|
123
|
+
|
|
124
|
+
terms = query.lower().split()
|
|
125
|
+
if not terms:
|
|
126
|
+
return {"query": query, "results": []}
|
|
127
|
+
log.debug("Searching for terms: %s", terms)
|
|
128
|
+
|
|
129
|
+
results: list[tuple[float, str, str, str]] = [] # (score, title, snippet, source)
|
|
130
|
+
|
|
131
|
+
# Search HTML docs.
|
|
132
|
+
for title, text, location in _html_docs:
|
|
133
|
+
score = _score(terms, title, text)
|
|
134
|
+
if score > 0:
|
|
135
|
+
snippet = _excerpt(text, terms, max_len=max_snippet_length)
|
|
136
|
+
results.append((score, title, snippet, f"docs: {location}"))
|
|
137
|
+
|
|
138
|
+
# Search Python API chunks (name field gets higher weight).
|
|
139
|
+
for name, body, source_file in _py_chunks:
|
|
140
|
+
score = _score_py(terms, name, body)
|
|
141
|
+
if score > 0:
|
|
142
|
+
snippet = _excerpt(body, terms, max_len=max_snippet_length)
|
|
143
|
+
results.append((score, name, snippet, f"python: {source_file}"))
|
|
144
|
+
|
|
145
|
+
# Sort by score descending.
|
|
146
|
+
results.sort(key=lambda r: r[0], reverse=True)
|
|
147
|
+
results = results[:max_results]
|
|
148
|
+
|
|
149
|
+
result = {
|
|
150
|
+
"query": query,
|
|
151
|
+
"results": [
|
|
152
|
+
{"source": source, "title": title, "snippet": snippet, "score": score}
|
|
153
|
+
for score, title, snippet, source in results
|
|
154
|
+
],
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# Cross-link: append matching examples if available.
|
|
158
|
+
if include_examples:
|
|
159
|
+
from ida_code.example_search import search as _search_examples
|
|
160
|
+
|
|
161
|
+
ex_results = _search_examples(query, max_results=2, max_snippet_lines=5)
|
|
162
|
+
if ex_results["results"]:
|
|
163
|
+
result["related_examples"] = ex_results["results"]
|
|
164
|
+
|
|
165
|
+
return result
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _score(terms: list[str], title: str, text: str) -> float:
|
|
169
|
+
"""Score a document against search terms with field weighting.
|
|
170
|
+
|
|
171
|
+
Title matches score 4.0, body matches score 1.0.
|
|
172
|
+
All-terms-match bonus: 1.5x multiplier.
|
|
173
|
+
"""
|
|
174
|
+
total = 0.0
|
|
175
|
+
matched_terms = 0
|
|
176
|
+
|
|
177
|
+
title_lower = title.lower()
|
|
178
|
+
text_lower = text.lower()
|
|
179
|
+
|
|
180
|
+
for term in terms:
|
|
181
|
+
term_score = 0.0
|
|
182
|
+
|
|
183
|
+
# Title match (high value)
|
|
184
|
+
if term_matches(term, title_lower):
|
|
185
|
+
term_score = max(term_score, 4.0)
|
|
186
|
+
|
|
187
|
+
# Body match (lower value)
|
|
188
|
+
if term_matches(term, text_lower):
|
|
189
|
+
term_score = max(term_score, 1.0)
|
|
190
|
+
|
|
191
|
+
if term_score > 0:
|
|
192
|
+
matched_terms += 1
|
|
193
|
+
total += term_score
|
|
194
|
+
|
|
195
|
+
# All-terms-match bonus
|
|
196
|
+
if len(terms) > 1 and matched_terms == len(terms):
|
|
197
|
+
total *= 1.5
|
|
198
|
+
|
|
199
|
+
return total
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _score_py(terms: list[str], name: str, body: str) -> float:
|
|
203
|
+
"""Score a Python API chunk with name-weighted scoring.
|
|
204
|
+
|
|
205
|
+
Name matches score 5.0, body matches score 1.0.
|
|
206
|
+
"""
|
|
207
|
+
total = 0.0
|
|
208
|
+
matched_terms = 0
|
|
209
|
+
|
|
210
|
+
name_lower = name.lower()
|
|
211
|
+
body_lower = body.lower()
|
|
212
|
+
|
|
213
|
+
for term in terms:
|
|
214
|
+
term_score = 0.0
|
|
215
|
+
|
|
216
|
+
# Name match (highest value — this IS the API definition)
|
|
217
|
+
if term_matches(term, name_lower):
|
|
218
|
+
term_score = max(term_score, 5.0)
|
|
219
|
+
|
|
220
|
+
# Body match
|
|
221
|
+
if term_matches(term, body_lower):
|
|
222
|
+
term_score = max(term_score, 1.0)
|
|
223
|
+
|
|
224
|
+
if term_score > 0:
|
|
225
|
+
matched_terms += 1
|
|
226
|
+
total += term_score
|
|
227
|
+
|
|
228
|
+
if len(terms) > 1 and matched_terms == len(terms):
|
|
229
|
+
total *= 1.5
|
|
230
|
+
|
|
231
|
+
return total
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _excerpt(text: str, terms: list[str], max_len: int = 300) -> str:
|
|
235
|
+
"""Extract a snippet around the first matching term."""
|
|
236
|
+
text_lower = text.lower()
|
|
237
|
+
best_pos = len(text)
|
|
238
|
+
for t in terms:
|
|
239
|
+
# Use simple substring find for excerpt positioning
|
|
240
|
+
pos = text_lower.find(t)
|
|
241
|
+
if pos != -1 and pos < best_pos:
|
|
242
|
+
best_pos = pos
|
|
243
|
+
|
|
244
|
+
start = max(0, best_pos - 50)
|
|
245
|
+
end = start + max_len
|
|
246
|
+
snippet = text[start:end].strip()
|
|
247
|
+
|
|
248
|
+
if start > 0:
|
|
249
|
+
snippet = "..." + snippet
|
|
250
|
+
if end < len(text):
|
|
251
|
+
snippet = snippet + "..."
|
|
252
|
+
|
|
253
|
+
# Collapse whitespace.
|
|
254
|
+
snippet = re.sub(r"\s+", " ", snippet)
|
|
255
|
+
return snippet
|