ida-code 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ida_code-0.2.3 → ida_code-0.2.4}/CHANGELOG.md +11 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/PKG-INFO +3 -3
- {ida_code-0.2.3 → ida_code-0.2.4}/README.md +2 -2
- {ida_code-0.2.3 → ida_code-0.2.4}/pyproject.toml +1 -1
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/code_search.py +1 -1
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/doc_search.py +45 -24
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/prompts.py +14 -4
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/server.py +71 -23
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_doc_search.py +17 -12
- {ida_code-0.2.3 → ida_code-0.2.4}/.gitignore +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/LICENSE +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/__init__.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/_search_utils.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/comments.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/config.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/executor.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/guidelines.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/ida_thread.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/macho.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/session.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/snapshots.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/structures.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/undo.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/src/ida_code/variables.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/__init__.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_code_search.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_comments.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_e2e.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_executor.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_ida_thread.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_macho.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_search_utils.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_session.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_structures.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_undo.py +0 -0
- {ida_code-0.2.3 → ida_code-0.2.4}/tests/test_variables.py +0 -0
|
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
6
6
|
|
|
7
|
+
## [0.2.4] - 2026-05-19
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **`get_guideline` tool** — tool-form companion to the `guidelines://*` resources. Same content, but tool listings are read more reliably than resource listings by cold LLM clients.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- `search_docs` snippet cap is word-based (`max_snippet_length` → `max_snippet_words`, default 25). Avoids mid-word truncation and lines up better with token cost.
|
|
16
|
+
- `search_docs` / `search_code` docstrings and the server `instructions` paragraph rewritten as discovery steers — `search_code` for API lookups, `search_docs` narrowed to HTML prose, both pointing at `get_guideline` / `get_source`.
|
|
17
|
+
|
|
7
18
|
## [0.2.3] - 2026-05-10
|
|
8
19
|
|
|
9
20
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ida-code
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: MCP server for AI-assisted IDAPython scripting via idalib
|
|
5
5
|
Project-URL: Homepage, https://github.com/Dil4rd/ida-code
|
|
6
6
|
Project-URL: Repository, https://github.com/Dil4rd/ida-code
|
|
@@ -76,7 +76,7 @@ For other MCP clients, run the server directly:
|
|
|
76
76
|
IDA_INSTALL_DIR=/opt/ida-pro-9.3 ida-code # stdio transport
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
-
## Tools (
|
|
79
|
+
## Tools (37)
|
|
80
80
|
|
|
81
81
|
Full parameter docs live in each tool's docstring — surfaced automatically to MCP clients via `tools/list`.
|
|
82
82
|
|
|
@@ -90,7 +90,7 @@ Full parameter docs live in each tool's docstring — surfaced automatically to
|
|
|
90
90
|
| Snapshots | `list_snapshots`, `create_snapshot`, `restore_snapshot`, `delete_snapshot` |
|
|
91
91
|
| Undo/redo | `get_undo_status`, `perform_undo`, `perform_redo` |
|
|
92
92
|
| Inventory | `get_strings`, `get_imports`, `get_exports` |
|
|
93
|
-
| Search | `search_docs`, `search_code`, `get_source` |
|
|
93
|
+
| Search | `search_docs`, `search_code`, `get_source`, `get_guideline` |
|
|
94
94
|
|
|
95
95
|
## Resources & prompts
|
|
96
96
|
|
|
@@ -47,7 +47,7 @@ For other MCP clients, run the server directly:
|
|
|
47
47
|
IDA_INSTALL_DIR=/opt/ida-pro-9.3 ida-code # stdio transport
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
## Tools (
|
|
50
|
+
## Tools (37)
|
|
51
51
|
|
|
52
52
|
Full parameter docs live in each tool's docstring — surfaced automatically to MCP clients via `tools/list`.
|
|
53
53
|
|
|
@@ -61,7 +61,7 @@ Full parameter docs live in each tool's docstring — surfaced automatically to
|
|
|
61
61
|
| Snapshots | `list_snapshots`, `create_snapshot`, `restore_snapshot`, `delete_snapshot` |
|
|
62
62
|
| Undo/redo | `get_undo_status`, `perform_undo`, `perform_redo` |
|
|
63
63
|
| Inventory | `get_strings`, `get_imports`, `get_exports` |
|
|
64
|
-
| Search | `search_docs`, `search_code`, `get_source` |
|
|
64
|
+
| Search | `search_docs`, `search_code`, `get_source`, `get_guideline` |
|
|
65
65
|
|
|
66
66
|
## Resources & prompts
|
|
67
67
|
|
|
@@ -725,7 +725,7 @@ def search(
|
|
|
725
725
|
|
|
726
726
|
if include_docs:
|
|
727
727
|
from ida_code.doc_search import search as _search_docs
|
|
728
|
-
doc_res = _search_docs(query, max_results=2,
|
|
728
|
+
doc_res = _search_docs(query, max_results=2, max_snippet_words=30, include_examples=False)
|
|
729
729
|
if doc_res.get("results"):
|
|
730
730
|
out["related_docs"] = doc_res["results"]
|
|
731
731
|
|
|
@@ -61,7 +61,7 @@ def _ensure_indexes():
|
|
|
61
61
|
def search(
|
|
62
62
|
query: str,
|
|
63
63
|
max_results: int = 5,
|
|
64
|
-
|
|
64
|
+
max_snippet_words: int = 25,
|
|
65
65
|
include_examples: bool = True,
|
|
66
66
|
) -> dict:
|
|
67
67
|
"""Search IDA HTML documentation. Returns a structured dict.
|
|
@@ -83,7 +83,7 @@ def search(
|
|
|
83
83
|
for title, text, location in _html_docs:
|
|
84
84
|
score = _score(terms, title, text)
|
|
85
85
|
if score > 0:
|
|
86
|
-
snippet = _excerpt(text, terms,
|
|
86
|
+
snippet = _excerpt(text, terms, max_words=max_snippet_words)
|
|
87
87
|
results.append((score, title, snippet, f"docs: {location}"))
|
|
88
88
|
|
|
89
89
|
results.sort(key=lambda r: r[0], reverse=True)
|
|
@@ -138,31 +138,52 @@ def _score(terms: list[str], title: str, text: str) -> float:
|
|
|
138
138
|
return total
|
|
139
139
|
|
|
140
140
|
|
|
141
|
-
def _excerpt(text: str, terms: list[str],
|
|
142
|
-
"""Extract a snippet of *text* around the first matching term.
|
|
141
|
+
def _excerpt(text: str, terms: list[str], max_words: int = 25) -> str:
|
|
142
|
+
"""Extract a word-bounded snippet of *text* around the first matching term.
|
|
143
|
+
|
|
144
|
+
Words are tokens produced by ``str.split()`` (whitespace-separated).
|
|
145
|
+
The snippet is at most ``max_words`` words plus optional ``"..."``
|
|
146
|
+
ellipses on each side when the window doesn't cover the whole text.
|
|
147
|
+
Cutting on word boundaries avoids mid-word truncation that char-based
|
|
148
|
+
capping produces, and the unit aligns more closely with token cost
|
|
149
|
+
than character count does.
|
|
150
|
+
"""
|
|
143
151
|
if not text:
|
|
144
152
|
return ""
|
|
145
153
|
|
|
154
|
+
words = text.split()
|
|
155
|
+
if not words:
|
|
156
|
+
return ""
|
|
157
|
+
|
|
146
158
|
lowered = text.lower()
|
|
147
|
-
|
|
159
|
+
best_char = -1
|
|
148
160
|
for term in terms:
|
|
149
161
|
pos = lowered.find(term)
|
|
150
|
-
if pos != -1 and (
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
162
|
+
if pos != -1 and (best_char == -1 or pos < best_char):
|
|
163
|
+
best_char = pos
|
|
164
|
+
|
|
165
|
+
# Locate which word index the best-match character lands in.
|
|
166
|
+
match_word_idx = 0
|
|
167
|
+
if best_char != -1:
|
|
168
|
+
# Re-tokenise on the same boundaries: walk words, tracking the
|
|
169
|
+
# original text offset by re-finding each word from the cursor.
|
|
170
|
+
cursor = 0
|
|
171
|
+
for i, w in enumerate(words):
|
|
172
|
+
cursor = text.find(w, cursor)
|
|
173
|
+
if cursor == -1 or cursor > best_char:
|
|
174
|
+
break
|
|
175
|
+
match_word_idx = i
|
|
176
|
+
cursor += len(w)
|
|
177
|
+
|
|
178
|
+
half = max_words // 2
|
|
179
|
+
start = max(0, match_word_idx - half)
|
|
180
|
+
end = min(len(words), start + max_words)
|
|
181
|
+
if end - start < max_words:
|
|
182
|
+
start = max(0, end - max_words)
|
|
183
|
+
|
|
184
|
+
snippet = " ".join(words[start:end])
|
|
185
|
+
if start > 0:
|
|
186
|
+
snippet = "..." + snippet
|
|
187
|
+
if end < len(words):
|
|
188
|
+
snippet = snippet + "..."
|
|
189
|
+
return snippet
|
|
@@ -91,9 +91,15 @@ return structured data, handle errors, and are faster to use.
|
|
|
91
91
|
- **Use `execute` for custom analysis** — The `execute` tool gives you full \
|
|
92
92
|
IDAPython access. Write custom scripts for pattern matching, data extraction, \
|
|
93
93
|
or anything the dedicated tools don't cover.
|
|
94
|
-
- **Search docs and
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
- **Search docs and code** — Use `search_docs` for IDA HTML documentation. \
|
|
95
|
+
Use `search_code` to find Python source — library API definitions and \
|
|
96
|
+
working example scripts in one query. Library entries show `def` signatures \
|
|
97
|
+
+ docstrings; example entries cover the in-IDA `python/examples` and the \
|
|
98
|
+
standalone-idalib `idalib/examples` corpora. For "everything about func X", \
|
|
99
|
+
a single `search_code("X")` call returns the API definition, example uses, \
|
|
100
|
+
and cross-linked HTML docs. When a snippet is truncated, the result carries \
|
|
101
|
+
`snippet_start_line` + `total_lines`; pass the same `file` to `get_source` \
|
|
102
|
+
to fetch additional lines (sandboxed to the indexed corpora).
|
|
97
103
|
- **Snapshot before bulk changes** — Call `create_snapshot` before renaming or \
|
|
98
104
|
retyping many symbols. Use `restore_snapshot` to roll back if something goes wrong.
|
|
99
105
|
- **Work incrementally** — Rename and retype a few variables, re-decompile, \
|
|
@@ -146,7 +152,11 @@ segment membership with `ida_segment.getseg(ea)` before accessing data.
|
|
|
146
152
|
the final script.
|
|
147
153
|
- The execution namespace persists — define helpers in one call and use \
|
|
148
154
|
them in the next.
|
|
149
|
-
- Use `search_docs`
|
|
155
|
+
- Use `search_docs` for IDA HTML documentation. Use `search_code` for \
|
|
156
|
+
Python source (API definitions + examples); set `docstring_only=True` \
|
|
157
|
+
when searching by intent ("function that opens a database") rather than \
|
|
158
|
+
identifier name. When a snippet is truncated, follow up with `get_source` \
|
|
159
|
+
to fetch the rest from the same `file`.
|
|
150
160
|
"""
|
|
151
161
|
|
|
152
162
|
|
|
@@ -36,7 +36,14 @@ mcp = FastMCP(
|
|
|
36
36
|
"annotate code, and run IDAPython scripts.\n\n"
|
|
37
37
|
"Typical workflow: open_database → list_functions → decompile → "
|
|
38
38
|
"annotate (rename_function, set_comment, set_variable) → iterate.\n\n"
|
|
39
|
-
"Only one database can be open at a time. Most tools require an open database
|
|
39
|
+
"Only one database can be open at a time. Most tools require an open database.\n\n"
|
|
40
|
+
"Discovery: `guidelines://standalone_script`, `guidelines://plugin`, and "
|
|
41
|
+
"`guidelines://idapython_script` resources hold code templates and Hex-Rays "
|
|
42
|
+
"coding conventions — read whichever matches your task before writing. The "
|
|
43
|
+
"`reverse_engineer` and `create_script` prompts walk through full workflows. "
|
|
44
|
+
"For Python API signatures, idapro module, or example scripts use `search_code` "
|
|
45
|
+
"(then `get_source` to fetch more lines from any file it returns); "
|
|
46
|
+
"`search_docs` is HTML prose only (user-guide, developer-guide)."
|
|
40
47
|
),
|
|
41
48
|
)
|
|
42
49
|
|
|
@@ -85,6 +92,11 @@ async def open_database(
|
|
|
85
92
|
*arch* selects a specific architecture slice from a fat (universal) Mach-O
|
|
86
93
|
binary (e.g. "arm64e", "x86_64"). Use list_architectures to discover
|
|
87
94
|
available slices. Ignored for non-fat binaries.
|
|
95
|
+
|
|
96
|
+
Writing a standalone idalib script that calls this? First call
|
|
97
|
+
``get_guideline("standalone_script")`` for the bootstrap template
|
|
98
|
+
(sys.path / IDADIR setup) and Hex-Rays coding conventions — those aren't
|
|
99
|
+
in this docstring.
|
|
88
100
|
"""
|
|
89
101
|
open_path, original_path = session._prepare_open(path, arch, overwrite)
|
|
90
102
|
return await on_ida_thread(
|
|
@@ -337,30 +349,32 @@ async def list_functions(offset: int = 0, limit: int = 50, name_filter: str = ""
|
|
|
337
349
|
return await on_ida_thread(_impl)
|
|
338
350
|
|
|
339
351
|
|
|
340
|
-
|
|
341
|
-
def search_docs(
|
|
342
|
-
query: str,
|
|
343
|
-
max_results: int = 5,
|
|
344
|
-
max_snippet_length: int = 150,
|
|
345
|
-
include_examples: bool = True,
|
|
346
|
-
) -> dict:
|
|
347
|
-
"""Look up IDA HTML documentation. No database needs to be open.
|
|
352
|
+
GuidelineTarget = Literal["standalone_script", "plugin", "idapython_script"]
|
|
348
353
|
|
|
349
|
-
Use this to find prose explanations, user-guide / developer-guide
|
|
350
|
-
chapters, and conceptual context.
|
|
351
354
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
355
|
+
@mcp.tool
|
|
356
|
+
def get_guideline(target: GuidelineTarget) -> str:
|
|
357
|
+
"""Return the coding guideline for an IDA script type. No database needed.
|
|
358
|
+
|
|
359
|
+
Read this BEFORE writing any IDA Python code. Covers the bootstrap
|
|
360
|
+
template, key constraints (import order, single-thread, single-database),
|
|
361
|
+
Hex-Rays coding conventions (avoid `idc.py`/`idaapi`/`from X import Y`,
|
|
362
|
+
double-quote strings), and the search_code / search_docs / get_source
|
|
363
|
+
workflow for finding APIs and examples.
|
|
364
|
+
|
|
365
|
+
Targets:
|
|
366
|
+
- ``standalone_script`` — uses idalib outside IDA. sys.path setup,
|
|
367
|
+
``import idapro`` first, ``open_database`` / ``close_database``.
|
|
368
|
+
- ``plugin`` — IDA plugin loaded inside the GUI. Subclass of
|
|
369
|
+
``idaapi.plugin_t``, ``PLUGIN_ENTRY`` factory, hooks, actions.
|
|
370
|
+
- ``idapython_script`` — classic IDAPython script run via File >
|
|
371
|
+
Script File or the Python console. No bootstrap needed.
|
|
372
|
+
|
|
373
|
+
Identical content is also available as the MCP resource
|
|
374
|
+
``guidelines://<target>``; the tool form is offered because tool listings
|
|
375
|
+
are read more reliably than resource listings by most MCP clients.
|
|
362
376
|
"""
|
|
363
|
-
return
|
|
377
|
+
return _guidelines.get(target)
|
|
364
378
|
|
|
365
379
|
|
|
366
380
|
CodeKind = Literal["library", "example", ""]
|
|
@@ -379,7 +393,12 @@ def search_code(
|
|
|
379
393
|
max_snippet_line_chars: int = 200,
|
|
380
394
|
include_docs: bool = True,
|
|
381
395
|
) -> dict:
|
|
382
|
-
"""Find Python source —
|
|
396
|
+
"""Find Python source — API signatures, idapro module, and example scripts.
|
|
397
|
+
|
|
398
|
+
**Primary tool for "what's the signature of X?" or "show me code that does Y"**
|
|
399
|
+
queries — covers `ida_*.py`, `idautils.py`, `idc.py`, the standalone idalib
|
|
400
|
+
`idapro` package, plus all in-IDA and idalib example scripts. No database
|
|
401
|
+
needs to be open.
|
|
383
402
|
|
|
384
403
|
Unified search over:
|
|
385
404
|
|
|
@@ -436,6 +455,35 @@ def search_code(
|
|
|
436
455
|
)
|
|
437
456
|
|
|
438
457
|
|
|
458
|
+
@mcp.tool
|
|
459
|
+
def search_docs(
|
|
460
|
+
query: str,
|
|
461
|
+
max_results: int = 5,
|
|
462
|
+
max_snippet_words: int = 25,
|
|
463
|
+
include_examples: bool = True,
|
|
464
|
+
) -> dict:
|
|
465
|
+
"""Look up IDA *HTML prose* documentation (user-guide / developer-guide).
|
|
466
|
+
|
|
467
|
+
**For Python API signatures, idapro module, or example scripts, use
|
|
468
|
+
`search_code` instead** — this tool only indexes the HTML docs, not
|
|
469
|
+
Python source. No database needs to be open.
|
|
470
|
+
|
|
471
|
+
Use this for conceptual context and chapter-style explanations:
|
|
472
|
+
"what is auto-analysis", "how does the structure editor work", etc.
|
|
473
|
+
|
|
474
|
+
Uses word-boundary matching: "set" matches "set_name" but not "reset".
|
|
475
|
+
|
|
476
|
+
When *include_examples* is True (default), also returns up to 2 matching
|
|
477
|
+
example scripts in the ``related_examples`` key (cross-linked from
|
|
478
|
+
``search_code`` with ``kind="example"``).
|
|
479
|
+
|
|
480
|
+
*max_snippet_words* caps each snippet at this many whitespace-separated
|
|
481
|
+
words (default 25). Word-based cap avoids mid-word truncation and aligns
|
|
482
|
+
more closely with LLM token cost than character cap.
|
|
483
|
+
"""
|
|
484
|
+
return _search_docs(query, max_results, max_snippet_words, include_examples)
|
|
485
|
+
|
|
486
|
+
|
|
439
487
|
@mcp.tool
|
|
440
488
|
def get_source(file: str, start_line: int = 1, line_count: int = 200) -> dict:
|
|
441
489
|
"""Read a slice of a Python file from the indexed corpora. No database needed.
|
|
@@ -82,33 +82,38 @@ class TestScore:
|
|
|
82
82
|
|
|
83
83
|
class TestExcerpt:
|
|
84
84
|
def test_short_text_returned_fully(self):
|
|
85
|
-
result = _excerpt("hello world", ["hello"],
|
|
85
|
+
result = _excerpt("hello world", ["hello"], max_words=10)
|
|
86
86
|
assert "hello world" in result
|
|
87
87
|
|
|
88
88
|
def test_excerpt_around_match(self):
|
|
89
|
-
text = "
|
|
90
|
-
result = _excerpt(text, ["target"],
|
|
89
|
+
text = " ".join(["before"] * 30 + ["TARGET"] + ["after"] * 30)
|
|
90
|
+
result = _excerpt(text, ["target"], max_words=10)
|
|
91
91
|
assert "TARGET" in result
|
|
92
|
-
assert len(result)
|
|
92
|
+
assert len(result.split()) <= 11 # 10 words + maybe ellipsis tokens
|
|
93
93
|
|
|
94
94
|
def test_ellipsis_at_start(self):
|
|
95
|
-
text = "
|
|
96
|
-
result = _excerpt(text, ["match"],
|
|
95
|
+
text = " ".join(["before"] * 50 + ["MATCH"] + ["after"] * 50)
|
|
96
|
+
result = _excerpt(text, ["match"], max_words=5)
|
|
97
97
|
assert result.startswith("...")
|
|
98
98
|
|
|
99
99
|
def test_ellipsis_at_end(self):
|
|
100
|
-
text = "
|
|
101
|
-
result = _excerpt(text, ["match"],
|
|
100
|
+
text = " ".join(["before"] * 50 + ["MATCH"] + ["after"] * 50)
|
|
101
|
+
result = _excerpt(text, ["match"], max_words=5)
|
|
102
102
|
assert result.endswith("...")
|
|
103
103
|
|
|
104
104
|
def test_no_match_returns_beginning(self):
|
|
105
105
|
text = "start of text and more"
|
|
106
|
-
result = _excerpt(text, ["nonexistent"],
|
|
106
|
+
result = _excerpt(text, ["nonexistent"], max_words=50)
|
|
107
107
|
assert "start" in result
|
|
108
108
|
|
|
109
|
-
def
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
def test_word_boundary_no_midword_cut(self):
|
|
110
|
+
"""Truncation lands on whole words, never mid-word."""
|
|
111
|
+
text = " ".join(["alphabet"] * 100)
|
|
112
|
+
result = _excerpt(text, ["alphabet"], max_words=5)
|
|
113
|
+
# Strip ellipses, every remaining word should be the full "alphabet"
|
|
114
|
+
body = result.replace("...", "").strip()
|
|
115
|
+
for word in body.split():
|
|
116
|
+
assert word == "alphabet", f"got partial word: {word!r}"
|
|
112
117
|
|
|
113
118
|
|
|
114
119
|
class TestSearchCrossLinking:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|