code-memory 1.0.10__tar.gz → 1.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_memory-1.0.10 → code_memory-1.0.12}/.github/workflows/release-binaries.yml +1 -1
- {code_memory-1.0.10 → code_memory-1.0.12}/PKG-INFO +1 -1
- {code_memory-1.0.10 → code_memory-1.0.12}/parser.py +87 -28
- {code_memory-1.0.10 → code_memory-1.0.12}/pyproject.toml +1 -1
- {code_memory-1.0.10 → code_memory-1.0.12}/queries.py +170 -15
- {code_memory-1.0.10 → code_memory-1.0.12}/server.py +37 -2
- {code_memory-1.0.10 → code_memory-1.0.12}/.github/workflows/ci.yml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/.github/workflows/publish.yml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/.gitignore +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/.python-version +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/CHANGELOG.md +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/CONTRIBUTING.md +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/LICENSE +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/Makefile +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/README.md +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/code-memory.spec +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/db.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/doc_parser.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/errors.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/git_search.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/hooks/hook-sentence_transformers.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/hooks/hook-sqlite_vec.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/hooks/hook-tree_sitter.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/hooks/hook-tree_sitter_languages.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/logging_config.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_1.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_2.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_3.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_4.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_5.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/prompts/milestone_6.xml +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/__init__.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/conftest.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/test_errors.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/test_logging.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/test_tools.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/tests/test_validation.py +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/uv.lock +0 -0
- {code_memory-1.0.10 → code_memory-1.0.12}/validation.py +0 -0
|
@@ -53,7 +53,7 @@ jobs:
|
|
|
53
53
|
|
|
54
54
|
- name: Download embedding model (cache for offline use)
|
|
55
55
|
run: |
|
|
56
|
-
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('jinaai/jina-code-embeddings-
|
|
56
|
+
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('jinaai/jina-code-embeddings-0.5b', trust_remote_code=True)"
|
|
57
57
|
|
|
58
58
|
- name: Build binary with PyInstaller (Linux/Windows)
|
|
59
59
|
if: matrix.os != 'macos-latest' || matrix.target == 'macos-x86_64'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-memory
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.12
|
|
4
4
|
Summary: A deterministic, high-precision code intelligence MCP server
|
|
5
5
|
Project-URL: Homepage, https://github.com/kapillamba4/code-memory
|
|
6
6
|
Project-URL: Documentation, https://github.com/kapillamba4/code-memory#readme
|
|
@@ -47,22 +47,79 @@ def _load_gitignore_spec(root_dir: str) -> pathspec.PathSpec | None:
|
|
|
47
47
|
return None
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
|
|
50
|
+
class GitignoreMatcher:
|
|
51
|
+
"""Manages .gitignore matching with support for nested .gitignore files.
|
|
52
|
+
|
|
53
|
+
Git reads all .gitignore files in the directory tree, not just the root.
|
|
54
|
+
Each nested .gitignore applies patterns relative to its own directory.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, root_dir: str):
|
|
58
|
+
self.root_dir = root_dir
|
|
59
|
+
self._specs: dict[str, pathspec.PathSpec] = {}
|
|
58
60
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
# Load root .gitignore if it exists
|
|
62
|
+
root_spec = _load_gitignore_spec(root_dir)
|
|
63
|
+
if root_spec:
|
|
64
|
+
self._specs["."] = root_spec
|
|
65
|
+
|
|
66
|
+
def _load_spec_for_dir(self, abs_dir: str, rel_dir: str) -> None:
|
|
67
|
+
"""Load .gitignore for a directory if not already loaded."""
|
|
68
|
+
if rel_dir in self._specs:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
spec = _load_gitignore_spec(abs_dir)
|
|
72
|
+
if spec:
|
|
73
|
+
self._specs[rel_dir] = spec
|
|
74
|
+
|
|
75
|
+
def _get_parent_specs(self, rel_path: str) -> list[tuple[str, pathspec.PathSpec]]:
|
|
76
|
+
"""Get all applicable gitignore specs for a given path.
|
|
77
|
+
|
|
78
|
+
Returns list of (base_dir, spec) tuples for specs that apply to this path.
|
|
79
|
+
"""
|
|
80
|
+
result = []
|
|
81
|
+
path_parts = rel_path.replace("\\", "/").split("/")
|
|
82
|
+
|
|
83
|
+
# Check all ancestor directories that have .gitignore files
|
|
84
|
+
for base_dir, spec in self._specs.items():
|
|
85
|
+
if base_dir == ".":
|
|
86
|
+
# Root spec applies to everything
|
|
87
|
+
result.append((base_dir, spec))
|
|
88
|
+
else:
|
|
89
|
+
# Nested spec only applies if path is under that directory
|
|
90
|
+
base_parts = base_dir.replace("\\", "/").split("/")
|
|
91
|
+
if path_parts[:len(base_parts)] == base_parts:
|
|
92
|
+
result.append((base_dir, spec))
|
|
93
|
+
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
def should_skip(self, rel_path: str, is_dir: bool) -> bool:
|
|
97
|
+
"""Check if a path should be skipped based on all applicable .gitignore patterns."""
|
|
98
|
+
# Normalize path separators for matching
|
|
99
|
+
rel_path = rel_path.replace("\\", "/")
|
|
100
|
+
|
|
101
|
+
for base_dir, spec in self._get_parent_specs(rel_path):
|
|
102
|
+
# For nested gitignores, compute path relative to that gitignore's directory
|
|
103
|
+
if base_dir == ".":
|
|
104
|
+
check_path = rel_path
|
|
105
|
+
else:
|
|
106
|
+
base_prefix = base_dir.replace("\\", "/") + "/"
|
|
107
|
+
if rel_path.startswith(base_prefix):
|
|
108
|
+
check_path = rel_path[len(base_prefix):]
|
|
109
|
+
else:
|
|
110
|
+
continue
|
|
64
111
|
|
|
65
|
-
|
|
112
|
+
# Check both the path as-is and with trailing slash for directories
|
|
113
|
+
if spec.match_file(check_path):
|
|
114
|
+
return True
|
|
115
|
+
if is_dir and spec.match_file(check_path + "/"):
|
|
116
|
+
return True
|
|
117
|
+
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def check_dir_for_gitignore(self, abs_dir: str, rel_dir: str) -> None:
|
|
121
|
+
"""Check if directory contains a .gitignore and load it."""
|
|
122
|
+
self._load_spec_for_dir(abs_dir, rel_dir)
|
|
66
123
|
|
|
67
124
|
# ── File extensions we consider "source code" ─────────────────────────
|
|
68
125
|
_SOURCE_EXTENSIONS = frozenset({
|
|
@@ -397,8 +454,9 @@ def index_file(filepath: str, db) -> dict:
|
|
|
397
454
|
def index_directory(dirpath: str, db) -> list[dict]:
|
|
398
455
|
"""Recursively index all source files under *dirpath*.
|
|
399
456
|
|
|
400
|
-
Skips directories in ``_SKIP_DIRS``, files matching ``.gitignore`` patterns
|
|
401
|
-
and unchanged files. Indexes any file
|
|
457
|
+
Skips directories in ``_SKIP_DIRS``, files matching ``.gitignore`` patterns
|
|
458
|
+
(including nested .gitignore files), and unchanged files. Indexes any file
|
|
459
|
+
with a recognised source-code extension.
|
|
402
460
|
|
|
403
461
|
Args:
|
|
404
462
|
dirpath: Root directory to scan.
|
|
@@ -413,32 +471,33 @@ def index_directory(dirpath: str, db) -> list[dict]:
|
|
|
413
471
|
dirpath = os.path.abspath(dirpath)
|
|
414
472
|
total_start = time.perf_counter()
|
|
415
473
|
|
|
416
|
-
#
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
logger.debug("Loaded .gitignore patterns from %s", dirpath)
|
|
474
|
+
# Initialize gitignore matcher (supports nested .gitignore files)
|
|
475
|
+
gitignore = GitignoreMatcher(dirpath)
|
|
476
|
+
logger.debug("Initialized gitignore matcher for %s", dirpath)
|
|
420
477
|
|
|
421
478
|
for root, dirs, files in os.walk(dirpath, topdown=True):
|
|
422
479
|
rel_root = os.path.relpath(root, dirpath)
|
|
423
480
|
|
|
481
|
+
# Check for .gitignore in current directory and load it
|
|
482
|
+
if rel_root != ".":
|
|
483
|
+
gitignore.check_dir_for_gitignore(root, rel_root)
|
|
484
|
+
|
|
424
485
|
# Prune skipped directories in-place (always-skip + gitignore)
|
|
425
486
|
def _should_keep_dir(d: str) -> bool:
|
|
426
487
|
if d in _SKIP_DIRS or d.endswith(".egg-info"):
|
|
427
488
|
return False
|
|
428
|
-
if
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
return False
|
|
489
|
+
rel_path = os.path.join(rel_root, d) if rel_root != "." else d
|
|
490
|
+
if gitignore.should_skip(rel_path, is_dir=True):
|
|
491
|
+
return False
|
|
432
492
|
return True
|
|
433
493
|
|
|
434
494
|
dirs[:] = [d for d in dirs if _should_keep_dir(d)]
|
|
435
495
|
|
|
436
496
|
for fname in sorted(files):
|
|
437
497
|
# Skip files matching .gitignore patterns
|
|
438
|
-
if
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
continue
|
|
498
|
+
rel_path = os.path.join(rel_root, fname) if rel_root != "." else fname
|
|
499
|
+
if gitignore.should_skip(rel_path, is_dir=False):
|
|
500
|
+
continue
|
|
442
501
|
|
|
443
502
|
ext = os.path.splitext(fname)[1].lower()
|
|
444
503
|
# Accept files with known extensions, or files with a
|
|
@@ -156,7 +156,7 @@ def hybrid_search(query: str, db, top_k: int = 10) -> list[dict]:
|
|
|
156
156
|
# ---------------------------------------------------------------------------
|
|
157
157
|
|
|
158
158
|
|
|
159
|
-
def find_definition(symbol_name: str, db) -> list[dict]:
|
|
159
|
+
def find_definition(symbol_name: str, db, include_context: bool = True) -> list[dict]:
|
|
160
160
|
"""Find where *symbol_name* is defined using hybrid search.
|
|
161
161
|
|
|
162
162
|
Post-filters for exact name matches first; falls back to top hybrid
|
|
@@ -165,22 +165,89 @@ def find_definition(symbol_name: str, db) -> list[dict]:
|
|
|
165
165
|
Args:
|
|
166
166
|
symbol_name: The name of the symbol to find.
|
|
167
167
|
db: An open ``sqlite3.Connection``.
|
|
168
|
+
include_context: If True, include docstrings and parent symbol info.
|
|
168
169
|
|
|
169
170
|
Returns:
|
|
170
|
-
A list of result dicts.
|
|
171
|
+
A list of result dicts with enriched information.
|
|
171
172
|
"""
|
|
172
173
|
results = hybrid_search(symbol_name, db, top_k=20)
|
|
173
174
|
|
|
174
175
|
# Exact-match filter (case-sensitive)
|
|
175
176
|
exact = [r for r in results if r["name"] == symbol_name]
|
|
176
|
-
if exact:
|
|
177
|
-
return exact
|
|
177
|
+
matched = exact if exact else results[:5]
|
|
178
178
|
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
if not include_context:
|
|
180
|
+
return matched
|
|
181
181
|
|
|
182
|
+
# Enrich results with docstrings and parent information
|
|
183
|
+
enriched = []
|
|
184
|
+
for r in matched:
|
|
185
|
+
symbol_id = r.get("symbol_id") or _get_symbol_id(r["name"], r["file_path"], db)
|
|
186
|
+
enriched_result = {
|
|
187
|
+
**r,
|
|
188
|
+
"docstring": None,
|
|
189
|
+
"parent": None,
|
|
190
|
+
"signature": _extract_signature(r.get("source_text", "")),
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Get parent symbol
|
|
194
|
+
if symbol_id:
|
|
195
|
+
parent_row = db.execute(
|
|
196
|
+
"""
|
|
197
|
+
SELECT p.name, p.kind
|
|
198
|
+
FROM symbols s
|
|
199
|
+
LEFT JOIN symbols p ON p.id = s.parent_symbol_id
|
|
200
|
+
WHERE s.id = ?
|
|
201
|
+
""",
|
|
202
|
+
(symbol_id,),
|
|
203
|
+
).fetchone()
|
|
204
|
+
if parent_row and parent_row[0]:
|
|
205
|
+
enriched_result["parent"] = {"name": parent_row[0], "kind": parent_row[1]}
|
|
206
|
+
|
|
207
|
+
# Get docstring from doc_chunks
|
|
208
|
+
doc_row = db.execute(
|
|
209
|
+
"""
|
|
210
|
+
SELECT dc.content
|
|
211
|
+
FROM doc_chunks dc
|
|
212
|
+
JOIN doc_files df ON df.id = dc.doc_file_id
|
|
213
|
+
WHERE df.path = ? AND dc.line_start <= ? AND dc.line_end >= ?
|
|
214
|
+
AND df.doc_type = 'docstring'
|
|
215
|
+
LIMIT 1
|
|
216
|
+
""",
|
|
217
|
+
(r["file_path"], r["line_start"], r["line_start"]),
|
|
218
|
+
).fetchone()
|
|
219
|
+
if doc_row:
|
|
220
|
+
enriched_result["docstring"] = doc_row[0]
|
|
221
|
+
|
|
222
|
+
enriched.append(enriched_result)
|
|
182
223
|
|
|
183
|
-
|
|
224
|
+
return enriched
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _get_symbol_id(name: str, file_path: str, db) -> int | None:
|
|
228
|
+
"""Get symbol ID by name and file path."""
|
|
229
|
+
row = db.execute(
|
|
230
|
+
"SELECT id FROM symbols WHERE name = ? AND file_id = (SELECT id FROM files WHERE path = ?)",
|
|
231
|
+
(name, file_path),
|
|
232
|
+
).fetchone()
|
|
233
|
+
return row[0] if row else None
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _extract_signature(source_text: str) -> str | None:
|
|
237
|
+
"""Extract the function/class signature from source text."""
|
|
238
|
+
if not source_text:
|
|
239
|
+
return None
|
|
240
|
+
lines = source_text.strip().split("\n")
|
|
241
|
+
if not lines:
|
|
242
|
+
return None
|
|
243
|
+
# Return first meaningful line (signature)
|
|
244
|
+
first_line = lines[0].strip()
|
|
245
|
+
if len(first_line) > 100:
|
|
246
|
+
return first_line[:100] + "..."
|
|
247
|
+
return first_line if first_line else None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def find_references(symbol_name: str, db, include_context: bool = True) -> list[dict]:
|
|
184
251
|
"""Find all cross-references to *symbol_name*.
|
|
185
252
|
|
|
186
253
|
Queries the ``references_`` table for exact matches.
|
|
@@ -188,9 +255,10 @@ def find_references(symbol_name: str, db) -> list[dict]:
|
|
|
188
255
|
Args:
|
|
189
256
|
symbol_name: The name of the symbol to find references for.
|
|
190
257
|
db: An open ``sqlite3.Connection``.
|
|
258
|
+
include_context: If True, include source context and containing symbol.
|
|
191
259
|
|
|
192
260
|
Returns:
|
|
193
|
-
A list of dicts with
|
|
261
|
+
A list of dicts with enriched reference information.
|
|
194
262
|
"""
|
|
195
263
|
rows = db.execute(
|
|
196
264
|
"""
|
|
@@ -203,10 +271,51 @@ def find_references(symbol_name: str, db) -> list[dict]:
|
|
|
203
271
|
(symbol_name,),
|
|
204
272
|
).fetchall()
|
|
205
273
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
274
|
+
if not include_context:
|
|
275
|
+
return [
|
|
276
|
+
{"symbol_name": r[0], "file_path": r[1], "line_number": r[2]}
|
|
277
|
+
for r in rows
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
# Enrich with context
|
|
281
|
+
enriched = []
|
|
282
|
+
for r in rows:
|
|
283
|
+
ref = {
|
|
284
|
+
"symbol_name": r[0],
|
|
285
|
+
"file_path": r[1],
|
|
286
|
+
"line_number": r[2],
|
|
287
|
+
"source_line": None,
|
|
288
|
+
"containing_symbol": None,
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
# Get the source line at this reference
|
|
292
|
+
try:
|
|
293
|
+
with open(r[1], "r") as f:
|
|
294
|
+
lines = f.readlines()
|
|
295
|
+
if 0 < r[2] <= len(lines):
|
|
296
|
+
ref["source_line"] = lines[r[2] - 1].strip()
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
# Find containing symbol
|
|
301
|
+
containing = db.execute(
|
|
302
|
+
"""
|
|
303
|
+
SELECT s.name, s.kind
|
|
304
|
+
FROM symbols s
|
|
305
|
+
JOIN files f ON f.id = s.file_id
|
|
306
|
+
WHERE f.path = ?
|
|
307
|
+
AND s.line_start <= ? AND s.line_end >= ?
|
|
308
|
+
ORDER BY (s.line_end - s.line_start)
|
|
309
|
+
LIMIT 1
|
|
310
|
+
""",
|
|
311
|
+
(r[1], r[2], r[2]),
|
|
312
|
+
).fetchone()
|
|
313
|
+
if containing:
|
|
314
|
+
ref["containing_symbol"] = {"name": containing[0], "kind": containing[1]}
|
|
315
|
+
|
|
316
|
+
enriched.append(ref)
|
|
317
|
+
|
|
318
|
+
return enriched
|
|
210
319
|
|
|
211
320
|
|
|
212
321
|
def get_file_structure(file_path: str, db) -> list[dict]:
|
|
@@ -449,7 +558,7 @@ def _add_context_chunks(results: list[dict], db) -> list[dict]:
|
|
|
449
558
|
# ---------------------------------------------------------------------------
|
|
450
559
|
|
|
451
560
|
|
|
452
|
-
def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
561
|
+
def discover_topic(topic_query: str, db, top_k: int = 15, include_snippets: bool = True) -> list[dict]:
|
|
453
562
|
"""Discover files and code related to a high-level topic or feature.
|
|
454
563
|
|
|
455
564
|
This function performs broad semantic search across both code symbols
|
|
@@ -465,6 +574,7 @@ def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
|
465
574
|
Examples: "authentication", "workout tracking", "email notifications"
|
|
466
575
|
db: An open ``sqlite3.Connection``.
|
|
467
576
|
top_k: Maximum number of files to return (default 15).
|
|
577
|
+
include_snippets: If True, include code snippets for top symbols.
|
|
468
578
|
|
|
469
579
|
Returns:
|
|
470
580
|
A list of file-level results, each containing:
|
|
@@ -472,7 +582,9 @@ def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
|
472
582
|
- relevance_score: Combined semantic relevance score
|
|
473
583
|
- matched_symbols: List of symbol names that matched the topic
|
|
474
584
|
- matched_docs: List of doc section titles that matched
|
|
585
|
+
- symbol_kinds: Types of symbols found (function, class, etc.)
|
|
475
586
|
- summary: Brief description of what in this file is relevant
|
|
587
|
+
- top_snippets: Code snippets from top-matching symbols (if include_snippets)
|
|
476
588
|
"""
|
|
477
589
|
# Run parallel searches on both code symbols and documentation
|
|
478
590
|
code_results = hybrid_search(topic_query, db, top_k=50)
|
|
@@ -492,10 +604,19 @@ def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
|
492
604
|
"matched_symbols": [],
|
|
493
605
|
"matched_docs": [],
|
|
494
606
|
"symbol_kinds": set(),
|
|
607
|
+
"symbol_details": [], # Store full details for snippets
|
|
495
608
|
}
|
|
496
609
|
file_aggregates[fp]["relevance_score"] += r.get("score", 0.5)
|
|
497
610
|
file_aggregates[fp]["matched_symbols"].append(r.get("name", ""))
|
|
498
611
|
file_aggregates[fp]["symbol_kinds"].add(r.get("kind", ""))
|
|
612
|
+
file_aggregates[fp]["symbol_details"].append({
|
|
613
|
+
"name": r.get("name"),
|
|
614
|
+
"kind": r.get("kind"),
|
|
615
|
+
"line_start": r.get("line_start"),
|
|
616
|
+
"line_end": r.get("line_end"),
|
|
617
|
+
"source_text": r.get("source_text"),
|
|
618
|
+
"score": r.get("score"),
|
|
619
|
+
})
|
|
499
620
|
|
|
500
621
|
for r in doc_results:
|
|
501
622
|
fp = r.get("source_file", "")
|
|
@@ -508,6 +629,7 @@ def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
|
508
629
|
"matched_symbols": [],
|
|
509
630
|
"matched_docs": [],
|
|
510
631
|
"symbol_kinds": set(),
|
|
632
|
+
"symbol_details": [],
|
|
511
633
|
}
|
|
512
634
|
file_aggregates[fp]["relevance_score"] += r.get("score", 0.5)
|
|
513
635
|
section = r.get("section_title", "")
|
|
@@ -531,13 +653,46 @@ def discover_topic(topic_query: str, db, top_k: int = 15) -> list[dict]:
|
|
|
531
653
|
|
|
532
654
|
kinds = ", ".join(k for k in item["symbol_kinds"] if k)
|
|
533
655
|
|
|
534
|
-
|
|
656
|
+
result = {
|
|
535
657
|
"file_path": item["file_path"],
|
|
536
658
|
"relevance_score": round(item["relevance_score"], 4),
|
|
537
659
|
"matched_symbols": item["matched_symbols"][:10],
|
|
538
660
|
"matched_docs": item["matched_docs"][:5],
|
|
539
661
|
"symbol_kinds": kinds,
|
|
540
662
|
"summary": f"Contains {kinds}: {symbol_summary}" if kinds else f"Related symbols: {symbol_summary}",
|
|
541
|
-
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
# Add top snippets if requested
|
|
666
|
+
if include_snippets and item["symbol_details"]:
|
|
667
|
+
# Sort by score and take top 2
|
|
668
|
+
top_symbols = sorted(
|
|
669
|
+
item["symbol_details"],
|
|
670
|
+
key=lambda x: x.get("score", 0) or 0,
|
|
671
|
+
reverse=True
|
|
672
|
+
)[:2]
|
|
673
|
+
result["top_snippets"] = [
|
|
674
|
+
{
|
|
675
|
+
"name": s["name"],
|
|
676
|
+
"kind": s["kind"],
|
|
677
|
+
"line_range": f"{s['line_start']}-{s['line_end']}",
|
|
678
|
+
"code": _truncate_code(s.get("source_text", ""), max_lines=15),
|
|
679
|
+
}
|
|
680
|
+
for s in top_symbols if s.get("source_text")
|
|
681
|
+
]
|
|
682
|
+
|
|
683
|
+
results.append(result)
|
|
542
684
|
|
|
543
685
|
return results
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def _truncate_code(source_text: str, max_lines: int = 15, max_chars: int = 500) -> str:
|
|
689
|
+
"""Truncate source code to a reasonable preview size."""
|
|
690
|
+
if not source_text:
|
|
691
|
+
return ""
|
|
692
|
+
lines = source_text.strip().split("\n")
|
|
693
|
+
if len(lines) <= max_lines and len(source_text) <= max_chars:
|
|
694
|
+
return source_text.strip()
|
|
695
|
+
truncated = "\n".join(lines[:max_lines])
|
|
696
|
+
if len(truncated) > max_chars:
|
|
697
|
+
truncated = truncated[:max_chars]
|
|
698
|
+
return truncated + "\n// ... (truncated)"
|
|
@@ -41,6 +41,15 @@ if the codebase has not been indexed. Always check if indexing is needed:
|
|
|
41
41
|
2. THEN: Use search_code, search_docs, or search_history to find information
|
|
42
42
|
3. RE-INDEX: If you modify files or haven't indexed recently, run index_codebase again
|
|
43
43
|
|
|
44
|
+
TOOL SELECTION - USE THESE INSTEAD OF grep/glob/find:
|
|
45
|
+
|
|
46
|
+
When you would normally use grep, rg, find, or glob, use search_code instead:
|
|
47
|
+
- "grep -r pattern" → search_code(query="pattern", search_type="topic_discovery")
|
|
48
|
+
- "find . -name '*.py' | xargs grep 'class X'" → search_code(query="X", search_type="definition")
|
|
49
|
+
- "Show me files related to auth" → search_code(query="auth", search_type="topic_discovery")
|
|
50
|
+
|
|
51
|
+
search_code provides SEMANTIC understanding - it finds related concepts, not just text matches.
|
|
52
|
+
|
|
44
53
|
When to use each tool:
|
|
45
54
|
- search_code: THE PREFERRED tool for finding code. Use "topic_discovery" for feature/domain searches (e.g., "workout related files"), "definition" for specific symbols, "references" for usages.
|
|
46
55
|
- search_docs: Understanding architecture, reading documentation/READMEs
|
|
@@ -125,7 +134,9 @@ def search_code(
|
|
|
125
134
|
search_type: Literal["topic_discovery", "definition", "references", "file_structure"],
|
|
126
135
|
directory: str,
|
|
127
136
|
) -> dict:
|
|
128
|
-
"""
|
|
137
|
+
"""USE THIS INSTEAD OF grep/glob/find for ANY code search. This tool provides SEMANTIC code understanding - it finds related concepts, not just text matches.
|
|
138
|
+
|
|
139
|
+
STOP: Before using grep, rg, find, or glob, use this tool instead. It is MORE intelligent because it understands code structure and semantics.
|
|
129
140
|
|
|
130
141
|
PREREQUISITE: This tool requires indexing. If results are empty or you haven't indexed this session, call index_codebase(directory) first.
|
|
131
142
|
|
|
@@ -169,6 +180,12 @@ def search_code(
|
|
|
169
180
|
- "references": "send_email", "validate_token"
|
|
170
181
|
- "file_structure": "src/services/auth.py"
|
|
171
182
|
|
|
183
|
+
INSTEAD OF GREP EXAMPLES:
|
|
184
|
+
- Instead of: grep -r "auth" . → Use: search_code(query="auth", search_type="topic_discovery")
|
|
185
|
+
- Instead of: grep -r "class User" → Use: search_code(query="User", search_type="definition")
|
|
186
|
+
- Instead of: grep -r "import.*auth" → Use: search_code(query="auth", search_type="references")
|
|
187
|
+
- Instead of: find . -name "*.py" | xargs grep "login" → Use: search_code(query="login", search_type="topic_discovery")
|
|
188
|
+
|
|
172
189
|
Do NOT use this tool for:
|
|
173
190
|
- Reading full file contents (use your built-in file reader)
|
|
174
191
|
- Git history queries (use search_history)
|
|
@@ -183,7 +200,25 @@ def search_code(
|
|
|
183
200
|
directory: Path to the project directory to search.
|
|
184
201
|
|
|
185
202
|
Returns:
|
|
186
|
-
Dict with status, search_type, query, and results array.
|
|
203
|
+
Dict with status, search_type, query, and results array.
|
|
204
|
+
|
|
205
|
+
For topic_discovery, each result includes:
|
|
206
|
+
- file_path, relevance_score, matched_symbols, matched_docs, symbol_kinds, summary
|
|
207
|
+
- top_snippets: Code snippets from top-matching symbols
|
|
208
|
+
|
|
209
|
+
For definition, each result includes:
|
|
210
|
+
- name, kind, file_path, line_start, line_end, source_text, score
|
|
211
|
+
- docstring: Extracted docstring (if available)
|
|
212
|
+
- parent: {name, kind} of containing class/module
|
|
213
|
+
- signature: First line of the symbol (function signature or class declaration)
|
|
214
|
+
|
|
215
|
+
For references, each result includes:
|
|
216
|
+
- symbol_name, file_path, line_number
|
|
217
|
+
- source_line: The actual line of code with the reference
|
|
218
|
+
- containing_symbol: {name, kind} of the function/class containing this reference
|
|
219
|
+
|
|
220
|
+
For file_structure, each result includes:
|
|
221
|
+
- name, kind, line_start, line_end, parent
|
|
187
222
|
"""
|
|
188
223
|
with logging_config.ToolLogger("search_code", query=query, search_type=search_type) as log:
|
|
189
224
|
try:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|