code-explore-by-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_explore_by_sql-0.1.0.dist-info/METADATA +205 -0
- code_explore_by_sql-0.1.0.dist-info/RECORD +29 -0
- code_explore_by_sql-0.1.0.dist-info/WHEEL +4 -0
- code_explore_by_sql-0.1.0.dist-info/entry_points.txt +3 -0
- code_explore_by_sql-0.1.0.dist-info/licenses/LICENSE +21 -0
- code_source_sql/__init__.py +9 -0
- code_source_sql/__main__.py +5 -0
- code_source_sql/bracket_scanner.py +385 -0
- code_source_sql/build_db.py +284 -0
- code_source_sql/code_block_summary.py +522 -0
- code_source_sql/configs.py +402 -0
- code_source_sql/db.py +625 -0
- code_source_sql/edge_extractor.py +183 -0
- code_source_sql/languages/__init__.py +31 -0
- code_source_sql/languages/c.py +118 -0
- code_source_sql/languages/cpp.py +106 -0
- code_source_sql/languages/csharp.py +103 -0
- code_source_sql/languages/glsl.py +162 -0
- code_source_sql/languages/go.py +91 -0
- code_source_sql/languages/hlsl.py +155 -0
- code_source_sql/languages/java.py +98 -0
- code_source_sql/languages/javascript.py +215 -0
- code_source_sql/languages/kotlin.py +108 -0
- code_source_sql/languages/python.py +105 -0
- code_source_sql/languages/rust.py +91 -0
- code_source_sql/languages/swift.py +116 -0
- code_source_sql/server.py +264 -0
- code_source_sql/symbol_analyzer.py +487 -0
- code_source_sql/unreal_rules.py +163 -0
code_source_sql/db.py
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
"""Database schema and query functions — three-table architecture per plan.md.
|
|
2
|
+
|
|
3
|
+
Tables:
|
|
4
|
+
file_content — raw source files with FTS5 trigram index
|
|
5
|
+
symbol_index — qualified names with decoration metadata, line ranges
|
|
6
|
+
strict_edges — deterministic edges only (inheritance, type_dependency, static_call, rpc_routing)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sqlite3
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from .edge_extractor import StrictEdge
|
|
17
|
+
from .symbol_analyzer import ExtraSymbol, SymbolDef
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _resolve_lang_fw(language: str) -> tuple[Any, Any]:
|
|
21
|
+
"""Resolve a language name string to (LanguageConfig, FrameworkConfig).
|
|
22
|
+
|
|
23
|
+
Uses the language registry for LanguageConfig lookup.
|
|
24
|
+
C++ keeps Unreal framework for backward compat with existing databases.
|
|
25
|
+
"""
|
|
26
|
+
from .configs import get_language, make_generic_framework
|
|
27
|
+
lang = get_language(language)
|
|
28
|
+
if language == "cpp":
|
|
29
|
+
from .unreal_rules import make_unreal_framework
|
|
30
|
+
return lang, make_unreal_framework()
|
|
31
|
+
return lang, make_generic_framework()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def connect(db_path: str | Path) -> sqlite3.Connection:
|
|
35
|
+
path = Path(db_path)
|
|
36
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
conn = sqlite3.connect(path)
|
|
38
|
+
conn.row_factory = sqlite3.Row
|
|
39
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
40
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
41
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
42
|
+
return conn
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def initialize_schema(conn: sqlite3.Connection) -> None:
|
|
46
|
+
conn.executescript("""
|
|
47
|
+
-- Table 1: File_Content_FTS
|
|
48
|
+
CREATE TABLE IF NOT EXISTS file_content (
|
|
49
|
+
file_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
50
|
+
module_name TEXT,
|
|
51
|
+
file_path TEXT NOT NULL UNIQUE,
|
|
52
|
+
content TEXT NOT NULL,
|
|
53
|
+
content_hash TEXT,
|
|
54
|
+
language TEXT NOT NULL DEFAULT 'cpp'
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS file_content_fts USING fts5(
|
|
58
|
+
file_path,
|
|
59
|
+
module_name,
|
|
60
|
+
content,
|
|
61
|
+
content=file_content,
|
|
62
|
+
content_rowid=file_id,
|
|
63
|
+
tokenize="trigram"
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
CREATE TRIGGER IF NOT EXISTS fc_ai AFTER INSERT ON file_content BEGIN
|
|
67
|
+
INSERT INTO file_content_fts(rowid, file_path, module_name, content)
|
|
68
|
+
VALUES (new.file_id, new.file_path, new.module_name, new.content);
|
|
69
|
+
END;
|
|
70
|
+
|
|
71
|
+
CREATE TRIGGER IF NOT EXISTS fc_ad AFTER DELETE ON file_content BEGIN
|
|
72
|
+
INSERT INTO file_content_fts(file_content_fts, rowid, file_path, module_name, content)
|
|
73
|
+
VALUES ('delete', old.file_id, old.file_path, old.module_name, old.content);
|
|
74
|
+
END;
|
|
75
|
+
|
|
76
|
+
CREATE TRIGGER IF NOT EXISTS fc_au AFTER UPDATE ON file_content BEGIN
|
|
77
|
+
INSERT INTO file_content_fts(file_content_fts, rowid, file_path, module_name, content)
|
|
78
|
+
VALUES ('delete', old.file_id, old.file_path, old.module_name, old.content);
|
|
79
|
+
INSERT INTO file_content_fts(rowid, file_path, module_name, content)
|
|
80
|
+
VALUES (new.file_id, new.file_path, new.module_name, new.content);
|
|
81
|
+
END;
|
|
82
|
+
|
|
83
|
+
-- Table 2: Symbol_Index
|
|
84
|
+
CREATE TABLE IF NOT EXISTS symbol_index (
|
|
85
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
86
|
+
qualified_name TEXT NOT NULL,
|
|
87
|
+
block_type TEXT NOT NULL,
|
|
88
|
+
file_id INTEGER NOT NULL REFERENCES file_content(file_id) ON DELETE CASCADE,
|
|
89
|
+
start_line INTEGER NOT NULL,
|
|
90
|
+
end_line INTEGER NOT NULL,
|
|
91
|
+
decoration_meta TEXT,
|
|
92
|
+
parent_class TEXT,
|
|
93
|
+
signature TEXT,
|
|
94
|
+
inheritance_base TEXT,
|
|
95
|
+
language TEXT NOT NULL DEFAULT 'cpp'
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
CREATE INDEX IF NOT EXISTS idx_sym_qn ON symbol_index(qualified_name);
|
|
99
|
+
CREATE INDEX IF NOT EXISTS idx_sym_file ON symbol_index(file_id);
|
|
100
|
+
CREATE INDEX IF NOT EXISTS idx_sym_type ON symbol_index(block_type);
|
|
101
|
+
CREATE INDEX IF NOT EXISTS idx_sym_file_line ON symbol_index(file_id, start_line);
|
|
102
|
+
|
|
103
|
+
-- Table 3: Strict_Edges
|
|
104
|
+
CREATE TABLE IF NOT EXISTS strict_edges (
|
|
105
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
106
|
+
source_qn TEXT NOT NULL,
|
|
107
|
+
target_qn TEXT NOT NULL,
|
|
108
|
+
edge_type TEXT NOT NULL,
|
|
109
|
+
language TEXT NOT NULL DEFAULT 'cpp'
|
|
110
|
+
);
|
|
111
|
+
|
|
112
|
+
CREATE INDEX IF NOT EXISTS idx_edge_source ON strict_edges(source_qn);
|
|
113
|
+
CREATE INDEX IF NOT EXISTS idx_edge_target ON strict_edges(target_qn);
|
|
114
|
+
CREATE INDEX IF NOT EXISTS idx_edge_type ON strict_edges(edge_type);
|
|
115
|
+
""")
|
|
116
|
+
|
|
117
|
+
# Migrations for existing databases
|
|
118
|
+
try:
|
|
119
|
+
conn.execute("ALTER TABLE file_content ADD COLUMN language TEXT NOT NULL DEFAULT 'cpp'")
|
|
120
|
+
except Exception:
|
|
121
|
+
pass # Column already exists
|
|
122
|
+
try:
|
|
123
|
+
conn.execute("ALTER TABLE symbol_index ADD COLUMN language TEXT NOT NULL DEFAULT 'cpp'")
|
|
124
|
+
except Exception:
|
|
125
|
+
pass # Column already exists
|
|
126
|
+
try:
|
|
127
|
+
conn.execute("ALTER TABLE strict_edges ADD COLUMN language TEXT NOT NULL DEFAULT 'cpp'")
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
130
|
+
try:
|
|
131
|
+
conn.execute("ALTER TABLE symbol_index RENAME COLUMN ue_meta TO decoration_meta")
|
|
132
|
+
except Exception:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
conn.commit()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ── Write functions ──────────────────────────────────────────────────
|
|
139
|
+
|
|
140
|
+
def upsert_file(conn: sqlite3.Connection, file_path: str, module_name: str | None,
|
|
141
|
+
content: str, content_hash: str | None = None,
|
|
142
|
+
language: str = "cpp") -> int:
|
|
143
|
+
conn.execute(
|
|
144
|
+
"DELETE FROM symbol_index WHERE file_id = "
|
|
145
|
+
"(SELECT file_id FROM file_content WHERE file_path = ?)",
|
|
146
|
+
(file_path,),
|
|
147
|
+
)
|
|
148
|
+
conn.execute(
|
|
149
|
+
"DELETE FROM strict_edges WHERE source_qn IN "
|
|
150
|
+
"(SELECT qualified_name FROM symbol_index WHERE file_id = "
|
|
151
|
+
"(SELECT file_id FROM file_content WHERE file_path = ?))",
|
|
152
|
+
(file_path,),
|
|
153
|
+
)
|
|
154
|
+
conn.execute(
|
|
155
|
+
"""INSERT INTO file_content(file_path, module_name, content, content_hash, language)
|
|
156
|
+
VALUES (?, ?, ?, ?, ?)
|
|
157
|
+
ON CONFLICT(file_path) DO UPDATE SET
|
|
158
|
+
module_name=excluded.module_name,
|
|
159
|
+
content=excluded.content,
|
|
160
|
+
content_hash=excluded.content_hash,
|
|
161
|
+
language=excluded.language""",
|
|
162
|
+
(file_path, module_name, content, content_hash, language),
|
|
163
|
+
)
|
|
164
|
+
row = conn.execute("SELECT file_id FROM file_content WHERE file_path = ?", (file_path,)).fetchone()
|
|
165
|
+
return row["file_id"]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def insert_symbols(conn: sqlite3.Connection, symbols: list[SymbolDef]) -> None:
|
|
169
|
+
if not symbols:
|
|
170
|
+
return
|
|
171
|
+
conn.executemany(
|
|
172
|
+
"""INSERT INTO symbol_index
|
|
173
|
+
(qualified_name, block_type, file_id, start_line, end_line,
|
|
174
|
+
decoration_meta, parent_class, signature, inheritance_base, language)
|
|
175
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
176
|
+
[
|
|
177
|
+
(
|
|
178
|
+
s.qualified_name, s.block_type, s.file_id,
|
|
179
|
+
s.start_line, s.end_line,
|
|
180
|
+
json.dumps(s.decoration_meta) if s.decoration_meta else None,
|
|
181
|
+
s.parent_class,
|
|
182
|
+
s.signature,
|
|
183
|
+
s.inheritance_base,
|
|
184
|
+
s.language,
|
|
185
|
+
)
|
|
186
|
+
for s in symbols
|
|
187
|
+
],
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def insert_extra_symbols(conn: sqlite3.Connection, extras: list[ExtraSymbol]) -> None:
|
|
192
|
+
if not extras:
|
|
193
|
+
return
|
|
194
|
+
conn.executemany(
|
|
195
|
+
"""INSERT INTO symbol_index
|
|
196
|
+
(qualified_name, block_type, file_id, start_line, end_line, signature, language)
|
|
197
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
198
|
+
[
|
|
199
|
+
(e.qualified_name, e.block_type, e.file_id, e.start_line, e.end_line, e.signature, e.language)
|
|
200
|
+
for e in extras
|
|
201
|
+
],
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def insert_edges(conn: sqlite3.Connection, edges: list[StrictEdge]) -> None:
|
|
206
|
+
if not edges:
|
|
207
|
+
return
|
|
208
|
+
conn.executemany(
|
|
209
|
+
"INSERT INTO strict_edges(source_qn, target_qn, edge_type, language) VALUES (?, ?, ?, ?)",
|
|
210
|
+
[(e.source_qn, e.target_qn, e.edge_type, e.language) for e in edges],
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def delete_file(conn: sqlite3.Connection, file_path: str) -> None:
|
|
215
|
+
conn.execute("DELETE FROM file_content WHERE file_path = ?", (file_path,))
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def commit(conn: sqlite3.Connection) -> None:
|
|
219
|
+
conn.commit()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ── Signal enrichment helpers ────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
def _normalize_expand_item(expand_item: list[str] | None) -> list[str]:
|
|
225
|
+
if not expand_item:
|
|
226
|
+
return []
|
|
227
|
+
seen: set[str] = set()
|
|
228
|
+
items: list[str] = []
|
|
229
|
+
for item in expand_item:
|
|
230
|
+
item = str(item).strip()
|
|
231
|
+
key = item.lower()
|
|
232
|
+
if len(item) >= 3 and key not in seen:
|
|
233
|
+
seen.add(key)
|
|
234
|
+
items.append(item)
|
|
235
|
+
return items
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _signal_hits(text: str, expand_item: list[str]) -> list[str]:
|
|
239
|
+
text_lower = text.lower()
|
|
240
|
+
return [item for item in expand_item if item.lower() in text_lower]
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ── Read_Symbol query ────────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
def read_symbol(
|
|
246
|
+
conn: sqlite3.Connection,
|
|
247
|
+
qualified_name: str,
|
|
248
|
+
view: str = "full",
|
|
249
|
+
expand_item: list[str] | None = None,
|
|
250
|
+
) -> list[dict[str, Any]]:
|
|
251
|
+
"""Look up symbols by qualified_name. Returns minimal identity + code."""
|
|
252
|
+
expand_item = _normalize_expand_item(expand_item)
|
|
253
|
+
|
|
254
|
+
# 1. Exact match
|
|
255
|
+
rows = conn.execute(
|
|
256
|
+
"""SELECT si.*, fc.file_path, fc.content
|
|
257
|
+
FROM symbol_index si
|
|
258
|
+
JOIN file_content fc ON fc.file_id = si.file_id
|
|
259
|
+
WHERE si.qualified_name = ?
|
|
260
|
+
ORDER BY si.file_id, si.start_line""",
|
|
261
|
+
(qualified_name,),
|
|
262
|
+
).fetchall()
|
|
263
|
+
|
|
264
|
+
# 2. Type prefix resolution
|
|
265
|
+
_weak_types = frozenset({"namespace", "macro_def"})
|
|
266
|
+
need_prefix = not rows or all(r["block_type"] in _weak_types for r in rows)
|
|
267
|
+
if need_prefix:
|
|
268
|
+
db_langs = [r["language"] for r in conn.execute(
|
|
269
|
+
"SELECT DISTINCT language FROM symbol_index"
|
|
270
|
+
).fetchall()]
|
|
271
|
+
for lang_name in db_langs:
|
|
272
|
+
_, fw_resolve = _resolve_lang_fw(lang_name)
|
|
273
|
+
if fw_resolve.resolve_type_prefixes:
|
|
274
|
+
for candidate in fw_resolve.resolve_type_prefixes(qualified_name):
|
|
275
|
+
prefix_rows = conn.execute(
|
|
276
|
+
"""SELECT si.*, fc.file_path, fc.content
|
|
277
|
+
FROM symbol_index si
|
|
278
|
+
JOIN file_content fc ON fc.file_id = si.file_id
|
|
279
|
+
WHERE si.qualified_name = ?
|
|
280
|
+
ORDER BY si.file_id, si.start_line""",
|
|
281
|
+
(candidate,),
|
|
282
|
+
).fetchall()
|
|
283
|
+
if prefix_rows:
|
|
284
|
+
rows = prefix_rows
|
|
285
|
+
break
|
|
286
|
+
if rows:
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
# 3. Partial match
|
|
290
|
+
if not rows:
|
|
291
|
+
rows = conn.execute(
|
|
292
|
+
"""SELECT si.*, fc.file_path, fc.content
|
|
293
|
+
FROM symbol_index si
|
|
294
|
+
JOIN file_content fc ON fc.file_id = si.file_id
|
|
295
|
+
WHERE si.qualified_name LIKE ?
|
|
296
|
+
ORDER BY si.file_id, si.start_line""",
|
|
297
|
+
(f"%{qualified_name}%",),
|
|
298
|
+
).fetchall()
|
|
299
|
+
|
|
300
|
+
if not rows:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
results = []
|
|
304
|
+
for r in rows:
|
|
305
|
+
start = r["start_line"]
|
|
306
|
+
end = r["end_line"]
|
|
307
|
+
lines = r["content"].split("\n")
|
|
308
|
+
code = "\n".join(lines[start - 1 : end])
|
|
309
|
+
|
|
310
|
+
qn = r["qualified_name"]
|
|
311
|
+
|
|
312
|
+
# Signal text for ranking only
|
|
313
|
+
signal_text = "\n".join([qn, r["file_path"] or "", r["signature"] or "", code])
|
|
314
|
+
expand_item_hits = _signal_hits(signal_text, expand_item)
|
|
315
|
+
|
|
316
|
+
results.append({
|
|
317
|
+
"qn": qn,
|
|
318
|
+
"type": r["block_type"],
|
|
319
|
+
"file": r["file_path"],
|
|
320
|
+
"range": [start, end],
|
|
321
|
+
"_code_raw": code,
|
|
322
|
+
"_expand_item_hits": expand_item_hits,
|
|
323
|
+
"_language": r["language"],
|
|
324
|
+
"_block_type_raw": r["block_type"],
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
if expand_item:
|
|
328
|
+
results.sort(key=lambda item: len(item.get("_expand_item_hits", [])), reverse=True)
|
|
329
|
+
|
|
330
|
+
# Apply view to best match only, strip internals
|
|
331
|
+
for i, item in enumerate(results):
|
|
332
|
+
if i == 0 and view != "meta":
|
|
333
|
+
lang, fw = _resolve_lang_fw(item["_language"])
|
|
334
|
+
item["code"] = _apply_view(
|
|
335
|
+
item["_code_raw"], view,
|
|
336
|
+
block_type=item["_block_type_raw"],
|
|
337
|
+
qualified_name=item["qn"],
|
|
338
|
+
lang=lang, fw=fw,
|
|
339
|
+
max_lines=80 if view == "signature" else 0,
|
|
340
|
+
)
|
|
341
|
+
# Strip internal fields
|
|
342
|
+
item.pop("_code_raw", None)
|
|
343
|
+
item.pop("_expand_item_hits", None)
|
|
344
|
+
item.pop("_language", None)
|
|
345
|
+
item.pop("_block_type_raw", None)
|
|
346
|
+
|
|
347
|
+
return results
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _apply_view(
|
|
351
|
+
code: str,
|
|
352
|
+
view: str,
|
|
353
|
+
*,
|
|
354
|
+
block_type: str | None = None,
|
|
355
|
+
qualified_name: str | None = None,
|
|
356
|
+
child_symbols: list[dict] | None = None,
|
|
357
|
+
lang: Any | None = None,
|
|
358
|
+
fw: Any | None = None,
|
|
359
|
+
max_lines: int = 0,
|
|
360
|
+
) -> str:
|
|
361
|
+
from .code_block_summary import apply_view
|
|
362
|
+
return apply_view(
|
|
363
|
+
code, view,
|
|
364
|
+
block_type=block_type,
|
|
365
|
+
qualified_name=qualified_name,
|
|
366
|
+
child_symbols=child_symbols,
|
|
367
|
+
lang=lang,
|
|
368
|
+
fw=fw,
|
|
369
|
+
max_lines=max_lines,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
# ── Search_FTS query ─────────────────────────────────────────────────
|
|
374
|
+
|
|
375
|
+
def _fts_ranked(
|
|
376
|
+
conn: sqlite3.Connection,
|
|
377
|
+
fts_query: str,
|
|
378
|
+
path_filter: str,
|
|
379
|
+
expand_item: list[str],
|
|
380
|
+
limit: int,
|
|
381
|
+
) -> list[dict[str, Any]]:
|
|
382
|
+
"""Execute FTS5 ranked query, return list of {fts_rowid, rank}."""
|
|
383
|
+
if path_filter:
|
|
384
|
+
fetch_limit = limit * (8 if expand_item else 5)
|
|
385
|
+
return [dict(r) for r in conn.execute(
|
|
386
|
+
"SELECT file_content_fts.rowid AS fts_rowid, bm25(file_content_fts) AS rank "
|
|
387
|
+
"FROM file_content_fts "
|
|
388
|
+
"JOIN file_content fc ON fc.file_id = file_content_fts.rowid "
|
|
389
|
+
"WHERE file_content_fts MATCH ? AND (fc.module_name = ? OR fc.file_path LIKE ? || '%') "
|
|
390
|
+
"ORDER BY rank LIMIT ?",
|
|
391
|
+
(fts_query, path_filter, path_filter + "/", fetch_limit),
|
|
392
|
+
).fetchall()]
|
|
393
|
+
return [dict(r) for r in conn.execute(
|
|
394
|
+
"SELECT file_content_fts.rowid AS fts_rowid, bm25(file_content_fts) AS rank "
|
|
395
|
+
"FROM file_content_fts "
|
|
396
|
+
"WHERE file_content_fts MATCH ? "
|
|
397
|
+
"ORDER BY rank LIMIT ?",
|
|
398
|
+
(fts_query, limit * (6 if expand_item else 3)),
|
|
399
|
+
).fetchall()]
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _fts5_escape(query: str) -> str:
|
|
403
|
+
import re
|
|
404
|
+
words = re.findall(r'[A-Za-z0-9_]{3,}', query)
|
|
405
|
+
if not words:
|
|
406
|
+
stripped = query.strip()
|
|
407
|
+
if len(stripped) >= 3:
|
|
408
|
+
words = [stripped]
|
|
409
|
+
if not words:
|
|
410
|
+
return '""'
|
|
411
|
+
return " AND ".join(f'"{w}"' for w in words)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def search_fts(
|
|
415
|
+
conn: sqlite3.Connection,
|
|
416
|
+
keyword: str = "",
|
|
417
|
+
path_filter: str = "",
|
|
418
|
+
expand_item: list[str] | None = None,
|
|
419
|
+
raw_query: str = "",
|
|
420
|
+
limit: int = 30,
|
|
421
|
+
) -> list[dict[str, Any]]:
|
|
422
|
+
"""FTS5 search returning located code blocks with preview.
|
|
423
|
+
|
|
424
|
+
Query modes (mutually exclusive — use one):
|
|
425
|
+
- keyword: auto-escaped AND of tokens. Simple, safe.
|
|
426
|
+
- raw_query: passed directly to FTS5 MATCH. Supports column filters,
|
|
427
|
+
OR, NOT, and all FTS5 syntax.
|
|
428
|
+
|
|
429
|
+
FTS5 columns: file_path, module_name, content (all trigram).
|
|
430
|
+
All terms must be ≥3 characters.
|
|
431
|
+
"""
|
|
432
|
+
if raw_query:
|
|
433
|
+
fts_query = raw_query
|
|
434
|
+
elif keyword:
|
|
435
|
+
fts_query = _fts5_escape(keyword)
|
|
436
|
+
else:
|
|
437
|
+
return []
|
|
438
|
+
expand_item = _normalize_expand_item(expand_item)
|
|
439
|
+
|
|
440
|
+
# Execute FTS query — catch malformed raw_query
|
|
441
|
+
try:
|
|
442
|
+
ranked = _fts_ranked(conn, fts_query, path_filter, expand_item, limit)
|
|
443
|
+
except Exception:
|
|
444
|
+
return []
|
|
445
|
+
if not ranked:
|
|
446
|
+
return []
|
|
447
|
+
|
|
448
|
+
if not ranked:
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
results = []
|
|
452
|
+
keyword_lower = keyword.lower()
|
|
453
|
+
words = keyword_lower.split()
|
|
454
|
+
|
|
455
|
+
for r in ranked:
|
|
456
|
+
file_id = r["fts_rowid"]
|
|
457
|
+
row = conn.execute(
|
|
458
|
+
"SELECT file_path, module_name, content FROM file_content WHERE file_id = ?",
|
|
459
|
+
(file_id,),
|
|
460
|
+
).fetchone()
|
|
461
|
+
if not row:
|
|
462
|
+
continue
|
|
463
|
+
|
|
464
|
+
lines = row["content"].split("\n")
|
|
465
|
+
hit_lines = []
|
|
466
|
+
|
|
467
|
+
for i, line in enumerate(lines):
|
|
468
|
+
line_lower = line.lower()
|
|
469
|
+
if all(w in line_lower for w in words):
|
|
470
|
+
hit_lines.append(i + 1) # 1-based
|
|
471
|
+
|
|
472
|
+
for hit_line in hit_lines[:5]: # Max 5 hits per file
|
|
473
|
+
block_info = _find_enclosing_symbol(conn, file_id, hit_line)
|
|
474
|
+
|
|
475
|
+
result = {
|
|
476
|
+
"file": row["file_path"],
|
|
477
|
+
"line": hit_line,
|
|
478
|
+
}
|
|
479
|
+
if block_info:
|
|
480
|
+
result["block"] = block_info["qualified_name"]
|
|
481
|
+
result["block_type"] = block_info["block_type"]
|
|
482
|
+
else:
|
|
483
|
+
# No enclosing symbol — suggest a context window for read_file_range
|
|
484
|
+
total_lines = len(lines)
|
|
485
|
+
ctx_start = max(1, hit_line - 5)
|
|
486
|
+
ctx_end = min(total_lines, hit_line + 25)
|
|
487
|
+
result["range"] = [ctx_start, ctx_end]
|
|
488
|
+
|
|
489
|
+
if expand_item:
|
|
490
|
+
signal_text = "\n".join([
|
|
491
|
+
row["file_path"] or "",
|
|
492
|
+
row["module_name"] or "",
|
|
493
|
+
result.get("block", ""),
|
|
494
|
+
row["content"],
|
|
495
|
+
])
|
|
496
|
+
hits = _signal_hits(signal_text, expand_item)
|
|
497
|
+
if hits:
|
|
498
|
+
result["_expand_item_hits"] = hits # internal ranking only
|
|
499
|
+
|
|
500
|
+
results.append(result)
|
|
501
|
+
|
|
502
|
+
if len(results) >= limit:
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
if expand_item:
|
|
506
|
+
results.sort(key=lambda item: len(item.get("_expand_item_hits", [])), reverse=True)
|
|
507
|
+
|
|
508
|
+
# Strip internal ranking field before returning
|
|
509
|
+
for item in results:
|
|
510
|
+
item.pop("_expand_item_hits", None)
|
|
511
|
+
|
|
512
|
+
return results[:limit]
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _find_enclosing_symbol(
|
|
516
|
+
conn: sqlite3.Connection, file_id: int, line: int
|
|
517
|
+
) -> dict[str, Any] | None:
|
|
518
|
+
"""Find the deepest enclosing symbol for a line number."""
|
|
519
|
+
row = conn.execute(
|
|
520
|
+
"""SELECT qualified_name, block_type, start_line, end_line
|
|
521
|
+
FROM symbol_index
|
|
522
|
+
WHERE file_id = ? AND start_line <= ? AND end_line >= ?
|
|
523
|
+
ORDER BY (end_line - start_line) ASC
|
|
524
|
+
LIMIT 1""",
|
|
525
|
+
(file_id, line, line),
|
|
526
|
+
).fetchone()
|
|
527
|
+
return dict(row) if row else None
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _get_language_for_symbol(conn: sqlite3.Connection, file_id: int, line: int) -> str:
|
|
531
|
+
"""Get language string for a symbol at a given position, defaulting to 'cpp'."""
|
|
532
|
+
row = conn.execute(
|
|
533
|
+
"SELECT language FROM symbol_index WHERE file_id = ? AND start_line <= ? AND end_line >= ? LIMIT 1",
|
|
534
|
+
(file_id, line, line),
|
|
535
|
+
).fetchone()
|
|
536
|
+
return row["language"] if row else "cpp"
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
# ── Directory structure ──────────────────────────────────────────────
|
|
540
|
+
|
|
541
|
+
def get_directory_structure(conn: sqlite3.Connection) -> dict[str, Any]:
|
|
542
|
+
total = conn.execute("SELECT COUNT(*) AS c FROM file_content").fetchone()["c"]
|
|
543
|
+
modules = [
|
|
544
|
+
dict(r) for r in conn.execute(
|
|
545
|
+
"SELECT module_name, COUNT(*) AS file_count "
|
|
546
|
+
"FROM file_content GROUP BY module_name ORDER BY file_count DESC LIMIT 30"
|
|
547
|
+
).fetchall()
|
|
548
|
+
]
|
|
549
|
+
total_modules = conn.execute(
|
|
550
|
+
"SELECT COUNT(DISTINCT module_name) AS c FROM file_content"
|
|
551
|
+
).fetchone()["c"]
|
|
552
|
+
|
|
553
|
+
return {
|
|
554
|
+
"total_files": total,
|
|
555
|
+
"total_modules": total_modules,
|
|
556
|
+
"modules": modules,
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# ── Read_File_Range query ─────────────────────────────────────────────
|
|
561
|
+
|
|
562
|
+
def _find_intersecting_symbols(
|
|
563
|
+
conn: sqlite3.Connection, file_id: int, start_line: int, end_line: int,
|
|
564
|
+
) -> list[dict[str, Any]]:
|
|
565
|
+
rows = conn.execute(
|
|
566
|
+
"""SELECT id, qualified_name, block_type, start_line, end_line, decoration_meta,
|
|
567
|
+
parent_class, signature, inheritance_base, language
|
|
568
|
+
FROM symbol_index
|
|
569
|
+
WHERE file_id = ? AND start_line <= ? AND end_line >= ?
|
|
570
|
+
ORDER BY (end_line - start_line) ASC""",
|
|
571
|
+
(file_id, end_line, start_line),
|
|
572
|
+
).fetchall()
|
|
573
|
+
return [dict(r) for r in rows]
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def read_file_range(
|
|
577
|
+
conn: sqlite3.Connection,
|
|
578
|
+
file_path: str,
|
|
579
|
+
start_line: int,
|
|
580
|
+
end_line: int,
|
|
581
|
+
view: str = "full",
|
|
582
|
+
expand_item: list[str] | None = None,
|
|
583
|
+
) -> dict[str, Any] | None:
|
|
584
|
+
row = conn.execute(
|
|
585
|
+
"SELECT file_id, content, language FROM file_content WHERE file_path = ?",
|
|
586
|
+
(file_path,),
|
|
587
|
+
).fetchone()
|
|
588
|
+
if not row:
|
|
589
|
+
return None
|
|
590
|
+
|
|
591
|
+
lines = row["content"].split("\n")
|
|
592
|
+
code = "\n".join(lines[start_line - 1 : end_line])
|
|
593
|
+
|
|
594
|
+
file_id = row["file_id"]
|
|
595
|
+
file_language = row["language"]
|
|
596
|
+
symbols_full = _find_intersecting_symbols(conn, file_id, start_line, end_line)
|
|
597
|
+
|
|
598
|
+
# Apply view to code
|
|
599
|
+
if view != "meta":
|
|
600
|
+
lang, fw = _resolve_lang_fw(file_language)
|
|
601
|
+
single_block_type = symbols_full[0]["block_type"] if len(symbols_full) == 1 else None
|
|
602
|
+
code = _apply_view(
|
|
603
|
+
code, view,
|
|
604
|
+
block_type=single_block_type,
|
|
605
|
+
qualified_name=symbols_full[0]["qualified_name"] if len(symbols_full) == 1 else None,
|
|
606
|
+
child_symbols=symbols_full if len(symbols_full) > 1 else None,
|
|
607
|
+
lang=lang, fw=fw,
|
|
608
|
+
max_lines=80 if view == "signature" else 0,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Simplified symbols for output
|
|
612
|
+
symbols_out = [
|
|
613
|
+
{"qn": s["qualified_name"], "type": s["block_type"], "range": [s["start_line"], s["end_line"]]}
|
|
614
|
+
for s in symbols_full
|
|
615
|
+
]
|
|
616
|
+
|
|
617
|
+
result: dict[str, Any] = {
|
|
618
|
+
"file": file_path,
|
|
619
|
+
"range": [start_line, end_line],
|
|
620
|
+
}
|
|
621
|
+
if symbols_out:
|
|
622
|
+
result["symbols"] = symbols_out
|
|
623
|
+
if view != "meta":
|
|
624
|
+
result["code"] = code
|
|
625
|
+
return result
|