sari 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/config.py +240 -0
- app/db.py +932 -0
- app/dedup_queue.py +77 -0
- app/engine_registry.py +56 -0
- app/engine_runtime.py +472 -0
- app/http_server.py +204 -0
- app/indexer.py +1532 -0
- app/main.py +147 -0
- app/models.py +39 -0
- app/queue_pipeline.py +65 -0
- app/ranking.py +144 -0
- app/registry.py +172 -0
- app/search_engine.py +572 -0
- app/watcher.py +124 -0
- app/workspace.py +286 -0
- deckard/__init__.py +3 -0
- deckard/__main__.py +4 -0
- deckard/main.py +345 -0
- deckard/version.py +1 -0
- mcp/__init__.py +1 -0
- mcp/__main__.py +19 -0
- mcp/cli.py +485 -0
- mcp/daemon.py +149 -0
- mcp/proxy.py +304 -0
- mcp/registry.py +218 -0
- mcp/server.py +519 -0
- mcp/session.py +234 -0
- mcp/telemetry.py +112 -0
- mcp/test_cli.py +89 -0
- mcp/test_daemon.py +124 -0
- mcp/test_server.py +197 -0
- mcp/tools/__init__.py +14 -0
- mcp/tools/_util.py +244 -0
- mcp/tools/deckard_guide.py +32 -0
- mcp/tools/doctor.py +208 -0
- mcp/tools/get_callers.py +60 -0
- mcp/tools/get_implementations.py +60 -0
- mcp/tools/index_file.py +75 -0
- mcp/tools/list_files.py +138 -0
- mcp/tools/read_file.py +48 -0
- mcp/tools/read_symbol.py +99 -0
- mcp/tools/registry.py +212 -0
- mcp/tools/repo_candidates.py +89 -0
- mcp/tools/rescan.py +46 -0
- mcp/tools/scan_once.py +54 -0
- mcp/tools/search.py +208 -0
- mcp/tools/search_api_endpoints.py +72 -0
- mcp/tools/search_symbols.py +63 -0
- mcp/tools/status.py +135 -0
- sari/__init__.py +1 -0
- sari/__main__.py +4 -0
- sari-0.0.1.dist-info/METADATA +521 -0
- sari-0.0.1.dist-info/RECORD +58 -0
- sari-0.0.1.dist-info/WHEEL +5 -0
- sari-0.0.1.dist-info/entry_points.txt +2 -0
- sari-0.0.1.dist-info/licenses/LICENSE +21 -0
- sari-0.0.1.dist-info/top_level.txt +4 -0
app/db.py
ADDED
|
@@ -0,0 +1,932 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
import zlib
|
|
5
|
+
import unicodedata
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Iterable, Optional, Tuple, List, Dict
|
|
9
|
+
|
|
10
|
+
# Support both `python3 app/db.py` (script mode) and package mode.
|
|
11
|
+
try:
|
|
12
|
+
from .models import SearchHit, SearchOptions
|
|
13
|
+
from .ranking import get_file_extension, glob_to_like
|
|
14
|
+
from .engine_registry import get_registry
|
|
15
|
+
except ImportError:
|
|
16
|
+
from models import SearchHit, SearchOptions
|
|
17
|
+
from ranking import get_file_extension, glob_to_like
|
|
18
|
+
from engine_registry import get_registry
|
|
19
|
+
|
|
20
|
+
def _compress(text: str) -> bytes:
|
|
21
|
+
if not text: return b""
|
|
22
|
+
return zlib.compress(text.encode("utf-8"), level=6)
|
|
23
|
+
|
|
24
|
+
def _decompress(data: Any) -> str:
|
|
25
|
+
if not data: return ""
|
|
26
|
+
if isinstance(data, str): return data # legacy
|
|
27
|
+
try:
|
|
28
|
+
return zlib.decompress(data).decode("utf-8")
|
|
29
|
+
except Exception:
|
|
30
|
+
return str(data)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _normalize_engine_text(text: str) -> str:
|
|
34
|
+
if not text:
|
|
35
|
+
return ""
|
|
36
|
+
norm = unicodedata.normalize("NFKC", text)
|
|
37
|
+
norm = norm.lower()
|
|
38
|
+
norm = " ".join(norm.split())
|
|
39
|
+
return norm
|
|
40
|
+
|
|
41
|
+
class LocalSearchDB:
|
|
42
|
+
"""SQLite + optional FTS5 backed index.
|
|
43
|
+
|
|
44
|
+
Design goals:
|
|
45
|
+
- Low IO overhead: batch writes, WAL.
|
|
46
|
+
- Thread safety: separate read/write connections.
|
|
47
|
+
- Safer defaults: DB stored under user cache dir by default.
|
|
48
|
+
|
|
49
|
+
v2.3.1 enhancements:
|
|
50
|
+
- File type filtering
|
|
51
|
+
- Path pattern matching (glob)
|
|
52
|
+
- Exclude patterns
|
|
53
|
+
- Recency boost
|
|
54
|
+
- Regex search mode
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, db_path: str):
|
|
58
|
+
self.db_path = db_path
|
|
59
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
# Separate connections: writer (indexer) and reader (HTTP).
|
|
62
|
+
self._write = sqlite3.connect(db_path, check_same_thread=False)
|
|
63
|
+
self._read = sqlite3.connect(db_path, check_same_thread=False)
|
|
64
|
+
self._write.row_factory = sqlite3.Row
|
|
65
|
+
self._read.row_factory = sqlite3.Row
|
|
66
|
+
|
|
67
|
+
# Register decompression function (v2.7.0)
|
|
68
|
+
self._write.create_function("deckard_decompress", 1, _decompress)
|
|
69
|
+
self._read.create_function("deckard_decompress", 1, _decompress)
|
|
70
|
+
|
|
71
|
+
self._lock = threading.Lock()
|
|
72
|
+
|
|
73
|
+
self._read_lock = threading.Lock()
|
|
74
|
+
|
|
75
|
+
self._apply_pragmas(self._write)
|
|
76
|
+
self._apply_pragmas(self._read)
|
|
77
|
+
|
|
78
|
+
self._fts_enabled = self._try_enable_fts(self._write)
|
|
79
|
+
self._init_schema()
|
|
80
|
+
|
|
81
|
+
# TTL Cache for stats (v2.5.1)
|
|
82
|
+
self._stats_cache: dict[str, Any] = {}
|
|
83
|
+
self._stats_cache_ts = 0.0
|
|
84
|
+
self._stats_cache_ttl = 60.0 # 60 seconds
|
|
85
|
+
|
|
86
|
+
self.engine = get_registry().create("sqlite", self)
|
|
87
|
+
|
|
88
|
+
def set_engine(self, engine: Any) -> None:
|
|
89
|
+
self.engine = engine
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _apply_pragmas(conn: sqlite3.Connection) -> None:
|
|
93
|
+
# conn.execute("PRAGMA foreign_keys=ON;") # Disabled for compatibility with legacy extraction-only tests
|
|
94
|
+
conn.execute("PRAGMA journal_mode=WAL;")
|
|
95
|
+
conn.execute("PRAGMA synchronous=NORMAL;")
|
|
96
|
+
conn.execute("PRAGMA temp_store=MEMORY;")
|
|
97
|
+
conn.execute("PRAGMA busy_timeout=2000;")
|
|
98
|
+
conn.execute("PRAGMA cache_size=-20000;")
|
|
99
|
+
|
|
100
|
+
def open_writer_connection(self) -> sqlite3.Connection:
|
|
101
|
+
conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
|
102
|
+
conn.row_factory = sqlite3.Row
|
|
103
|
+
conn.create_function("deckard_decompress", 1, _decompress)
|
|
104
|
+
self._apply_pragmas(conn)
|
|
105
|
+
return conn
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def fts_enabled(self) -> bool:
|
|
109
|
+
return self._fts_enabled
|
|
110
|
+
|
|
111
|
+
def close(self) -> None:
|
|
112
|
+
for c in (self._read, self._write):
|
|
113
|
+
try:
|
|
114
|
+
c.close()
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
# ----------------------------
|
|
119
|
+
# Transaction-safe *_tx methods (no commit/rollback here)
|
|
120
|
+
# ----------------------------
|
|
121
|
+
|
|
122
|
+
def upsert_files_tx(self, cur: sqlite3.Cursor, rows: Iterable[tuple]) -> int:
|
|
123
|
+
rows_list = []
|
|
124
|
+
for r in rows:
|
|
125
|
+
r_list = list(r)
|
|
126
|
+
# Pad legacy rows (path, repo, mtime, size, content, last_seen)
|
|
127
|
+
if len(r_list) < 14:
|
|
128
|
+
while len(r_list) < 6:
|
|
129
|
+
r_list.append(0)
|
|
130
|
+
defaults = ["none", "none", "none", "none", 0, 0, 0, 0]
|
|
131
|
+
r_list.extend(defaults[: (14 - len(r_list))])
|
|
132
|
+
compressed_content = _compress(r_list[4])
|
|
133
|
+
rows_list.append((
|
|
134
|
+
r_list[0], r_list[1], r_list[2], r_list[3], compressed_content,
|
|
135
|
+
r_list[5], r_list[6], r_list[7], r_list[8], r_list[9],
|
|
136
|
+
r_list[10], r_list[11], r_list[12], r_list[13]
|
|
137
|
+
))
|
|
138
|
+
if not rows_list:
|
|
139
|
+
return 0
|
|
140
|
+
cur.executemany(
|
|
141
|
+
"""
|
|
142
|
+
INSERT INTO files(path, repo, mtime, size, content, last_seen, parse_status, parse_reason, ast_status, ast_reason, is_binary, is_minified, sampled, content_bytes)
|
|
143
|
+
VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
|
144
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
145
|
+
repo=excluded.repo,
|
|
146
|
+
mtime=excluded.mtime,
|
|
147
|
+
size=excluded.size,
|
|
148
|
+
content=excluded.content,
|
|
149
|
+
last_seen=excluded.last_seen,
|
|
150
|
+
parse_status=excluded.parse_status,
|
|
151
|
+
parse_reason=excluded.parse_reason,
|
|
152
|
+
ast_status=excluded.ast_status,
|
|
153
|
+
ast_reason=excluded.ast_reason,
|
|
154
|
+
is_binary=excluded.is_binary,
|
|
155
|
+
is_minified=excluded.is_minified,
|
|
156
|
+
sampled=excluded.sampled,
|
|
157
|
+
content_bytes=excluded.content_bytes
|
|
158
|
+
WHERE excluded.mtime >= files.mtime;
|
|
159
|
+
""",
|
|
160
|
+
rows_list,
|
|
161
|
+
)
|
|
162
|
+
# Clear old symbols for updated paths to ensure consistency (v2.8.0)
|
|
163
|
+
cur.executemany("DELETE FROM symbols WHERE path = ?", [(r[0],) for r in rows_list])
|
|
164
|
+
return len(rows_list)
|
|
165
|
+
|
|
166
|
+
def upsert_symbols_tx(self, cur: sqlite3.Cursor, symbols: Iterable[tuple]) -> int:
|
|
167
|
+
if hasattr(symbols, "symbols"):
|
|
168
|
+
symbols_list = list(getattr(symbols, "symbols"))
|
|
169
|
+
else:
|
|
170
|
+
symbols_list = list(symbols)
|
|
171
|
+
if not symbols_list:
|
|
172
|
+
return 0
|
|
173
|
+
normalized = []
|
|
174
|
+
for s in symbols_list:
|
|
175
|
+
if len(s) == 7:
|
|
176
|
+
normalized.append(s + ("{}", ""))
|
|
177
|
+
elif len(s) == 9:
|
|
178
|
+
normalized.append(s)
|
|
179
|
+
else:
|
|
180
|
+
tmp = list(s) + [""] * (9 - len(s))
|
|
181
|
+
normalized.append(tuple(tmp[:9]))
|
|
182
|
+
symbols_list = normalized
|
|
183
|
+
paths = {s[0] for s in symbols_list}
|
|
184
|
+
cur.executemany("DELETE FROM symbols WHERE path = ?", [(p,) for p in paths])
|
|
185
|
+
cur.executemany(
|
|
186
|
+
"""
|
|
187
|
+
INSERT INTO symbols(path, name, kind, line, end_line, content, parent_name, metadata, docstring)
|
|
188
|
+
VALUES(?,?,?,?,?,?,?,?,?)
|
|
189
|
+
""",
|
|
190
|
+
symbols_list,
|
|
191
|
+
)
|
|
192
|
+
return len(symbols_list)
|
|
193
|
+
|
|
194
|
+
def upsert_relations_tx(self, cur: sqlite3.Cursor, relations: Iterable[tuple[str, str, str, str, str, int]]) -> int:
|
|
195
|
+
rels_list = list(relations)
|
|
196
|
+
if not rels_list:
|
|
197
|
+
return 0
|
|
198
|
+
paths = {r[0] for r in rels_list}
|
|
199
|
+
cur.executemany("DELETE FROM symbol_relations WHERE from_path = ?", [(p,) for p in paths])
|
|
200
|
+
cur.executemany(
|
|
201
|
+
"""
|
|
202
|
+
INSERT INTO symbol_relations(from_path, from_symbol, to_path, to_symbol, rel_type, line)
|
|
203
|
+
VALUES(?,?,?,?,?,?)
|
|
204
|
+
""",
|
|
205
|
+
rels_list,
|
|
206
|
+
)
|
|
207
|
+
return len(rels_list)
|
|
208
|
+
|
|
209
|
+
def delete_path_tx(self, cur: sqlite3.Cursor, path: str) -> None:
|
|
210
|
+
# Explicit delete order: relations -> symbols -> files (no FK/cascade dependency)
|
|
211
|
+
cur.execute("DELETE FROM symbol_relations WHERE from_path = ? OR to_path = ?", (path, path))
|
|
212
|
+
cur.execute("DELETE FROM symbols WHERE path = ?", (path,))
|
|
213
|
+
cur.execute("DELETE FROM files WHERE path = ?", (path,))
|
|
214
|
+
|
|
215
|
+
def purge_legacy_paths(self, prefix: str = "root-") -> int:
|
|
216
|
+
"""
|
|
217
|
+
Remove legacy file paths that don't match the new root_id/rel format.
|
|
218
|
+
New format: root-<hash>/relative/path
|
|
219
|
+
"""
|
|
220
|
+
with self._lock:
|
|
221
|
+
cur = self._write.cursor()
|
|
222
|
+
cur.execute("BEGIN")
|
|
223
|
+
rows = cur.execute(
|
|
224
|
+
"SELECT path FROM files WHERE path NOT LIKE ?",
|
|
225
|
+
(f"{prefix}%/%",),
|
|
226
|
+
).fetchall()
|
|
227
|
+
paths = [r[0] for r in rows]
|
|
228
|
+
for p in paths:
|
|
229
|
+
self.delete_path_tx(cur, p)
|
|
230
|
+
self._write.commit()
|
|
231
|
+
return len(paths)
|
|
232
|
+
|
|
233
|
+
def update_last_seen_tx(self, cur: sqlite3.Cursor, paths: Iterable[str], timestamp: int) -> int:
|
|
234
|
+
paths_list = list(paths)
|
|
235
|
+
if not paths_list:
|
|
236
|
+
return 0
|
|
237
|
+
cur.executemany(
|
|
238
|
+
"UPDATE files SET last_seen=? WHERE path=?",
|
|
239
|
+
[(timestamp, p) for p in paths_list],
|
|
240
|
+
)
|
|
241
|
+
return len(paths_list)
|
|
242
|
+
|
|
243
|
+
def upsert_repo_meta_tx(self, cur: sqlite3.Cursor, repo_name: str, tags: str = "", domain: str = "", description: str = "", priority: int = 0) -> None:
|
|
244
|
+
cur.execute(
|
|
245
|
+
"""
|
|
246
|
+
INSERT OR REPLACE INTO repo_meta (repo_name, tags, domain, description, priority)
|
|
247
|
+
VALUES (?, ?, ?, ?, ?)
|
|
248
|
+
""",
|
|
249
|
+
(repo_name, tags, domain, description, priority)
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def get_unseen_paths(self, timestamp_limit: int) -> list[str]:
|
|
253
|
+
with self._read_lock:
|
|
254
|
+
rows = self._read.execute(
|
|
255
|
+
"SELECT path FROM files WHERE last_seen < ?",
|
|
256
|
+
(timestamp_limit,),
|
|
257
|
+
).fetchall()
|
|
258
|
+
return [str(r["path"]) for r in rows]
|
|
259
|
+
|
|
260
|
+
def _try_enable_fts(self, conn: sqlite3.Connection) -> bool:
|
|
261
|
+
try:
|
|
262
|
+
conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS __fts_test USING fts5(x)")
|
|
263
|
+
conn.execute("DROP TABLE IF EXISTS __fts_test")
|
|
264
|
+
return True
|
|
265
|
+
except Exception:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
def _init_schema(self) -> None:
|
|
269
|
+
with self._lock:
|
|
270
|
+
cur = self._write.cursor()
|
|
271
|
+
cur.execute(
|
|
272
|
+
"""
|
|
273
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
274
|
+
path TEXT PRIMARY KEY,
|
|
275
|
+
repo TEXT NOT NULL,
|
|
276
|
+
mtime INTEGER NOT NULL,
|
|
277
|
+
size INTEGER NOT NULL,
|
|
278
|
+
content BLOB NOT NULL,
|
|
279
|
+
last_seen INTEGER DEFAULT 0,
|
|
280
|
+
parse_status TEXT NOT NULL DEFAULT 'none',
|
|
281
|
+
parse_reason TEXT NOT NULL DEFAULT 'none',
|
|
282
|
+
ast_status TEXT NOT NULL DEFAULT 'none',
|
|
283
|
+
ast_reason TEXT NOT NULL DEFAULT 'none',
|
|
284
|
+
is_binary INTEGER NOT NULL DEFAULT 0,
|
|
285
|
+
is_minified INTEGER NOT NULL DEFAULT 0,
|
|
286
|
+
sampled INTEGER NOT NULL DEFAULT 0,
|
|
287
|
+
content_bytes INTEGER NOT NULL DEFAULT 0
|
|
288
|
+
);
|
|
289
|
+
"""
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
cur.execute(
|
|
294
|
+
"""
|
|
295
|
+
CREATE TABLE IF NOT EXISTS repo_meta (
|
|
296
|
+
repo_name TEXT PRIMARY KEY,
|
|
297
|
+
tags TEXT,
|
|
298
|
+
domain TEXT,
|
|
299
|
+
description TEXT,
|
|
300
|
+
priority INTEGER DEFAULT 0
|
|
301
|
+
);
|
|
302
|
+
"""
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# v2.6.0: Symbols table for code intelligence
|
|
306
|
+
cur.execute(
|
|
307
|
+
"""
|
|
308
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
309
|
+
path TEXT NOT NULL,
|
|
310
|
+
name TEXT NOT NULL,
|
|
311
|
+
kind TEXT NOT NULL,
|
|
312
|
+
line INTEGER NOT NULL,
|
|
313
|
+
end_line INTEGER NOT NULL,
|
|
314
|
+
content TEXT NOT NULL,
|
|
315
|
+
parent_name TEXT DEFAULT '',
|
|
316
|
+
metadata TEXT DEFAULT '{}',
|
|
317
|
+
docstring TEXT DEFAULT '',
|
|
318
|
+
FOREIGN KEY(path) REFERENCES files(path) ON DELETE CASCADE
|
|
319
|
+
);
|
|
320
|
+
"""
|
|
321
|
+
)
|
|
322
|
+
# v2.7.0/v2.9.0: Migration for symbols table
|
|
323
|
+
try:
|
|
324
|
+
cur.execute("ALTER TABLE symbols ADD COLUMN end_line INTEGER DEFAULT 0")
|
|
325
|
+
except sqlite3.OperationalError: pass
|
|
326
|
+
try:
|
|
327
|
+
cur.execute("ALTER TABLE symbols ADD COLUMN parent_name TEXT DEFAULT ''")
|
|
328
|
+
except sqlite3.OperationalError: pass
|
|
329
|
+
try:
|
|
330
|
+
cur.execute("ALTER TABLE symbols ADD COLUMN metadata TEXT DEFAULT '{}'")
|
|
331
|
+
except sqlite3.OperationalError: pass
|
|
332
|
+
try:
|
|
333
|
+
cur.execute("ALTER TABLE symbols ADD COLUMN docstring TEXT DEFAULT ''")
|
|
334
|
+
except sqlite3.OperationalError: pass
|
|
335
|
+
|
|
336
|
+
# v2.9.0: Symbol Relations table
|
|
337
|
+
cur.execute(
|
|
338
|
+
"""
|
|
339
|
+
CREATE TABLE IF NOT EXISTS symbol_relations (
|
|
340
|
+
from_path TEXT NOT NULL,
|
|
341
|
+
from_symbol TEXT NOT NULL,
|
|
342
|
+
to_path TEXT NOT NULL,
|
|
343
|
+
to_symbol TEXT NOT NULL,
|
|
344
|
+
rel_type TEXT NOT NULL, -- 'calls', 'implements', 'extends'
|
|
345
|
+
line INTEGER NOT NULL,
|
|
346
|
+
FOREIGN KEY(from_path) REFERENCES files(path) ON DELETE CASCADE
|
|
347
|
+
);
|
|
348
|
+
"""
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_symbols_path ON symbols(path);")
|
|
352
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);")
|
|
353
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_relations_from ON symbol_relations(from_symbol);")
|
|
354
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_relations_to ON symbol_relations(to_symbol);")
|
|
355
|
+
|
|
356
|
+
# Index for efficient filtering
|
|
357
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo);")
|
|
358
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_files_mtime ON files(mtime DESC);")
|
|
359
|
+
|
|
360
|
+
# v2.5.3: Migration for existing users
|
|
361
|
+
try:
|
|
362
|
+
cur.execute("ALTER TABLE files ADD COLUMN last_seen INTEGER DEFAULT 0")
|
|
363
|
+
self._write.commit()
|
|
364
|
+
except sqlite3.OperationalError:
|
|
365
|
+
# Column already exists or table doesn't exist yet
|
|
366
|
+
pass
|
|
367
|
+
|
|
368
|
+
# v2.10.0: 3-stage collection columns
|
|
369
|
+
for stmt in [
|
|
370
|
+
"ALTER TABLE files ADD COLUMN parse_status TEXT NOT NULL DEFAULT 'none'",
|
|
371
|
+
"ALTER TABLE files ADD COLUMN parse_reason TEXT NOT NULL DEFAULT 'none'",
|
|
372
|
+
"ALTER TABLE files ADD COLUMN ast_status TEXT NOT NULL DEFAULT 'none'",
|
|
373
|
+
"ALTER TABLE files ADD COLUMN ast_reason TEXT NOT NULL DEFAULT 'none'",
|
|
374
|
+
"ALTER TABLE files ADD COLUMN is_binary INTEGER NOT NULL DEFAULT 0",
|
|
375
|
+
"ALTER TABLE files ADD COLUMN is_minified INTEGER NOT NULL DEFAULT 0",
|
|
376
|
+
"ALTER TABLE files ADD COLUMN sampled INTEGER NOT NULL DEFAULT 0",
|
|
377
|
+
"ALTER TABLE files ADD COLUMN content_bytes INTEGER NOT NULL DEFAULT 0",
|
|
378
|
+
]:
|
|
379
|
+
try:
|
|
380
|
+
cur.execute(stmt)
|
|
381
|
+
except sqlite3.OperationalError:
|
|
382
|
+
pass
|
|
383
|
+
|
|
384
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_files_last_seen ON files(last_seen);")
|
|
385
|
+
|
|
386
|
+
# v2.7.0: Compressed content storage with FTS support via VIEW
|
|
387
|
+
cur.execute(
|
|
388
|
+
"""
|
|
389
|
+
CREATE VIEW IF NOT EXISTS files_view AS
|
|
390
|
+
SELECT rowid, path, repo, deckard_decompress(content) AS content
|
|
391
|
+
FROM files;
|
|
392
|
+
"""
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
if self._fts_enabled:
|
|
396
|
+
# Drop old FTS if it exists to ensure new schema (v2.7.0)
|
|
397
|
+
# But only if it's not already using the VIEW to avoid unnecessary drops.
|
|
398
|
+
# For safety in this update, we'll try to migrate.
|
|
399
|
+
try:
|
|
400
|
+
cur.execute("CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5(path, repo, content, content='files_view', content_rowid='rowid')")
|
|
401
|
+
except sqlite3.OperationalError:
|
|
402
|
+
# If already exists with different schema, we might need to drop/recreate.
|
|
403
|
+
# This is a one-time migration cost.
|
|
404
|
+
cur.execute("DROP TABLE IF EXISTS files_fts")
|
|
405
|
+
cur.execute("CREATE VIRTUAL TABLE files_fts USING fts5(path, repo, content, content='files_view', content_rowid='rowid')")
|
|
406
|
+
|
|
407
|
+
cur.execute("DROP TRIGGER IF EXISTS files_ai")
|
|
408
|
+
cur.execute("DROP TRIGGER IF EXISTS files_ad")
|
|
409
|
+
cur.execute("DROP TRIGGER IF EXISTS files_au")
|
|
410
|
+
|
|
411
|
+
cur.execute(
|
|
412
|
+
"""
|
|
413
|
+
CREATE TRIGGER files_ai AFTER INSERT ON files BEGIN
|
|
414
|
+
INSERT INTO files_fts(rowid, path, repo, content)
|
|
415
|
+
VALUES (new.rowid, new.path, new.repo, deckard_decompress(new.content));
|
|
416
|
+
END;
|
|
417
|
+
"""
|
|
418
|
+
)
|
|
419
|
+
cur.execute(
|
|
420
|
+
"""
|
|
421
|
+
CREATE TRIGGER files_ad AFTER DELETE ON files BEGIN
|
|
422
|
+
INSERT INTO files_fts(files_fts, rowid, path, repo, content)
|
|
423
|
+
VALUES('delete', old.rowid, old.path, old.repo, deckard_decompress(old.content));
|
|
424
|
+
END;
|
|
425
|
+
"""
|
|
426
|
+
)
|
|
427
|
+
cur.execute(
|
|
428
|
+
"""
|
|
429
|
+
CREATE TRIGGER files_au AFTER UPDATE ON files BEGIN
|
|
430
|
+
INSERT INTO files_fts(files_fts, rowid, path, repo, content)
|
|
431
|
+
VALUES('delete', old.rowid, old.path, old.repo, deckard_decompress(old.content));
|
|
432
|
+
INSERT INTO files_fts(rowid, path, repo, content)
|
|
433
|
+
VALUES (new.rowid, new.path, new.repo, deckard_decompress(new.content));
|
|
434
|
+
END;
|
|
435
|
+
"""
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
def upsert_files(self, rows: Iterable[tuple[str, str, int, int, str, int]]) -> int:
|
|
439
|
+
rows_list = list(rows)
|
|
440
|
+
if not rows_list:
|
|
441
|
+
return 0
|
|
442
|
+
with self._lock:
|
|
443
|
+
cur = self._write.cursor()
|
|
444
|
+
cur.execute("BEGIN")
|
|
445
|
+
count = self.upsert_files_tx(cur, rows_list)
|
|
446
|
+
self._write.commit()
|
|
447
|
+
return count
|
|
448
|
+
|
|
449
|
+
def upsert_symbols(self, symbols: Iterable[tuple]) -> int:
|
|
450
|
+
"""Upsert detected symbols (path, name, kind, line, end_line, content, parent_name, metadata, docstring)."""
|
|
451
|
+
symbols_list = list(getattr(symbols, "symbols", symbols))
|
|
452
|
+
if not symbols_list:
|
|
453
|
+
return 0
|
|
454
|
+
with self._lock:
|
|
455
|
+
cur = self._write.cursor()
|
|
456
|
+
cur.execute("BEGIN")
|
|
457
|
+
count = self.upsert_symbols_tx(cur, symbols_list)
|
|
458
|
+
self._write.commit()
|
|
459
|
+
return count
|
|
460
|
+
|
|
461
|
+
def get_symbol_block(self, path: str, name: str) -> Optional[dict[str, Any]]:
|
|
462
|
+
"""Get the full content block for a specific symbol (v2.7.0)."""
|
|
463
|
+
sql = """
|
|
464
|
+
SELECT s.line, s.end_line, s.metadata, s.docstring, f.content
|
|
465
|
+
FROM symbols s
|
|
466
|
+
JOIN files f ON s.path = f.path
|
|
467
|
+
WHERE s.path = ? AND s.name = ?
|
|
468
|
+
ORDER BY s.line ASC
|
|
469
|
+
LIMIT 1
|
|
470
|
+
"""
|
|
471
|
+
with self._read_lock:
|
|
472
|
+
row = self._read.execute(sql, (path, name)).fetchone()
|
|
473
|
+
|
|
474
|
+
if not row:
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
line_start = row["line"]
|
|
478
|
+
line_end = row["end_line"]
|
|
479
|
+
full_content = _decompress(row["content"])
|
|
480
|
+
|
|
481
|
+
# Extract lines
|
|
482
|
+
lines = full_content.splitlines()
|
|
483
|
+
|
|
484
|
+
# 1-based index to 0-based
|
|
485
|
+
if line_end <= 0: # fallback if end_line not parsed
|
|
486
|
+
line_end = line_start + 10
|
|
487
|
+
|
|
488
|
+
start_idx = max(0, line_start - 1)
|
|
489
|
+
end_idx = min(len(lines), line_end)
|
|
490
|
+
|
|
491
|
+
block = "\n".join(lines[start_idx:end_idx])
|
|
492
|
+
return {
|
|
493
|
+
"name": name,
|
|
494
|
+
"start_line": line_start,
|
|
495
|
+
"end_line": line_end,
|
|
496
|
+
"content": block,
|
|
497
|
+
"metadata": row["metadata"],
|
|
498
|
+
"docstring": row["docstring"]
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
def upsert_relations(self, relations: Iterable[tuple[str, str, str, str, str, int]]) -> int:
|
|
502
|
+
"""Upsert symbol relations (from_path, from_symbol, to_path, to_symbol, rel_type, line)."""
|
|
503
|
+
rels_list = list(relations)
|
|
504
|
+
if not rels_list:
|
|
505
|
+
return 0
|
|
506
|
+
with self._lock:
|
|
507
|
+
cur = self._write.cursor()
|
|
508
|
+
cur.execute("BEGIN")
|
|
509
|
+
count = self.upsert_relations_tx(cur, rels_list)
|
|
510
|
+
self._write.commit()
|
|
511
|
+
return count
|
|
512
|
+
|
|
513
|
+
def update_last_seen(self, paths: Iterable[str], timestamp: int) -> int:
|
|
514
|
+
"""Update last_seen timestamp for existing files (v2.5.3)."""
|
|
515
|
+
paths_list = list(paths)
|
|
516
|
+
if not paths_list:
|
|
517
|
+
return 0
|
|
518
|
+
with self._lock:
|
|
519
|
+
cur = self._write.cursor()
|
|
520
|
+
cur.execute("BEGIN")
|
|
521
|
+
count = self.update_last_seen_tx(cur, paths_list, timestamp)
|
|
522
|
+
self._write.commit()
|
|
523
|
+
return count
|
|
524
|
+
|
|
525
|
+
def delete_unseen_files(self, timestamp_limit: int) -> int:
|
|
526
|
+
"""Delete files that were not seen in the latest scan (v2.5.3)."""
|
|
527
|
+
with self._lock:
|
|
528
|
+
cur = self._write.cursor()
|
|
529
|
+
# Cascade delete should handle symbols if FK is enabled, but sqlite default often disabled.
|
|
530
|
+
# Manually delete symbols for cleanliness or rely on trigger?
|
|
531
|
+
# Safest to delete manually if FKs aren't reliable.
|
|
532
|
+
# Let's check keys.
|
|
533
|
+
cur.execute("PRAGMA foreign_keys = ON;")
|
|
534
|
+
|
|
535
|
+
cur.execute("DELETE FROM files WHERE last_seen < ?", (timestamp_limit,))
|
|
536
|
+
count = cur.rowcount
|
|
537
|
+
self._write.commit()
|
|
538
|
+
return count
|
|
539
|
+
|
|
540
|
+
def delete_files(self, paths: Iterable[str]) -> int:
|
|
541
|
+
paths_list = list(paths)
|
|
542
|
+
if not paths_list:
|
|
543
|
+
return 0
|
|
544
|
+
with self._lock:
|
|
545
|
+
cur = self._write.cursor()
|
|
546
|
+
cur.execute("BEGIN")
|
|
547
|
+
cur.execute("PRAGMA foreign_keys = ON;")
|
|
548
|
+
cur.executemany("DELETE FROM files WHERE path=?", [(p,) for p in paths_list])
|
|
549
|
+
self._write.commit()
|
|
550
|
+
return len(paths_list)
|
|
551
|
+
|
|
552
|
+
def get_all_file_paths(self) -> set[str]:
|
|
553
|
+
"""Get all indexed file paths for deletion detection."""
|
|
554
|
+
with self._read_lock:
|
|
555
|
+
rows = self._read.execute("SELECT path FROM files").fetchall()
|
|
556
|
+
return {r["path"] for r in rows}
|
|
557
|
+
|
|
558
|
+
def get_file_meta(self, path: str) -> Optional[tuple[int, int]]:
|
|
559
|
+
with self._read_lock:
|
|
560
|
+
row = self._read.execute("SELECT mtime, size FROM files WHERE path=?", (path,)).fetchone()
|
|
561
|
+
if not row:
|
|
562
|
+
return None
|
|
563
|
+
return int(row["mtime"]), int(row["size"])
|
|
564
|
+
|
|
565
|
+
def get_index_status(self) -> dict[str, Any]:
|
|
566
|
+
"""Get index metadata for debugging/UI (v2.4.2)."""
|
|
567
|
+
with self._read_lock:
|
|
568
|
+
row = self._read.execute("SELECT COUNT(1) AS c, MAX(mtime) AS last_mtime FROM files").fetchone()
|
|
569
|
+
count = int(row["c"]) if row and row["c"] else 0
|
|
570
|
+
last_mtime = int(row["last_mtime"]) if row and row["last_mtime"] else 0
|
|
571
|
+
|
|
572
|
+
return {
|
|
573
|
+
"total_files": count,
|
|
574
|
+
"last_scan_time": last_mtime,
|
|
575
|
+
"db_size_bytes": Path(self.db_path).stat().st_size if Path(self.db_path).exists() else 0
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
def has_legacy_paths(self) -> bool:
|
|
579
|
+
"""Return True if DB contains non root-id paths."""
|
|
580
|
+
cache_key = "legacy_paths"
|
|
581
|
+
now = time.time()
|
|
582
|
+
cached = self._stats_cache.get(cache_key)
|
|
583
|
+
if cached is not None and (now - self._stats_cache_ts < self._stats_cache_ttl):
|
|
584
|
+
return bool(cached)
|
|
585
|
+
try:
|
|
586
|
+
with self._read_lock:
|
|
587
|
+
row = self._read.execute(
|
|
588
|
+
"SELECT 1 AS c FROM files WHERE path NOT LIKE ? LIMIT 1",
|
|
589
|
+
("root-%/%",),
|
|
590
|
+
).fetchone()
|
|
591
|
+
exists = bool(row)
|
|
592
|
+
self._stats_cache[cache_key] = exists
|
|
593
|
+
self._stats_cache_ts = now
|
|
594
|
+
return exists
|
|
595
|
+
except Exception:
|
|
596
|
+
return False
|
|
597
|
+
|
|
598
|
+
def count_files(self) -> int:
|
|
599
|
+
with self._read_lock:
|
|
600
|
+
row = self._read.execute("SELECT COUNT(1) AS c FROM files").fetchone()
|
|
601
|
+
return int(row["c"]) if row else 0
|
|
602
|
+
|
|
603
|
+
def clear_stats_cache(self) -> None:
|
|
604
|
+
"""Invalidate stats cache."""
|
|
605
|
+
self._stats_cache.clear()
|
|
606
|
+
self._stats_cache_ts = 0.0
|
|
607
|
+
|
|
608
|
+
def get_repo_stats(self, force_refresh: bool = False, root_ids: Optional[list[str]] = None) -> dict[str, int]:
|
|
609
|
+
"""Get file counts per repo with TTL cache (v2.5.1)."""
|
|
610
|
+
now = time.time()
|
|
611
|
+
if root_ids:
|
|
612
|
+
force_refresh = True
|
|
613
|
+
if not force_refresh and (now - self._stats_cache_ts < self._stats_cache_ttl):
|
|
614
|
+
cached = self._stats_cache.get("repo_stats")
|
|
615
|
+
if cached is not None:
|
|
616
|
+
return cached
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
with self._read_lock:
|
|
620
|
+
if root_ids:
|
|
621
|
+
root_clauses = " OR ".join(["path LIKE ?"] * len(root_ids))
|
|
622
|
+
sql = f"SELECT repo, COUNT(1) as c FROM files WHERE {root_clauses} GROUP BY repo"
|
|
623
|
+
params = [f"{rid}/%" for rid in root_ids]
|
|
624
|
+
rows = self._read.execute(sql, params).fetchall()
|
|
625
|
+
else:
|
|
626
|
+
rows = self._read.execute("SELECT repo, COUNT(1) as c FROM files GROUP BY repo").fetchall()
|
|
627
|
+
stats = {r["repo"]: r["c"] for r in rows}
|
|
628
|
+
self._stats_cache["repo_stats"] = stats
|
|
629
|
+
self._stats_cache_ts = now
|
|
630
|
+
return stats
|
|
631
|
+
except Exception:
|
|
632
|
+
return {}
|
|
633
|
+
|
|
634
|
+
def upsert_repo_meta(self, repo_name: str, tags: str = "", domain: str = "", description: str = "", priority: int = 0) -> None:
|
|
635
|
+
"""Upsert repository metadata (v2.4.3)."""
|
|
636
|
+
with self._lock:
|
|
637
|
+
cur = self._write.cursor()
|
|
638
|
+
cur.execute("BEGIN")
|
|
639
|
+
self.upsert_repo_meta_tx(cur, repo_name, tags, domain, description, priority)
|
|
640
|
+
self._write.commit()
|
|
641
|
+
|
|
642
|
+
def get_repo_meta(self, repo_name: str) -> Optional[dict[str, Any]]:
|
|
643
|
+
"""Get metadata for a specific repo."""
|
|
644
|
+
with self._read_lock:
|
|
645
|
+
row = self._read.execute("SELECT * FROM repo_meta WHERE repo_name = ?", (repo_name,)).fetchone()
|
|
646
|
+
return dict(row) if row else None
|
|
647
|
+
|
|
648
|
+
def get_all_repo_meta(self) -> dict[str, dict[str, Any]]:
|
|
649
|
+
"""Get all repo metadata as a map."""
|
|
650
|
+
with self._read_lock:
|
|
651
|
+
rows = self._read.execute("SELECT * FROM repo_meta").fetchall()
|
|
652
|
+
return {row["repo_name"]: dict(row) for row in rows}
|
|
653
|
+
|
|
654
|
+
def delete_file(self, path: str) -> None:
|
|
655
|
+
"""Delete a file and its symbols by path (v2.7.2)."""
|
|
656
|
+
with self._lock:
|
|
657
|
+
cur = self._write.cursor()
|
|
658
|
+
cur.execute("BEGIN")
|
|
659
|
+
self.delete_path_tx(cur, path)
|
|
660
|
+
self._write.commit()
|
|
661
|
+
|
|
662
|
+
def list_files(
|
|
663
|
+
self,
|
|
664
|
+
repo: Optional[str] = None,
|
|
665
|
+
path_pattern: Optional[str] = None,
|
|
666
|
+
file_types: Optional[list[str]] = None,
|
|
667
|
+
include_hidden: bool = False,
|
|
668
|
+
limit: int = 100,
|
|
669
|
+
offset: int = 0,
|
|
670
|
+
root_ids: Optional[list[str]] = None,
|
|
671
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
672
|
+
"""List indexed files for debugging (v2.4.0)."""
|
|
673
|
+
limit = min(int(limit), 500)
|
|
674
|
+
offset = max(int(offset), 0)
|
|
675
|
+
|
|
676
|
+
where_clauses = []
|
|
677
|
+
params: list[Any] = []
|
|
678
|
+
|
|
679
|
+
# 0. Root filter
|
|
680
|
+
if root_ids:
|
|
681
|
+
root_clauses = []
|
|
682
|
+
for rid in root_ids:
|
|
683
|
+
root_clauses.append("f.path LIKE ?")
|
|
684
|
+
params.append(f"{rid}/%")
|
|
685
|
+
if root_clauses:
|
|
686
|
+
where_clauses.append("(" + " OR ".join(root_clauses) + ")")
|
|
687
|
+
|
|
688
|
+
# 1. Repo filter
|
|
689
|
+
if repo:
|
|
690
|
+
where_clauses.append("f.repo = ?")
|
|
691
|
+
params.append(repo)
|
|
692
|
+
|
|
693
|
+
# 2. Hidden files filter
|
|
694
|
+
if not include_hidden:
|
|
695
|
+
where_clauses.append("f.path NOT LIKE '%/.%'")
|
|
696
|
+
where_clauses.append("f.path NOT LIKE '.%'")
|
|
697
|
+
|
|
698
|
+
# 3. File types filter
|
|
699
|
+
if file_types:
|
|
700
|
+
type_clauses = []
|
|
701
|
+
for ft in file_types:
|
|
702
|
+
ext = ft.lower().lstrip(".")
|
|
703
|
+
type_clauses.append("f.path LIKE ?")
|
|
704
|
+
params.append(f"%.{ext}")
|
|
705
|
+
if type_clauses:
|
|
706
|
+
where_clauses.append("(" + " OR ".join(type_clauses) + ")")
|
|
707
|
+
|
|
708
|
+
# 4. Path pattern filter
|
|
709
|
+
if path_pattern:
|
|
710
|
+
sql_pattern = glob_to_like(path_pattern)
|
|
711
|
+
where_clauses.append("f.path LIKE ?")
|
|
712
|
+
params.append(sql_pattern)
|
|
713
|
+
|
|
714
|
+
where = " AND ".join(where_clauses) if where_clauses else "1=1"
|
|
715
|
+
|
|
716
|
+
sql = f"""
|
|
717
|
+
SELECT f.repo AS repo,
|
|
718
|
+
f.path AS path,
|
|
719
|
+
f.mtime AS mtime,
|
|
720
|
+
f.size AS size
|
|
721
|
+
FROM files f
|
|
722
|
+
WHERE {where}
|
|
723
|
+
ORDER BY f.repo, f.path
|
|
724
|
+
LIMIT ? OFFSET ?;
|
|
725
|
+
"""
|
|
726
|
+
|
|
727
|
+
# Data query params
|
|
728
|
+
data_params = params + [limit, offset]
|
|
729
|
+
|
|
730
|
+
with self._read_lock:
|
|
731
|
+
rows = self._read.execute(sql, data_params).fetchall()
|
|
732
|
+
|
|
733
|
+
files: list[dict[str, Any]] = []
|
|
734
|
+
for r in rows:
|
|
735
|
+
files.append({
|
|
736
|
+
"repo": r["repo"],
|
|
737
|
+
"path": r["path"],
|
|
738
|
+
"mtime": int(r["mtime"]),
|
|
739
|
+
"size": int(r["size"]),
|
|
740
|
+
"file_type": get_file_extension(r["path"]),
|
|
741
|
+
})
|
|
742
|
+
|
|
743
|
+
# Count query params (no limit/offset)
|
|
744
|
+
count_sql = f"SELECT COUNT(1) AS c FROM files f WHERE {where}"
|
|
745
|
+
|
|
746
|
+
repo_where = where if where else "1=1"
|
|
747
|
+
repo_sql = f"""
|
|
748
|
+
SELECT repo, COUNT(1) AS file_count
|
|
749
|
+
FROM files f
|
|
750
|
+
WHERE {repo_where}
|
|
751
|
+
GROUP BY repo
|
|
752
|
+
ORDER BY file_count DESC;
|
|
753
|
+
"""
|
|
754
|
+
with self._read_lock:
|
|
755
|
+
count_res = self._read.execute(count_sql, params).fetchone()
|
|
756
|
+
total = count_res["c"] if count_res else 0
|
|
757
|
+
repo_rows = self._read.execute(repo_sql, params).fetchall()
|
|
758
|
+
|
|
759
|
+
repos = [{"repo": r["repo"], "file_count": r["file_count"]} for r in repo_rows]
|
|
760
|
+
|
|
761
|
+
meta = {
|
|
762
|
+
"total": total,
|
|
763
|
+
"returned": len(files),
|
|
764
|
+
"offset": offset,
|
|
765
|
+
"limit": limit,
|
|
766
|
+
"repos": repos,
|
|
767
|
+
"include_hidden": include_hidden,
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
return files, meta
|
|
771
|
+
|
|
772
|
+
# ========== Helper Methods ==========
|
|
773
|
+
|
|
774
|
+
# Delegated search logic in SearchEngine
|
|
775
|
+
|
|
776
|
+
# ========== Main Search Methods ==========
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def search_symbols(self, query: str, repo: Optional[str] = None, limit: int = 20, root_ids: Optional[list[str]] = None) -> list[dict[str, Any]]:
|
|
780
|
+
"""Search for symbols by name (v2.6.0)."""
|
|
781
|
+
limit = min(limit, 100)
|
|
782
|
+
query = query.strip()
|
|
783
|
+
if not query:
|
|
784
|
+
return []
|
|
785
|
+
|
|
786
|
+
sql = """
|
|
787
|
+
SELECT s.path, s.name, s.kind, s.line, s.end_line, s.content, s.docstring, s.metadata, f.repo, f.mtime, f.size
|
|
788
|
+
FROM symbols s
|
|
789
|
+
JOIN files f ON s.path = f.path
|
|
790
|
+
WHERE s.name LIKE ?
|
|
791
|
+
"""
|
|
792
|
+
params = [f"%{query}%"]
|
|
793
|
+
|
|
794
|
+
if root_ids:
|
|
795
|
+
root_clauses = []
|
|
796
|
+
for rid in root_ids:
|
|
797
|
+
root_clauses.append("f.path LIKE ?")
|
|
798
|
+
params.append(f"{rid}/%")
|
|
799
|
+
if root_clauses:
|
|
800
|
+
sql += " AND (" + " OR ".join(root_clauses) + ")"
|
|
801
|
+
|
|
802
|
+
if repo:
|
|
803
|
+
sql += " AND f.repo = ?"
|
|
804
|
+
params.append(repo)
|
|
805
|
+
|
|
806
|
+
sql += " ORDER BY length(s.name) ASC, s.path ASC LIMIT ?"
|
|
807
|
+
params.append(limit)
|
|
808
|
+
|
|
809
|
+
with self._read_lock:
|
|
810
|
+
rows = self._read.execute(sql, params).fetchall()
|
|
811
|
+
|
|
812
|
+
return [
|
|
813
|
+
{
|
|
814
|
+
"path": r["path"],
|
|
815
|
+
"repo": r["repo"],
|
|
816
|
+
"name": r["name"],
|
|
817
|
+
"kind": r["kind"],
|
|
818
|
+
"line": r["line"],
|
|
819
|
+
"snippet": r["content"],
|
|
820
|
+
"docstring": r["docstring"],
|
|
821
|
+
"metadata": r["metadata"],
|
|
822
|
+
"mtime": int(r["mtime"]),
|
|
823
|
+
"size": int(r["size"])
|
|
824
|
+
}
|
|
825
|
+
for r in rows
|
|
826
|
+
]
|
|
827
|
+
|
|
828
|
+
def read_file(self, path: str) -> Optional[str]:
|
|
829
|
+
"""Read full file content from DB (v2.6.0)."""
|
|
830
|
+
with self._read_lock:
|
|
831
|
+
row = self._read.execute("SELECT content FROM files WHERE path = ?", (path,)).fetchone()
|
|
832
|
+
return _decompress(row["content"]) if row else None
|
|
833
|
+
|
|
834
|
+
def iter_engine_documents(self, root_ids: list[str]) -> Iterable[Dict[str, Any]]:
|
|
835
|
+
max_doc_bytes = int(os.environ.get("DECKARD_ENGINE_MAX_DOC_BYTES", "4194304") or 4194304)
|
|
836
|
+
preview_bytes = int(os.environ.get("DECKARD_ENGINE_PREVIEW_BYTES", "8192") or 8192)
|
|
837
|
+
head_bytes = max_doc_bytes // 2
|
|
838
|
+
tail_bytes = max_doc_bytes - head_bytes
|
|
839
|
+
with self._read_lock:
|
|
840
|
+
if root_ids:
|
|
841
|
+
clauses = " OR ".join(["path LIKE ?"] * len(root_ids))
|
|
842
|
+
params = [f"{rid}/%" for rid in root_ids]
|
|
843
|
+
sql = f"SELECT path, repo, mtime, size, content, parse_status FROM files WHERE {clauses}"
|
|
844
|
+
rows = self._read.execute(sql, params)
|
|
845
|
+
else:
|
|
846
|
+
rows = self._read.execute("SELECT path, repo, mtime, size, content, parse_status FROM files")
|
|
847
|
+
for r in rows:
|
|
848
|
+
path = str(r["path"])
|
|
849
|
+
if "/" not in path:
|
|
850
|
+
continue
|
|
851
|
+
root_id, rel_path = path.split("/", 1)
|
|
852
|
+
if root_ids and root_id not in root_ids:
|
|
853
|
+
continue
|
|
854
|
+
path_text = f"{path} {rel_path}"
|
|
855
|
+
body_text = ""
|
|
856
|
+
preview = ""
|
|
857
|
+
if str(r["parse_status"]) == "ok":
|
|
858
|
+
raw = _decompress(r["content"])
|
|
859
|
+
norm = _normalize_engine_text(raw)
|
|
860
|
+
if len(norm) > max_doc_bytes:
|
|
861
|
+
norm = norm[:head_bytes] + norm[-tail_bytes:]
|
|
862
|
+
body_text = norm
|
|
863
|
+
if preview_bytes > 0:
|
|
864
|
+
half = preview_bytes // 2
|
|
865
|
+
preview = raw[:half] + ("\n...\n" if len(raw) > preview_bytes else "") + raw[-half:]
|
|
866
|
+
yield {
|
|
867
|
+
"doc_id": path,
|
|
868
|
+
"path": path,
|
|
869
|
+
"repo": str(r["repo"] or "__root__"),
|
|
870
|
+
"root_id": root_id,
|
|
871
|
+
"rel_path": rel_path,
|
|
872
|
+
"path_text": path_text,
|
|
873
|
+
"body_text": body_text,
|
|
874
|
+
"preview": preview,
|
|
875
|
+
"mtime": int(r["mtime"] or 0),
|
|
876
|
+
"size": int(r["size"] or 0),
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
def search_v2(self, opts: SearchOptions) -> Tuple[List[SearchHit], Dict[str, Any]]:
|
|
880
|
+
return self.engine.search_v2(opts)
|
|
881
|
+
|
|
882
|
+
# Compatibility shims for legacy tests (v2.7.x)
|
|
883
|
+
def _search_like(self, opts: SearchOptions, terms: List[str],
|
|
884
|
+
meta: Dict[str, Any], no_slice: bool = False) -> Tuple[List[SearchHit], Dict[str, Any]]:
|
|
885
|
+
return self.engine._search_like(opts, terms, meta, no_slice=no_slice)
|
|
886
|
+
|
|
887
|
+
def _search_fts(self, opts: SearchOptions, terms: List[str],
|
|
888
|
+
meta: Dict[str, Any], no_slice: bool = False) -> Optional[Tuple[List[SearchHit], Dict[str, Any]]]:
|
|
889
|
+
return self.engine._search_fts(opts, terms, meta, no_slice=no_slice)
|
|
890
|
+
|
|
891
|
+
def search(
|
|
892
|
+
self,
|
|
893
|
+
q: str,
|
|
894
|
+
repo: Optional[str],
|
|
895
|
+
limit: int = 20,
|
|
896
|
+
snippet_max_lines: int = 5,
|
|
897
|
+
root_ids: Optional[list[str]] = None,
|
|
898
|
+
) -> Tuple[List[SearchHit], Dict[str, Any]]:
|
|
899
|
+
opts = SearchOptions(
|
|
900
|
+
query=q,
|
|
901
|
+
repo=repo,
|
|
902
|
+
limit=limit,
|
|
903
|
+
snippet_lines=snippet_max_lines,
|
|
904
|
+
root_ids=list(root_ids or []),
|
|
905
|
+
)
|
|
906
|
+
return self.search_v2(opts)
|
|
907
|
+
|
|
908
|
+
def _get_enclosing_symbol(self, path: str, line_no: int) -> Optional[str]:
|
|
909
|
+
"""Find the nearest symbol definition above the given line (v2.6.0)."""
|
|
910
|
+
# Optimized query: find symbol with max line that is <= line_no
|
|
911
|
+
sql = """
|
|
912
|
+
SELECT kind, name
|
|
913
|
+
FROM symbols
|
|
914
|
+
WHERE path = ? AND line <= ?
|
|
915
|
+
ORDER BY line DESC
|
|
916
|
+
LIMIT 1
|
|
917
|
+
"""
|
|
918
|
+
with self._read_lock:
|
|
919
|
+
row = self._read.execute(sql, (path, line_no)).fetchone()
|
|
920
|
+
|
|
921
|
+
if row:
|
|
922
|
+
return f"{row['kind']}: {row['name']}"
|
|
923
|
+
return None
|
|
924
|
+
|
|
925
|
+
def _is_exact_symbol(self, name: str) -> bool:
|
|
926
|
+
"""Check if a symbol with this exact name exists (v2.6.0)."""
|
|
927
|
+
with self._read_lock:
|
|
928
|
+
row = self._read.execute("SELECT 1 FROM symbols WHERE name = ? LIMIT 1", (name,)).fetchone()
|
|
929
|
+
return bool(row)
|
|
930
|
+
|
|
931
|
+
def repo_candidates(self, q: str, limit: int = 3, root_ids: Optional[list[str]] = None) -> List[Dict[str, Any]]:
|
|
932
|
+
return self.engine.repo_candidates(q, limit, root_ids=root_ids or [])
|