sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
app/db.py ADDED
@@ -0,0 +1,932 @@
1
+ import sqlite3
2
+ import threading
3
+ import time
4
+ import zlib
5
+ import unicodedata
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Any, Iterable, Optional, Tuple, List, Dict
9
+
10
+ # Support both `python3 app/db.py` (script mode) and package mode.
11
+ try:
12
+ from .models import SearchHit, SearchOptions
13
+ from .ranking import get_file_extension, glob_to_like
14
+ from .engine_registry import get_registry
15
+ except ImportError:
16
+ from models import SearchHit, SearchOptions
17
+ from ranking import get_file_extension, glob_to_like
18
+ from engine_registry import get_registry
19
+
20
+ def _compress(text: str) -> bytes:
21
+ if not text: return b""
22
+ return zlib.compress(text.encode("utf-8"), level=6)
23
+
24
+ def _decompress(data: Any) -> str:
25
+ if not data: return ""
26
+ if isinstance(data, str): return data # legacy
27
+ try:
28
+ return zlib.decompress(data).decode("utf-8")
29
+ except Exception:
30
+ return str(data)
31
+
32
+
33
+ def _normalize_engine_text(text: str) -> str:
34
+ if not text:
35
+ return ""
36
+ norm = unicodedata.normalize("NFKC", text)
37
+ norm = norm.lower()
38
+ norm = " ".join(norm.split())
39
+ return norm
40
+
41
+ class LocalSearchDB:
42
+ """SQLite + optional FTS5 backed index.
43
+
44
+ Design goals:
45
+ - Low IO overhead: batch writes, WAL.
46
+ - Thread safety: separate read/write connections.
47
+ - Safer defaults: DB stored under user cache dir by default.
48
+
49
+ v2.3.1 enhancements:
50
+ - File type filtering
51
+ - Path pattern matching (glob)
52
+ - Exclude patterns
53
+ - Recency boost
54
+ - Regex search mode
55
+ """
56
+
57
+ def __init__(self, db_path: str):
58
+ self.db_path = db_path
59
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ # Separate connections: writer (indexer) and reader (HTTP).
62
+ self._write = sqlite3.connect(db_path, check_same_thread=False)
63
+ self._read = sqlite3.connect(db_path, check_same_thread=False)
64
+ self._write.row_factory = sqlite3.Row
65
+ self._read.row_factory = sqlite3.Row
66
+
67
+ # Register decompression function (v2.7.0)
68
+ self._write.create_function("deckard_decompress", 1, _decompress)
69
+ self._read.create_function("deckard_decompress", 1, _decompress)
70
+
71
+ self._lock = threading.Lock()
72
+
73
+ self._read_lock = threading.Lock()
74
+
75
+ self._apply_pragmas(self._write)
76
+ self._apply_pragmas(self._read)
77
+
78
+ self._fts_enabled = self._try_enable_fts(self._write)
79
+ self._init_schema()
80
+
81
+ # TTL Cache for stats (v2.5.1)
82
+ self._stats_cache: dict[str, Any] = {}
83
+ self._stats_cache_ts = 0.0
84
+ self._stats_cache_ttl = 60.0 # 60 seconds
85
+
86
+ self.engine = get_registry().create("sqlite", self)
87
+
88
+ def set_engine(self, engine: Any) -> None:
89
+ self.engine = engine
90
+
91
+ @staticmethod
92
+ def _apply_pragmas(conn: sqlite3.Connection) -> None:
93
+ # conn.execute("PRAGMA foreign_keys=ON;") # Disabled for compatibility with legacy extraction-only tests
94
+ conn.execute("PRAGMA journal_mode=WAL;")
95
+ conn.execute("PRAGMA synchronous=NORMAL;")
96
+ conn.execute("PRAGMA temp_store=MEMORY;")
97
+ conn.execute("PRAGMA busy_timeout=2000;")
98
+ conn.execute("PRAGMA cache_size=-20000;")
99
+
100
+ def open_writer_connection(self) -> sqlite3.Connection:
101
+ conn = sqlite3.connect(self.db_path, check_same_thread=False)
102
+ conn.row_factory = sqlite3.Row
103
+ conn.create_function("deckard_decompress", 1, _decompress)
104
+ self._apply_pragmas(conn)
105
+ return conn
106
+
107
+ @property
108
+ def fts_enabled(self) -> bool:
109
+ return self._fts_enabled
110
+
111
+ def close(self) -> None:
112
+ for c in (self._read, self._write):
113
+ try:
114
+ c.close()
115
+ except Exception:
116
+ pass
117
+
118
+ # ----------------------------
119
+ # Transaction-safe *_tx methods (no commit/rollback here)
120
+ # ----------------------------
121
+
122
+ def upsert_files_tx(self, cur: sqlite3.Cursor, rows: Iterable[tuple]) -> int:
123
+ rows_list = []
124
+ for r in rows:
125
+ r_list = list(r)
126
+ # Pad legacy rows (path, repo, mtime, size, content, last_seen)
127
+ if len(r_list) < 14:
128
+ while len(r_list) < 6:
129
+ r_list.append(0)
130
+ defaults = ["none", "none", "none", "none", 0, 0, 0, 0]
131
+ r_list.extend(defaults[: (14 - len(r_list))])
132
+ compressed_content = _compress(r_list[4])
133
+ rows_list.append((
134
+ r_list[0], r_list[1], r_list[2], r_list[3], compressed_content,
135
+ r_list[5], r_list[6], r_list[7], r_list[8], r_list[9],
136
+ r_list[10], r_list[11], r_list[12], r_list[13]
137
+ ))
138
+ if not rows_list:
139
+ return 0
140
+ cur.executemany(
141
+ """
142
+ INSERT INTO files(path, repo, mtime, size, content, last_seen, parse_status, parse_reason, ast_status, ast_reason, is_binary, is_minified, sampled, content_bytes)
143
+ VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)
144
+ ON CONFLICT(path) DO UPDATE SET
145
+ repo=excluded.repo,
146
+ mtime=excluded.mtime,
147
+ size=excluded.size,
148
+ content=excluded.content,
149
+ last_seen=excluded.last_seen,
150
+ parse_status=excluded.parse_status,
151
+ parse_reason=excluded.parse_reason,
152
+ ast_status=excluded.ast_status,
153
+ ast_reason=excluded.ast_reason,
154
+ is_binary=excluded.is_binary,
155
+ is_minified=excluded.is_minified,
156
+ sampled=excluded.sampled,
157
+ content_bytes=excluded.content_bytes
158
+ WHERE excluded.mtime >= files.mtime;
159
+ """,
160
+ rows_list,
161
+ )
162
+ # Clear old symbols for updated paths to ensure consistency (v2.8.0)
163
+ cur.executemany("DELETE FROM symbols WHERE path = ?", [(r[0],) for r in rows_list])
164
+ return len(rows_list)
165
+
166
+ def upsert_symbols_tx(self, cur: sqlite3.Cursor, symbols: Iterable[tuple]) -> int:
167
+ if hasattr(symbols, "symbols"):
168
+ symbols_list = list(getattr(symbols, "symbols"))
169
+ else:
170
+ symbols_list = list(symbols)
171
+ if not symbols_list:
172
+ return 0
173
+ normalized = []
174
+ for s in symbols_list:
175
+ if len(s) == 7:
176
+ normalized.append(s + ("{}", ""))
177
+ elif len(s) == 9:
178
+ normalized.append(s)
179
+ else:
180
+ tmp = list(s) + [""] * (9 - len(s))
181
+ normalized.append(tuple(tmp[:9]))
182
+ symbols_list = normalized
183
+ paths = {s[0] for s in symbols_list}
184
+ cur.executemany("DELETE FROM symbols WHERE path = ?", [(p,) for p in paths])
185
+ cur.executemany(
186
+ """
187
+ INSERT INTO symbols(path, name, kind, line, end_line, content, parent_name, metadata, docstring)
188
+ VALUES(?,?,?,?,?,?,?,?,?)
189
+ """,
190
+ symbols_list,
191
+ )
192
+ return len(symbols_list)
193
+
194
+ def upsert_relations_tx(self, cur: sqlite3.Cursor, relations: Iterable[tuple[str, str, str, str, str, int]]) -> int:
195
+ rels_list = list(relations)
196
+ if not rels_list:
197
+ return 0
198
+ paths = {r[0] for r in rels_list}
199
+ cur.executemany("DELETE FROM symbol_relations WHERE from_path = ?", [(p,) for p in paths])
200
+ cur.executemany(
201
+ """
202
+ INSERT INTO symbol_relations(from_path, from_symbol, to_path, to_symbol, rel_type, line)
203
+ VALUES(?,?,?,?,?,?)
204
+ """,
205
+ rels_list,
206
+ )
207
+ return len(rels_list)
208
+
209
+ def delete_path_tx(self, cur: sqlite3.Cursor, path: str) -> None:
210
+ # Explicit delete order: relations -> symbols -> files (no FK/cascade dependency)
211
+ cur.execute("DELETE FROM symbol_relations WHERE from_path = ? OR to_path = ?", (path, path))
212
+ cur.execute("DELETE FROM symbols WHERE path = ?", (path,))
213
+ cur.execute("DELETE FROM files WHERE path = ?", (path,))
214
+
215
+ def purge_legacy_paths(self, prefix: str = "root-") -> int:
216
+ """
217
+ Remove legacy file paths that don't match the new root_id/rel format.
218
+ New format: root-<hash>/relative/path
219
+ """
220
+ with self._lock:
221
+ cur = self._write.cursor()
222
+ cur.execute("BEGIN")
223
+ rows = cur.execute(
224
+ "SELECT path FROM files WHERE path NOT LIKE ?",
225
+ (f"{prefix}%/%",),
226
+ ).fetchall()
227
+ paths = [r[0] for r in rows]
228
+ for p in paths:
229
+ self.delete_path_tx(cur, p)
230
+ self._write.commit()
231
+ return len(paths)
232
+
233
+ def update_last_seen_tx(self, cur: sqlite3.Cursor, paths: Iterable[str], timestamp: int) -> int:
234
+ paths_list = list(paths)
235
+ if not paths_list:
236
+ return 0
237
+ cur.executemany(
238
+ "UPDATE files SET last_seen=? WHERE path=?",
239
+ [(timestamp, p) for p in paths_list],
240
+ )
241
+ return len(paths_list)
242
+
243
+ def upsert_repo_meta_tx(self, cur: sqlite3.Cursor, repo_name: str, tags: str = "", domain: str = "", description: str = "", priority: int = 0) -> None:
244
+ cur.execute(
245
+ """
246
+ INSERT OR REPLACE INTO repo_meta (repo_name, tags, domain, description, priority)
247
+ VALUES (?, ?, ?, ?, ?)
248
+ """,
249
+ (repo_name, tags, domain, description, priority)
250
+ )
251
+
252
+ def get_unseen_paths(self, timestamp_limit: int) -> list[str]:
253
+ with self._read_lock:
254
+ rows = self._read.execute(
255
+ "SELECT path FROM files WHERE last_seen < ?",
256
+ (timestamp_limit,),
257
+ ).fetchall()
258
+ return [str(r["path"]) for r in rows]
259
+
260
+ def _try_enable_fts(self, conn: sqlite3.Connection) -> bool:
261
+ try:
262
+ conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS __fts_test USING fts5(x)")
263
+ conn.execute("DROP TABLE IF EXISTS __fts_test")
264
+ return True
265
+ except Exception:
266
+ return False
267
+
268
+ def _init_schema(self) -> None:
269
+ with self._lock:
270
+ cur = self._write.cursor()
271
+ cur.execute(
272
+ """
273
+ CREATE TABLE IF NOT EXISTS files (
274
+ path TEXT PRIMARY KEY,
275
+ repo TEXT NOT NULL,
276
+ mtime INTEGER NOT NULL,
277
+ size INTEGER NOT NULL,
278
+ content BLOB NOT NULL,
279
+ last_seen INTEGER DEFAULT 0,
280
+ parse_status TEXT NOT NULL DEFAULT 'none',
281
+ parse_reason TEXT NOT NULL DEFAULT 'none',
282
+ ast_status TEXT NOT NULL DEFAULT 'none',
283
+ ast_reason TEXT NOT NULL DEFAULT 'none',
284
+ is_binary INTEGER NOT NULL DEFAULT 0,
285
+ is_minified INTEGER NOT NULL DEFAULT 0,
286
+ sampled INTEGER NOT NULL DEFAULT 0,
287
+ content_bytes INTEGER NOT NULL DEFAULT 0
288
+ );
289
+ """
290
+ )
291
+
292
+
293
+ cur.execute(
294
+ """
295
+ CREATE TABLE IF NOT EXISTS repo_meta (
296
+ repo_name TEXT PRIMARY KEY,
297
+ tags TEXT,
298
+ domain TEXT,
299
+ description TEXT,
300
+ priority INTEGER DEFAULT 0
301
+ );
302
+ """
303
+ )
304
+
305
+ # v2.6.0: Symbols table for code intelligence
306
+ cur.execute(
307
+ """
308
+ CREATE TABLE IF NOT EXISTS symbols (
309
+ path TEXT NOT NULL,
310
+ name TEXT NOT NULL,
311
+ kind TEXT NOT NULL,
312
+ line INTEGER NOT NULL,
313
+ end_line INTEGER NOT NULL,
314
+ content TEXT NOT NULL,
315
+ parent_name TEXT DEFAULT '',
316
+ metadata TEXT DEFAULT '{}',
317
+ docstring TEXT DEFAULT '',
318
+ FOREIGN KEY(path) REFERENCES files(path) ON DELETE CASCADE
319
+ );
320
+ """
321
+ )
322
+ # v2.7.0/v2.9.0: Migration for symbols table
323
+ try:
324
+ cur.execute("ALTER TABLE symbols ADD COLUMN end_line INTEGER DEFAULT 0")
325
+ except sqlite3.OperationalError: pass
326
+ try:
327
+ cur.execute("ALTER TABLE symbols ADD COLUMN parent_name TEXT DEFAULT ''")
328
+ except sqlite3.OperationalError: pass
329
+ try:
330
+ cur.execute("ALTER TABLE symbols ADD COLUMN metadata TEXT DEFAULT '{}'")
331
+ except sqlite3.OperationalError: pass
332
+ try:
333
+ cur.execute("ALTER TABLE symbols ADD COLUMN docstring TEXT DEFAULT ''")
334
+ except sqlite3.OperationalError: pass
335
+
336
+ # v2.9.0: Symbol Relations table
337
+ cur.execute(
338
+ """
339
+ CREATE TABLE IF NOT EXISTS symbol_relations (
340
+ from_path TEXT NOT NULL,
341
+ from_symbol TEXT NOT NULL,
342
+ to_path TEXT NOT NULL,
343
+ to_symbol TEXT NOT NULL,
344
+ rel_type TEXT NOT NULL, -- 'calls', 'implements', 'extends'
345
+ line INTEGER NOT NULL,
346
+ FOREIGN KEY(from_path) REFERENCES files(path) ON DELETE CASCADE
347
+ );
348
+ """
349
+ )
350
+
351
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_symbols_path ON symbols(path);")
352
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);")
353
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_relations_from ON symbol_relations(from_symbol);")
354
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_relations_to ON symbol_relations(to_symbol);")
355
+
356
+ # Index for efficient filtering
357
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo);")
358
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_files_mtime ON files(mtime DESC);")
359
+
360
+ # v2.5.3: Migration for existing users
361
+ try:
362
+ cur.execute("ALTER TABLE files ADD COLUMN last_seen INTEGER DEFAULT 0")
363
+ self._write.commit()
364
+ except sqlite3.OperationalError:
365
+ # Column already exists or table doesn't exist yet
366
+ pass
367
+
368
+ # v2.10.0: 3-stage collection columns
369
+ for stmt in [
370
+ "ALTER TABLE files ADD COLUMN parse_status TEXT NOT NULL DEFAULT 'none'",
371
+ "ALTER TABLE files ADD COLUMN parse_reason TEXT NOT NULL DEFAULT 'none'",
372
+ "ALTER TABLE files ADD COLUMN ast_status TEXT NOT NULL DEFAULT 'none'",
373
+ "ALTER TABLE files ADD COLUMN ast_reason TEXT NOT NULL DEFAULT 'none'",
374
+ "ALTER TABLE files ADD COLUMN is_binary INTEGER NOT NULL DEFAULT 0",
375
+ "ALTER TABLE files ADD COLUMN is_minified INTEGER NOT NULL DEFAULT 0",
376
+ "ALTER TABLE files ADD COLUMN sampled INTEGER NOT NULL DEFAULT 0",
377
+ "ALTER TABLE files ADD COLUMN content_bytes INTEGER NOT NULL DEFAULT 0",
378
+ ]:
379
+ try:
380
+ cur.execute(stmt)
381
+ except sqlite3.OperationalError:
382
+ pass
383
+
384
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_files_last_seen ON files(last_seen);")
385
+
386
+ # v2.7.0: Compressed content storage with FTS support via VIEW
387
+ cur.execute(
388
+ """
389
+ CREATE VIEW IF NOT EXISTS files_view AS
390
+ SELECT rowid, path, repo, deckard_decompress(content) AS content
391
+ FROM files;
392
+ """
393
+ )
394
+
395
+ if self._fts_enabled:
396
+ # Drop old FTS if it exists to ensure new schema (v2.7.0)
397
+ # But only if it's not already using the VIEW to avoid unnecessary drops.
398
+ # For safety in this update, we'll try to migrate.
399
+ try:
400
+ cur.execute("CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5(path, repo, content, content='files_view', content_rowid='rowid')")
401
+ except sqlite3.OperationalError:
402
+ # If already exists with different schema, we might need to drop/recreate.
403
+ # This is a one-time migration cost.
404
+ cur.execute("DROP TABLE IF EXISTS files_fts")
405
+ cur.execute("CREATE VIRTUAL TABLE files_fts USING fts5(path, repo, content, content='files_view', content_rowid='rowid')")
406
+
407
+ cur.execute("DROP TRIGGER IF EXISTS files_ai")
408
+ cur.execute("DROP TRIGGER IF EXISTS files_ad")
409
+ cur.execute("DROP TRIGGER IF EXISTS files_au")
410
+
411
+ cur.execute(
412
+ """
413
+ CREATE TRIGGER files_ai AFTER INSERT ON files BEGIN
414
+ INSERT INTO files_fts(rowid, path, repo, content)
415
+ VALUES (new.rowid, new.path, new.repo, deckard_decompress(new.content));
416
+ END;
417
+ """
418
+ )
419
+ cur.execute(
420
+ """
421
+ CREATE TRIGGER files_ad AFTER DELETE ON files BEGIN
422
+ INSERT INTO files_fts(files_fts, rowid, path, repo, content)
423
+ VALUES('delete', old.rowid, old.path, old.repo, deckard_decompress(old.content));
424
+ END;
425
+ """
426
+ )
427
+ cur.execute(
428
+ """
429
+ CREATE TRIGGER files_au AFTER UPDATE ON files BEGIN
430
+ INSERT INTO files_fts(files_fts, rowid, path, repo, content)
431
+ VALUES('delete', old.rowid, old.path, old.repo, deckard_decompress(old.content));
432
+ INSERT INTO files_fts(rowid, path, repo, content)
433
+ VALUES (new.rowid, new.path, new.repo, deckard_decompress(new.content));
434
+ END;
435
+ """
436
+ )
437
+
438
+ def upsert_files(self, rows: Iterable[tuple[str, str, int, int, str, int]]) -> int:
439
+ rows_list = list(rows)
440
+ if not rows_list:
441
+ return 0
442
+ with self._lock:
443
+ cur = self._write.cursor()
444
+ cur.execute("BEGIN")
445
+ count = self.upsert_files_tx(cur, rows_list)
446
+ self._write.commit()
447
+ return count
448
+
449
+ def upsert_symbols(self, symbols: Iterable[tuple]) -> int:
450
+ """Upsert detected symbols (path, name, kind, line, end_line, content, parent_name, metadata, docstring)."""
451
+ symbols_list = list(getattr(symbols, "symbols", symbols))
452
+ if not symbols_list:
453
+ return 0
454
+ with self._lock:
455
+ cur = self._write.cursor()
456
+ cur.execute("BEGIN")
457
+ count = self.upsert_symbols_tx(cur, symbols_list)
458
+ self._write.commit()
459
+ return count
460
+
461
+ def get_symbol_block(self, path: str, name: str) -> Optional[dict[str, Any]]:
462
+ """Get the full content block for a specific symbol (v2.7.0)."""
463
+ sql = """
464
+ SELECT s.line, s.end_line, s.metadata, s.docstring, f.content
465
+ FROM symbols s
466
+ JOIN files f ON s.path = f.path
467
+ WHERE s.path = ? AND s.name = ?
468
+ ORDER BY s.line ASC
469
+ LIMIT 1
470
+ """
471
+ with self._read_lock:
472
+ row = self._read.execute(sql, (path, name)).fetchone()
473
+
474
+ if not row:
475
+ return None
476
+
477
+ line_start = row["line"]
478
+ line_end = row["end_line"]
479
+ full_content = _decompress(row["content"])
480
+
481
+ # Extract lines
482
+ lines = full_content.splitlines()
483
+
484
+ # 1-based index to 0-based
485
+ if line_end <= 0: # fallback if end_line not parsed
486
+ line_end = line_start + 10
487
+
488
+ start_idx = max(0, line_start - 1)
489
+ end_idx = min(len(lines), line_end)
490
+
491
+ block = "\n".join(lines[start_idx:end_idx])
492
+ return {
493
+ "name": name,
494
+ "start_line": line_start,
495
+ "end_line": line_end,
496
+ "content": block,
497
+ "metadata": row["metadata"],
498
+ "docstring": row["docstring"]
499
+ }
500
+
501
+ def upsert_relations(self, relations: Iterable[tuple[str, str, str, str, str, int]]) -> int:
502
+ """Upsert symbol relations (from_path, from_symbol, to_path, to_symbol, rel_type, line)."""
503
+ rels_list = list(relations)
504
+ if not rels_list:
505
+ return 0
506
+ with self._lock:
507
+ cur = self._write.cursor()
508
+ cur.execute("BEGIN")
509
+ count = self.upsert_relations_tx(cur, rels_list)
510
+ self._write.commit()
511
+ return count
512
+
513
+ def update_last_seen(self, paths: Iterable[str], timestamp: int) -> int:
514
+ """Update last_seen timestamp for existing files (v2.5.3)."""
515
+ paths_list = list(paths)
516
+ if not paths_list:
517
+ return 0
518
+ with self._lock:
519
+ cur = self._write.cursor()
520
+ cur.execute("BEGIN")
521
+ count = self.update_last_seen_tx(cur, paths_list, timestamp)
522
+ self._write.commit()
523
+ return count
524
+
525
+ def delete_unseen_files(self, timestamp_limit: int) -> int:
526
+ """Delete files that were not seen in the latest scan (v2.5.3)."""
527
+ with self._lock:
528
+ cur = self._write.cursor()
529
+ # Cascade delete should handle symbols if FK is enabled, but sqlite default often disabled.
530
+ # Manually delete symbols for cleanliness or rely on trigger?
531
+ # Safest to delete manually if FKs aren't reliable.
532
+ # Let's check keys.
533
+ cur.execute("PRAGMA foreign_keys = ON;")
534
+
535
+ cur.execute("DELETE FROM files WHERE last_seen < ?", (timestamp_limit,))
536
+ count = cur.rowcount
537
+ self._write.commit()
538
+ return count
539
+
540
+ def delete_files(self, paths: Iterable[str]) -> int:
541
+ paths_list = list(paths)
542
+ if not paths_list:
543
+ return 0
544
+ with self._lock:
545
+ cur = self._write.cursor()
546
+ cur.execute("BEGIN")
547
+ cur.execute("PRAGMA foreign_keys = ON;")
548
+ cur.executemany("DELETE FROM files WHERE path=?", [(p,) for p in paths_list])
549
+ self._write.commit()
550
+ return len(paths_list)
551
+
552
+ def get_all_file_paths(self) -> set[str]:
553
+ """Get all indexed file paths for deletion detection."""
554
+ with self._read_lock:
555
+ rows = self._read.execute("SELECT path FROM files").fetchall()
556
+ return {r["path"] for r in rows}
557
+
558
+ def get_file_meta(self, path: str) -> Optional[tuple[int, int]]:
559
+ with self._read_lock:
560
+ row = self._read.execute("SELECT mtime, size FROM files WHERE path=?", (path,)).fetchone()
561
+ if not row:
562
+ return None
563
+ return int(row["mtime"]), int(row["size"])
564
+
565
+ def get_index_status(self) -> dict[str, Any]:
566
+ """Get index metadata for debugging/UI (v2.4.2)."""
567
+ with self._read_lock:
568
+ row = self._read.execute("SELECT COUNT(1) AS c, MAX(mtime) AS last_mtime FROM files").fetchone()
569
+ count = int(row["c"]) if row and row["c"] else 0
570
+ last_mtime = int(row["last_mtime"]) if row and row["last_mtime"] else 0
571
+
572
+ return {
573
+ "total_files": count,
574
+ "last_scan_time": last_mtime,
575
+ "db_size_bytes": Path(self.db_path).stat().st_size if Path(self.db_path).exists() else 0
576
+ }
577
+
578
+ def has_legacy_paths(self) -> bool:
579
+ """Return True if DB contains non root-id paths."""
580
+ cache_key = "legacy_paths"
581
+ now = time.time()
582
+ cached = self._stats_cache.get(cache_key)
583
+ if cached is not None and (now - self._stats_cache_ts < self._stats_cache_ttl):
584
+ return bool(cached)
585
+ try:
586
+ with self._read_lock:
587
+ row = self._read.execute(
588
+ "SELECT 1 AS c FROM files WHERE path NOT LIKE ? LIMIT 1",
589
+ ("root-%/%",),
590
+ ).fetchone()
591
+ exists = bool(row)
592
+ self._stats_cache[cache_key] = exists
593
+ self._stats_cache_ts = now
594
+ return exists
595
+ except Exception:
596
+ return False
597
+
598
+ def count_files(self) -> int:
599
+ with self._read_lock:
600
+ row = self._read.execute("SELECT COUNT(1) AS c FROM files").fetchone()
601
+ return int(row["c"]) if row else 0
602
+
603
+ def clear_stats_cache(self) -> None:
604
+ """Invalidate stats cache."""
605
+ self._stats_cache.clear()
606
+ self._stats_cache_ts = 0.0
607
+
608
+ def get_repo_stats(self, force_refresh: bool = False, root_ids: Optional[list[str]] = None) -> dict[str, int]:
609
+ """Get file counts per repo with TTL cache (v2.5.1)."""
610
+ now = time.time()
611
+ if root_ids:
612
+ force_refresh = True
613
+ if not force_refresh and (now - self._stats_cache_ts < self._stats_cache_ttl):
614
+ cached = self._stats_cache.get("repo_stats")
615
+ if cached is not None:
616
+ return cached
617
+
618
+ try:
619
+ with self._read_lock:
620
+ if root_ids:
621
+ root_clauses = " OR ".join(["path LIKE ?"] * len(root_ids))
622
+ sql = f"SELECT repo, COUNT(1) as c FROM files WHERE {root_clauses} GROUP BY repo"
623
+ params = [f"{rid}/%" for rid in root_ids]
624
+ rows = self._read.execute(sql, params).fetchall()
625
+ else:
626
+ rows = self._read.execute("SELECT repo, COUNT(1) as c FROM files GROUP BY repo").fetchall()
627
+ stats = {r["repo"]: r["c"] for r in rows}
628
+ self._stats_cache["repo_stats"] = stats
629
+ self._stats_cache_ts = now
630
+ return stats
631
+ except Exception:
632
+ return {}
633
+
634
+ def upsert_repo_meta(self, repo_name: str, tags: str = "", domain: str = "", description: str = "", priority: int = 0) -> None:
635
+ """Upsert repository metadata (v2.4.3)."""
636
+ with self._lock:
637
+ cur = self._write.cursor()
638
+ cur.execute("BEGIN")
639
+ self.upsert_repo_meta_tx(cur, repo_name, tags, domain, description, priority)
640
+ self._write.commit()
641
+
642
+ def get_repo_meta(self, repo_name: str) -> Optional[dict[str, Any]]:
643
+ """Get metadata for a specific repo."""
644
+ with self._read_lock:
645
+ row = self._read.execute("SELECT * FROM repo_meta WHERE repo_name = ?", (repo_name,)).fetchone()
646
+ return dict(row) if row else None
647
+
648
+ def get_all_repo_meta(self) -> dict[str, dict[str, Any]]:
649
+ """Get all repo metadata as a map."""
650
+ with self._read_lock:
651
+ rows = self._read.execute("SELECT * FROM repo_meta").fetchall()
652
+ return {row["repo_name"]: dict(row) for row in rows}
653
+
654
+ def delete_file(self, path: str) -> None:
655
+ """Delete a file and its symbols by path (v2.7.2)."""
656
+ with self._lock:
657
+ cur = self._write.cursor()
658
+ cur.execute("BEGIN")
659
+ self.delete_path_tx(cur, path)
660
+ self._write.commit()
661
+
662
+ def list_files(
663
+ self,
664
+ repo: Optional[str] = None,
665
+ path_pattern: Optional[str] = None,
666
+ file_types: Optional[list[str]] = None,
667
+ include_hidden: bool = False,
668
+ limit: int = 100,
669
+ offset: int = 0,
670
+ root_ids: Optional[list[str]] = None,
671
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
672
+ """List indexed files for debugging (v2.4.0)."""
673
+ limit = min(int(limit), 500)
674
+ offset = max(int(offset), 0)
675
+
676
+ where_clauses = []
677
+ params: list[Any] = []
678
+
679
+ # 0. Root filter
680
+ if root_ids:
681
+ root_clauses = []
682
+ for rid in root_ids:
683
+ root_clauses.append("f.path LIKE ?")
684
+ params.append(f"{rid}/%")
685
+ if root_clauses:
686
+ where_clauses.append("(" + " OR ".join(root_clauses) + ")")
687
+
688
+ # 1. Repo filter
689
+ if repo:
690
+ where_clauses.append("f.repo = ?")
691
+ params.append(repo)
692
+
693
+ # 2. Hidden files filter
694
+ if not include_hidden:
695
+ where_clauses.append("f.path NOT LIKE '%/.%'")
696
+ where_clauses.append("f.path NOT LIKE '.%'")
697
+
698
+ # 3. File types filter
699
+ if file_types:
700
+ type_clauses = []
701
+ for ft in file_types:
702
+ ext = ft.lower().lstrip(".")
703
+ type_clauses.append("f.path LIKE ?")
704
+ params.append(f"%.{ext}")
705
+ if type_clauses:
706
+ where_clauses.append("(" + " OR ".join(type_clauses) + ")")
707
+
708
+ # 4. Path pattern filter
709
+ if path_pattern:
710
+ sql_pattern = glob_to_like(path_pattern)
711
+ where_clauses.append("f.path LIKE ?")
712
+ params.append(sql_pattern)
713
+
714
+ where = " AND ".join(where_clauses) if where_clauses else "1=1"
715
+
716
+ sql = f"""
717
+ SELECT f.repo AS repo,
718
+ f.path AS path,
719
+ f.mtime AS mtime,
720
+ f.size AS size
721
+ FROM files f
722
+ WHERE {where}
723
+ ORDER BY f.repo, f.path
724
+ LIMIT ? OFFSET ?;
725
+ """
726
+
727
+ # Data query params
728
+ data_params = params + [limit, offset]
729
+
730
+ with self._read_lock:
731
+ rows = self._read.execute(sql, data_params).fetchall()
732
+
733
+ files: list[dict[str, Any]] = []
734
+ for r in rows:
735
+ files.append({
736
+ "repo": r["repo"],
737
+ "path": r["path"],
738
+ "mtime": int(r["mtime"]),
739
+ "size": int(r["size"]),
740
+ "file_type": get_file_extension(r["path"]),
741
+ })
742
+
743
+ # Count query params (no limit/offset)
744
+ count_sql = f"SELECT COUNT(1) AS c FROM files f WHERE {where}"
745
+
746
+ repo_where = where if where else "1=1"
747
+ repo_sql = f"""
748
+ SELECT repo, COUNT(1) AS file_count
749
+ FROM files f
750
+ WHERE {repo_where}
751
+ GROUP BY repo
752
+ ORDER BY file_count DESC;
753
+ """
754
+ with self._read_lock:
755
+ count_res = self._read.execute(count_sql, params).fetchone()
756
+ total = count_res["c"] if count_res else 0
757
+ repo_rows = self._read.execute(repo_sql, params).fetchall()
758
+
759
+ repos = [{"repo": r["repo"], "file_count": r["file_count"]} for r in repo_rows]
760
+
761
+ meta = {
762
+ "total": total,
763
+ "returned": len(files),
764
+ "offset": offset,
765
+ "limit": limit,
766
+ "repos": repos,
767
+ "include_hidden": include_hidden,
768
+ }
769
+
770
+ return files, meta
771
+
772
+ # ========== Helper Methods ==========
773
+
774
+ # Delegated search logic in SearchEngine
775
+
776
+ # ========== Main Search Methods ==========
777
+
778
+
779
+ def search_symbols(self, query: str, repo: Optional[str] = None, limit: int = 20, root_ids: Optional[list[str]] = None) -> list[dict[str, Any]]:
780
+ """Search for symbols by name (v2.6.0)."""
781
+ limit = min(limit, 100)
782
+ query = query.strip()
783
+ if not query:
784
+ return []
785
+
786
+ sql = """
787
+ SELECT s.path, s.name, s.kind, s.line, s.end_line, s.content, s.docstring, s.metadata, f.repo, f.mtime, f.size
788
+ FROM symbols s
789
+ JOIN files f ON s.path = f.path
790
+ WHERE s.name LIKE ?
791
+ """
792
+ params = [f"%{query}%"]
793
+
794
+ if root_ids:
795
+ root_clauses = []
796
+ for rid in root_ids:
797
+ root_clauses.append("f.path LIKE ?")
798
+ params.append(f"{rid}/%")
799
+ if root_clauses:
800
+ sql += " AND (" + " OR ".join(root_clauses) + ")"
801
+
802
+ if repo:
803
+ sql += " AND f.repo = ?"
804
+ params.append(repo)
805
+
806
+ sql += " ORDER BY length(s.name) ASC, s.path ASC LIMIT ?"
807
+ params.append(limit)
808
+
809
+ with self._read_lock:
810
+ rows = self._read.execute(sql, params).fetchall()
811
+
812
+ return [
813
+ {
814
+ "path": r["path"],
815
+ "repo": r["repo"],
816
+ "name": r["name"],
817
+ "kind": r["kind"],
818
+ "line": r["line"],
819
+ "snippet": r["content"],
820
+ "docstring": r["docstring"],
821
+ "metadata": r["metadata"],
822
+ "mtime": int(r["mtime"]),
823
+ "size": int(r["size"])
824
+ }
825
+ for r in rows
826
+ ]
827
+
828
+ def read_file(self, path: str) -> Optional[str]:
829
+ """Read full file content from DB (v2.6.0)."""
830
+ with self._read_lock:
831
+ row = self._read.execute("SELECT content FROM files WHERE path = ?", (path,)).fetchone()
832
+ return _decompress(row["content"]) if row else None
833
+
834
+ def iter_engine_documents(self, root_ids: list[str]) -> Iterable[Dict[str, Any]]:
835
+ max_doc_bytes = int(os.environ.get("DECKARD_ENGINE_MAX_DOC_BYTES", "4194304") or 4194304)
836
+ preview_bytes = int(os.environ.get("DECKARD_ENGINE_PREVIEW_BYTES", "8192") or 8192)
837
+ head_bytes = max_doc_bytes // 2
838
+ tail_bytes = max_doc_bytes - head_bytes
839
+ with self._read_lock:
840
+ if root_ids:
841
+ clauses = " OR ".join(["path LIKE ?"] * len(root_ids))
842
+ params = [f"{rid}/%" for rid in root_ids]
843
+ sql = f"SELECT path, repo, mtime, size, content, parse_status FROM files WHERE {clauses}"
844
+ rows = self._read.execute(sql, params)
845
+ else:
846
+ rows = self._read.execute("SELECT path, repo, mtime, size, content, parse_status FROM files")
847
+ for r in rows:
848
+ path = str(r["path"])
849
+ if "/" not in path:
850
+ continue
851
+ root_id, rel_path = path.split("/", 1)
852
+ if root_ids and root_id not in root_ids:
853
+ continue
854
+ path_text = f"{path} {rel_path}"
855
+ body_text = ""
856
+ preview = ""
857
+ if str(r["parse_status"]) == "ok":
858
+ raw = _decompress(r["content"])
859
+ norm = _normalize_engine_text(raw)
860
+ if len(norm) > max_doc_bytes:
861
+ norm = norm[:head_bytes] + norm[-tail_bytes:]
862
+ body_text = norm
863
+ if preview_bytes > 0:
864
+ half = preview_bytes // 2
865
+ preview = raw[:half] + ("\n...\n" if len(raw) > preview_bytes else "") + raw[-half:]
866
+ yield {
867
+ "doc_id": path,
868
+ "path": path,
869
+ "repo": str(r["repo"] or "__root__"),
870
+ "root_id": root_id,
871
+ "rel_path": rel_path,
872
+ "path_text": path_text,
873
+ "body_text": body_text,
874
+ "preview": preview,
875
+ "mtime": int(r["mtime"] or 0),
876
+ "size": int(r["size"] or 0),
877
+ }
878
+
879
+ def search_v2(self, opts: SearchOptions) -> Tuple[List[SearchHit], Dict[str, Any]]:
880
+ return self.engine.search_v2(opts)
881
+
882
+ # Compatibility shims for legacy tests (v2.7.x)
883
+ def _search_like(self, opts: SearchOptions, terms: List[str],
884
+ meta: Dict[str, Any], no_slice: bool = False) -> Tuple[List[SearchHit], Dict[str, Any]]:
885
+ return self.engine._search_like(opts, terms, meta, no_slice=no_slice)
886
+
887
+ def _search_fts(self, opts: SearchOptions, terms: List[str],
888
+ meta: Dict[str, Any], no_slice: bool = False) -> Optional[Tuple[List[SearchHit], Dict[str, Any]]]:
889
+ return self.engine._search_fts(opts, terms, meta, no_slice=no_slice)
890
+
891
+ def search(
892
+ self,
893
+ q: str,
894
+ repo: Optional[str],
895
+ limit: int = 20,
896
+ snippet_max_lines: int = 5,
897
+ root_ids: Optional[list[str]] = None,
898
+ ) -> Tuple[List[SearchHit], Dict[str, Any]]:
899
+ opts = SearchOptions(
900
+ query=q,
901
+ repo=repo,
902
+ limit=limit,
903
+ snippet_lines=snippet_max_lines,
904
+ root_ids=list(root_ids or []),
905
+ )
906
+ return self.search_v2(opts)
907
+
908
+ def _get_enclosing_symbol(self, path: str, line_no: int) -> Optional[str]:
909
+ """Find the nearest symbol definition above the given line (v2.6.0)."""
910
+ # Optimized query: find symbol with max line that is <= line_no
911
+ sql = """
912
+ SELECT kind, name
913
+ FROM symbols
914
+ WHERE path = ? AND line <= ?
915
+ ORDER BY line DESC
916
+ LIMIT 1
917
+ """
918
+ with self._read_lock:
919
+ row = self._read.execute(sql, (path, line_no)).fetchone()
920
+
921
+ if row:
922
+ return f"{row['kind']}: {row['name']}"
923
+ return None
924
+
925
+ def _is_exact_symbol(self, name: str) -> bool:
926
+ """Check if a symbol with this exact name exists (v2.6.0)."""
927
+ with self._read_lock:
928
+ row = self._read.execute("SELECT 1 FROM symbols WHERE name = ? LIMIT 1", (name,)).fetchone()
929
+ return bool(row)
930
+
931
+ def repo_candidates(self, q: str, limit: int = 3, root_ids: Optional[list[str]] = None) -> List[Dict[str, Any]]:
932
+ return self.engine.repo_candidates(q, limit, root_ids=root_ids or [])