sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
app/search_engine.py ADDED
@@ -0,0 +1,572 @@
1
+ import sqlite3
2
+ import re
3
+ import time
4
+ import unicodedata
5
+ from pathlib import Path
6
+ from typing import List, Tuple, Optional, Any, Dict
7
+
8
+ from .models import SearchHit, SearchOptions
9
+ from .ranking import (
10
+ extract_terms, count_matches, calculate_recency_score,
11
+ snippet_around, get_file_extension, glob_to_like
12
+ )
13
+
14
+ class SearchEngine:
15
+ def __init__(self, db):
16
+ self.db = db
17
+
18
+ def search_v2(self, opts: SearchOptions) -> Tuple[List[SearchHit], Dict[str, Any]]:
19
+ """Enhanced search with Hybrid (Symbol + FTS) strategy."""
20
+ q = (opts.query or "").strip()
21
+ q = unicodedata.normalize("NFKC", q).lower()
22
+ q = " ".join(q.split())
23
+ if not q:
24
+ return [], {"fallback_used": False, "total_scanned": 0, "total": 0}
25
+
26
+ terms = extract_terms(q)
27
+ meta: Dict[str, Any] = {"fallback_used": False, "total_scanned": 0}
28
+
29
+ # Regex mode bypasses hybrid logic
30
+ if opts.use_regex:
31
+ return self._search_regex(opts, terms, meta)
32
+
33
+ # 1. Symbol Search (Priority Layer)
34
+ symbol_hits_data = []
35
+ if opts.total_mode != "approx":
36
+ symbol_hits_data = self.db.search_symbols(q, repo=opts.repo, limit=50, root_ids=list(opts.root_ids or []))
37
+
38
+ # Convert symbol hits to SearchHit objects
39
+ symbol_hits = []
40
+ for s in symbol_hits_data:
41
+ hit = SearchHit(
42
+ repo=s["repo"],
43
+ path=s["path"],
44
+ score=1000.0, # Massive starting score for symbol match
45
+ snippet=s["snippet"],
46
+ mtime=s["mtime"],
47
+ size=s["size"],
48
+ match_count=1,
49
+ file_type=get_file_extension(s["path"]),
50
+ hit_reason=f"Symbol: {s['kind']} {s['name']}",
51
+ context_symbol=f"{s['kind']}: {s['name']}",
52
+ docstring=s.get("docstring", ""),
53
+ metadata=s.get("metadata", "{}")
54
+ )
55
+ # Recency boost if enabled
56
+ if opts.recency_boost:
57
+ hit.score = calculate_recency_score(hit.mtime, hit.score)
58
+ symbol_hits.append(hit)
59
+
60
+ # 2. FTS Search
61
+ fts_hits = []
62
+ # v2.7.0: Allow unicode in FTS, but fallback if non-ASCII character present
63
+ # as FTS tokenizers often skip emojis and special symbols.
64
+ has_unicode = any(ord(c) > 127 for c in q)
65
+ is_too_short = len(q) < 3
66
+
67
+ use_fts = self.db.fts_enabled and not is_too_short and not has_unicode
68
+ fts_success = False
69
+
70
+
71
+ if use_fts:
72
+ try:
73
+ res = self.db._search_fts(opts, terms, meta, no_slice=True)
74
+ if res:
75
+ fts_hits, fts_meta = res
76
+ meta.update(fts_meta)
77
+ fts_success = True
78
+ except sqlite3.OperationalError:
79
+ pass
80
+
81
+ if not fts_success:
82
+ # Fallback to LIKE
83
+ res, like_meta = self.db._search_like(opts, terms, meta, no_slice=True)
84
+ fts_hits = res
85
+ meta.update(like_meta)
86
+ meta["fallback_used"] = True
87
+ elif not fts_hits and terms:
88
+ # v2.7.5: Force fallback if FTS results are suspiciously empty for non-trivial query
89
+ res, like_meta = self.db._search_like(opts, terms, meta, no_slice=True)
90
+ fts_hits = res
91
+ meta.update(like_meta)
92
+ meta["fallback_used"] = True
93
+
94
+ # 3. Merge Strategies
95
+ merged_map: Dict[str, SearchHit] = {}
96
+ for h in fts_hits:
97
+ merged_map[h.path] = h
98
+
99
+ for sh in symbol_hits:
100
+ if sh.path in merged_map:
101
+ existing = merged_map[sh.path]
102
+ existing.score += 1200.0
103
+ existing.hit_reason = f"{sh.hit_reason}, {existing.hit_reason}"
104
+ if sh.snippet.strip() not in existing.snippet:
105
+ existing.snippet = f"{sh.snippet}\n...\n{existing.snippet}"
106
+ if sh.docstring:
107
+ existing.docstring = sh.docstring
108
+ if sh.metadata and sh.metadata != "{}":
109
+ existing.metadata = sh.metadata
110
+ else:
111
+ merged_map[sh.path] = sh
112
+
113
+ final_hits = list(merged_map.values())
114
+ final_hits.sort(key=lambda h: (-h.score, -h.mtime, h.path))
115
+
116
+ start = int(opts.offset)
117
+ end = start + int(opts.limit)
118
+
119
+ # Adjust Total Count
120
+ if opts.total_mode == "approx":
121
+ meta["total"] = -1
122
+ elif meta.get("total", 0) > 0:
123
+ meta["total"] = max(meta["total"], len(final_hits))
124
+ else:
125
+ meta["total"] = len(final_hits)
126
+
127
+ return final_hits[start:end], meta
128
+
129
+ def _search_like(self, opts: SearchOptions, terms: List[str],
130
+ meta: Dict[str, Any], no_slice: bool = False) -> Tuple[List[SearchHit], Dict[str, Any]]:
131
+ meta["fallback_used"] = True
132
+ like_q = opts.query.replace("^", "^^").replace("%", "^%").replace("_", "^_")
133
+ # v2.7.0: Use files_view (fv) instead of files (f) for content match as it's decompressed
134
+ where_clauses = ["(fv.content LIKE ? ESCAPE '^' OR f.path LIKE ? ESCAPE '^' OR f.repo LIKE ? ESCAPE '^')"]
135
+ params: List[Any] = [f"%{like_q}%", f"%{like_q}%", f"%{like_q}%"]
136
+
137
+ filter_clauses, filter_params = self._build_filter_clauses(opts)
138
+ where_clauses.extend(filter_clauses)
139
+ params.extend(filter_params)
140
+
141
+ where = " AND ".join(where_clauses)
142
+ fetch_limit = (opts.offset + opts.limit) * 2
143
+ if fetch_limit < 100: fetch_limit = 100
144
+
145
+ sql = f"""
146
+ SELECT f.repo AS repo,
147
+ f.path AS path,
148
+ f.mtime AS mtime,
149
+ f.size AS size,
150
+ 1.0 AS score,
151
+ fv.content AS content
152
+ FROM files f
153
+ JOIN files_view fv ON f.rowid = fv.rowid
154
+ WHERE {where}
155
+ ORDER BY {"f.mtime DESC" if opts.recency_boost else "f.path"}, f.path ASC
156
+ LIMIT ?;
157
+ """
158
+ params.append(int(fetch_limit))
159
+
160
+ with self.db._read_lock:
161
+ if opts.total_mode == "exact":
162
+ count_sql = f"SELECT COUNT(*) as c FROM files f JOIN files_view fv ON f.rowid = fv.rowid WHERE {where}"
163
+ count_row = self.db._read.execute(count_sql, params[:-1]).fetchone()
164
+ meta["total"] = int(count_row["c"]) if count_row else 0
165
+ else:
166
+ meta["total"] = -1
167
+
168
+ rows = self.db._read.execute(sql, params).fetchall()
169
+
170
+ meta["total_mode"] = opts.total_mode
171
+ hits = self._process_rows(rows, opts, terms)
172
+ meta["total_scanned"] = len(rows)
173
+
174
+ if no_slice:
175
+ return hits, meta
176
+
177
+ start = opts.offset
178
+ end = opts.offset + opts.limit
179
+ return hits[start:end], meta
180
+
181
+ def _search_fts(self, opts: SearchOptions, terms: List[str],
182
+ meta: Dict[str, Any], no_slice: bool = False) -> Optional[Tuple[List[SearchHit], Dict[str, Any]]]:
183
+ # v2.7.0: Safe FTS query escaping
184
+ # Wrap terms in double quotes and escape existing quotes to prevent FTS5 syntax errors
185
+ safe_terms = []
186
+ for t in terms:
187
+ clean_t = t.replace('"', '""')
188
+ if clean_t:
189
+ safe_terms.append(f'"{clean_t}"')
190
+
191
+ fts_query = " ".join(safe_terms)
192
+ if not fts_query:
193
+ return [], meta
194
+
195
+ where_clauses = ["files_fts MATCH ?"]
196
+ params: List[Any] = [fts_query]
197
+
198
+ filter_clauses, filter_params = self._build_filter_clauses(opts)
199
+ where_clauses.extend(filter_clauses)
200
+ params.extend(filter_params)
201
+
202
+ where = " AND ".join(where_clauses)
203
+ total_hits = 0
204
+ if opts.total_mode == "exact":
205
+ try:
206
+ count_sql = f"SELECT COUNT(*) as c FROM files_fts JOIN files f ON f.rowid = files_fts.rowid WHERE {where}"
207
+ with self.db._read_lock:
208
+ count_row = self.db._read.execute(count_sql, params).fetchone()
209
+ total_hits = int(count_row["c"]) if count_row else 0
210
+ except sqlite3.OperationalError:
211
+ return None
212
+ else:
213
+ total_hits = -1
214
+
215
+ meta["total"] = total_hits
216
+ meta["total_mode"] = opts.total_mode
217
+ fetch_limit = 50
218
+
219
+ path_prior_sql = """
220
+ CASE
221
+ WHEN f.path LIKE 'src/%' OR f.path LIKE '%/src/%' OR f.path LIKE 'app/%' OR f.path LIKE '%/app/%' OR f.path LIKE 'core/%' OR f.path LIKE '%/core/%' THEN 0.6
222
+ WHEN f.path LIKE 'config/%' OR f.path LIKE '%/config/%' OR f.path LIKE 'domain/%' OR f.path LIKE '%/domain/%' OR f.path LIKE 'service/%' OR f.path LIKE '%/service/%' THEN 0.4
223
+ WHEN f.path LIKE 'test/%' OR f.path LIKE '%/test/%' OR f.path LIKE 'tests/%' OR f.path LIKE '%/tests/%' OR f.path LIKE 'example/%' OR f.path LIKE '%/example/%' OR f.path LIKE 'dist/%' OR f.path LIKE '%/dist/%' OR f.path LIKE 'build/%' OR f.path LIKE '%/build/%' THEN -0.7
224
+ ELSE 0.0
225
+ END
226
+ """
227
+
228
+ filetype_prior_sql = """
229
+ CASE
230
+ WHEN f.path LIKE '%.py' OR f.path LIKE '%.ts' OR f.path LIKE '%.go' OR f.path LIKE '%.java' OR f.path LIKE '%.kt' THEN 0.3
231
+ WHEN f.path LIKE '%.yaml' OR f.path LIKE '%.yml' OR f.path LIKE '%.json' THEN 0.15
232
+ WHEN f.path LIKE '%.lock' OR f.path LIKE '%.min.js' OR f.path LIKE '%.map' THEN -0.8
233
+ ELSE 0.0
234
+ END
235
+ """
236
+
237
+ sql = f"""
238
+ SELECT f.repo AS repo,
239
+ f.path AS path,
240
+ f.mtime AS mtime,
241
+ f.size AS size,
242
+ ( -1.0 * bm25(files_fts) + {path_prior_sql} + {filetype_prior_sql} ) AS score,
243
+ f.content AS content
244
+ FROM files_fts
245
+ JOIN files f ON f.rowid = files_fts.rowid
246
+ WHERE {where}
247
+ ORDER BY score DESC
248
+ LIMIT ?;
249
+ """
250
+ params.append(int(fetch_limit))
251
+
252
+ with self.db._read_lock:
253
+ rows = self.db._read.execute(sql, params).fetchall()
254
+
255
+ hits = self._process_rows(rows, opts, terms, is_rerank=True)
256
+ meta["total_scanned"] = len(rows)
257
+
258
+ if no_slice:
259
+ return hits, meta
260
+
261
+ start = opts.offset
262
+ end = opts.offset + opts.limit
263
+ return hits[start:end], meta
264
+
265
+ def _search_regex(self, opts: SearchOptions, terms: List[str],
266
+ meta: Dict[str, Any]) -> Tuple[List[SearchHit], Dict[str, Any]]:
267
+ meta["regex_mode"] = True
268
+ flags = 0 if opts.case_sensitive else re.IGNORECASE
269
+ try:
270
+ pattern = re.compile(opts.query, flags)
271
+ except re.error as e:
272
+ meta["regex_error"] = str(e)
273
+ return [], meta
274
+
275
+ where_clauses = ["1=1"]
276
+ params: List[Any] = []
277
+ if opts.repo:
278
+ where_clauses.append("f.repo = ?")
279
+ params.append(opts.repo)
280
+
281
+ filter_clauses, filter_params = self._build_filter_clauses(opts)
282
+ where_clauses.extend(filter_clauses)
283
+ params.extend(filter_params)
284
+
285
+ where = " AND ".join(where_clauses)
286
+
287
+ sql = f"""
288
+ SELECT f.repo AS repo,
289
+ f.path AS path,
290
+ f.mtime AS mtime,
291
+ f.size AS size,
292
+ fv.content AS content
293
+ FROM files f
294
+ JOIN files_view fv ON f.rowid = fv.rowid
295
+ WHERE {where}
296
+ ORDER BY {"f.mtime DESC" if opts.recency_boost else "f.path"}
297
+ LIMIT 5000;
298
+ """
299
+ with self.db._read_lock:
300
+ rows = self.db._read.execute(sql, params).fetchall()
301
+ meta["total_scanned"] = len(rows)
302
+
303
+ # No more manual _decompress(r["content"]) needed here as it comes from fv.content
304
+ hits: List[SearchHit] = []
305
+ for r in rows:
306
+ path = r["path"]
307
+ content = r["content"] or ""
308
+
309
+ if not self._matches_file_types(path, opts.file_types): continue
310
+ if not self._matches_path_pattern(path, opts.path_pattern): continue
311
+ if self._matches_exclude_patterns(path, opts.exclude_patterns): continue
312
+
313
+ matches = pattern.findall(content)
314
+ if not matches: continue
315
+
316
+ match_count = len(matches)
317
+ score = float(match_count)
318
+ if opts.recency_boost:
319
+ score = calculate_recency_score(int(r["mtime"]), score)
320
+
321
+ snippet = snippet_around(content, [opts.query], opts.snippet_lines, highlight=True)
322
+ hits.append(SearchHit(
323
+ repo=r["repo"], path=path, score=score, snippet=snippet,
324
+ mtime=int(r["mtime"]), size=int(r["size"]), match_count=match_count,
325
+ file_type=get_file_extension(path)
326
+ ))
327
+
328
+ hits.sort(key=lambda h: (-h.score, -h.mtime, h.path))
329
+ meta["total"] = len(hits)
330
+ meta["total_mode"] = "approx"
331
+ start = opts.offset
332
+ end = opts.offset + opts.limit
333
+ return hits[start:end], meta
334
+
335
+ def _process_rows(self, rows: list, opts: SearchOptions,
336
+ terms: List[str], is_rerank: bool = False) -> List[SearchHit]:
337
+ hits: List[SearchHit] = []
338
+ all_meta = self.db.get_all_repo_meta()
339
+ query_terms = [t.lower() for t in terms]
340
+ query_raw_lower = opts.query.lower()
341
+
342
+ # v2.7.0: Local import of _decompress is no longer strictly needed if content comes from VIEW,
343
+ # but let's keep it as a fallback in case raw rows are passed.
344
+ from .db import _decompress
345
+
346
+ def_patterns = []
347
+ for term in query_terms:
348
+ if len(term) < 3: continue
349
+ p = re.compile(rf"(class|def|function|struct|pub\s+fn|async\s+def|interface|type)\s+{re.escape(term)}\b", re.IGNORECASE)
350
+ def_patterns.append(p)
351
+
352
+ for r in rows:
353
+ path = r["path"]
354
+ repo_name = r["repo"]
355
+ # Try to use 'content' as is (from view), fallback to decompress if it's BLOB
356
+ content = r["content"]
357
+ if isinstance(content, (bytes, bytearray)):
358
+ content = _decompress(content)
359
+ elif content is None:
360
+ content = ""
361
+
362
+ mtime = int(r["mtime"])
363
+ size = int(r["size"])
364
+
365
+ if not self._matches_file_types(path, opts.file_types): continue
366
+ if not self._matches_path_pattern(path, opts.path_pattern): continue
367
+ if self._matches_exclude_patterns(path, opts.exclude_patterns): continue
368
+
369
+ score = float(r["score"]) if r["score"] is not None else 0.0
370
+ reasons = []
371
+ path_lower = path.lower()
372
+ filename = path_lower.split("/")[-1]
373
+ file_stem = Path(filename).stem.lower()
374
+
375
+ if filename == query_raw_lower or file_stem == query_raw_lower:
376
+ score += 2.0
377
+ reasons.append("Exact filename match")
378
+ elif query_raw_lower in file_stem:
379
+ score += 1.2
380
+ reasons.append("Filename stem match")
381
+ elif path_lower.endswith(query_raw_lower):
382
+ score += 1.0
383
+ reasons.append("Path suffix match")
384
+
385
+ for pat in def_patterns:
386
+ if pat.search(content):
387
+ score += 1.5
388
+ reasons.append("Definition found")
389
+ break
390
+
391
+ if len(query_terms) > 1:
392
+ content_lower = content.lower()
393
+ term_indices = []
394
+ all_found = True
395
+ for t in query_terms:
396
+ idx = content_lower.find(t)
397
+ if idx == -1:
398
+ all_found = False
399
+ break
400
+ term_indices.append(idx)
401
+ if all_found:
402
+ span = max(term_indices) - min(term_indices)
403
+ if span < 100:
404
+ score += 0.5
405
+ reasons.append("Proximity boost")
406
+
407
+ meta_obj = all_meta.get(repo_name)
408
+ if meta_obj:
409
+ if meta_obj["priority"] > 0:
410
+ score += meta_obj["priority"]
411
+ reasons.append("High priority")
412
+ tags = meta_obj["tags"].lower().split(",")
413
+ domain = meta_obj["domain"].lower()
414
+ for term in query_terms:
415
+ if term in tags or term == domain:
416
+ score += 0.5
417
+ reasons.append(f"Tag match ({term})")
418
+ break
419
+
420
+ if any(p in path_lower for p in [".codex/", "agents.md", "gemini.md", "readme.md"]):
421
+ score += 0.2
422
+ reasons.append("Core file")
423
+
424
+ if opts.recency_boost:
425
+ score = calculate_recency_score(mtime, score)
426
+
427
+ match_count = count_matches(content, opts.query, False, opts.case_sensitive)
428
+ if opts.case_sensitive and match_count == 0: continue
429
+
430
+ # v2.7.0: Debugging fallback logic - if no matches found via count_matches, log why
431
+ if match_count == 0 and not opts.case_sensitive:
432
+ # We expect non-case-sensitive to find things if LIKE found them
433
+ pass
434
+
435
+ snippet = snippet_around(content, terms, opts.snippet_lines, highlight=True)
436
+ context_symbol = ""
437
+ first_line_match = re.search(r"L(\d+):", snippet)
438
+ if first_line_match:
439
+ start_line = int(first_line_match.group(1))
440
+ ctx = self.db._get_enclosing_symbol(path, start_line)
441
+ if ctx:
442
+ context_symbol = ctx
443
+ score += 0.2
444
+
445
+ hits.append(SearchHit(
446
+ repo=repo_name, path=path, score=round(score, 3), snippet=snippet,
447
+ mtime=mtime, size=size, match_count=match_count,
448
+ file_type=get_file_extension(path),
449
+ hit_reason=", ".join(reasons) if reasons else "Content match",
450
+ context_symbol=context_symbol
451
+ ))
452
+
453
+ hits.sort(key=lambda h: (-h.score, -h.mtime, h.path))
454
+ return hits
455
+
456
+ def repo_candidates(self, q: str, limit: int = 3, root_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
457
+ q = (q or "").strip()
458
+ if not q: return []
459
+ limit = max(1, min(int(limit), 5))
460
+
461
+ if self.db.fts_enabled:
462
+ sql = """
463
+ SELECT f.repo AS repo, COUNT(1) AS c
464
+ FROM files_fts JOIN files f ON f.rowid = files_fts.rowid
465
+ WHERE files_fts MATCH ? GROUP BY f.repo ORDER BY c DESC LIMIT ?;
466
+ """
467
+ try:
468
+ with self.db._read_lock:
469
+ rows = self.db._read.execute(sql, (q, limit)).fetchall()
470
+ out: List[Dict[str, Any]] = []
471
+ for r in rows:
472
+ repo = str(r["repo"])
473
+ c = int(r["c"])
474
+ hits, _ = self.search_v2(SearchOptions(query=q, repo=repo, limit=1, root_ids=list(root_ids or [])))
475
+ evidence = hits[0].snippet.replace("\n", " ")[:200] if hits else ""
476
+ out.append({"repo": repo, "score": c, "evidence": evidence})
477
+ return out
478
+ except sqlite3.OperationalError: pass
479
+
480
+ like_q = q.replace("^", "^^").replace("%", "^%").replace("_", "^_")
481
+ sql = "SELECT repo, COUNT(1) AS c FROM files WHERE content LIKE ? ESCAPE '^' GROUP BY repo ORDER BY c DESC LIMIT ?;"
482
+ with self.db._read_lock:
483
+ rows = self.db._read.execute(sql, (f"%{like_q}%", limit)).fetchall()
484
+ out = []
485
+ for r in rows:
486
+ repo, c = str(r["repo"]), int(r["c"])
487
+ hits, _ = self.search_v2(SearchOptions(query=q, repo=repo, limit=1, root_ids=list(root_ids or [])))
488
+ evidence = hits[0].snippet.replace("\n", " ")[:200] if hits else ""
489
+ out.append({"repo": repo, "score": c, "evidence": evidence})
490
+ return out
491
+
492
+ def _build_filter_clauses(self, opts: SearchOptions) -> Tuple[List[str], List[Any]]:
493
+ clauses, params = [], []
494
+ if opts.root_ids:
495
+ root_clauses = []
496
+ for rid in opts.root_ids:
497
+ root_clauses.append("f.path LIKE ?")
498
+ params.append(f"{rid}/%")
499
+ if root_clauses:
500
+ clauses.append("(" + " OR ".join(root_clauses) + ")")
501
+ if opts.repo:
502
+ clauses.append("f.repo = ?")
503
+ params.append(opts.repo)
504
+ if opts.file_types:
505
+ type_clauses = []
506
+ for ft in opts.file_types:
507
+ ext = ft.lower().lstrip(".")
508
+ type_clauses.append("f.path LIKE ?")
509
+ params.append(f"%.{ext}")
510
+ if type_clauses: clauses.append("(" + " OR ".join(type_clauses) + ")")
511
+ if opts.path_pattern:
512
+ clauses.append("f.path LIKE ?")
513
+ params.append(glob_to_like(opts.path_pattern))
514
+ return clauses, params
515
+
516
+ def _matches_file_types(self, path: str, file_types: List[str]) -> bool:
517
+ if not file_types: return True
518
+ return get_file_extension(path) in [ft.lower().lstrip('.') for ft in file_types]
519
+
520
+ def _matches_path_pattern(self, path: str, pattern: Optional[str]) -> bool:
521
+ if not pattern: return True
522
+ import fnmatch
523
+
524
+ # Normalize slashes for consistency
525
+ path = path.replace("\\", "/")
526
+ pattern = pattern.replace("\\", "/")
527
+
528
+ # If pattern is absolute, match exactly or prefix
529
+ if pattern.startswith("/"):
530
+ if path.startswith(pattern): return True
531
+ return fnmatch.fnmatch(path, pattern)
532
+
533
+ # Relative pattern: match end of path or segment
534
+ # e.g. "src/main.py" should match "/users/.../src/main.py"
535
+
536
+ if path.endswith("/" + pattern): return True
537
+ if path == pattern: return True
538
+
539
+ # Check glob
540
+ if fnmatch.fnmatch(path, pattern): return True
541
+ if fnmatch.fnmatch(path, f"*/{pattern}"): return True
542
+ if fnmatch.fnmatch(path, f"*/{pattern}/*"): return True
543
+
544
+ # Fallback to existing loose match
545
+ return (fnmatch.fnmatch(path, f"**/{pattern}") or
546
+ fnmatch.fnmatch(path, f"{pattern}*"))
547
+
548
+ def _matches_exclude_patterns(self, path: str, patterns: List[str]) -> bool:
549
+ if not patterns: return False
550
+ import fnmatch
551
+ for p in patterns:
552
+ if p in path or fnmatch.fnmatch(path, f"*{p}*"): return True
553
+ return False
554
+
555
+
556
+ class SqliteSearchEngineAdapter:
557
+ """Adapter for the legacy SQLite-backed SearchEngine implementation."""
558
+
559
+ def __init__(self, db):
560
+ self._impl = SearchEngine(db)
561
+
562
+ def search_v2(self, opts: SearchOptions):
563
+ return self._impl.search_v2(opts)
564
+
565
+ def repo_candidates(self, q: str, limit: int = 3, root_ids: Optional[List[str]] = None):
566
+ return self._impl.repo_candidates(q, limit, root_ids=root_ids)
567
+
568
+ def _search_like(self, opts: SearchOptions, terms: List[str], meta: Dict[str, Any], no_slice: bool = False):
569
+ return self._impl._search_like(opts, terms, meta, no_slice=no_slice)
570
+
571
+ def _search_fts(self, opts: SearchOptions, terms: List[str], meta: Dict[str, Any], no_slice: bool = False):
572
+ return self._impl._search_fts(opts, terms, meta, no_slice=no_slice)
app/watcher.py ADDED
@@ -0,0 +1,124 @@
1
+
2
+ import os
3
+ import time
4
+ import threading
5
+ from dataclasses import dataclass
6
+ from typing import Callable, Dict, List, Optional
7
+ from threading import Timer
8
+
9
+ try:
10
+ from watchdog.observers import Observer
11
+ from watchdog.events import FileSystemEventHandler
12
+ HAS_WATCHDOG = True
13
+ except ImportError:
14
+ HAS_WATCHDOG = False
15
+ # Dummy classes for safe definition
16
+ class FileSystemEventHandler: pass
17
+ class Observer: pass
18
+
19
+ try:
20
+ from .queue_pipeline import FsEvent, FsEventKind
21
+ except Exception:
22
+ from queue_pipeline import FsEvent, FsEventKind
23
+
24
+ class DebouncedEventHandler(FileSystemEventHandler):
25
+ """Handles events with debounce to prevent duplicate indexing on save."""
26
+ def __init__(self, callback: Callable[[str], None], debounce_seconds: float = 1.0, logger=None):
27
+ self.callback = callback
28
+ self.debounce_seconds = debounce_seconds
29
+ self.logger = logger
30
+ self._timers = {}
31
+ self._lock = threading.Lock()
32
+ self._pending_events: Dict[str, FsEvent] = {}
33
+
34
+ def on_any_event(self, event):
35
+ if event.is_directory:
36
+ return
37
+
38
+ # We care about Created, Modified, Moved, Deleted
39
+ # watchdog event types: 'created', 'deleted', 'modified', 'moved'
40
+
41
+ evt_kind = None
42
+ if event.event_type == 'created':
43
+ evt_kind = FsEventKind.CREATED
44
+ elif event.event_type == 'modified':
45
+ evt_kind = FsEventKind.MODIFIED
46
+ elif event.event_type == 'deleted':
47
+ evt_kind = FsEventKind.DELETED
48
+ elif event.event_type == 'moved':
49
+ evt_kind = FsEventKind.MOVED
50
+
51
+ if not evt_kind:
52
+ return
53
+
54
+ key = event.src_path
55
+ fs_event = FsEvent(kind=evt_kind, path=event.src_path,
56
+ dest_path=getattr(event, 'dest_path', None),
57
+ ts=time.time())
58
+
59
+ with self._lock:
60
+ if key in self._timers:
61
+ self._timers[key].cancel()
62
+ self._pending_events[key] = fs_event
63
+ t = Timer(self.debounce_seconds, self._trigger, args=[key])
64
+ self._timers[key] = t
65
+ t.start()
66
+
67
+ def _trigger(self, path: str):
68
+ with self._lock:
69
+ if path in self._timers:
70
+ del self._timers[path]
71
+ fs_event = self._pending_events.pop(path, None)
72
+ if not fs_event:
73
+ return
74
+ try:
75
+ self.callback(fs_event)
76
+ except Exception as e:
77
+ if self.logger:
78
+ self.logger.log_error(f"Watcher callback failed for {path}: {e}")
79
+
80
+ class FileWatcher:
81
+ def __init__(self, paths: List[str], on_change_callback: Callable[[FsEvent], None], logger=None):
82
+ self.paths = paths
83
+ self.callback = on_change_callback
84
+ self.logger = logger
85
+ self.observer = None
86
+ self._running = False
87
+
88
+ def start(self):
89
+ if not HAS_WATCHDOG:
90
+ if self.logger:
91
+ self.logger.log_info("Watchdog not installed. Skipping real-time monitoring.")
92
+ return
93
+
94
+ if self._running:
95
+ return
96
+
97
+ self.observer = Observer()
98
+ handler = DebouncedEventHandler(self.callback, logger=self.logger)
99
+
100
+ started_any = False
101
+ for p in self.paths:
102
+ if os.path.exists(p):
103
+ try:
104
+ self.observer.schedule(handler, p, recursive=True)
105
+ started_any = True
106
+ except Exception as e:
107
+ if self.logger:
108
+ self.logger.log_error(f"Failed to watch path {p}: {e}")
109
+
110
+ if started_any:
111
+ try:
112
+ self.observer.start()
113
+ self._running = True
114
+ if self.logger:
115
+ self.logger.log_info(f"Watcher started on: {self.paths}")
116
+ except Exception as e:
117
+ if self.logger:
118
+ self.logger.log_error(f"Failed to start observer: {e}")
119
+
120
+ def stop(self):
121
+ if self.observer and self._running:
122
+ self.observer.stop()
123
+ self.observer.join()
124
+ self._running = False