footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,573 @@
1
+ """Search queries — keyword, FTS5, and enrichment.
2
+
3
+ All search SQL lives here. Services call these functions and handle
4
+ role-based business logic (visibility filtering, fallback orchestration).
5
+ Some SQL-level pre-filtering (e.g. exclude_hidden) is parameterised here
6
+ for query efficiency; the service decides the parameter value.
7
+ """
8
+
9
+ import sqlite3
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from footprinter.db.sql_utils import (
14
+ build_fts5_query,
15
+ build_term_conditions,
16
+ paginate,
17
+ paginated_response,
18
+ split_query_terms,
19
+ )
20
+
21
+ HOME = str(Path.home())
22
+
23
+
24
+ def search_files(
25
+ conn: sqlite3.Connection,
26
+ query: str,
27
+ source: str = "all",
28
+ limit: int = 100,
29
+ file_ext: str | None = None,
30
+ page: int = 1,
31
+ ) -> dict:
32
+ """Search files by name using FTS5.
33
+
34
+ Parameters
35
+ ----------
36
+ conn : sqlite3.Connection
37
+ query : str
38
+ Search term (minimum 2 characters).
39
+ source : str
40
+ Filter: "all", "local", or "remote".
41
+ limit : int
42
+ Maximum results per page (default: 100).
43
+ file_ext : str or None
44
+ Filter by file extension (e.g. ".pdf"). Case-insensitive.
45
+ page : int
46
+ 1-based page number.
47
+
48
+ Returns
49
+ -------
50
+ dict with keys: results (list of dicts with ``fts_score``), pagination
51
+ """
52
+ if len(query) < 2:
53
+ return paginated_response("results", [], {"page": page, "limit": limit, "total": 0, "total_pages": 1})
54
+
55
+ if source == "local":
56
+ source_filter = "source = 'local'"
57
+ elif source == "remote":
58
+ source_filter = "source IN (SELECT name FROM sources WHERE source_type = 'remote')"
59
+ else:
60
+ source_filter = "source IS NOT NULL"
61
+
62
+ fts_query = f'"{query}"*'
63
+
64
+ ext_clause = ""
65
+ params: list = [fts_query]
66
+ if file_ext:
67
+ escaped_ext = file_ext.lower().replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
68
+ ext_clause = "AND lower(file.name) LIKE ? ESCAPE '\\'"
69
+ params.append(f"%{escaped_ext}")
70
+
71
+ count_sql = f"""
72
+ SELECT COUNT(*)
73
+ FROM files file
74
+ JOIN files_fts fts ON fts.rowid = file.id
75
+ WHERE files_fts MATCH ?
76
+ AND file.{source_filter}
77
+ AND file.status != 'removed'
78
+ {ext_clause}
79
+ """
80
+ fetch_sql = f"""
81
+ SELECT file.id, file.source, file.name, file.path, file.content_type, file.size_bytes,
82
+ file.modified_at, fts.rank as fts_rank
83
+ FROM files file
84
+ JOIN files_fts fts ON fts.rowid = file.id
85
+ WHERE files_fts MATCH ?
86
+ AND file.{source_filter}
87
+ AND file.status != 'removed'
88
+ {ext_clause}
89
+ ORDER BY fts.rank
90
+ LIMIT ? OFFSET ?
91
+ """
92
+
93
+ rows, pagination = paginate(conn, count_sql, fetch_sql, params, page=page, limit=limit)
94
+
95
+ results = []
96
+ for row in rows:
97
+ # FTS5 rank is negative (more negative = better match)
98
+ fts_rank = row["fts_rank"] if row["fts_rank"] is not None else 0.0
99
+ fts_score = min(1.0, abs(fts_rank) / 10.0)
100
+
101
+ results.append(
102
+ {
103
+ "id": row["id"],
104
+ "source": row["source"],
105
+ "name": row["name"],
106
+ "path": row["path"] or "",
107
+ "content_type": row["content_type"] or "",
108
+ "size_bytes": row["size_bytes"],
109
+ "modified_at": row["modified_at"] or "",
110
+ "fts_score": fts_score,
111
+ }
112
+ )
113
+
114
+ return paginated_response("results", results, pagination)
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Keyword search (extracted from search_service)
119
+ # ---------------------------------------------------------------------------
120
+
121
+
122
+ def search_files_keyword(
123
+ conn: sqlite3.Connection,
124
+ *,
125
+ terms: list[str] = (),
126
+ has_query: bool = False,
127
+ project: Optional[str] = None,
128
+ client: Optional[str] = None,
129
+ date_from: Optional[str] = None,
130
+ date_to: Optional[str] = None,
131
+ account: Optional[str] = None,
132
+ folder: Optional[str] = None,
133
+ mime_type: Optional[str] = None,
134
+ limit: int = 50,
135
+ exclude_hidden: bool = True,
136
+ ) -> list[dict]:
137
+ """Keyword search for files with optional filters.
138
+
139
+ Returns list of dicts with file metadata including project/client joins.
140
+ """
141
+ params: list = []
142
+ where = ["file.status != 'removed'"]
143
+ fts_join = ""
144
+
145
+ if has_query:
146
+ fts5_str = build_fts5_query(list(terms))
147
+ if fts5_str:
148
+ fts_join = "JOIN files_fts fts ON fts.rowid = file.id"
149
+ where.append("files_fts MATCH ?")
150
+ params.append(fts5_str)
151
+
152
+ if project:
153
+ where.append("project.project_name = ?")
154
+ params.append(project)
155
+ if client:
156
+ where.append("client.name = ?")
157
+ params.append(client)
158
+ if date_from:
159
+ where.append("file.modified_at >= ?")
160
+ params.append(date_from)
161
+ if date_to:
162
+ where.append("file.modified_at <= ?")
163
+ params.append(date_to)
164
+ if account:
165
+ where.append("file.account = ?")
166
+ params.append(account)
167
+ if folder:
168
+ folder_path = folder.replace("~", HOME, 1) if folder.startswith("~") else folder
169
+ where.append("file.path LIKE ?")
170
+ params.append(f"{folder_path}%")
171
+ if mime_type:
172
+ where.append("file.mime_type = ?")
173
+ params.append(mime_type)
174
+ if exclude_hidden:
175
+ where.append("file.mcp_view != 'hidden'")
176
+ params.append(limit)
177
+
178
+ rows = conn.execute(
179
+ f"""
180
+ SELECT file.id, file.source, file.name, file.path, file.content_type,
181
+ file.size_bytes, file.modified_at, file.account, file.mime_type,
182
+ file.mcp_view,
183
+ project.project_name, client.name AS client
184
+ FROM files file
185
+ {fts_join}
186
+ LEFT JOIN projects project ON file.project_id = project.id
187
+ LEFT JOIN clients client ON project.client_id = client.id
188
+ WHERE {" AND ".join(where)}
189
+ ORDER BY file.modified_at DESC
190
+ LIMIT ?
191
+ """,
192
+ params,
193
+ ).fetchall()
194
+
195
+ return [
196
+ {
197
+ "id": r["id"],
198
+ "source": r["source"],
199
+ "name": r["name"],
200
+ "path": r["path"],
201
+ "content_type": r["content_type"],
202
+ "size_bytes": r["size_bytes"],
203
+ "modified_at": r["modified_at"],
204
+ "account": r["account"],
205
+ "mime_type": r["mime_type"],
206
+ "project": r["project_name"],
207
+ "client": r["client"],
208
+ "mcp_view": r["mcp_view"],
209
+ }
210
+ for r in rows
211
+ ]
212
+
213
+
214
+ def search_emails_keyword(
215
+ conn: sqlite3.Connection,
216
+ *,
217
+ terms: list[str] = (),
218
+ has_query: bool = False,
219
+ project: Optional[str] = None,
220
+ client: Optional[str] = None,
221
+ date_from: Optional[str] = None,
222
+ date_to: Optional[str] = None,
223
+ account: Optional[str] = None,
224
+ sender: Optional[str] = None,
225
+ days_back: Optional[int] = None,
226
+ limit: int = 50,
227
+ exclude_hidden: bool = True,
228
+ ) -> list[dict]:
229
+ """Keyword search for emails with optional filters.
230
+
231
+ Returns list of dicts with email metadata including project/client joins.
232
+ """
233
+ params: list = []
234
+ where: list[str] = ["email.status != 'removed'"]
235
+ fts_join = ""
236
+
237
+ if has_query:
238
+ fts5_str = build_fts5_query(list(terms))
239
+ if fts5_str:
240
+ fts_join = "JOIN emails_fts fts ON fts.rowid = email.id"
241
+ where.append("emails_fts MATCH ?")
242
+ params.append(fts5_str)
243
+
244
+ if project:
245
+ where.append("project.project_name = ?")
246
+ params.append(project)
247
+ if client:
248
+ where.append("client.name = ?")
249
+ params.append(client)
250
+ if date_from:
251
+ where.append("email.received_at >= ?")
252
+ params.append(date_from)
253
+ if date_to:
254
+ where.append("email.received_at <= ?")
255
+ params.append(date_to)
256
+ if account:
257
+ where.append("email.account = ?")
258
+ params.append(account)
259
+ if sender:
260
+ where.append("(email.from_address LIKE ? OR email.from_name LIKE ?)")
261
+ params.extend([f"%{sender}%", f"%{sender}%"])
262
+ if days_back is not None and int(days_back) > 0:
263
+ where.append(f"email.received_at >= datetime('now', '-{int(days_back)} days')")
264
+ if exclude_hidden:
265
+ where.append("email.mcp_view != 'hidden'")
266
+ params.append(limit)
267
+
268
+ rows = conn.execute(
269
+ f"""
270
+ SELECT email.id, email.message_id, email.subject, email.from_address,
271
+ email.from_name, email.to_addresses, email.received_at,
272
+ email.account, email.labels, email.body_preview,
273
+ email.mcp_view, email.mcp_read,
274
+ project.project_name, client.name AS client_name
275
+ FROM emails email
276
+ {fts_join}
277
+ LEFT JOIN projects project ON email.project_id = project.id
278
+ LEFT JOIN clients client ON email.client_id = client.id
279
+ WHERE {" AND ".join(where)}
280
+ ORDER BY email.received_at DESC
281
+ LIMIT ?
282
+ """,
283
+ params,
284
+ ).fetchall()
285
+
286
+ return [
287
+ {
288
+ "id": r["id"],
289
+ "subject": r["subject"],
290
+ "from": r["from_name"] or r["from_address"],
291
+ "from_address": r["from_address"],
292
+ "to": r["to_addresses"],
293
+ "received_at": r["received_at"],
294
+ "account": r["account"],
295
+ "labels": r["labels"],
296
+ "snippet": r["body_preview"],
297
+ "project_name": r["project_name"],
298
+ "client_name": r["client_name"],
299
+ "mcp_view": r["mcp_view"],
300
+ "mcp_read": r["mcp_read"],
301
+ }
302
+ for r in rows
303
+ ]
304
+
305
+
306
+ def search_chats_keyword(
307
+ conn: sqlite3.Connection,
308
+ *,
309
+ terms: list[str] = (),
310
+ has_query: bool = False,
311
+ project: Optional[str] = None,
312
+ client: Optional[str] = None,
313
+ date_from: Optional[str] = None,
314
+ date_to: Optional[str] = None,
315
+ limit: int = 50,
316
+ exclude_hidden: bool = True,
317
+ ) -> list[dict]:
318
+ """Keyword search for chats with optional filters.
319
+
320
+ Returns list of dicts with chat metadata including project/client joins.
321
+ """
322
+ params: list = []
323
+ where: list[str] = ["chat.status NOT IN ('removed', 'merged')"]
324
+
325
+ if has_query:
326
+ cond, cond_params = build_term_conditions(["chat.title"], list(terms))
327
+ where.append(cond)
328
+ params.extend(cond_params)
329
+
330
+ if project:
331
+ where.append("project.project_name = ?")
332
+ params.append(project)
333
+ if client:
334
+ where.append("client.name = ?")
335
+ params.append(client)
336
+ if date_from:
337
+ where.append("chat.created_at >= ?")
338
+ params.append(date_from)
339
+ if date_to:
340
+ where.append("chat.created_at <= ?")
341
+ params.append(date_to)
342
+ if exclude_hidden:
343
+ where.append("chat.mcp_view != 'hidden'")
344
+ params.append(limit)
345
+
346
+ rows = conn.execute(
347
+ f"""
348
+ SELECT chat.id, chat.external_id, chat.account, chat.title,
349
+ chat.summary, chat.created_at, chat.modified_at,
350
+ chat.message_count, chat.mcp_view, chat.mcp_read,
351
+ project.project_name, client.name AS client_name
352
+ FROM chats chat
353
+ LEFT JOIN projects project ON chat.project_id = project.id
354
+ LEFT JOIN clients client ON chat.client_id = client.id
355
+ WHERE {" AND ".join(where)}
356
+ ORDER BY chat.created_at DESC
357
+ LIMIT ?
358
+ """,
359
+ params,
360
+ ).fetchall()
361
+
362
+ return [
363
+ {
364
+ "id": r["id"],
365
+ "external_id": r["external_id"],
366
+ "account": r["account"],
367
+ "title": r["title"],
368
+ "summary": r["summary"],
369
+ "created_at": r["created_at"],
370
+ "message_count": r["message_count"],
371
+ "project_name": r["project_name"],
372
+ "client_name": r["client_name"],
373
+ "mcp_view": r["mcp_view"],
374
+ "mcp_read": r["mcp_read"],
375
+ }
376
+ for r in rows
377
+ ]
378
+
379
+
380
+ def search_browser_keyword(
381
+ conn: sqlite3.Connection,
382
+ *,
383
+ terms: list[str] = (),
384
+ has_query: bool = False,
385
+ date_from: Optional[str] = None,
386
+ date_to: Optional[str] = None,
387
+ limit: int = 50,
388
+ ) -> list[dict]:
389
+ """Keyword search for browser visits with optional filters.
390
+
391
+ Returns list of dicts with visit metadata. Source-level visibility
392
+ gating is handled by the service layer, not here.
393
+ """
394
+ params: list = []
395
+ where: list[str] = ["status != 'removed'"]
396
+
397
+ if has_query:
398
+ cond, cond_params = build_term_conditions(["url", "title"], list(terms))
399
+ where.append(cond)
400
+ params.extend(cond_params)
401
+
402
+ if date_from:
403
+ where.append("visit_time >= ?")
404
+ params.append(date_from)
405
+ if date_to:
406
+ where.append("visit_time <= ?")
407
+ params.append(date_to)
408
+ params.append(limit)
409
+
410
+ rows = conn.execute(
411
+ f"""
412
+ SELECT id, url, title, visit_time, browser
413
+ FROM visits
414
+ WHERE {" AND ".join(where)}
415
+ ORDER BY visit_time DESC
416
+ LIMIT ?
417
+ """,
418
+ params,
419
+ ).fetchall()
420
+
421
+ return [
422
+ {
423
+ "id": r["id"],
424
+ "url": r["url"],
425
+ "title": r["title"],
426
+ "visit_time": r["visit_time"],
427
+ "browser": r["browser"],
428
+ }
429
+ for r in rows
430
+ ]
431
+
432
+
433
+ # ---------------------------------------------------------------------------
434
+ # FTS5 fallback (extracted from semantic_service)
435
+ # ---------------------------------------------------------------------------
436
+
437
+
438
+ def chat_fts5_fallback(
439
+ conn: sqlite3.Connection,
440
+ query: str,
441
+ limit: int,
442
+ ) -> list[dict]:
443
+ """FTS5 keyword fallback for chat search.
444
+
445
+ Returns dicts shaped for semantic_service consumption: chat_id, chat_title,
446
+ snippet, relevance_score, source, created_at, message_id.
447
+ """
448
+ safe_query = query.replace('"', '""')
449
+ fts_query = f'"{safe_query}"*'
450
+
451
+ rows = conn.execute(
452
+ """SELECT chat.id, chat.title, chat.summary, chat.account,
453
+ chat.created_at, chat.message_count, fts.rank as fts_rank
454
+ FROM chats_fts fts
455
+ JOIN chats chat ON chat.id = fts.rowid
456
+ WHERE chats_fts MATCH ?
457
+ AND chat.status != 'removed'
458
+ ORDER BY fts.rank
459
+ LIMIT ?""",
460
+ (fts_query, limit),
461
+ ).fetchall()
462
+
463
+ results = []
464
+ for r in rows:
465
+ fts_rank = r["fts_rank"] if r["fts_rank"] is not None else 0.0
466
+ score = round(min(1.0, abs(fts_rank) / 10.0), 3)
467
+ results.append(
468
+ {
469
+ "chat_id": r["id"],
470
+ "chat_title": r["title"],
471
+ "message_id": None,
472
+ "source": r["account"],
473
+ "created_at": r["created_at"],
474
+ "snippet": r["summary"] or "",
475
+ "relevance_score": score,
476
+ }
477
+ )
478
+ return results
479
+
480
+
481
+ def file_fts5_fallback(
482
+ conn: sqlite3.Connection,
483
+ query: str,
484
+ limit: int,
485
+ ) -> list[dict]:
486
+ """FTS5 keyword fallback for file search.
487
+
488
+ Returns dicts shaped for semantic_service consumption: id, source, name,
489
+ path, content_type, size_bytes, modified_at, relevance_score, snippet,
490
+ mcp_view, mcp_read.
491
+ """
492
+ terms = split_query_terms(query)
493
+ match_str = build_fts5_query(terms)
494
+ if not match_str:
495
+ return []
496
+
497
+ rows = conn.execute(
498
+ "SELECT file.id, file.source, file.name, file.path, "
499
+ "file.content_type, file.size_bytes, "
500
+ "file.modified_at, file.mcp_view, file.mcp_read, "
501
+ "file.content_preview "
502
+ "FROM files file "
503
+ "JOIN files_fts fts ON fts.rowid = file.id "
504
+ "WHERE files_fts MATCH ? AND file.status != 'removed' "
505
+ "LIMIT ?",
506
+ (match_str, limit),
507
+ ).fetchall()
508
+
509
+ results = []
510
+ for row in rows:
511
+ if row["content_preview"] and row["mcp_read"] != "deny":
512
+ snippet = row["content_preview"][:200]
513
+ else:
514
+ snippet = f"{row['name']} — {row['path']}"
515
+ results.append(
516
+ {
517
+ "id": row["id"],
518
+ "source": row["source"],
519
+ "name": row["name"],
520
+ "path": row["path"],
521
+ "content_type": row["content_type"],
522
+ "size_bytes": row["size_bytes"],
523
+ "modified_at": row["modified_at"],
524
+ "relevance_score": 0.5,
525
+ "snippet": snippet,
526
+ "mcp_view": row["mcp_view"],
527
+ "mcp_read": row["mcp_read"],
528
+ }
529
+ )
530
+ return results
531
+
532
+
533
+ # ---------------------------------------------------------------------------
534
+ # Enrichment queries (extracted from semantic_service)
535
+ # ---------------------------------------------------------------------------
536
+
537
+
538
+ def enrich_chat_visibility(
539
+ conn: sqlite3.Connection,
540
+ chat_ids: list[int],
541
+ ) -> dict[int, dict]:
542
+ """Fetch visibility fields for a set of chat IDs.
543
+
544
+ Returns {chat_id: {account, mcp_view, mcp_read}} lookup dict.
545
+ """
546
+ if not chat_ids:
547
+ return {}
548
+ ph = ",".join("?" * len(chat_ids))
549
+ rows = conn.execute(
550
+ f"SELECT id, account, mcp_view, mcp_read FROM chats WHERE id IN ({ph})",
551
+ chat_ids,
552
+ ).fetchall()
553
+ return {row["id"]: dict(row) for row in rows}
554
+
555
+
556
+ def enrich_file_metadata(
557
+ conn: sqlite3.Connection,
558
+ file_ids: list[int],
559
+ ) -> dict[int, dict]:
560
+ """Fetch metadata for a set of file IDs (excludes removed).
561
+
562
+ Returns {file_id: {id, source, name, path, ...}} lookup dict.
563
+ """
564
+ if not file_ids:
565
+ return {}
566
+ ph = ",".join("?" * len(file_ids))
567
+ rows = conn.execute(
568
+ f"SELECT id, source, name, path, content_type, size_bytes, "
569
+ f"modified_at, mcp_view, mcp_read "
570
+ f"FROM files WHERE id IN ({ph}) AND status != 'removed'",
571
+ file_ids,
572
+ ).fetchall()
573
+ return {row["id"]: dict(row) for row in rows}