footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,360 @@
1
+ """semantic_service — embedding search with FTS5 fallback and access control.
2
+
3
+ D2 access rule: semantic matches are content-derived, so visible items also
4
+ require mcp_read='allow' (presence in results reveals content).
5
+ """
6
+
7
+ import logging
8
+ import sqlite3
9
+ from typing import Dict, List
10
+
11
+ from footprinter.db.search import (
12
+ chat_fts5_fallback,
13
+ enrich_chat_visibility,
14
+ enrich_file_metadata,
15
+ file_fts5_fallback,
16
+ )
17
+ from footprinter.services.access_service import (
18
+ resolve_inherit_permission,
19
+ resolve_inherit_visibility,
20
+ )
21
+ from footprinter.services.roles import Role
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ _VALID_SOURCES = frozenset({"chats", "files", "all"})
26
+
27
+ _CHAT_FIELDS = {
28
+ "chat_id",
29
+ "chat_title",
30
+ "snippet",
31
+ "relevance_score",
32
+ "source",
33
+ "created_at",
34
+ "message_id",
35
+ }
36
+
37
+ _FILE_FIELDS = {
38
+ "id",
39
+ "name",
40
+ "path",
41
+ "content_type",
42
+ "size_bytes",
43
+ "modified_at",
44
+ "relevance_score",
45
+ "snippet",
46
+ }
47
+
48
+ # Search outcome: ok (vector worked), degraded (FTS5 fallback), failed (both crashed)
49
+ _OK = "ok"
50
+ _DEGRADED = "degraded"
51
+ _FAILED = "failed"
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Public API
56
+ # ---------------------------------------------------------------------------
57
+
58
+
59
+ def semantic_search(
60
+ conn: sqlite3.Connection,
61
+ query: str,
62
+ *,
63
+ role: Role = Role.ADMIN,
64
+ source: str = "all",
65
+ limit: int = 10,
66
+ ) -> dict:
67
+ """Search chats and/or files by semantic similarity.
68
+
69
+ Returns dict with source-specific keys (``chats``, ``files``), ``summary``,
70
+ and optionally ``note`` and ``suppressed``. Returns ``{"status": ...}``
71
+ for validation errors.
72
+ """
73
+ if not query or len(query) < 3:
74
+ return {"status": "invalid_query"}
75
+
76
+ if source not in _VALID_SOURCES:
77
+ return {"status": "invalid_source"}
78
+
79
+ result: dict = {"query": query}
80
+ all_notes: list[str] = []
81
+ total_suppressed = 0
82
+ chat_status = _OK
83
+ file_status = _OK
84
+
85
+ if source in ("chats", "all"):
86
+ chats, notes, suppressed, chat_status = _search_chats(
87
+ conn,
88
+ query,
89
+ limit,
90
+ role,
91
+ )
92
+ result["chats"] = chats
93
+ all_notes.extend(notes)
94
+ total_suppressed += suppressed
95
+
96
+ if source in ("files", "all"):
97
+ files, notes, suppressed, file_status = _search_files(
98
+ conn,
99
+ query,
100
+ limit,
101
+ role,
102
+ )
103
+ result["files"] = files
104
+ all_notes.extend(notes)
105
+ total_suppressed += suppressed
106
+
107
+ if total_suppressed > 0:
108
+ result["suppressed"] = total_suppressed
109
+
110
+ summary_parts = []
111
+ if "chats" in result:
112
+ summary_parts.append(
113
+ _build_chat_summary(result["chats"], query, status=chat_status),
114
+ )
115
+ if "files" in result:
116
+ summary_parts.append(
117
+ _build_file_summary(result["files"], query, status=file_status),
118
+ )
119
+ result["summary"] = " ".join(summary_parts)
120
+
121
+ if all_notes:
122
+ result["note"] = " ".join(dict.fromkeys(all_notes))
123
+
124
+ return result
125
+
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # Chat search
129
+ # ---------------------------------------------------------------------------
130
+
131
+
132
+ def _search_chats(
133
+ conn: sqlite3.Connection,
134
+ query: str,
135
+ limit: int,
136
+ role: Role,
137
+ ) -> tuple[list[dict], list[str], int, str]:
138
+ """Search chats via VectorStore → FTS5 fallback → enrich → filter."""
139
+ notes: list[str] = []
140
+ status = _OK
141
+ results: list[dict] = []
142
+
143
+ # Try vector search first
144
+ try:
145
+ from footprinter.semantic.vector_store import VectorStore
146
+
147
+ store = VectorStore.get_instance()
148
+ results = store.search_chats(query=query, n_results=limit)
149
+ except Exception as e:
150
+ logger.warning("Vector search unavailable (%s), falling back to FTS5", e)
151
+ status = _DEGRADED
152
+ try:
153
+ results = chat_fts5_fallback(conn, query, limit)
154
+ except Exception as fallback_err:
155
+ logger.warning("Chat FTS5 fallback failed: %s", fallback_err)
156
+ return [], ["Chat search failed — try footprinter_search"], 0, _FAILED
157
+
158
+ # Enrich with visibility from DB
159
+ chat_ids = [r.get("chat_id") for r in results if r.get("chat_id")]
160
+ vis_lookup = enrich_chat_visibility(conn, chat_ids) if chat_ids else {}
161
+
162
+ for r in results:
163
+ db_row = vis_lookup.get(r.get("chat_id"))
164
+ r["id"] = r.get("chat_id")
165
+ r["account"] = db_row["account"] if db_row else ""
166
+ r["mcp_view"] = db_row["mcp_view"] if db_row else "hidden"
167
+ r["mcp_read"] = db_row["mcp_read"] if db_row else None
168
+
169
+ # Access control filtering
170
+ if role.sees_all:
171
+ filtered = results
172
+ suppressed = 0
173
+ else:
174
+ # D2: presence in semantic results reveals content — exclude anything
175
+ # not both visible AND allowed. Fail-closed on null/missing values.
176
+ filtered = [
177
+ r for r in results
178
+ if resolve_inherit_visibility(r.get("mcp_view")) == "visible"
179
+ and resolve_inherit_permission(r.get("mcp_read")) == "allow"
180
+ ]
181
+ suppressed = len(results) - len(filtered)
182
+
183
+ # Trim visible results to presentation fields
184
+ trimmed = [_trim_chat_result(r) if r.get("mcp_view") == "visible" else r for r in filtered]
185
+
186
+ if status == _DEGRADED:
187
+ notes.append("Results are keyword-based (semantic search unavailable)")
188
+
189
+ return trimmed, notes, suppressed, status
190
+
191
+
192
+ # ---------------------------------------------------------------------------
193
+ # File search
194
+ # ---------------------------------------------------------------------------
195
+
196
+
197
+ def _search_files(
198
+ conn: sqlite3.Connection,
199
+ query: str,
200
+ limit: int,
201
+ role: Role,
202
+ ) -> tuple[list[dict], list[str], int, str]:
203
+ """Search files via VectorStore (+ enrich) or FTS5 fallback → filter."""
204
+ notes: list[str] = []
205
+ status = _OK
206
+ enriched: List[Dict] = []
207
+ dropped = 0
208
+
209
+ try:
210
+ from footprinter.semantic.vector_store import VectorStore
211
+
212
+ store = VectorStore.get_instance()
213
+ raw_results = store.search_files(query=query, n_results=limit * 3)
214
+ except Exception as e:
215
+ logger.warning("Vector search unavailable (%s), falling back to FTS5", e)
216
+ status = _DEGRADED
217
+ try:
218
+ enriched = file_fts5_fallback(conn, query, limit)
219
+ except Exception as fallback_err:
220
+ logger.warning("File FTS5 fallback failed: %s", fallback_err)
221
+ return [], ["File search failed — try footprinter_search"], 0, _FAILED
222
+ else:
223
+ for r in raw_results:
224
+ distance = r.get("distance") or 0
225
+ r["relevance_score"] = round(max(0, 1 - (distance / 2)), 3)
226
+ r["snippet"] = r.get("content_snippet", "")
227
+
228
+ deduped, dropped = _deduplicate_by_file(raw_results)
229
+ if dropped > 0:
230
+ logger.warning(
231
+ "Dropped %d vector results with missing file_id",
232
+ dropped,
233
+ )
234
+
235
+ file_ids = [r["file_id"] for r in deduped if r.get("file_id")]
236
+ if file_ids:
237
+ db_lookup = enrich_file_metadata(conn, file_ids)
238
+ for r in deduped:
239
+ db_row = db_lookup.get(r["file_id"])
240
+ if db_row:
241
+ r.update(db_row)
242
+ enriched = [r for r in deduped if r.get("id")]
243
+
244
+ # Access control filtering
245
+ if role.sees_all:
246
+ filtered = enriched
247
+ suppressed = 0
248
+ else:
249
+ # D2: presence in semantic results reveals content — exclude anything
250
+ # not both visible AND allowed. Fail-closed on null/missing values.
251
+ filtered = [
252
+ r for r in enriched
253
+ if resolve_inherit_visibility(r.get("mcp_view")) == "visible"
254
+ and resolve_inherit_permission(r.get("mcp_read")) == "allow"
255
+ ]
256
+ suppressed = len(enriched) - len(filtered)
257
+
258
+ trimmed = [_trim_file_result(r) if r.get("mcp_view") == "visible" else r for r in filtered]
259
+ trimmed = trimmed[:limit]
260
+
261
+ if status == _DEGRADED:
262
+ notes.append("Results are keyword-based (semantic search unavailable)")
263
+ if dropped > 0:
264
+ notes.append(f"Dropped {dropped} results with missing file_id. Run --rebuild-vectors to fix.")
265
+
266
+ return trimmed, notes, suppressed, status
267
+
268
+
269
+ # ---------------------------------------------------------------------------
270
+ # Helpers
271
+ # ---------------------------------------------------------------------------
272
+
273
+
274
+ def _deduplicate_by_file(results: List[Dict]) -> tuple[List[Dict], int]:
275
+ """Group by file_id, keep highest-relevance chunk per file."""
276
+ best: Dict[int, Dict] = {}
277
+ dropped = 0
278
+ for r in results:
279
+ fid = r.get("file_id")
280
+ if fid is None:
281
+ dropped += 1
282
+ continue
283
+ existing = best.get(fid)
284
+ if existing is None or r.get("relevance_score", 0) > existing.get("relevance_score", 0):
285
+ best[fid] = r
286
+ return list(best.values()), dropped
287
+
288
+
289
+ def _trim_chat_result(result: dict) -> dict:
290
+ return {k: v for k, v in result.items() if k in _CHAT_FIELDS}
291
+
292
+
293
+ def _trim_file_result(result: dict) -> dict:
294
+ return {k: v for k, v in result.items() if k in _FILE_FIELDS}
295
+
296
+
297
+ def _build_chat_summary(
298
+ chats: list[dict],
299
+ query: str,
300
+ *,
301
+ status: str = _OK,
302
+ ) -> str:
303
+ visible = [c for c in chats if c.get("chat_title")]
304
+ opaque_count = len(chats) - len(visible)
305
+ count = len(chats)
306
+ if count > 0:
307
+ label = "chat" if count == 1 else "chats"
308
+ top_titles = [c["chat_title"] for c in visible[:3]]
309
+ summary = f"Found {count} {label} matching '{query}'."
310
+ if top_titles:
311
+ summary += f" Top: {', '.join(repr(t) for t in top_titles)}."
312
+ if opaque_count > 0:
313
+ summary += f" ({opaque_count} with restricted visibility.)"
314
+ if status == _DEGRADED:
315
+ summary += " (keyword match — semantic search was unavailable)"
316
+ else:
317
+ if status == _FAILED:
318
+ summary = f"Chat search failed for '{query}' — try footprinter_search for keyword matching."
319
+ elif status == _DEGRADED:
320
+ summary = f"Semantic search unavailable — keyword search returned no chats for '{query}'."
321
+ else:
322
+ summary = (
323
+ f"No chats found for '{query}'. "
324
+ f"Tips: try different keywords, use footprinter_search "
325
+ f"for broader keyword matching across files/emails/browser."
326
+ )
327
+ return summary
328
+
329
+
330
+ def _build_file_summary(
331
+ files: list[dict],
332
+ query: str,
333
+ *,
334
+ status: str = _OK,
335
+ ) -> str:
336
+ visible = [f for f in files if f.get("name")]
337
+ opaque_count = len(files) - len(visible)
338
+ count = len(files)
339
+ if count > 0:
340
+ label = "file" if count == 1 else "files"
341
+ top_names = [f["name"] for f in visible[:3]]
342
+ summary = f"Found {count} {label} matching '{query}'."
343
+ if top_names:
344
+ summary += f" Top: {', '.join(repr(n) for n in top_names)}."
345
+ if opaque_count > 0:
346
+ summary += f" ({opaque_count} with restricted visibility.)"
347
+ if status == _DEGRADED:
348
+ summary += " (keyword match — semantic search was unavailable)"
349
+ else:
350
+ if status == _FAILED:
351
+ summary = f"File search failed for '{query}' — try footprinter_search for keyword matching."
352
+ elif status == _DEGRADED:
353
+ summary = f"Semantic search unavailable — keyword search returned no files for '{query}'."
354
+ else:
355
+ summary = (
356
+ f"No files found for '{query}'. "
357
+ f"Tips: try different keywords, use footprinter_search "
358
+ f"for exact keyword matching across file names/paths."
359
+ )
360
+ return summary
@@ -0,0 +1,18 @@
1
+ """Status service — visibility-aware system status aggregates."""
2
+
3
+ import sqlite3
4
+
5
+ from footprinter.db import status as db_status
6
+ from footprinter.paths import get_config_path
7
+ from footprinter.services.roles import Role
8
+
9
+
10
+ def get_status(conn: sqlite3.Connection, *, role: Role = Role.ADMIN) -> dict:
11
+ """Return system status, filtered by role.
12
+
13
+ VIEWER gets MCP-oriented counts with hidden-client data excluded.
14
+ ADMIN gets the full system status including config presence checks.
15
+ """
16
+ if role == Role.VIEWER:
17
+ return db_status.get_mcp_status(conn)
18
+ return db_status.get_system_status(conn, get_config_path())
@@ -0,0 +1,65 @@
1
+ """Visit (browser history) read service — get/list with role-based visibility filtering."""
2
+
3
+ import sqlite3
4
+
5
+ from footprinter.db import browser as db
6
+ from footprinter.services.access_service import filter_result, filter_results_list
7
+ from footprinter.services.roles import Role
8
+
9
+
10
+ def get(conn: sqlite3.Connection, entry_id: int, *, role: Role = Role.ADMIN) -> dict | None:
11
+ """Fetch a single browser visit by ID, filtered by role."""
12
+ result = db.get_visit(conn, entry_id)
13
+ if result is None:
14
+ return None
15
+ if role.sees_all:
16
+ return result
17
+ return filter_result("visit", result)
18
+
19
+
20
+ def assign(
21
+ conn: sqlite3.Connection,
22
+ entry_id: int,
23
+ *,
24
+ role: Role = Role.ADMIN,
25
+ project_id: int | None = None,
26
+ client_id: int | None = None,
27
+ ) -> dict | None:
28
+ """Assign a visit to a project and/or client.
29
+
30
+ Returns result dict on success, None if not found.
31
+ Raises PermissionError if role cannot write.
32
+ """
33
+ if not role.can_write:
34
+ raise PermissionError("Role does not permit write operations")
35
+ result = db.update_visit_relationships(
36
+ conn,
37
+ entry_id,
38
+ project_id=project_id,
39
+ client_id=client_id,
40
+ )
41
+ if result is None:
42
+ return None
43
+ resp: dict = {"id": entry_id}
44
+ if project_id is not None:
45
+ resp["project_id"] = project_id
46
+ if client_id is not None:
47
+ resp["client_id"] = client_id
48
+ return resp
49
+
50
+
51
+ def list_(
52
+ conn: sqlite3.Connection,
53
+ *,
54
+ role: Role = Role.ADMIN,
55
+ limit: int = 50,
56
+ page: int = 1,
57
+ ) -> dict:
58
+ """List browser visits with pagination, filtered by role."""
59
+ response = db.list_visits(conn, limit=limit, page=page)
60
+ if role.sees_all:
61
+ return response
62
+ filtered, suppressed = filter_results_list("visit", response["visits"])
63
+ response["visits"] = filtered
64
+ response["suppressed"] = suppressed
65
+ return response
@@ -0,0 +1,194 @@
1
+ """
2
+ Source registry — runtime registry for all Footprinter data sources.
3
+
4
+ Seeded from config.yaml on database init. Provides a query/mutation API
5
+ that all other code can use to discover available sources.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import sqlite3
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ import yaml
14
+
15
+ from footprinter.paths import get_config_path
16
+
17
+
18
+ class ConfigError(Exception):
19
+ """Raised when the config file is missing or invalid."""
20
+
21
+
22
+ def get_config(config_path: Optional[str] = None) -> dict:
23
+ """Load configuration from YAML file.
24
+
25
+ Checks FOOTPRINTER_CONFIG env var first, then falls back to default path.
26
+ Raises ConfigError with a friendly message on missing/corrupt files.
27
+ """
28
+ path = config_path or str(get_config_path())
29
+ try:
30
+ with open(path) as f:
31
+ return yaml.safe_load(f)
32
+ except FileNotFoundError:
33
+ raise ConfigError(f"Config not found: {path}\nRun 'fp setup' to get started.") from None
34
+ except yaml.YAMLError as e:
35
+ raise ConfigError(f"Invalid config file: {path}\n{e}") from None
36
+
37
+
38
+ def home_path() -> str:
39
+ """Return the user's home directory path."""
40
+ return os.path.expanduser("~")
41
+
42
+
43
+ def remote_accounts() -> List[str]:
44
+ """Return list of remote (Drive) account names from config."""
45
+ config = get_config()
46
+ seeds = config.get("source_seeds", [])
47
+ return [s["account"] for s in seeds if s.get("source_type") == "remote" and s.get("account")]
48
+
49
+
50
+ class SourceRegistry:
51
+ """Registry for data sources, backed by the sources table."""
52
+
53
+ def __init__(self, conn: sqlite3.Connection):
54
+ self.conn = conn
55
+
56
+ # ------------------------------------------------------------------
57
+ # Seeding
58
+ # ------------------------------------------------------------------
59
+
60
+ def seed_from_config(self, config_path: Optional[str] = None) -> int:
61
+ """Seed the sources table from config.yaml source_seeds.
62
+
63
+ Uses INSERT OR IGNORE so user edits to existing rows are preserved.
64
+ Returns the number of rows inserted.
65
+ """
66
+ config = get_config(config_path)
67
+ seeds = config.get("source_seeds", [])
68
+ inserted = 0
69
+ cursor = self.conn.cursor()
70
+ for seed in seeds:
71
+ config_json = json.dumps(seed.get("config")) if seed.get("config") else None
72
+ cursor.execute(
73
+ """
74
+ INSERT OR IGNORE
75
+ INTO sources (name, source_type, adapter, account, label, icon, enabled, config)
76
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
77
+ """,
78
+ (
79
+ seed["name"],
80
+ seed["source_type"],
81
+ None,
82
+ seed.get("account"),
83
+ seed.get("label"),
84
+ seed.get("icon"),
85
+ 1 if seed.get("enabled", True) else 0,
86
+ config_json,
87
+ ),
88
+ )
89
+ if cursor.rowcount > 0:
90
+ inserted += 1
91
+ self.conn.commit()
92
+ return inserted
93
+
94
+ # ------------------------------------------------------------------
95
+ # Query API
96
+ # ------------------------------------------------------------------
97
+
98
+ def all_source_names(self) -> List[str]:
99
+ """Return all source names."""
100
+ cursor = self.conn.cursor()
101
+ cursor.execute("SELECT name FROM sources ORDER BY name")
102
+ return [row[0] for row in cursor.fetchall()]
103
+
104
+ def all_sources(self) -> List[Dict[str, Any]]:
105
+ """Return all sources as dicts."""
106
+ cursor = self.conn.cursor()
107
+ cursor.execute("SELECT * FROM sources ORDER BY name")
108
+ columns = [desc[0] for desc in cursor.description]
109
+ return [dict(zip(columns, row)) for row in cursor.fetchall()]
110
+
111
+ def get_source(self, name: str) -> Optional[Dict[str, Any]]:
112
+ """Return a single source by name, or None."""
113
+ cursor = self.conn.cursor()
114
+ cursor.execute("SELECT * FROM sources WHERE name = ?", (name,))
115
+ row = cursor.fetchone()
116
+ if row is None:
117
+ return None
118
+ columns = [desc[0] for desc in cursor.description]
119
+ return dict(zip(columns, row))
120
+
121
+ def remote_source_names(self) -> List[str]:
122
+ """Return names of sources with source_type='remote'."""
123
+ cursor = self.conn.cursor()
124
+ cursor.execute("SELECT name FROM sources WHERE source_type = 'remote' ORDER BY name")
125
+ return [row[0] for row in cursor.fetchall()]
126
+
127
+ def file_source_names(self) -> List[str]:
128
+ """Return names of sources with source_type='file'."""
129
+ cursor = self.conn.cursor()
130
+ cursor.execute("SELECT name FROM sources WHERE source_type = 'file' ORDER BY name")
131
+ return [row[0] for row in cursor.fetchall()]
132
+
133
+ def source_label(self, name: str) -> Optional[str]:
134
+ """Return the label for a source, or None if not found."""
135
+ source = self.get_source(name)
136
+ return source["label"] if source else None
137
+
138
+ def source_account(self, name: str) -> Optional[str]:
139
+ """Return the account for a source, or None if not found."""
140
+ source = self.get_source(name)
141
+ return source["account"] if source else None
142
+
143
+ def is_remote_source(self, name: str) -> bool:
144
+ """Return True if the named source is a remote source."""
145
+ source = self.get_source(name)
146
+ return source is not None and source["source_type"] == "remote"
147
+
148
+ # ------------------------------------------------------------------
149
+ # Mutation API
150
+ # ------------------------------------------------------------------
151
+
152
+ def update_label(self, name: str, label: str) -> bool:
153
+ """Update a source's label. Returns True if a row was updated."""
154
+ cursor = self.conn.cursor()
155
+ cursor.execute(
156
+ "UPDATE sources SET label = ?, updated_at = CURRENT_TIMESTAMP WHERE name = ?",
157
+ (label, name),
158
+ )
159
+ self.conn.commit()
160
+ return cursor.rowcount > 0
161
+
162
+ def set_enabled(self, name: str, enabled: bool) -> bool:
163
+ """Enable or disable a source. Returns True if a row was updated."""
164
+ cursor = self.conn.cursor()
165
+ cursor.execute(
166
+ "UPDATE sources SET enabled = ?, updated_at = CURRENT_TIMESTAMP WHERE name = ?",
167
+ (1 if enabled else 0, name),
168
+ )
169
+ self.conn.commit()
170
+ return cursor.rowcount > 0
171
+
172
+ def register_source(self, name: str, source_type: str, **kwargs) -> bool:
173
+ """Register a new source. Returns True if inserted, False if already exists."""
174
+ cursor = self.conn.cursor()
175
+ config_json = json.dumps(kwargs.get("config")) if kwargs.get("config") else None
176
+ cursor.execute(
177
+ """
178
+ INSERT OR IGNORE
179
+ INTO sources (name, source_type, adapter, account, label, icon, enabled, config)
180
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
181
+ """,
182
+ (
183
+ name,
184
+ source_type,
185
+ kwargs.get("adapter"),
186
+ kwargs.get("account"),
187
+ kwargs.get("label"),
188
+ kwargs.get("icon"),
189
+ 1 if kwargs.get("enabled", True) else 0,
190
+ config_json,
191
+ ),
192
+ )
193
+ self.conn.commit()
194
+ return cursor.rowcount > 0
@@ -0,0 +1,7 @@
1
+ """Utility modules for Footprinter."""
2
+
3
+ from .hash_utils import compute_md5, compute_sha256
4
+ from .mime import mime_to_content_type
5
+ from .time import UTC_FMT, utc_now_iso
6
+
7
+ __all__ = ["compute_md5", "compute_sha256", "mime_to_content_type", "UTC_FMT", "utc_now_iso"]