footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,8 @@
1
+ """Footprinter — digital life indexer with AI-powered context."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ try:
6
+ __version__ = version("footprinter-cli")
7
+ except PackageNotFoundError:
8
+ __version__ = "0.0.0-dev"
footprinter/access.py ADDED
@@ -0,0 +1,444 @@
1
+ """Recalculation engine — scope-to-entity mapping + batch write-back.
2
+
3
+ Maps a policy scope (e.g. "global", "project:3", "folder:~/Work/") to affected
4
+ entity rows, calls the existing batch resolve functions, and writes resolved
5
+ values back to mcp_view / mcp_read columns.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import sqlite3
12
+ from collections.abc import Callable
13
+ from typing import Any
14
+
15
+ from footprinter.permissions import batch_resolve_permissions
16
+ from footprinter.visibility import batch_resolve_visibility
17
+
18
+ # Sources that indicate the resolution came from the global policy or the
19
+ # hardcoded baseline — not from any entity-specific or scope-specific policy.
20
+ # These entities should be stored as 'inherit' so changing the global policy
21
+ # takes effect at query time without re-running access resolution.
22
+ _INHERIT_SOURCES = frozenset({"global", "baseline"})
23
+
24
+
25
+ def _is_inherit_source(source: str) -> bool:
26
+ """True when the resolution source traces back to global or baseline only.
27
+
28
+ Handles both direct sources (``"global"``) and cascade paths
29
+ (``"project:3 (via global)"``).
30
+ """
31
+ if source in _INHERIT_SOURCES:
32
+ return True
33
+ # Cascade format: "project:3 (via global)" or "folder:30 (via baseline)"
34
+ if source.endswith(")"):
35
+ via_idx = source.rfind("(via ")
36
+ if via_idx != -1:
37
+ inner = source[via_idx + 5 : -1]
38
+ return inner in _INHERIT_SOURCES
39
+ return False
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Entity table metadata
44
+ # ---------------------------------------------------------------------------
45
+ # Each entry describes an entity type's table and capabilities.
46
+ # table: SQL table name
47
+ # has_visibility: has mcp_view column
48
+ # has_permissions: has mcp_read column
49
+ # has_status: has status column (filter WHERE status != 'removed')
50
+ # has_project_id: has project_id FK
51
+ # has_client_id: has client_id FK
52
+ # has_account: has account column
53
+ # path_column: column name for path-prefix matching (None if N/A)
54
+
55
+ ENTITY_META: dict[str, dict[str, Any]] = {
56
+ "file": {
57
+ "table": "files",
58
+ "has_visibility": True,
59
+ "has_permissions": True,
60
+ "has_status": True,
61
+ "has_project_id": True,
62
+ "has_client_id": True,
63
+ "has_account": True,
64
+ "path_column": "path",
65
+ },
66
+ "email": {
67
+ "table": "emails",
68
+ "has_visibility": True,
69
+ "has_permissions": True,
70
+ "has_status": False,
71
+ "has_project_id": True,
72
+ "has_client_id": True,
73
+ "has_account": True,
74
+ "path_column": None,
75
+ },
76
+ "chat": {
77
+ "table": "chats",
78
+ "has_visibility": True,
79
+ "has_permissions": True,
80
+ "has_status": True,
81
+ "has_project_id": True,
82
+ "has_client_id": True,
83
+ "has_account": True,
84
+ "path_column": None,
85
+ },
86
+ "folder": {
87
+ "table": "folders",
88
+ "has_visibility": True,
89
+ "has_permissions": False,
90
+ "has_status": False,
91
+ "has_project_id": True,
92
+ "has_client_id": True,
93
+ "has_account": False,
94
+ "path_column": "path",
95
+ },
96
+ "project": {
97
+ "table": "projects",
98
+ "has_visibility": True,
99
+ "has_permissions": True,
100
+ "has_status": False,
101
+ "has_project_id": False,
102
+ "has_client_id": True,
103
+ "has_account": False,
104
+ "path_column": "root_path",
105
+ },
106
+ "client": {
107
+ "table": "clients",
108
+ "has_visibility": True,
109
+ "has_permissions": True,
110
+ "has_status": False,
111
+ "has_project_id": False,
112
+ "has_client_id": False,
113
+ "has_account": False,
114
+ "path_column": None,
115
+ },
116
+ }
117
+
118
+ # Reverse map: source scope suffix → entity type (e.g. "files" → "file")
119
+ _SOURCE_TO_ENTITY = {meta["table"]: etype for etype, meta in ENTITY_META.items()}
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Internal helpers
124
+ # ---------------------------------------------------------------------------
125
+
126
+
127
+ def _get_all_ids(conn: sqlite3.Connection, entity_type: str) -> list[int]:
128
+ """Get all active IDs for an entity type."""
129
+ meta = ENTITY_META[entity_type]
130
+ table = meta["table"]
131
+ if meta["has_status"]:
132
+ rows = conn.execute(f"SELECT id FROM {table} WHERE status != 'removed'").fetchall()
133
+ else:
134
+ rows = conn.execute(f"SELECT id FROM {table}").fetchall()
135
+ return [r["id"] for r in rows]
136
+
137
+
138
+ def _get_ids_for_scope(conn: sqlite3.Connection, scope: str) -> dict[str, list[int]]:
139
+ """Map a policy scope to {entity_type: [ids]} affected by it."""
140
+ if scope == "global":
141
+ return {etype: _get_all_ids(conn, etype) for etype in ENTITY_META}
142
+
143
+ if ":" not in scope:
144
+ raise ValueError(f"Invalid scope: {scope}")
145
+
146
+ prefix, value = scope.split(":", 1)
147
+
148
+ if prefix == "source":
149
+ # source:files → all files; source:emails → all emails
150
+ entity_type = _SOURCE_TO_ENTITY.get(value)
151
+ if entity_type is None:
152
+ raise ValueError(f"Unknown source scope: {scope}")
153
+ return {entity_type: _get_all_ids(conn, entity_type)}
154
+
155
+ if prefix == "account":
156
+ # account:{name} → emails + chats + files WHERE account = ?
157
+ result: dict[str, list[int]] = {}
158
+ for etype in ENTITY_META:
159
+ meta = ENTITY_META[etype]
160
+ if not meta["has_account"]:
161
+ continue
162
+ table = meta["table"]
163
+ where = "account = ?"
164
+ if meta["has_status"]:
165
+ where += " AND status != 'removed'"
166
+ rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (value,)).fetchall()
167
+ ids = [r["id"] for r in rows]
168
+ if ids:
169
+ result[etype] = ids
170
+ return result
171
+
172
+ if prefix == "folder":
173
+ # folder:{path} → files/folders with matching path prefix
174
+ path = os.path.expanduser(value)
175
+ # Escape LIKE metacharacters so literal %, _ in paths match correctly
176
+ escaped = path.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
177
+ result = {}
178
+ for etype in ENTITY_META:
179
+ meta = ENTITY_META[etype]
180
+ path_col = meta["path_column"]
181
+ if path_col is None:
182
+ continue
183
+ table = meta["table"]
184
+ where = f"{path_col} LIKE ? ESCAPE '\\'"
185
+ if meta["has_status"]:
186
+ where += " AND status != 'removed'"
187
+ rows = conn.execute(
188
+ f"SELECT id FROM {table} WHERE {where}",
189
+ (escaped + "%",),
190
+ ).fetchall()
191
+ ids = [r["id"] for r in rows]
192
+ if ids:
193
+ result[etype] = ids
194
+ return result
195
+
196
+ if prefix == "project":
197
+ project_id = int(value)
198
+ result = {}
199
+ # The project itself
200
+ row = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
201
+ if row:
202
+ result["project"] = [row["id"]]
203
+ # Children with project_id FK
204
+ for etype in ENTITY_META:
205
+ if etype == "project":
206
+ continue
207
+ meta = ENTITY_META[etype]
208
+ if not meta["has_project_id"]:
209
+ continue
210
+ table = meta["table"]
211
+ where = "project_id = ?"
212
+ if meta["has_status"]:
213
+ where += " AND status != 'removed'"
214
+ rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (project_id,)).fetchall()
215
+ ids = [r["id"] for r in rows]
216
+ if ids:
217
+ result[etype] = ids
218
+ return result
219
+
220
+ if prefix == "client":
221
+ client_id = int(value)
222
+ # Gather ids per entity type as dicts (insertion-ordered sets) so we
223
+ # can union the project cascade with direct client_id matches without
224
+ # double-stamping entities reachable via both paths.
225
+ id_sets: dict[str, dict[int, None]] = {}
226
+ # The client itself
227
+ row = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
228
+ if row:
229
+ id_sets["client"] = {row["id"]: None}
230
+ # Projects under this client
231
+ proj_rows = conn.execute("SELECT id FROM projects WHERE client_id = ?", (client_id,)).fetchall()
232
+ proj_ids = [r["id"] for r in proj_rows]
233
+ if proj_ids:
234
+ id_sets["project"] = {pid: None for pid in proj_ids}
235
+ # Cascade: children of each project
236
+ for pid in proj_ids:
237
+ for etype, ids in _get_ids_for_scope(conn, f"project:{pid}").items():
238
+ if etype in ("project", "client"):
239
+ continue
240
+ id_sets.setdefault(etype, {}).update({i: None for i in ids})
241
+ # Direct: entities with a client_id FK of their own (files, folders,
242
+ # emails, chats). Union with the cascade; dedup via the dict keys.
243
+ for etype, meta in ENTITY_META.items():
244
+ if etype in ("client", "project"):
245
+ continue
246
+ if not meta["has_client_id"]:
247
+ continue
248
+ table = meta["table"]
249
+ where = "client_id = ?"
250
+ if meta["has_status"]:
251
+ where += " AND status != 'removed'"
252
+ rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (client_id,)).fetchall()
253
+ if rows:
254
+ id_sets.setdefault(etype, {}).update({r["id"]: None for r in rows})
255
+ return {etype: list(ids) for etype, ids in id_sets.items()}
256
+
257
+ # Single entity: file:42, email:10, etc.
258
+ if prefix in ENTITY_META:
259
+ entity_id = int(value)
260
+ return {prefix: [entity_id]}
261
+
262
+ raise ValueError(f"Unknown scope prefix: {prefix}")
263
+
264
+
265
+ def _write_back_visibility(conn: sqlite3.Connection, entity_type: str, results: dict[int, tuple]) -> None:
266
+ """Batch UPDATE mcp_view from resolve results.
267
+
268
+ Entities whose visibility comes from the global policy or the hardcoded
269
+ baseline are written as ``'inherit'`` — the MCP layer resolves them at
270
+ query time. Entities with a specific policy get the resolved value.
271
+ """
272
+ table = ENTITY_META[entity_type]["table"]
273
+ conn.executemany(
274
+ f"UPDATE {table} SET mcp_view = ? WHERE id = ?",
275
+ [("inherit" if _is_inherit_source(source) else state, eid) for eid, (state, source) in results.items()],
276
+ )
277
+
278
+
279
+ def _write_back_permissions(conn: sqlite3.Connection, entity_type: str, results: dict[int, tuple]) -> None:
280
+ """Batch UPDATE mcp_read from resolve results.
281
+
282
+ Entities whose permission comes from the global policy or the hardcoded
283
+ baseline are written as ``'inherit'`` — the MCP layer resolves them at
284
+ query time. Entities with a specific policy get the resolved value.
285
+ """
286
+ table = ENTITY_META[entity_type]["table"]
287
+ conn.executemany(
288
+ f"UPDATE {table} SET mcp_read = ? WHERE id = ?",
289
+ [
290
+ ("inherit" if _is_inherit_source(source) else ("allow" if allowed else "deny"), eid)
291
+ for eid, (allowed, source) in results.items()
292
+ ],
293
+ )
294
+
295
+
296
+ # ---------------------------------------------------------------------------
297
+ # Public API
298
+ # ---------------------------------------------------------------------------
299
+
300
+
301
+ def count_affected_entities(conn: sqlite3.Connection, scope: str) -> dict[str, int]:
302
+ """Count entities affected by *scope* without modifying them.
303
+
304
+ Returns:
305
+ Dict mapping entity type to count of affected rows.
306
+ Only includes types with count > 0.
307
+ """
308
+ return {etype: len(ids) for etype, ids in _get_ids_for_scope(conn, scope).items() if ids}
309
+
310
+
311
+ def stamp_entities(conn: sqlite3.Connection, ids_by_type: dict[str, list[int]]) -> dict[str, int]:
312
+ """Resolve and write visibility + permissions for the given entity IDs.
313
+
314
+ Used by ``recalculate_access`` (full scope resolution) and the incremental
315
+ pipeline path in ``processing.run_access_resolution``. The batched variant
316
+ (``recalculate_access_batched``) uses its own loop for per-chunk commits.
317
+
318
+ Always commits before returning, even when *ids_by_type* is empty.
319
+
320
+ Args:
321
+ conn: SQLite connection with row_factory = sqlite3.Row
322
+ ids_by_type: Mapping of entity type to list of row IDs to stamp.
323
+
324
+ Returns:
325
+ Dict mapping entity type to count of rows stamped.
326
+ Only includes types with count > 0.
327
+ """
328
+ stats: dict[str, int] = {}
329
+
330
+ for entity_type, ids in ids_by_type.items():
331
+ if not ids:
332
+ continue
333
+ meta = ENTITY_META[entity_type]
334
+
335
+ if meta["has_visibility"]:
336
+ vis_results = batch_resolve_visibility(conn, entity_type, ids)
337
+ _write_back_visibility(conn, entity_type, vis_results)
338
+
339
+ if meta["has_permissions"]:
340
+ perm_results = batch_resolve_permissions(conn, entity_type, ids)
341
+ _write_back_permissions(conn, entity_type, perm_results)
342
+
343
+ stats[entity_type] = len(ids)
344
+
345
+ conn.commit()
346
+ return stats
347
+
348
+
349
+ def recalculate_access(conn: sqlite3.Connection, scope: str) -> dict[str, int]:
350
+ """Recalculate visibility and permissions for all entities affected by *scope*.
351
+
352
+ Args:
353
+ conn: SQLite connection with row_factory = sqlite3.Row
354
+ scope: Policy scope string (e.g. "global", "project:3", "folder:~/Work/")
355
+
356
+ Returns:
357
+ Dict mapping entity type to count of rows updated.
358
+ """
359
+ ids_by_type = _get_ids_for_scope(conn, scope)
360
+ return stamp_entities(conn, ids_by_type)
361
+
362
+
363
+ def recalculate_access_batched(
364
+ conn: sqlite3.Connection,
365
+ scope: str,
366
+ *,
367
+ batch_size: int = 5000,
368
+ on_batch: Callable[[int], None] | None = None,
369
+ ) -> dict[str, int]:
370
+ """Recalculate visibility and permissions in batches with progress callback.
371
+
372
+ Same semantics as ``recalculate_access()`` but commits after each batch
373
+ and calls *on_batch* with the count of entities processed per chunk.
374
+ Designed for large scopes where a progress bar is needed.
375
+
376
+ Args:
377
+ conn: SQLite connection with row_factory = sqlite3.Row
378
+ scope: Policy scope string (e.g. "global", "folder:~/Work/")
379
+ batch_size: Number of entity IDs per chunk (default 5000)
380
+ on_batch: Optional callback receiving the count processed per chunk
381
+
382
+ Returns:
383
+ Dict mapping entity type to total count of rows updated.
384
+ """
385
+ ids_by_type = _get_ids_for_scope(conn, scope)
386
+ stats: dict[str, int] = {}
387
+
388
+ for entity_type, ids in ids_by_type.items():
389
+ if not ids:
390
+ continue
391
+ meta = ENTITY_META[entity_type]
392
+
393
+ for i in range(0, len(ids), batch_size):
394
+ chunk = ids[i : i + batch_size]
395
+
396
+ if meta["has_visibility"]:
397
+ vis_results = batch_resolve_visibility(conn, entity_type, chunk)
398
+ _write_back_visibility(conn, entity_type, vis_results)
399
+
400
+ if meta["has_permissions"]:
401
+ perm_results = batch_resolve_permissions(conn, entity_type, chunk)
402
+ _write_back_permissions(conn, entity_type, perm_results)
403
+
404
+ conn.commit()
405
+
406
+ if on_batch is not None:
407
+ on_batch(len(chunk))
408
+
409
+ stats[entity_type] = len(ids)
410
+
411
+ return stats
412
+
413
+
414
+ def recalculate_entity(conn: sqlite3.Connection, entity_type: str, entity_id: int) -> dict[str, int]:
415
+ """Recalculate visibility and permissions for a single entity.
416
+
417
+ Args:
418
+ conn: SQLite connection with row_factory = sqlite3.Row
419
+ entity_type: Entity type (e.g. "file", "email")
420
+ entity_id: Row ID
421
+
422
+ Returns:
423
+ Dict like {"file": 1}, or {"file": 0} if entity not found.
424
+ """
425
+ if entity_type not in ENTITY_META:
426
+ raise ValueError(f"Unknown entity type: {entity_type}")
427
+
428
+ meta = ENTITY_META[entity_type]
429
+ # Verify the entity exists before resolving
430
+ table = meta["table"]
431
+ row = conn.execute(f"SELECT id FROM {table} WHERE id = ?", (entity_id,)).fetchone()
432
+ if row is None:
433
+ return {entity_type: 0}
434
+
435
+ if meta["has_visibility"]:
436
+ vis_results = batch_resolve_visibility(conn, entity_type, [entity_id])
437
+ _write_back_visibility(conn, entity_type, vis_results)
438
+
439
+ if meta["has_permissions"]:
440
+ perm_results = batch_resolve_permissions(conn, entity_type, [entity_id])
441
+ _write_back_permissions(conn, entity_type, perm_results)
442
+
443
+ conn.commit()
444
+ return {entity_type: 1}
@@ -0,0 +1 @@
1
+ """Footprinter HTTP API — FastAPI routers calling the service layer."""
footprinter/api/db.py ADDED
@@ -0,0 +1,61 @@
1
+ """Database connection for Footprinter HTTP API."""
2
+
3
+ import sqlite3
4
+ from contextlib import contextmanager
5
+
6
+ from footprinter.paths import get_db_path
7
+ from footprinter.services.access_service import load_globals
8
+
9
+
10
+ class DatabaseNotInitializedError(Exception):
11
+ """Raised when the database exists but has no tables (uninitialized)."""
12
+
13
+
14
+ def _check_db_initialized(conn: sqlite3.Connection) -> None:
15
+ """Check that the database has been initialized with the expected schema.
16
+
17
+ Uses the ``files`` table as a sentinel — if it's missing, the database
18
+ has never been populated by ``fp ingest``.
19
+ """
20
+ row = conn.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='files'").fetchone()
21
+ if row[0] == 0:
22
+ raise DatabaseNotInitializedError()
23
+
24
+
25
+ @contextmanager
26
+ def get_db():
27
+ """Context manager for database connections.
28
+
29
+ Divergences from MCP's ``get_db()``:
30
+
31
+ - No ``PRAGMA query_only`` — the HTTP API uses Role.ADMIN and may need
32
+ write access for future endpoints.
33
+ - No ``handle_db_errors`` decorator — ``DatabaseNotInitializedError`` is
34
+ caught by a FastAPI exception handler registered in ``server.create_app()``.
35
+
36
+ Calls ``load_globals()`` to refresh the global visibility/permission
37
+ policy cache in ``access_service`` for the current request.
38
+ """
39
+ conn = sqlite3.connect(str(get_db_path()), timeout=10)
40
+ conn.row_factory = sqlite3.Row
41
+ conn.execute("PRAGMA busy_timeout=5000")
42
+ conn.execute("PRAGMA foreign_keys=ON")
43
+ try:
44
+ _check_db_initialized(conn)
45
+ load_globals(conn)
46
+ yield conn
47
+ finally:
48
+ conn.close()
49
+
50
+
51
+ def get_conn():
52
+ """FastAPI dependency that yields a database connection.
53
+
54
+ Usage::
55
+
56
+ @router.get("/endpoint")
57
+ def handler(conn=Depends(get_conn)):
58
+ ...
59
+ """
60
+ with get_db() as conn:
61
+ yield conn