footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
footprinter/__init__.py
ADDED
footprinter/access.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""Recalculation engine — scope-to-entity mapping + batch write-back.
|
|
2
|
+
|
|
3
|
+
Maps a policy scope (e.g. "global", "project:3", "folder:~/Work/") to affected
|
|
4
|
+
entity rows, calls the existing batch resolve functions, and writes resolved
|
|
5
|
+
values back to mcp_view / mcp_read columns.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sqlite3
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from footprinter.permissions import batch_resolve_permissions
|
|
16
|
+
from footprinter.visibility import batch_resolve_visibility
|
|
17
|
+
|
|
18
|
+
# Sources that indicate the resolution came from the global policy or the
|
|
19
|
+
# hardcoded baseline — not from any entity-specific or scope-specific policy.
|
|
20
|
+
# These entities should be stored as 'inherit' so changing the global policy
|
|
21
|
+
# takes effect at query time without re-running access resolution.
|
|
22
|
+
_INHERIT_SOURCES = frozenset({"global", "baseline"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _is_inherit_source(source: str) -> bool:
|
|
26
|
+
"""True when the resolution source traces back to global or baseline only.
|
|
27
|
+
|
|
28
|
+
Handles both direct sources (``"global"``) and cascade paths
|
|
29
|
+
(``"project:3 (via global)"``).
|
|
30
|
+
"""
|
|
31
|
+
if source in _INHERIT_SOURCES:
|
|
32
|
+
return True
|
|
33
|
+
# Cascade format: "project:3 (via global)" or "folder:30 (via baseline)"
|
|
34
|
+
if source.endswith(")"):
|
|
35
|
+
via_idx = source.rfind("(via ")
|
|
36
|
+
if via_idx != -1:
|
|
37
|
+
inner = source[via_idx + 5 : -1]
|
|
38
|
+
return inner in _INHERIT_SOURCES
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Entity table metadata
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Each entry describes an entity type's table and capabilities.
|
|
46
|
+
# table: SQL table name
|
|
47
|
+
# has_visibility: has mcp_view column
|
|
48
|
+
# has_permissions: has mcp_read column
|
|
49
|
+
# has_status: has status column (filter WHERE status != 'removed')
|
|
50
|
+
# has_project_id: has project_id FK
|
|
51
|
+
# has_client_id: has client_id FK
|
|
52
|
+
# has_account: has account column
|
|
53
|
+
# path_column: column name for path-prefix matching (None if N/A)
|
|
54
|
+
|
|
55
|
+
ENTITY_META: dict[str, dict[str, Any]] = {
|
|
56
|
+
"file": {
|
|
57
|
+
"table": "files",
|
|
58
|
+
"has_visibility": True,
|
|
59
|
+
"has_permissions": True,
|
|
60
|
+
"has_status": True,
|
|
61
|
+
"has_project_id": True,
|
|
62
|
+
"has_client_id": True,
|
|
63
|
+
"has_account": True,
|
|
64
|
+
"path_column": "path",
|
|
65
|
+
},
|
|
66
|
+
"email": {
|
|
67
|
+
"table": "emails",
|
|
68
|
+
"has_visibility": True,
|
|
69
|
+
"has_permissions": True,
|
|
70
|
+
"has_status": False,
|
|
71
|
+
"has_project_id": True,
|
|
72
|
+
"has_client_id": True,
|
|
73
|
+
"has_account": True,
|
|
74
|
+
"path_column": None,
|
|
75
|
+
},
|
|
76
|
+
"chat": {
|
|
77
|
+
"table": "chats",
|
|
78
|
+
"has_visibility": True,
|
|
79
|
+
"has_permissions": True,
|
|
80
|
+
"has_status": True,
|
|
81
|
+
"has_project_id": True,
|
|
82
|
+
"has_client_id": True,
|
|
83
|
+
"has_account": True,
|
|
84
|
+
"path_column": None,
|
|
85
|
+
},
|
|
86
|
+
"folder": {
|
|
87
|
+
"table": "folders",
|
|
88
|
+
"has_visibility": True,
|
|
89
|
+
"has_permissions": False,
|
|
90
|
+
"has_status": False,
|
|
91
|
+
"has_project_id": True,
|
|
92
|
+
"has_client_id": True,
|
|
93
|
+
"has_account": False,
|
|
94
|
+
"path_column": "path",
|
|
95
|
+
},
|
|
96
|
+
"project": {
|
|
97
|
+
"table": "projects",
|
|
98
|
+
"has_visibility": True,
|
|
99
|
+
"has_permissions": True,
|
|
100
|
+
"has_status": False,
|
|
101
|
+
"has_project_id": False,
|
|
102
|
+
"has_client_id": True,
|
|
103
|
+
"has_account": False,
|
|
104
|
+
"path_column": "root_path",
|
|
105
|
+
},
|
|
106
|
+
"client": {
|
|
107
|
+
"table": "clients",
|
|
108
|
+
"has_visibility": True,
|
|
109
|
+
"has_permissions": True,
|
|
110
|
+
"has_status": False,
|
|
111
|
+
"has_project_id": False,
|
|
112
|
+
"has_client_id": False,
|
|
113
|
+
"has_account": False,
|
|
114
|
+
"path_column": None,
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Reverse map: source scope suffix → entity type (e.g. "files" → "file")
|
|
119
|
+
_SOURCE_TO_ENTITY = {meta["table"]: etype for etype, meta in ENTITY_META.items()}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
# Internal helpers
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _get_all_ids(conn: sqlite3.Connection, entity_type: str) -> list[int]:
|
|
128
|
+
"""Get all active IDs for an entity type."""
|
|
129
|
+
meta = ENTITY_META[entity_type]
|
|
130
|
+
table = meta["table"]
|
|
131
|
+
if meta["has_status"]:
|
|
132
|
+
rows = conn.execute(f"SELECT id FROM {table} WHERE status != 'removed'").fetchall()
|
|
133
|
+
else:
|
|
134
|
+
rows = conn.execute(f"SELECT id FROM {table}").fetchall()
|
|
135
|
+
return [r["id"] for r in rows]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _get_ids_for_scope(conn: sqlite3.Connection, scope: str) -> dict[str, list[int]]:
|
|
139
|
+
"""Map a policy scope to {entity_type: [ids]} affected by it."""
|
|
140
|
+
if scope == "global":
|
|
141
|
+
return {etype: _get_all_ids(conn, etype) for etype in ENTITY_META}
|
|
142
|
+
|
|
143
|
+
if ":" not in scope:
|
|
144
|
+
raise ValueError(f"Invalid scope: {scope}")
|
|
145
|
+
|
|
146
|
+
prefix, value = scope.split(":", 1)
|
|
147
|
+
|
|
148
|
+
if prefix == "source":
|
|
149
|
+
# source:files → all files; source:emails → all emails
|
|
150
|
+
entity_type = _SOURCE_TO_ENTITY.get(value)
|
|
151
|
+
if entity_type is None:
|
|
152
|
+
raise ValueError(f"Unknown source scope: {scope}")
|
|
153
|
+
return {entity_type: _get_all_ids(conn, entity_type)}
|
|
154
|
+
|
|
155
|
+
if prefix == "account":
|
|
156
|
+
# account:{name} → emails + chats + files WHERE account = ?
|
|
157
|
+
result: dict[str, list[int]] = {}
|
|
158
|
+
for etype in ENTITY_META:
|
|
159
|
+
meta = ENTITY_META[etype]
|
|
160
|
+
if not meta["has_account"]:
|
|
161
|
+
continue
|
|
162
|
+
table = meta["table"]
|
|
163
|
+
where = "account = ?"
|
|
164
|
+
if meta["has_status"]:
|
|
165
|
+
where += " AND status != 'removed'"
|
|
166
|
+
rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (value,)).fetchall()
|
|
167
|
+
ids = [r["id"] for r in rows]
|
|
168
|
+
if ids:
|
|
169
|
+
result[etype] = ids
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
if prefix == "folder":
|
|
173
|
+
# folder:{path} → files/folders with matching path prefix
|
|
174
|
+
path = os.path.expanduser(value)
|
|
175
|
+
# Escape LIKE metacharacters so literal %, _ in paths match correctly
|
|
176
|
+
escaped = path.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
|
177
|
+
result = {}
|
|
178
|
+
for etype in ENTITY_META:
|
|
179
|
+
meta = ENTITY_META[etype]
|
|
180
|
+
path_col = meta["path_column"]
|
|
181
|
+
if path_col is None:
|
|
182
|
+
continue
|
|
183
|
+
table = meta["table"]
|
|
184
|
+
where = f"{path_col} LIKE ? ESCAPE '\\'"
|
|
185
|
+
if meta["has_status"]:
|
|
186
|
+
where += " AND status != 'removed'"
|
|
187
|
+
rows = conn.execute(
|
|
188
|
+
f"SELECT id FROM {table} WHERE {where}",
|
|
189
|
+
(escaped + "%",),
|
|
190
|
+
).fetchall()
|
|
191
|
+
ids = [r["id"] for r in rows]
|
|
192
|
+
if ids:
|
|
193
|
+
result[etype] = ids
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
if prefix == "project":
|
|
197
|
+
project_id = int(value)
|
|
198
|
+
result = {}
|
|
199
|
+
# The project itself
|
|
200
|
+
row = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
|
|
201
|
+
if row:
|
|
202
|
+
result["project"] = [row["id"]]
|
|
203
|
+
# Children with project_id FK
|
|
204
|
+
for etype in ENTITY_META:
|
|
205
|
+
if etype == "project":
|
|
206
|
+
continue
|
|
207
|
+
meta = ENTITY_META[etype]
|
|
208
|
+
if not meta["has_project_id"]:
|
|
209
|
+
continue
|
|
210
|
+
table = meta["table"]
|
|
211
|
+
where = "project_id = ?"
|
|
212
|
+
if meta["has_status"]:
|
|
213
|
+
where += " AND status != 'removed'"
|
|
214
|
+
rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (project_id,)).fetchall()
|
|
215
|
+
ids = [r["id"] for r in rows]
|
|
216
|
+
if ids:
|
|
217
|
+
result[etype] = ids
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
if prefix == "client":
|
|
221
|
+
client_id = int(value)
|
|
222
|
+
# Gather ids per entity type as dicts (insertion-ordered sets) so we
|
|
223
|
+
# can union the project cascade with direct client_id matches without
|
|
224
|
+
# double-stamping entities reachable via both paths.
|
|
225
|
+
id_sets: dict[str, dict[int, None]] = {}
|
|
226
|
+
# The client itself
|
|
227
|
+
row = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
|
|
228
|
+
if row:
|
|
229
|
+
id_sets["client"] = {row["id"]: None}
|
|
230
|
+
# Projects under this client
|
|
231
|
+
proj_rows = conn.execute("SELECT id FROM projects WHERE client_id = ?", (client_id,)).fetchall()
|
|
232
|
+
proj_ids = [r["id"] for r in proj_rows]
|
|
233
|
+
if proj_ids:
|
|
234
|
+
id_sets["project"] = {pid: None for pid in proj_ids}
|
|
235
|
+
# Cascade: children of each project
|
|
236
|
+
for pid in proj_ids:
|
|
237
|
+
for etype, ids in _get_ids_for_scope(conn, f"project:{pid}").items():
|
|
238
|
+
if etype in ("project", "client"):
|
|
239
|
+
continue
|
|
240
|
+
id_sets.setdefault(etype, {}).update({i: None for i in ids})
|
|
241
|
+
# Direct: entities with a client_id FK of their own (files, folders,
|
|
242
|
+
# emails, chats). Union with the cascade; dedup via the dict keys.
|
|
243
|
+
for etype, meta in ENTITY_META.items():
|
|
244
|
+
if etype in ("client", "project"):
|
|
245
|
+
continue
|
|
246
|
+
if not meta["has_client_id"]:
|
|
247
|
+
continue
|
|
248
|
+
table = meta["table"]
|
|
249
|
+
where = "client_id = ?"
|
|
250
|
+
if meta["has_status"]:
|
|
251
|
+
where += " AND status != 'removed'"
|
|
252
|
+
rows = conn.execute(f"SELECT id FROM {table} WHERE {where}", (client_id,)).fetchall()
|
|
253
|
+
if rows:
|
|
254
|
+
id_sets.setdefault(etype, {}).update({r["id"]: None for r in rows})
|
|
255
|
+
return {etype: list(ids) for etype, ids in id_sets.items()}
|
|
256
|
+
|
|
257
|
+
# Single entity: file:42, email:10, etc.
|
|
258
|
+
if prefix in ENTITY_META:
|
|
259
|
+
entity_id = int(value)
|
|
260
|
+
return {prefix: [entity_id]}
|
|
261
|
+
|
|
262
|
+
raise ValueError(f"Unknown scope prefix: {prefix}")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _write_back_visibility(conn: sqlite3.Connection, entity_type: str, results: dict[int, tuple]) -> None:
|
|
266
|
+
"""Batch UPDATE mcp_view from resolve results.
|
|
267
|
+
|
|
268
|
+
Entities whose visibility comes from the global policy or the hardcoded
|
|
269
|
+
baseline are written as ``'inherit'`` — the MCP layer resolves them at
|
|
270
|
+
query time. Entities with a specific policy get the resolved value.
|
|
271
|
+
"""
|
|
272
|
+
table = ENTITY_META[entity_type]["table"]
|
|
273
|
+
conn.executemany(
|
|
274
|
+
f"UPDATE {table} SET mcp_view = ? WHERE id = ?",
|
|
275
|
+
[("inherit" if _is_inherit_source(source) else state, eid) for eid, (state, source) in results.items()],
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _write_back_permissions(conn: sqlite3.Connection, entity_type: str, results: dict[int, tuple]) -> None:
|
|
280
|
+
"""Batch UPDATE mcp_read from resolve results.
|
|
281
|
+
|
|
282
|
+
Entities whose permission comes from the global policy or the hardcoded
|
|
283
|
+
baseline are written as ``'inherit'`` — the MCP layer resolves them at
|
|
284
|
+
query time. Entities with a specific policy get the resolved value.
|
|
285
|
+
"""
|
|
286
|
+
table = ENTITY_META[entity_type]["table"]
|
|
287
|
+
conn.executemany(
|
|
288
|
+
f"UPDATE {table} SET mcp_read = ? WHERE id = ?",
|
|
289
|
+
[
|
|
290
|
+
("inherit" if _is_inherit_source(source) else ("allow" if allowed else "deny"), eid)
|
|
291
|
+
for eid, (allowed, source) in results.items()
|
|
292
|
+
],
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# ---------------------------------------------------------------------------
|
|
297
|
+
# Public API
|
|
298
|
+
# ---------------------------------------------------------------------------
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def count_affected_entities(conn: sqlite3.Connection, scope: str) -> dict[str, int]:
|
|
302
|
+
"""Count entities affected by *scope* without modifying them.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Dict mapping entity type to count of affected rows.
|
|
306
|
+
Only includes types with count > 0.
|
|
307
|
+
"""
|
|
308
|
+
return {etype: len(ids) for etype, ids in _get_ids_for_scope(conn, scope).items() if ids}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def stamp_entities(conn: sqlite3.Connection, ids_by_type: dict[str, list[int]]) -> dict[str, int]:
|
|
312
|
+
"""Resolve and write visibility + permissions for the given entity IDs.
|
|
313
|
+
|
|
314
|
+
Used by ``recalculate_access`` (full scope resolution) and the incremental
|
|
315
|
+
pipeline path in ``processing.run_access_resolution``. The batched variant
|
|
316
|
+
(``recalculate_access_batched``) uses its own loop for per-chunk commits.
|
|
317
|
+
|
|
318
|
+
Always commits before returning, even when *ids_by_type* is empty.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
conn: SQLite connection with row_factory = sqlite3.Row
|
|
322
|
+
ids_by_type: Mapping of entity type to list of row IDs to stamp.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Dict mapping entity type to count of rows stamped.
|
|
326
|
+
Only includes types with count > 0.
|
|
327
|
+
"""
|
|
328
|
+
stats: dict[str, int] = {}
|
|
329
|
+
|
|
330
|
+
for entity_type, ids in ids_by_type.items():
|
|
331
|
+
if not ids:
|
|
332
|
+
continue
|
|
333
|
+
meta = ENTITY_META[entity_type]
|
|
334
|
+
|
|
335
|
+
if meta["has_visibility"]:
|
|
336
|
+
vis_results = batch_resolve_visibility(conn, entity_type, ids)
|
|
337
|
+
_write_back_visibility(conn, entity_type, vis_results)
|
|
338
|
+
|
|
339
|
+
if meta["has_permissions"]:
|
|
340
|
+
perm_results = batch_resolve_permissions(conn, entity_type, ids)
|
|
341
|
+
_write_back_permissions(conn, entity_type, perm_results)
|
|
342
|
+
|
|
343
|
+
stats[entity_type] = len(ids)
|
|
344
|
+
|
|
345
|
+
conn.commit()
|
|
346
|
+
return stats
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def recalculate_access(conn: sqlite3.Connection, scope: str) -> dict[str, int]:
|
|
350
|
+
"""Recalculate visibility and permissions for all entities affected by *scope*.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
conn: SQLite connection with row_factory = sqlite3.Row
|
|
354
|
+
scope: Policy scope string (e.g. "global", "project:3", "folder:~/Work/")
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dict mapping entity type to count of rows updated.
|
|
358
|
+
"""
|
|
359
|
+
ids_by_type = _get_ids_for_scope(conn, scope)
|
|
360
|
+
return stamp_entities(conn, ids_by_type)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def recalculate_access_batched(
|
|
364
|
+
conn: sqlite3.Connection,
|
|
365
|
+
scope: str,
|
|
366
|
+
*,
|
|
367
|
+
batch_size: int = 5000,
|
|
368
|
+
on_batch: Callable[[int], None] | None = None,
|
|
369
|
+
) -> dict[str, int]:
|
|
370
|
+
"""Recalculate visibility and permissions in batches with progress callback.
|
|
371
|
+
|
|
372
|
+
Same semantics as ``recalculate_access()`` but commits after each batch
|
|
373
|
+
and calls *on_batch* with the count of entities processed per chunk.
|
|
374
|
+
Designed for large scopes where a progress bar is needed.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
conn: SQLite connection with row_factory = sqlite3.Row
|
|
378
|
+
scope: Policy scope string (e.g. "global", "folder:~/Work/")
|
|
379
|
+
batch_size: Number of entity IDs per chunk (default 5000)
|
|
380
|
+
on_batch: Optional callback receiving the count processed per chunk
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Dict mapping entity type to total count of rows updated.
|
|
384
|
+
"""
|
|
385
|
+
ids_by_type = _get_ids_for_scope(conn, scope)
|
|
386
|
+
stats: dict[str, int] = {}
|
|
387
|
+
|
|
388
|
+
for entity_type, ids in ids_by_type.items():
|
|
389
|
+
if not ids:
|
|
390
|
+
continue
|
|
391
|
+
meta = ENTITY_META[entity_type]
|
|
392
|
+
|
|
393
|
+
for i in range(0, len(ids), batch_size):
|
|
394
|
+
chunk = ids[i : i + batch_size]
|
|
395
|
+
|
|
396
|
+
if meta["has_visibility"]:
|
|
397
|
+
vis_results = batch_resolve_visibility(conn, entity_type, chunk)
|
|
398
|
+
_write_back_visibility(conn, entity_type, vis_results)
|
|
399
|
+
|
|
400
|
+
if meta["has_permissions"]:
|
|
401
|
+
perm_results = batch_resolve_permissions(conn, entity_type, chunk)
|
|
402
|
+
_write_back_permissions(conn, entity_type, perm_results)
|
|
403
|
+
|
|
404
|
+
conn.commit()
|
|
405
|
+
|
|
406
|
+
if on_batch is not None:
|
|
407
|
+
on_batch(len(chunk))
|
|
408
|
+
|
|
409
|
+
stats[entity_type] = len(ids)
|
|
410
|
+
|
|
411
|
+
return stats
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def recalculate_entity(conn: sqlite3.Connection, entity_type: str, entity_id: int) -> dict[str, int]:
|
|
415
|
+
"""Recalculate visibility and permissions for a single entity.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
conn: SQLite connection with row_factory = sqlite3.Row
|
|
419
|
+
entity_type: Entity type (e.g. "file", "email")
|
|
420
|
+
entity_id: Row ID
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Dict like {"file": 1}, or {"file": 0} if entity not found.
|
|
424
|
+
"""
|
|
425
|
+
if entity_type not in ENTITY_META:
|
|
426
|
+
raise ValueError(f"Unknown entity type: {entity_type}")
|
|
427
|
+
|
|
428
|
+
meta = ENTITY_META[entity_type]
|
|
429
|
+
# Verify the entity exists before resolving
|
|
430
|
+
table = meta["table"]
|
|
431
|
+
row = conn.execute(f"SELECT id FROM {table} WHERE id = ?", (entity_id,)).fetchone()
|
|
432
|
+
if row is None:
|
|
433
|
+
return {entity_type: 0}
|
|
434
|
+
|
|
435
|
+
if meta["has_visibility"]:
|
|
436
|
+
vis_results = batch_resolve_visibility(conn, entity_type, [entity_id])
|
|
437
|
+
_write_back_visibility(conn, entity_type, vis_results)
|
|
438
|
+
|
|
439
|
+
if meta["has_permissions"]:
|
|
440
|
+
perm_results = batch_resolve_permissions(conn, entity_type, [entity_id])
|
|
441
|
+
_write_back_permissions(conn, entity_type, perm_results)
|
|
442
|
+
|
|
443
|
+
conn.commit()
|
|
444
|
+
return {entity_type: 1}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Footprinter HTTP API — FastAPI routers calling the service layer."""
|
footprinter/api/db.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Database connection for Footprinter HTTP API."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
|
|
6
|
+
from footprinter.paths import get_db_path
|
|
7
|
+
from footprinter.services.access_service import load_globals
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DatabaseNotInitializedError(Exception):
|
|
11
|
+
"""Raised when the database exists but has no tables (uninitialized)."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _check_db_initialized(conn: sqlite3.Connection) -> None:
|
|
15
|
+
"""Check that the database has been initialized with the expected schema.
|
|
16
|
+
|
|
17
|
+
Uses the ``files`` table as a sentinel — if it's missing, the database
|
|
18
|
+
has never been populated by ``fp ingest``.
|
|
19
|
+
"""
|
|
20
|
+
row = conn.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='files'").fetchone()
|
|
21
|
+
if row[0] == 0:
|
|
22
|
+
raise DatabaseNotInitializedError()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@contextmanager
|
|
26
|
+
def get_db():
|
|
27
|
+
"""Context manager for database connections.
|
|
28
|
+
|
|
29
|
+
Divergences from MCP's ``get_db()``:
|
|
30
|
+
|
|
31
|
+
- No ``PRAGMA query_only`` — the HTTP API uses Role.ADMIN and may need
|
|
32
|
+
write access for future endpoints.
|
|
33
|
+
- No ``handle_db_errors`` decorator — ``DatabaseNotInitializedError`` is
|
|
34
|
+
caught by a FastAPI exception handler registered in ``server.create_app()``.
|
|
35
|
+
|
|
36
|
+
Calls ``load_globals()`` to refresh the global visibility/permission
|
|
37
|
+
policy cache in ``access_service`` for the current request.
|
|
38
|
+
"""
|
|
39
|
+
conn = sqlite3.connect(str(get_db_path()), timeout=10)
|
|
40
|
+
conn.row_factory = sqlite3.Row
|
|
41
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
42
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
43
|
+
try:
|
|
44
|
+
_check_db_initialized(conn)
|
|
45
|
+
load_globals(conn)
|
|
46
|
+
yield conn
|
|
47
|
+
finally:
|
|
48
|
+
conn.close()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_conn():
|
|
52
|
+
"""FastAPI dependency that yields a database connection.
|
|
53
|
+
|
|
54
|
+
Usage::
|
|
55
|
+
|
|
56
|
+
@router.get("/endpoint")
|
|
57
|
+
def handler(conn=Depends(get_conn)):
|
|
58
|
+
...
|
|
59
|
+
"""
|
|
60
|
+
with get_db() as conn:
|
|
61
|
+
yield conn
|