footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""semantic_service — embedding search with FTS5 fallback and access control.
|
|
2
|
+
|
|
3
|
+
D2 access rule: semantic matches are content-derived, so visible items also
|
|
4
|
+
require mcp_read='allow' (presence in results reveals content).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import sqlite3
|
|
9
|
+
from typing import Dict, List
|
|
10
|
+
|
|
11
|
+
from footprinter.db.search import (
|
|
12
|
+
chat_fts5_fallback,
|
|
13
|
+
enrich_chat_visibility,
|
|
14
|
+
enrich_file_metadata,
|
|
15
|
+
file_fts5_fallback,
|
|
16
|
+
)
|
|
17
|
+
from footprinter.services.access_service import (
|
|
18
|
+
resolve_inherit_permission,
|
|
19
|
+
resolve_inherit_visibility,
|
|
20
|
+
)
|
|
21
|
+
from footprinter.services.roles import Role
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_VALID_SOURCES = frozenset({"chats", "files", "all"})
|
|
26
|
+
|
|
27
|
+
_CHAT_FIELDS = {
|
|
28
|
+
"chat_id",
|
|
29
|
+
"chat_title",
|
|
30
|
+
"snippet",
|
|
31
|
+
"relevance_score",
|
|
32
|
+
"source",
|
|
33
|
+
"created_at",
|
|
34
|
+
"message_id",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
_FILE_FIELDS = {
|
|
38
|
+
"id",
|
|
39
|
+
"name",
|
|
40
|
+
"path",
|
|
41
|
+
"content_type",
|
|
42
|
+
"size_bytes",
|
|
43
|
+
"modified_at",
|
|
44
|
+
"relevance_score",
|
|
45
|
+
"snippet",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Search outcome: ok (vector worked), degraded (FTS5 fallback), failed (both crashed)
|
|
49
|
+
_OK = "ok"
|
|
50
|
+
_DEGRADED = "degraded"
|
|
51
|
+
_FAILED = "failed"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Public API
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def semantic_search(
|
|
60
|
+
conn: sqlite3.Connection,
|
|
61
|
+
query: str,
|
|
62
|
+
*,
|
|
63
|
+
role: Role = Role.ADMIN,
|
|
64
|
+
source: str = "all",
|
|
65
|
+
limit: int = 10,
|
|
66
|
+
) -> dict:
|
|
67
|
+
"""Search chats and/or files by semantic similarity.
|
|
68
|
+
|
|
69
|
+
Returns dict with source-specific keys (``chats``, ``files``), ``summary``,
|
|
70
|
+
and optionally ``note`` and ``suppressed``. Returns ``{"status": ...}``
|
|
71
|
+
for validation errors.
|
|
72
|
+
"""
|
|
73
|
+
if not query or len(query) < 3:
|
|
74
|
+
return {"status": "invalid_query"}
|
|
75
|
+
|
|
76
|
+
if source not in _VALID_SOURCES:
|
|
77
|
+
return {"status": "invalid_source"}
|
|
78
|
+
|
|
79
|
+
result: dict = {"query": query}
|
|
80
|
+
all_notes: list[str] = []
|
|
81
|
+
total_suppressed = 0
|
|
82
|
+
chat_status = _OK
|
|
83
|
+
file_status = _OK
|
|
84
|
+
|
|
85
|
+
if source in ("chats", "all"):
|
|
86
|
+
chats, notes, suppressed, chat_status = _search_chats(
|
|
87
|
+
conn,
|
|
88
|
+
query,
|
|
89
|
+
limit,
|
|
90
|
+
role,
|
|
91
|
+
)
|
|
92
|
+
result["chats"] = chats
|
|
93
|
+
all_notes.extend(notes)
|
|
94
|
+
total_suppressed += suppressed
|
|
95
|
+
|
|
96
|
+
if source in ("files", "all"):
|
|
97
|
+
files, notes, suppressed, file_status = _search_files(
|
|
98
|
+
conn,
|
|
99
|
+
query,
|
|
100
|
+
limit,
|
|
101
|
+
role,
|
|
102
|
+
)
|
|
103
|
+
result["files"] = files
|
|
104
|
+
all_notes.extend(notes)
|
|
105
|
+
total_suppressed += suppressed
|
|
106
|
+
|
|
107
|
+
if total_suppressed > 0:
|
|
108
|
+
result["suppressed"] = total_suppressed
|
|
109
|
+
|
|
110
|
+
summary_parts = []
|
|
111
|
+
if "chats" in result:
|
|
112
|
+
summary_parts.append(
|
|
113
|
+
_build_chat_summary(result["chats"], query, status=chat_status),
|
|
114
|
+
)
|
|
115
|
+
if "files" in result:
|
|
116
|
+
summary_parts.append(
|
|
117
|
+
_build_file_summary(result["files"], query, status=file_status),
|
|
118
|
+
)
|
|
119
|
+
result["summary"] = " ".join(summary_parts)
|
|
120
|
+
|
|
121
|
+
if all_notes:
|
|
122
|
+
result["note"] = " ".join(dict.fromkeys(all_notes))
|
|
123
|
+
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# Chat search
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _search_chats(
|
|
133
|
+
conn: sqlite3.Connection,
|
|
134
|
+
query: str,
|
|
135
|
+
limit: int,
|
|
136
|
+
role: Role,
|
|
137
|
+
) -> tuple[list[dict], list[str], int, str]:
|
|
138
|
+
"""Search chats via VectorStore → FTS5 fallback → enrich → filter."""
|
|
139
|
+
notes: list[str] = []
|
|
140
|
+
status = _OK
|
|
141
|
+
results: list[dict] = []
|
|
142
|
+
|
|
143
|
+
# Try vector search first
|
|
144
|
+
try:
|
|
145
|
+
from footprinter.semantic.vector_store import VectorStore
|
|
146
|
+
|
|
147
|
+
store = VectorStore.get_instance()
|
|
148
|
+
results = store.search_chats(query=query, n_results=limit)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.warning("Vector search unavailable (%s), falling back to FTS5", e)
|
|
151
|
+
status = _DEGRADED
|
|
152
|
+
try:
|
|
153
|
+
results = chat_fts5_fallback(conn, query, limit)
|
|
154
|
+
except Exception as fallback_err:
|
|
155
|
+
logger.warning("Chat FTS5 fallback failed: %s", fallback_err)
|
|
156
|
+
return [], ["Chat search failed — try footprinter_search"], 0, _FAILED
|
|
157
|
+
|
|
158
|
+
# Enrich with visibility from DB
|
|
159
|
+
chat_ids = [r.get("chat_id") for r in results if r.get("chat_id")]
|
|
160
|
+
vis_lookup = enrich_chat_visibility(conn, chat_ids) if chat_ids else {}
|
|
161
|
+
|
|
162
|
+
for r in results:
|
|
163
|
+
db_row = vis_lookup.get(r.get("chat_id"))
|
|
164
|
+
r["id"] = r.get("chat_id")
|
|
165
|
+
r["account"] = db_row["account"] if db_row else ""
|
|
166
|
+
r["mcp_view"] = db_row["mcp_view"] if db_row else "hidden"
|
|
167
|
+
r["mcp_read"] = db_row["mcp_read"] if db_row else None
|
|
168
|
+
|
|
169
|
+
# Access control filtering
|
|
170
|
+
if role.sees_all:
|
|
171
|
+
filtered = results
|
|
172
|
+
suppressed = 0
|
|
173
|
+
else:
|
|
174
|
+
# D2: presence in semantic results reveals content — exclude anything
|
|
175
|
+
# not both visible AND allowed. Fail-closed on null/missing values.
|
|
176
|
+
filtered = [
|
|
177
|
+
r for r in results
|
|
178
|
+
if resolve_inherit_visibility(r.get("mcp_view")) == "visible"
|
|
179
|
+
and resolve_inherit_permission(r.get("mcp_read")) == "allow"
|
|
180
|
+
]
|
|
181
|
+
suppressed = len(results) - len(filtered)
|
|
182
|
+
|
|
183
|
+
# Trim visible results to presentation fields
|
|
184
|
+
trimmed = [_trim_chat_result(r) if r.get("mcp_view") == "visible" else r for r in filtered]
|
|
185
|
+
|
|
186
|
+
if status == _DEGRADED:
|
|
187
|
+
notes.append("Results are keyword-based (semantic search unavailable)")
|
|
188
|
+
|
|
189
|
+
return trimmed, notes, suppressed, status
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
# File search
|
|
194
|
+
# ---------------------------------------------------------------------------
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _search_files(
|
|
198
|
+
conn: sqlite3.Connection,
|
|
199
|
+
query: str,
|
|
200
|
+
limit: int,
|
|
201
|
+
role: Role,
|
|
202
|
+
) -> tuple[list[dict], list[str], int, str]:
|
|
203
|
+
"""Search files via VectorStore (+ enrich) or FTS5 fallback → filter."""
|
|
204
|
+
notes: list[str] = []
|
|
205
|
+
status = _OK
|
|
206
|
+
enriched: List[Dict] = []
|
|
207
|
+
dropped = 0
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
from footprinter.semantic.vector_store import VectorStore
|
|
211
|
+
|
|
212
|
+
store = VectorStore.get_instance()
|
|
213
|
+
raw_results = store.search_files(query=query, n_results=limit * 3)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
logger.warning("Vector search unavailable (%s), falling back to FTS5", e)
|
|
216
|
+
status = _DEGRADED
|
|
217
|
+
try:
|
|
218
|
+
enriched = file_fts5_fallback(conn, query, limit)
|
|
219
|
+
except Exception as fallback_err:
|
|
220
|
+
logger.warning("File FTS5 fallback failed: %s", fallback_err)
|
|
221
|
+
return [], ["File search failed — try footprinter_search"], 0, _FAILED
|
|
222
|
+
else:
|
|
223
|
+
for r in raw_results:
|
|
224
|
+
distance = r.get("distance") or 0
|
|
225
|
+
r["relevance_score"] = round(max(0, 1 - (distance / 2)), 3)
|
|
226
|
+
r["snippet"] = r.get("content_snippet", "")
|
|
227
|
+
|
|
228
|
+
deduped, dropped = _deduplicate_by_file(raw_results)
|
|
229
|
+
if dropped > 0:
|
|
230
|
+
logger.warning(
|
|
231
|
+
"Dropped %d vector results with missing file_id",
|
|
232
|
+
dropped,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
file_ids = [r["file_id"] for r in deduped if r.get("file_id")]
|
|
236
|
+
if file_ids:
|
|
237
|
+
db_lookup = enrich_file_metadata(conn, file_ids)
|
|
238
|
+
for r in deduped:
|
|
239
|
+
db_row = db_lookup.get(r["file_id"])
|
|
240
|
+
if db_row:
|
|
241
|
+
r.update(db_row)
|
|
242
|
+
enriched = [r for r in deduped if r.get("id")]
|
|
243
|
+
|
|
244
|
+
# Access control filtering
|
|
245
|
+
if role.sees_all:
|
|
246
|
+
filtered = enriched
|
|
247
|
+
suppressed = 0
|
|
248
|
+
else:
|
|
249
|
+
# D2: presence in semantic results reveals content — exclude anything
|
|
250
|
+
# not both visible AND allowed. Fail-closed on null/missing values.
|
|
251
|
+
filtered = [
|
|
252
|
+
r for r in enriched
|
|
253
|
+
if resolve_inherit_visibility(r.get("mcp_view")) == "visible"
|
|
254
|
+
and resolve_inherit_permission(r.get("mcp_read")) == "allow"
|
|
255
|
+
]
|
|
256
|
+
suppressed = len(enriched) - len(filtered)
|
|
257
|
+
|
|
258
|
+
trimmed = [_trim_file_result(r) if r.get("mcp_view") == "visible" else r for r in filtered]
|
|
259
|
+
trimmed = trimmed[:limit]
|
|
260
|
+
|
|
261
|
+
if status == _DEGRADED:
|
|
262
|
+
notes.append("Results are keyword-based (semantic search unavailable)")
|
|
263
|
+
if dropped > 0:
|
|
264
|
+
notes.append(f"Dropped {dropped} results with missing file_id. Run --rebuild-vectors to fix.")
|
|
265
|
+
|
|
266
|
+
return trimmed, notes, suppressed, status
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# ---------------------------------------------------------------------------
|
|
270
|
+
# Helpers
|
|
271
|
+
# ---------------------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _deduplicate_by_file(results: List[Dict]) -> tuple[List[Dict], int]:
|
|
275
|
+
"""Group by file_id, keep highest-relevance chunk per file."""
|
|
276
|
+
best: Dict[int, Dict] = {}
|
|
277
|
+
dropped = 0
|
|
278
|
+
for r in results:
|
|
279
|
+
fid = r.get("file_id")
|
|
280
|
+
if fid is None:
|
|
281
|
+
dropped += 1
|
|
282
|
+
continue
|
|
283
|
+
existing = best.get(fid)
|
|
284
|
+
if existing is None or r.get("relevance_score", 0) > existing.get("relevance_score", 0):
|
|
285
|
+
best[fid] = r
|
|
286
|
+
return list(best.values()), dropped
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _trim_chat_result(result: dict) -> dict:
|
|
290
|
+
return {k: v for k, v in result.items() if k in _CHAT_FIELDS}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _trim_file_result(result: dict) -> dict:
|
|
294
|
+
return {k: v for k, v in result.items() if k in _FILE_FIELDS}
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _build_chat_summary(
|
|
298
|
+
chats: list[dict],
|
|
299
|
+
query: str,
|
|
300
|
+
*,
|
|
301
|
+
status: str = _OK,
|
|
302
|
+
) -> str:
|
|
303
|
+
visible = [c for c in chats if c.get("chat_title")]
|
|
304
|
+
opaque_count = len(chats) - len(visible)
|
|
305
|
+
count = len(chats)
|
|
306
|
+
if count > 0:
|
|
307
|
+
label = "chat" if count == 1 else "chats"
|
|
308
|
+
top_titles = [c["chat_title"] for c in visible[:3]]
|
|
309
|
+
summary = f"Found {count} {label} matching '{query}'."
|
|
310
|
+
if top_titles:
|
|
311
|
+
summary += f" Top: {', '.join(repr(t) for t in top_titles)}."
|
|
312
|
+
if opaque_count > 0:
|
|
313
|
+
summary += f" ({opaque_count} with restricted visibility.)"
|
|
314
|
+
if status == _DEGRADED:
|
|
315
|
+
summary += " (keyword match — semantic search was unavailable)"
|
|
316
|
+
else:
|
|
317
|
+
if status == _FAILED:
|
|
318
|
+
summary = f"Chat search failed for '{query}' — try footprinter_search for keyword matching."
|
|
319
|
+
elif status == _DEGRADED:
|
|
320
|
+
summary = f"Semantic search unavailable — keyword search returned no chats for '{query}'."
|
|
321
|
+
else:
|
|
322
|
+
summary = (
|
|
323
|
+
f"No chats found for '{query}'. "
|
|
324
|
+
f"Tips: try different keywords, use footprinter_search "
|
|
325
|
+
f"for broader keyword matching across files/emails/browser."
|
|
326
|
+
)
|
|
327
|
+
return summary
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _build_file_summary(
|
|
331
|
+
files: list[dict],
|
|
332
|
+
query: str,
|
|
333
|
+
*,
|
|
334
|
+
status: str = _OK,
|
|
335
|
+
) -> str:
|
|
336
|
+
visible = [f for f in files if f.get("name")]
|
|
337
|
+
opaque_count = len(files) - len(visible)
|
|
338
|
+
count = len(files)
|
|
339
|
+
if count > 0:
|
|
340
|
+
label = "file" if count == 1 else "files"
|
|
341
|
+
top_names = [f["name"] for f in visible[:3]]
|
|
342
|
+
summary = f"Found {count} {label} matching '{query}'."
|
|
343
|
+
if top_names:
|
|
344
|
+
summary += f" Top: {', '.join(repr(n) for n in top_names)}."
|
|
345
|
+
if opaque_count > 0:
|
|
346
|
+
summary += f" ({opaque_count} with restricted visibility.)"
|
|
347
|
+
if status == _DEGRADED:
|
|
348
|
+
summary += " (keyword match — semantic search was unavailable)"
|
|
349
|
+
else:
|
|
350
|
+
if status == _FAILED:
|
|
351
|
+
summary = f"File search failed for '{query}' — try footprinter_search for keyword matching."
|
|
352
|
+
elif status == _DEGRADED:
|
|
353
|
+
summary = f"Semantic search unavailable — keyword search returned no files for '{query}'."
|
|
354
|
+
else:
|
|
355
|
+
summary = (
|
|
356
|
+
f"No files found for '{query}'. "
|
|
357
|
+
f"Tips: try different keywords, use footprinter_search "
|
|
358
|
+
f"for exact keyword matching across file names/paths."
|
|
359
|
+
)
|
|
360
|
+
return summary
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Status service — visibility-aware system status aggregates."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
|
|
5
|
+
from footprinter.db import status as db_status
|
|
6
|
+
from footprinter.paths import get_config_path
|
|
7
|
+
from footprinter.services.roles import Role
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_status(conn: sqlite3.Connection, *, role: Role = Role.ADMIN) -> dict:
|
|
11
|
+
"""Return system status, filtered by role.
|
|
12
|
+
|
|
13
|
+
VIEWER gets MCP-oriented counts with hidden-client data excluded.
|
|
14
|
+
ADMIN gets the full system status including config presence checks.
|
|
15
|
+
"""
|
|
16
|
+
if role == Role.VIEWER:
|
|
17
|
+
return db_status.get_mcp_status(conn)
|
|
18
|
+
return db_status.get_system_status(conn, get_config_path())
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Visit (browser history) read service — get/list with role-based visibility filtering."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
|
|
5
|
+
from footprinter.db import browser as db
|
|
6
|
+
from footprinter.services.access_service import filter_result, filter_results_list
|
|
7
|
+
from footprinter.services.roles import Role
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get(conn: sqlite3.Connection, entry_id: int, *, role: Role = Role.ADMIN) -> dict | None:
|
|
11
|
+
"""Fetch a single browser visit by ID, filtered by role."""
|
|
12
|
+
result = db.get_visit(conn, entry_id)
|
|
13
|
+
if result is None:
|
|
14
|
+
return None
|
|
15
|
+
if role.sees_all:
|
|
16
|
+
return result
|
|
17
|
+
return filter_result("visit", result)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def assign(
|
|
21
|
+
conn: sqlite3.Connection,
|
|
22
|
+
entry_id: int,
|
|
23
|
+
*,
|
|
24
|
+
role: Role = Role.ADMIN,
|
|
25
|
+
project_id: int | None = None,
|
|
26
|
+
client_id: int | None = None,
|
|
27
|
+
) -> dict | None:
|
|
28
|
+
"""Assign a visit to a project and/or client.
|
|
29
|
+
|
|
30
|
+
Returns result dict on success, None if not found.
|
|
31
|
+
Raises PermissionError if role cannot write.
|
|
32
|
+
"""
|
|
33
|
+
if not role.can_write:
|
|
34
|
+
raise PermissionError("Role does not permit write operations")
|
|
35
|
+
result = db.update_visit_relationships(
|
|
36
|
+
conn,
|
|
37
|
+
entry_id,
|
|
38
|
+
project_id=project_id,
|
|
39
|
+
client_id=client_id,
|
|
40
|
+
)
|
|
41
|
+
if result is None:
|
|
42
|
+
return None
|
|
43
|
+
resp: dict = {"id": entry_id}
|
|
44
|
+
if project_id is not None:
|
|
45
|
+
resp["project_id"] = project_id
|
|
46
|
+
if client_id is not None:
|
|
47
|
+
resp["client_id"] = client_id
|
|
48
|
+
return resp
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def list_(
|
|
52
|
+
conn: sqlite3.Connection,
|
|
53
|
+
*,
|
|
54
|
+
role: Role = Role.ADMIN,
|
|
55
|
+
limit: int = 50,
|
|
56
|
+
page: int = 1,
|
|
57
|
+
) -> dict:
|
|
58
|
+
"""List browser visits with pagination, filtered by role."""
|
|
59
|
+
response = db.list_visits(conn, limit=limit, page=page)
|
|
60
|
+
if role.sees_all:
|
|
61
|
+
return response
|
|
62
|
+
filtered, suppressed = filter_results_list("visit", response["visits"])
|
|
63
|
+
response["visits"] = filtered
|
|
64
|
+
response["suppressed"] = suppressed
|
|
65
|
+
return response
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Source registry — runtime registry for all Footprinter data sources.
|
|
3
|
+
|
|
4
|
+
Seeded from config.yaml on database init. Provides a query/mutation API
|
|
5
|
+
that all other code can use to discover available sources.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import sqlite3
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
from footprinter.paths import get_config_path
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ConfigError(Exception):
|
|
19
|
+
"""Raised when the config file is missing or invalid."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_config(config_path: Optional[str] = None) -> dict:
|
|
23
|
+
"""Load configuration from YAML file.
|
|
24
|
+
|
|
25
|
+
Checks FOOTPRINTER_CONFIG env var first, then falls back to default path.
|
|
26
|
+
Raises ConfigError with a friendly message on missing/corrupt files.
|
|
27
|
+
"""
|
|
28
|
+
path = config_path or str(get_config_path())
|
|
29
|
+
try:
|
|
30
|
+
with open(path) as f:
|
|
31
|
+
return yaml.safe_load(f)
|
|
32
|
+
except FileNotFoundError:
|
|
33
|
+
raise ConfigError(f"Config not found: {path}\nRun 'fp setup' to get started.") from None
|
|
34
|
+
except yaml.YAMLError as e:
|
|
35
|
+
raise ConfigError(f"Invalid config file: {path}\n{e}") from None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def home_path() -> str:
|
|
39
|
+
"""Return the user's home directory path."""
|
|
40
|
+
return os.path.expanduser("~")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def remote_accounts() -> List[str]:
|
|
44
|
+
"""Return list of remote (Drive) account names from config."""
|
|
45
|
+
config = get_config()
|
|
46
|
+
seeds = config.get("source_seeds", [])
|
|
47
|
+
return [s["account"] for s in seeds if s.get("source_type") == "remote" and s.get("account")]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SourceRegistry:
|
|
51
|
+
"""Registry for data sources, backed by the sources table."""
|
|
52
|
+
|
|
53
|
+
def __init__(self, conn: sqlite3.Connection):
|
|
54
|
+
self.conn = conn
|
|
55
|
+
|
|
56
|
+
# ------------------------------------------------------------------
|
|
57
|
+
# Seeding
|
|
58
|
+
# ------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
def seed_from_config(self, config_path: Optional[str] = None) -> int:
|
|
61
|
+
"""Seed the sources table from config.yaml source_seeds.
|
|
62
|
+
|
|
63
|
+
Uses INSERT OR IGNORE so user edits to existing rows are preserved.
|
|
64
|
+
Returns the number of rows inserted.
|
|
65
|
+
"""
|
|
66
|
+
config = get_config(config_path)
|
|
67
|
+
seeds = config.get("source_seeds", [])
|
|
68
|
+
inserted = 0
|
|
69
|
+
cursor = self.conn.cursor()
|
|
70
|
+
for seed in seeds:
|
|
71
|
+
config_json = json.dumps(seed.get("config")) if seed.get("config") else None
|
|
72
|
+
cursor.execute(
|
|
73
|
+
"""
|
|
74
|
+
INSERT OR IGNORE
|
|
75
|
+
INTO sources (name, source_type, adapter, account, label, icon, enabled, config)
|
|
76
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
77
|
+
""",
|
|
78
|
+
(
|
|
79
|
+
seed["name"],
|
|
80
|
+
seed["source_type"],
|
|
81
|
+
None,
|
|
82
|
+
seed.get("account"),
|
|
83
|
+
seed.get("label"),
|
|
84
|
+
seed.get("icon"),
|
|
85
|
+
1 if seed.get("enabled", True) else 0,
|
|
86
|
+
config_json,
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
if cursor.rowcount > 0:
|
|
90
|
+
inserted += 1
|
|
91
|
+
self.conn.commit()
|
|
92
|
+
return inserted
|
|
93
|
+
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
# Query API
|
|
96
|
+
# ------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def all_source_names(self) -> List[str]:
|
|
99
|
+
"""Return all source names."""
|
|
100
|
+
cursor = self.conn.cursor()
|
|
101
|
+
cursor.execute("SELECT name FROM sources ORDER BY name")
|
|
102
|
+
return [row[0] for row in cursor.fetchall()]
|
|
103
|
+
|
|
104
|
+
def all_sources(self) -> List[Dict[str, Any]]:
|
|
105
|
+
"""Return all sources as dicts."""
|
|
106
|
+
cursor = self.conn.cursor()
|
|
107
|
+
cursor.execute("SELECT * FROM sources ORDER BY name")
|
|
108
|
+
columns = [desc[0] for desc in cursor.description]
|
|
109
|
+
return [dict(zip(columns, row)) for row in cursor.fetchall()]
|
|
110
|
+
|
|
111
|
+
def get_source(self, name: str) -> Optional[Dict[str, Any]]:
|
|
112
|
+
"""Return a single source by name, or None."""
|
|
113
|
+
cursor = self.conn.cursor()
|
|
114
|
+
cursor.execute("SELECT * FROM sources WHERE name = ?", (name,))
|
|
115
|
+
row = cursor.fetchone()
|
|
116
|
+
if row is None:
|
|
117
|
+
return None
|
|
118
|
+
columns = [desc[0] for desc in cursor.description]
|
|
119
|
+
return dict(zip(columns, row))
|
|
120
|
+
|
|
121
|
+
def remote_source_names(self) -> List[str]:
|
|
122
|
+
"""Return names of sources with source_type='remote'."""
|
|
123
|
+
cursor = self.conn.cursor()
|
|
124
|
+
cursor.execute("SELECT name FROM sources WHERE source_type = 'remote' ORDER BY name")
|
|
125
|
+
return [row[0] for row in cursor.fetchall()]
|
|
126
|
+
|
|
127
|
+
def file_source_names(self) -> List[str]:
|
|
128
|
+
"""Return names of sources with source_type='file'."""
|
|
129
|
+
cursor = self.conn.cursor()
|
|
130
|
+
cursor.execute("SELECT name FROM sources WHERE source_type = 'file' ORDER BY name")
|
|
131
|
+
return [row[0] for row in cursor.fetchall()]
|
|
132
|
+
|
|
133
|
+
def source_label(self, name: str) -> Optional[str]:
|
|
134
|
+
"""Return the label for a source, or None if not found."""
|
|
135
|
+
source = self.get_source(name)
|
|
136
|
+
return source["label"] if source else None
|
|
137
|
+
|
|
138
|
+
def source_account(self, name: str) -> Optional[str]:
|
|
139
|
+
"""Return the account for a source, or None if not found."""
|
|
140
|
+
source = self.get_source(name)
|
|
141
|
+
return source["account"] if source else None
|
|
142
|
+
|
|
143
|
+
def is_remote_source(self, name: str) -> bool:
|
|
144
|
+
"""Return True if the named source is a remote source."""
|
|
145
|
+
source = self.get_source(name)
|
|
146
|
+
return source is not None and source["source_type"] == "remote"
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------
|
|
149
|
+
# Mutation API
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def update_label(self, name: str, label: str) -> bool:
|
|
153
|
+
"""Update a source's label. Returns True if a row was updated."""
|
|
154
|
+
cursor = self.conn.cursor()
|
|
155
|
+
cursor.execute(
|
|
156
|
+
"UPDATE sources SET label = ?, updated_at = CURRENT_TIMESTAMP WHERE name = ?",
|
|
157
|
+
(label, name),
|
|
158
|
+
)
|
|
159
|
+
self.conn.commit()
|
|
160
|
+
return cursor.rowcount > 0
|
|
161
|
+
|
|
162
|
+
def set_enabled(self, name: str, enabled: bool) -> bool:
|
|
163
|
+
"""Enable or disable a source. Returns True if a row was updated."""
|
|
164
|
+
cursor = self.conn.cursor()
|
|
165
|
+
cursor.execute(
|
|
166
|
+
"UPDATE sources SET enabled = ?, updated_at = CURRENT_TIMESTAMP WHERE name = ?",
|
|
167
|
+
(1 if enabled else 0, name),
|
|
168
|
+
)
|
|
169
|
+
self.conn.commit()
|
|
170
|
+
return cursor.rowcount > 0
|
|
171
|
+
|
|
172
|
+
def register_source(self, name: str, source_type: str, **kwargs) -> bool:
|
|
173
|
+
"""Register a new source. Returns True if inserted, False if already exists."""
|
|
174
|
+
cursor = self.conn.cursor()
|
|
175
|
+
config_json = json.dumps(kwargs.get("config")) if kwargs.get("config") else None
|
|
176
|
+
cursor.execute(
|
|
177
|
+
"""
|
|
178
|
+
INSERT OR IGNORE
|
|
179
|
+
INTO sources (name, source_type, adapter, account, label, icon, enabled, config)
|
|
180
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
181
|
+
""",
|
|
182
|
+
(
|
|
183
|
+
name,
|
|
184
|
+
source_type,
|
|
185
|
+
kwargs.get("adapter"),
|
|
186
|
+
kwargs.get("account"),
|
|
187
|
+
kwargs.get("label"),
|
|
188
|
+
kwargs.get("icon"),
|
|
189
|
+
1 if kwargs.get("enabled", True) else 0,
|
|
190
|
+
config_json,
|
|
191
|
+
),
|
|
192
|
+
)
|
|
193
|
+
self.conn.commit()
|
|
194
|
+
return cursor.rowcount > 0
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Utility modules for Footprinter."""
|
|
2
|
+
|
|
3
|
+
from .hash_utils import compute_md5, compute_sha256
|
|
4
|
+
from .mime import mime_to_content_type
|
|
5
|
+
from .time import UTC_FMT, utc_now_iso
|
|
6
|
+
|
|
7
|
+
__all__ = ["compute_md5", "compute_sha256", "mime_to_content_type", "UTC_FMT", "utc_now_iso"]
|