footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""IngestService — single authority on ingest tracking and FTS optimization.
|
|
2
|
+
|
|
3
|
+
All ingest operations (pipeline and non-pipeline) flow through this service.
|
|
4
|
+
Wraps PipeRunner for pipeline ingests; called directly for non-pipeline ingests.
|
|
5
|
+
Manages FTS trigger lifecycle around batch runs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sqlite3
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import TYPE_CHECKING, Callable, List, Optional
|
|
15
|
+
|
|
16
|
+
from footprinter.utils.time import utc_now_iso
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from footprinter.ingest.database import Database
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class IngestService:
|
|
25
|
+
"""Track ingest lifecycle: begin, complete, fail, query history.
|
|
26
|
+
|
|
27
|
+
Optionally manages FTS trigger optimization around batch runs
|
|
28
|
+
when constructed with a ``get_db`` callable.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
conn: sqlite3.Connection,
|
|
34
|
+
get_db: Optional[Callable[[], Database]] = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
self.conn = conn
|
|
37
|
+
self._get_db = get_db
|
|
38
|
+
|
|
39
|
+
def begin(
|
|
40
|
+
self,
|
|
41
|
+
pipe: str,
|
|
42
|
+
mode: str | None = None,
|
|
43
|
+
trigger: str | None = None,
|
|
44
|
+
) -> int:
|
|
45
|
+
"""Insert a running ingest record and return its id."""
|
|
46
|
+
cursor = self.conn.execute(
|
|
47
|
+
"INSERT INTO ingests (pipe, started_at, status, mode, trigger) VALUES (?, ?, 'running', ?, ?)",
|
|
48
|
+
(pipe, utc_now_iso(), mode, trigger),
|
|
49
|
+
)
|
|
50
|
+
self.conn.commit()
|
|
51
|
+
return cursor.lastrowid # type: ignore[return-value]
|
|
52
|
+
|
|
53
|
+
def complete(
|
|
54
|
+
self,
|
|
55
|
+
ingest_id: int,
|
|
56
|
+
result: dict | None = None,
|
|
57
|
+
metadata: dict | None = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Mark an ingest as completed with optional result counts and metadata."""
|
|
60
|
+
result = result or {}
|
|
61
|
+
meta_json = json.dumps(metadata) if metadata is not None else None
|
|
62
|
+
self.conn.execute(
|
|
63
|
+
"UPDATE ingests SET"
|
|
64
|
+
" status = 'completed',"
|
|
65
|
+
" completed_at = ?,"
|
|
66
|
+
" items_processed = ?,"
|
|
67
|
+
" items_new = ?,"
|
|
68
|
+
" items_updated = ?,"
|
|
69
|
+
" items_skipped = ?,"
|
|
70
|
+
" errors = ?,"
|
|
71
|
+
" elapsed_seconds = ?,"
|
|
72
|
+
" metadata = ?"
|
|
73
|
+
" WHERE id = ?",
|
|
74
|
+
(
|
|
75
|
+
utc_now_iso(),
|
|
76
|
+
result.get("items_processed", 0),
|
|
77
|
+
result.get("items_new", 0),
|
|
78
|
+
result.get("items_updated", 0),
|
|
79
|
+
result.get("items_skipped", 0),
|
|
80
|
+
result.get("errors", 0),
|
|
81
|
+
result.get("elapsed_seconds"),
|
|
82
|
+
meta_json,
|
|
83
|
+
ingest_id,
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
self.conn.commit()
|
|
87
|
+
|
|
88
|
+
def fail(self, ingest_id: int, error: str) -> None:
|
|
89
|
+
"""Mark an ingest as failed with an error message."""
|
|
90
|
+
self.conn.execute(
|
|
91
|
+
"UPDATE ingests SET status = 'failed', completed_at = ?, metadata = ? WHERE id = ?",
|
|
92
|
+
(
|
|
93
|
+
utc_now_iso(),
|
|
94
|
+
json.dumps({"error": error}),
|
|
95
|
+
ingest_id,
|
|
96
|
+
),
|
|
97
|
+
)
|
|
98
|
+
self.conn.commit()
|
|
99
|
+
|
|
100
|
+
def last_run(self, pipe: str) -> datetime | None:
|
|
101
|
+
"""Return the completed_at timestamp of the most recent successful ingest."""
|
|
102
|
+
row = self.conn.execute(
|
|
103
|
+
"SELECT completed_at FROM ingests"
|
|
104
|
+
" WHERE pipe = ? AND status = 'completed'"
|
|
105
|
+
" ORDER BY completed_at DESC LIMIT 1",
|
|
106
|
+
(pipe,),
|
|
107
|
+
).fetchone()
|
|
108
|
+
if row is None:
|
|
109
|
+
return None
|
|
110
|
+
return datetime.fromisoformat(row["completed_at"])
|
|
111
|
+
|
|
112
|
+
def run_pipe(
|
|
113
|
+
self,
|
|
114
|
+
pipe: str,
|
|
115
|
+
*,
|
|
116
|
+
mode: str | None = None,
|
|
117
|
+
trigger: str | None = None,
|
|
118
|
+
runner,
|
|
119
|
+
on_progress=None,
|
|
120
|
+
) -> dict:
|
|
121
|
+
"""Wrap a PipeRunner.run_pipe call with ingest tracking.
|
|
122
|
+
|
|
123
|
+
Creates an ingests record, passes last_run to runner.run_pipe(),
|
|
124
|
+
then records completion or failure.
|
|
125
|
+
"""
|
|
126
|
+
ingest_id = self.begin(pipe, mode=mode, trigger=trigger)
|
|
127
|
+
try:
|
|
128
|
+
result = runner.run_pipe(pipe, on_progress=on_progress, last_run=self.last_run(pipe))
|
|
129
|
+
if result.get("status") == "error":
|
|
130
|
+
self.fail(ingest_id, error=result.get("error", "unknown"))
|
|
131
|
+
else:
|
|
132
|
+
self.complete(ingest_id, result=result)
|
|
133
|
+
return result
|
|
134
|
+
except Exception as e:
|
|
135
|
+
self.fail(ingest_id, error=str(e))
|
|
136
|
+
raise
|
|
137
|
+
|
|
138
|
+
def history(self, pipe: str, limit: int = 20) -> list[dict]:
|
|
139
|
+
"""Return recent ingest records for a pipe, most recent first."""
|
|
140
|
+
rows = self.conn.execute(
|
|
141
|
+
"SELECT * FROM ingests WHERE pipe = ? ORDER BY started_at DESC LIMIT ?",
|
|
142
|
+
(pipe, limit),
|
|
143
|
+
).fetchall()
|
|
144
|
+
records = []
|
|
145
|
+
for row in rows:
|
|
146
|
+
record = dict(row)
|
|
147
|
+
if record.get("metadata") is not None:
|
|
148
|
+
record["metadata"] = json.loads(record["metadata"])
|
|
149
|
+
records.append(record)
|
|
150
|
+
return records
|
|
151
|
+
|
|
152
|
+
# ── FTS optimization ────────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
def ensure_fts_health(self, full_mode: bool) -> None:
|
|
155
|
+
"""Check FTS health and restore missing triggers.
|
|
156
|
+
|
|
157
|
+
Always probes FTS health (both modes). In incremental mode, also
|
|
158
|
+
restores missing triggers from a prior crash (SIGKILL/OOM during
|
|
159
|
+
a full-mode run) before the health probe. Trigger restore is
|
|
160
|
+
skipped in full mode because triggers are dropped anyway.
|
|
161
|
+
|
|
162
|
+
No-op if constructed without ``get_db``.
|
|
163
|
+
"""
|
|
164
|
+
if self._get_db is None:
|
|
165
|
+
return
|
|
166
|
+
try:
|
|
167
|
+
db = self._get_db()
|
|
168
|
+
if db is None:
|
|
169
|
+
return
|
|
170
|
+
if not full_mode:
|
|
171
|
+
missing_triggers = db.check_fts_triggers()
|
|
172
|
+
if missing_triggers:
|
|
173
|
+
log.info(
|
|
174
|
+
"Restoring %d missing FTS triggers from prior crash",
|
|
175
|
+
len(missing_triggers),
|
|
176
|
+
)
|
|
177
|
+
db.create_fts_triggers()
|
|
178
|
+
fts_health = db.check_fts_health()
|
|
179
|
+
for table, info in fts_health.items():
|
|
180
|
+
if info["status"] == "error":
|
|
181
|
+
log.warning(
|
|
182
|
+
"FTS index corrupted (%s) — run 'fp ingest --repair-fts' to fix",
|
|
183
|
+
table,
|
|
184
|
+
)
|
|
185
|
+
except sqlite3.OperationalError as e:
|
|
186
|
+
log.debug("FTS health probe skipped: %s", e)
|
|
187
|
+
|
|
188
|
+
def run_pipes(
|
|
189
|
+
self,
|
|
190
|
+
pipes: List[str],
|
|
191
|
+
*,
|
|
192
|
+
runner,
|
|
193
|
+
full_mode: bool = False,
|
|
194
|
+
on_pipe_start: Optional[Callable] = None,
|
|
195
|
+
on_pipe_end: Optional[Callable] = None,
|
|
196
|
+
on_progress: Optional[Callable] = None,
|
|
197
|
+
pipe_hook: Optional[Callable] = None,
|
|
198
|
+
) -> List[dict]:
|
|
199
|
+
"""Run multiple pipes with FTS optimization around the batch.
|
|
200
|
+
|
|
201
|
+
In full mode, drops FTS triggers before the first pipe and rebuilds
|
|
202
|
+
FTS indexes after the last pipe (or on error) to avoid per-row
|
|
203
|
+
trigger overhead during bulk ingest. FTS optimization requires
|
|
204
|
+
``get_db`` — silently skipped if constructed without it.
|
|
205
|
+
"""
|
|
206
|
+
self.ensure_fts_health(full_mode)
|
|
207
|
+
|
|
208
|
+
fts_dropped = False
|
|
209
|
+
if full_mode and self._get_db is not None:
|
|
210
|
+
try:
|
|
211
|
+
db = self._get_db()
|
|
212
|
+
db.drop_fts_triggers()
|
|
213
|
+
fts_dropped = True
|
|
214
|
+
except sqlite3.OperationalError as e:
|
|
215
|
+
log.warning("Failed to drop FTS triggers: %s", e)
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
return runner.run_pipes(
|
|
219
|
+
pipes,
|
|
220
|
+
on_pipe_start=on_pipe_start,
|
|
221
|
+
on_pipe_end=on_pipe_end,
|
|
222
|
+
on_progress=on_progress,
|
|
223
|
+
pipe_hook=pipe_hook,
|
|
224
|
+
)
|
|
225
|
+
finally:
|
|
226
|
+
if fts_dropped:
|
|
227
|
+
try:
|
|
228
|
+
db = self._get_db()
|
|
229
|
+
db.rebuild_fts_indexes()
|
|
230
|
+
except sqlite3.OperationalError as e:
|
|
231
|
+
log.error("Failed to rebuild FTS indexes: %s", e)
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Project service — get/list with role-based visibility, upsert and soft delete."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from footprinter.db import projects as db
|
|
7
|
+
from footprinter.services.access_service import (
|
|
8
|
+
_read_visibility,
|
|
9
|
+
filter_result,
|
|
10
|
+
filter_results_list,
|
|
11
|
+
)
|
|
12
|
+
from footprinter.services.includes import validate_include
|
|
13
|
+
from footprinter.services.roles import Role
|
|
14
|
+
|
|
15
|
+
VALID_INCLUDES = frozenset({"files", "folders"})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get(
|
|
19
|
+
conn: sqlite3.Connection,
|
|
20
|
+
project_id: int,
|
|
21
|
+
*,
|
|
22
|
+
role: Role = Role.ADMIN,
|
|
23
|
+
include: list[str] | None = None,
|
|
24
|
+
) -> dict | None:
|
|
25
|
+
"""Fetch a single project by ID, filtered by role.
|
|
26
|
+
|
|
27
|
+
Pass ``include`` to attach nested data:
|
|
28
|
+
- ``"files"`` — paginated list of files in this project
|
|
29
|
+
- ``"folders"`` — list of folders in this project
|
|
30
|
+
"""
|
|
31
|
+
includes = validate_include(include, VALID_INCLUDES)
|
|
32
|
+
result = db.get_project_detail(conn, project_id)
|
|
33
|
+
if result is None:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
# Attach includes only when caller has full access to this entity
|
|
37
|
+
is_full = role.sees_all or _read_visibility(result) == "visible"
|
|
38
|
+
if is_full and includes:
|
|
39
|
+
if "files" in includes:
|
|
40
|
+
from footprinter.services import file_service
|
|
41
|
+
|
|
42
|
+
resp = file_service.list_(conn, role=role, project_id=project_id)
|
|
43
|
+
result["files"] = resp["files"]
|
|
44
|
+
if "folders" in includes:
|
|
45
|
+
from footprinter.services import folder_service
|
|
46
|
+
|
|
47
|
+
resp = folder_service.list_(conn, role=role, project_id=project_id, depth=None)
|
|
48
|
+
result["folders"] = resp["folders"]
|
|
49
|
+
|
|
50
|
+
if role.sees_all:
|
|
51
|
+
return result
|
|
52
|
+
return filter_result("project", result)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_(
|
|
56
|
+
conn: sqlite3.Connection,
|
|
57
|
+
*,
|
|
58
|
+
role: Role = Role.ADMIN,
|
|
59
|
+
include: list[str] | None = None,
|
|
60
|
+
status: Optional[str | list[str]] = None,
|
|
61
|
+
client: Optional[str] = None,
|
|
62
|
+
project_type: Optional[str] = None,
|
|
63
|
+
limit: int = 50,
|
|
64
|
+
page: int = 1,
|
|
65
|
+
) -> dict:
|
|
66
|
+
"""List projects with pagination, filtered by role."""
|
|
67
|
+
includes = validate_include(include, VALID_INCLUDES)
|
|
68
|
+
response = db.list_projects(
|
|
69
|
+
conn,
|
|
70
|
+
status=status,
|
|
71
|
+
client=client,
|
|
72
|
+
project_type=project_type,
|
|
73
|
+
limit=limit,
|
|
74
|
+
page=page,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Track which items are fully visible before filtering strips fields
|
|
78
|
+
visible_ids: set[int] = set()
|
|
79
|
+
if includes and not role.sees_all:
|
|
80
|
+
visible_ids = {p["id"] for p in response["projects"] if _read_visibility(p) == "visible"}
|
|
81
|
+
|
|
82
|
+
if not role.sees_all:
|
|
83
|
+
filtered, suppressed = filter_results_list("project", response["projects"])
|
|
84
|
+
response["projects"] = filtered
|
|
85
|
+
response["suppressed"] = suppressed
|
|
86
|
+
|
|
87
|
+
if includes:
|
|
88
|
+
for project in response["projects"]:
|
|
89
|
+
if not role.sees_all and project["id"] not in visible_ids:
|
|
90
|
+
continue
|
|
91
|
+
if "files" in includes:
|
|
92
|
+
from footprinter.services import file_service
|
|
93
|
+
|
|
94
|
+
resp = file_service.list_(conn, role=role, project_id=project["id"])
|
|
95
|
+
project["files"] = resp["files"]
|
|
96
|
+
if "folders" in includes:
|
|
97
|
+
from footprinter.services import folder_service
|
|
98
|
+
|
|
99
|
+
resp = folder_service.list_(
|
|
100
|
+
conn,
|
|
101
|
+
role=role,
|
|
102
|
+
project_id=project["id"],
|
|
103
|
+
depth=None,
|
|
104
|
+
)
|
|
105
|
+
project["folders"] = resp["folders"]
|
|
106
|
+
|
|
107
|
+
return response
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def resolve_by_name(
|
|
111
|
+
conn: sqlite3.Connection,
|
|
112
|
+
name: str,
|
|
113
|
+
*,
|
|
114
|
+
role: Role = Role.ADMIN,
|
|
115
|
+
) -> dict | None:
|
|
116
|
+
"""Resolve a project by fuzzy name match, with navigation data.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Full navigation dict for single match.
|
|
120
|
+
Disambiguation dict for multiple ambiguous matches.
|
|
121
|
+
None for no match (or hidden-only matches for VIEWER).
|
|
122
|
+
"""
|
|
123
|
+
rows = db.find_by_name_fuzzy(conn, name)
|
|
124
|
+
if not rows:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
# Filter hidden for VIEWER
|
|
128
|
+
if not role.sees_all:
|
|
129
|
+
rows = [r for r in rows if _read_visibility(r) != "hidden"]
|
|
130
|
+
if not rows:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
if len(rows) == 1:
|
|
134
|
+
return _build_project_navigation(conn, rows[0], role=role)
|
|
135
|
+
|
|
136
|
+
# Check exact match (case-insensitive)
|
|
137
|
+
exact = [r for r in rows if r["project_name"].lower() == name.lower()]
|
|
138
|
+
if len(exact) == 1:
|
|
139
|
+
return _build_project_navigation(conn, exact[0], role=role)
|
|
140
|
+
|
|
141
|
+
# Disambiguation
|
|
142
|
+
return _build_disambiguation(rows, "project_name", name, role)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _build_project_navigation(conn: sqlite3.Connection, row: dict, *, role: Role) -> dict:
|
|
146
|
+
"""Build full project navigation dict from a project row."""
|
|
147
|
+
visibility = _read_visibility(row)
|
|
148
|
+
if not role.sees_all and visibility == "opaque":
|
|
149
|
+
return filter_result("project", row)
|
|
150
|
+
|
|
151
|
+
nav = db.get_project_navigation(conn, row["id"])
|
|
152
|
+
result = {**row, **nav}
|
|
153
|
+
|
|
154
|
+
if role.sees_all:
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
# Filter child folders by visibility
|
|
158
|
+
result["folders"], _ = filter_results_list("folder", result["folders"])
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _build_disambiguation(rows: list[dict], name_col: str, query: str, role: Role) -> dict:
|
|
163
|
+
"""Build a disambiguation dict from multiple matches."""
|
|
164
|
+
from footprinter.services.access_service import resolve_inherit_visibility
|
|
165
|
+
|
|
166
|
+
matches = []
|
|
167
|
+
for r in rows:
|
|
168
|
+
vis = resolve_inherit_visibility(r.get("mcp_view"))
|
|
169
|
+
if vis == "opaque":
|
|
170
|
+
matches.append({"id": r["id"], "visibility": "restricted"})
|
|
171
|
+
else:
|
|
172
|
+
matches.append({"id": r["id"], "name": r[name_col]})
|
|
173
|
+
return {
|
|
174
|
+
"disambiguation": True,
|
|
175
|
+
"message": f"Multiple matches for '{query}'. Please be more specific.",
|
|
176
|
+
"matches": matches,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def upsert(
|
|
181
|
+
conn: sqlite3.Connection,
|
|
182
|
+
*,
|
|
183
|
+
project_name: str,
|
|
184
|
+
role: Role = Role.ADMIN,
|
|
185
|
+
root_path: Optional[str] = None,
|
|
186
|
+
client_id: Optional[int] = None,
|
|
187
|
+
project_type: Optional[str] = None,
|
|
188
|
+
description: Optional[str] = None,
|
|
189
|
+
github_url: Optional[str] = None,
|
|
190
|
+
status: Optional[str] = None,
|
|
191
|
+
status_reason: Optional[str] = None,
|
|
192
|
+
) -> dict:
|
|
193
|
+
"""Insert or update a project. Matches on root_path first, then project_name.
|
|
194
|
+
|
|
195
|
+
Returns dict with ``id`` and ``action`` ("created"|"updated").
|
|
196
|
+
Raises PermissionError if role cannot write, ValueError on bad input.
|
|
197
|
+
"""
|
|
198
|
+
if not role.can_write:
|
|
199
|
+
raise PermissionError("Role does not permit write operations")
|
|
200
|
+
|
|
201
|
+
project_name = (project_name or "").strip()
|
|
202
|
+
if not project_name:
|
|
203
|
+
raise ValueError("project_name is required")
|
|
204
|
+
|
|
205
|
+
existing_id = db.find_project_id_by_key(
|
|
206
|
+
conn,
|
|
207
|
+
root_path=root_path,
|
|
208
|
+
project_name=project_name,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if existing_id is None:
|
|
212
|
+
result = db.create_project(
|
|
213
|
+
conn,
|
|
214
|
+
project_name=project_name,
|
|
215
|
+
root_path=root_path,
|
|
216
|
+
client_id=client_id,
|
|
217
|
+
project_type=project_type,
|
|
218
|
+
description=description,
|
|
219
|
+
github_url=github_url,
|
|
220
|
+
status=status or "active",
|
|
221
|
+
)
|
|
222
|
+
return {"id": result["id"], "action": "created"}
|
|
223
|
+
|
|
224
|
+
update_fields: dict = {}
|
|
225
|
+
if project_type is not None:
|
|
226
|
+
update_fields["project_type"] = project_type
|
|
227
|
+
if root_path is not None:
|
|
228
|
+
update_fields["root_path"] = root_path
|
|
229
|
+
if description is not None:
|
|
230
|
+
update_fields["description"] = description
|
|
231
|
+
if github_url is not None:
|
|
232
|
+
update_fields["github_url"] = github_url
|
|
233
|
+
if client_id is not None:
|
|
234
|
+
update_fields["client_id"] = client_id
|
|
235
|
+
if status is not None:
|
|
236
|
+
update_fields["status"] = status
|
|
237
|
+
if status_reason is not None:
|
|
238
|
+
update_fields["status_reason"] = status_reason
|
|
239
|
+
# Always update name — desired-state semantics
|
|
240
|
+
update_fields["project_name"] = project_name
|
|
241
|
+
db.update_project(conn, existing_id, **update_fields)
|
|
242
|
+
return {"id": existing_id, "action": "updated"}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def delete(
|
|
246
|
+
conn: sqlite3.Connection,
|
|
247
|
+
project_id: int,
|
|
248
|
+
*,
|
|
249
|
+
role: Role = Role.ADMIN,
|
|
250
|
+
) -> dict | None:
|
|
251
|
+
"""Soft-delete a project by setting status to 'removed'.
|
|
252
|
+
|
|
253
|
+
Returns ``{"id", "status"}`` on success, ``None`` if not found.
|
|
254
|
+
Raises PermissionError if role cannot write.
|
|
255
|
+
"""
|
|
256
|
+
if not role.can_write:
|
|
257
|
+
raise PermissionError("Role does not permit write operations")
|
|
258
|
+
|
|
259
|
+
result = db.update_project(conn, project_id, status="removed", status_reason="cli:delete")
|
|
260
|
+
if result is None:
|
|
261
|
+
return None
|
|
262
|
+
return {"id": project_id, "status": "removed"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Role enum for the service layer."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Role(Enum):
|
|
7
|
+
"""Caller role — determines write access and metadata visibility.
|
|
8
|
+
|
|
9
|
+
Interface layers assign the role:
|
|
10
|
+
- CLI passes Role.ADMIN (full access, local user)
|
|
11
|
+
- MCP passes Role.VIEWER (read-only, filtered metadata)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
ADMIN = "admin"
|
|
15
|
+
VIEWER = "viewer"
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def can_write(self) -> bool:
|
|
19
|
+
"""Whether this role permits write operations."""
|
|
20
|
+
return self in (Role.ADMIN,)
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def sees_all(self) -> bool:
|
|
24
|
+
"""Whether this role can see all metadata (including sensitive paths)."""
|
|
25
|
+
return self == Role.ADMIN
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""search_service — multi-source keyword search with visibility filtering.
|
|
2
|
+
|
|
3
|
+
Orchestrates per-source searches (files, emails, chats, browser) and applies
|
|
4
|
+
role-based visibility filtering + content stripping.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sqlite3
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from footprinter.db.search import (
|
|
11
|
+
search_browser_keyword,
|
|
12
|
+
search_chats_keyword,
|
|
13
|
+
search_emails_keyword,
|
|
14
|
+
search_files_keyword,
|
|
15
|
+
)
|
|
16
|
+
from footprinter.db.sql_utils import split_query_terms
|
|
17
|
+
from footprinter.services.access_service import (
|
|
18
|
+
filter_results_list,
|
|
19
|
+
strip_content_for_denied,
|
|
20
|
+
)
|
|
21
|
+
from footprinter.services.roles import Role
|
|
22
|
+
from footprinter.visibility import get_source_visibility
|
|
23
|
+
|
|
24
|
+
DEFAULT_SOURCES = ["files", "emails", "chats", "browser"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def search(
|
|
28
|
+
conn: sqlite3.Connection,
|
|
29
|
+
*,
|
|
30
|
+
role: Role = Role.ADMIN,
|
|
31
|
+
query: str = "",
|
|
32
|
+
sources: Optional[list[str]] = None,
|
|
33
|
+
project: Optional[str] = None,
|
|
34
|
+
client: Optional[str] = None,
|
|
35
|
+
date_from: Optional[str] = None,
|
|
36
|
+
date_to: Optional[str] = None,
|
|
37
|
+
limit: int = 50,
|
|
38
|
+
account: Optional[str] = None,
|
|
39
|
+
sender: Optional[str] = None,
|
|
40
|
+
days_back: Optional[int] = None,
|
|
41
|
+
folder: Optional[str] = None,
|
|
42
|
+
mime_type: Optional[str] = None,
|
|
43
|
+
) -> dict:
|
|
44
|
+
"""Search across indexed sources by keyword.
|
|
45
|
+
|
|
46
|
+
Returns dict with per-source result lists and a ``suppressed`` count.
|
|
47
|
+
VIEWER role: hidden items excluded, opaque items minimized, content
|
|
48
|
+
stripped for permission-denied items.
|
|
49
|
+
"""
|
|
50
|
+
if not sources:
|
|
51
|
+
sources = list(DEFAULT_SOURCES)
|
|
52
|
+
|
|
53
|
+
results: dict = {}
|
|
54
|
+
total_suppressed = 0
|
|
55
|
+
has_query = bool(query and query.strip())
|
|
56
|
+
terms = split_query_terms(query) if has_query else []
|
|
57
|
+
has_query = has_query and bool(terms)
|
|
58
|
+
|
|
59
|
+
if "files" in sources:
|
|
60
|
+
file_results = search_files_keyword(
|
|
61
|
+
conn,
|
|
62
|
+
terms=terms,
|
|
63
|
+
has_query=has_query,
|
|
64
|
+
project=project,
|
|
65
|
+
client=client,
|
|
66
|
+
date_from=date_from,
|
|
67
|
+
date_to=date_to,
|
|
68
|
+
account=account,
|
|
69
|
+
folder=folder,
|
|
70
|
+
mime_type=mime_type,
|
|
71
|
+
limit=limit,
|
|
72
|
+
exclude_hidden=not role.sees_all,
|
|
73
|
+
)
|
|
74
|
+
if role.sees_all:
|
|
75
|
+
results["files"] = file_results
|
|
76
|
+
else:
|
|
77
|
+
filtered, suppressed = filter_results_list("file", file_results)
|
|
78
|
+
results["files"] = filtered
|
|
79
|
+
total_suppressed += suppressed
|
|
80
|
+
|
|
81
|
+
if "emails" in sources:
|
|
82
|
+
email_results = search_emails_keyword(
|
|
83
|
+
conn,
|
|
84
|
+
terms=terms,
|
|
85
|
+
has_query=has_query,
|
|
86
|
+
project=project,
|
|
87
|
+
client=client,
|
|
88
|
+
date_from=date_from,
|
|
89
|
+
date_to=date_to,
|
|
90
|
+
account=account,
|
|
91
|
+
sender=sender,
|
|
92
|
+
days_back=days_back,
|
|
93
|
+
limit=limit,
|
|
94
|
+
exclude_hidden=not role.sees_all,
|
|
95
|
+
)
|
|
96
|
+
if role.sees_all:
|
|
97
|
+
results["emails"] = email_results
|
|
98
|
+
else:
|
|
99
|
+
filtered, suppressed = filter_results_list("email", email_results)
|
|
100
|
+
strip_content_for_denied("email", filtered)
|
|
101
|
+
results["emails"] = filtered
|
|
102
|
+
total_suppressed += suppressed
|
|
103
|
+
|
|
104
|
+
if "chats" in sources:
|
|
105
|
+
chat_results = search_chats_keyword(
|
|
106
|
+
conn,
|
|
107
|
+
terms=terms,
|
|
108
|
+
has_query=has_query,
|
|
109
|
+
project=project,
|
|
110
|
+
client=client,
|
|
111
|
+
date_from=date_from,
|
|
112
|
+
date_to=date_to,
|
|
113
|
+
limit=limit,
|
|
114
|
+
exclude_hidden=not role.sees_all,
|
|
115
|
+
)
|
|
116
|
+
if role.sees_all:
|
|
117
|
+
results["chats"] = chat_results
|
|
118
|
+
else:
|
|
119
|
+
filtered, suppressed = filter_results_list("chat", chat_results)
|
|
120
|
+
strip_content_for_denied("chat", filtered)
|
|
121
|
+
results["chats"] = filtered
|
|
122
|
+
total_suppressed += suppressed
|
|
123
|
+
|
|
124
|
+
if "browser" in sources:
|
|
125
|
+
browser_results = _search_browser_with_visibility(
|
|
126
|
+
conn,
|
|
127
|
+
terms=terms,
|
|
128
|
+
has_query=has_query,
|
|
129
|
+
date_from=date_from,
|
|
130
|
+
date_to=date_to,
|
|
131
|
+
limit=limit,
|
|
132
|
+
role=role,
|
|
133
|
+
)
|
|
134
|
+
if browser_results is not None:
|
|
135
|
+
results["browser"] = browser_results
|
|
136
|
+
|
|
137
|
+
if total_suppressed > 0:
|
|
138
|
+
results["suppressed"] = total_suppressed
|
|
139
|
+
|
|
140
|
+
return results
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _search_browser_with_visibility(
|
|
144
|
+
conn: sqlite3.Connection,
|
|
145
|
+
*,
|
|
146
|
+
terms: list[str],
|
|
147
|
+
has_query: bool,
|
|
148
|
+
date_from: Optional[str] = None,
|
|
149
|
+
date_to: Optional[str] = None,
|
|
150
|
+
limit: int = 50,
|
|
151
|
+
role: Role = Role.ADMIN,
|
|
152
|
+
) -> Optional[list[dict]]:
|
|
153
|
+
"""Search browser visits with source-level visibility gating.
|
|
154
|
+
|
|
155
|
+
Returns None if source is hidden. The visibility check is business logic
|
|
156
|
+
that stays in the service; the SQL query is delegated to db.search.
|
|
157
|
+
"""
|
|
158
|
+
browser_visibility = None
|
|
159
|
+
if not role.sees_all:
|
|
160
|
+
browser_visibility = get_source_visibility(conn, "source:browser")
|
|
161
|
+
if browser_visibility == "hidden":
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
raw_results = search_browser_keyword(
|
|
165
|
+
conn,
|
|
166
|
+
terms=terms,
|
|
167
|
+
has_query=has_query,
|
|
168
|
+
date_from=date_from,
|
|
169
|
+
date_to=date_to,
|
|
170
|
+
limit=limit,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Source-level opaque gating
|
|
174
|
+
if browser_visibility == "opaque":
|
|
175
|
+
return [{"id": r["id"], "browser": r["browser"]} for r in raw_results]
|
|
176
|
+
|
|
177
|
+
return raw_results
|