footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lightweight terminal status command for Footprinter.
|
|
3
|
+
|
|
4
|
+
Shows data counts, source health, and last run info using rich tables.
|
|
5
|
+
No web/FastAPI dependencies required.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
fp status # Rich formatted output
|
|
9
|
+
fp status --json # Machine-readable JSON
|
|
10
|
+
fp status --last-run # Last pipeline run details
|
|
11
|
+
python -m footprinter.cli.status
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sqlite3
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from rich.console import Console
|
|
22
|
+
from rich.panel import Panel
|
|
23
|
+
from rich.table import Table
|
|
24
|
+
|
|
25
|
+
from footprinter.connectors import discover_connectors, is_installed, resolve_hook
|
|
26
|
+
from footprinter.paths import get_chroma_path, get_config_path, get_db_path
|
|
27
|
+
from footprinter.source_registry import get_config
|
|
28
|
+
|
|
29
|
+
console = Console()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_data_counts(db_path: Path) -> dict:
|
|
33
|
+
"""Query database for all data counts. Each query wrapped in try/except."""
|
|
34
|
+
counts: dict = {}
|
|
35
|
+
|
|
36
|
+
conn = sqlite3.connect(str(db_path), timeout=10)
|
|
37
|
+
conn.row_factory = sqlite3.Row
|
|
38
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
39
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
40
|
+
cursor = conn.cursor()
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
return _query_all_counts(cursor, counts)
|
|
44
|
+
finally:
|
|
45
|
+
conn.close()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _query_all_counts(cursor, counts: dict) -> dict:
|
|
49
|
+
"""Run all count queries. Separated for try/finally in caller."""
|
|
50
|
+
# Files by source
|
|
51
|
+
try:
|
|
52
|
+
cursor.execute(
|
|
53
|
+
"""
|
|
54
|
+
SELECT source, COUNT(*) as count, SUM(size_bytes) as size
|
|
55
|
+
FROM files WHERE status != 'removed'
|
|
56
|
+
GROUP BY source
|
|
57
|
+
"""
|
|
58
|
+
)
|
|
59
|
+
counts["files"] = {
|
|
60
|
+
row["source"]: {
|
|
61
|
+
"count": row["count"],
|
|
62
|
+
"size_mb": round((row["size"] or 0) / 1024 / 1024, 1),
|
|
63
|
+
}
|
|
64
|
+
for row in cursor.fetchall()
|
|
65
|
+
}
|
|
66
|
+
except sqlite3.OperationalError:
|
|
67
|
+
counts["files"] = {}
|
|
68
|
+
|
|
69
|
+
# Total files
|
|
70
|
+
try:
|
|
71
|
+
cursor.execute("SELECT COUNT(*) FROM files WHERE status != 'removed'")
|
|
72
|
+
counts["files_total"] = cursor.fetchone()[0]
|
|
73
|
+
except sqlite3.OperationalError:
|
|
74
|
+
counts["files_total"] = 0
|
|
75
|
+
|
|
76
|
+
# Folders by source
|
|
77
|
+
try:
|
|
78
|
+
cursor.execute(
|
|
79
|
+
"""
|
|
80
|
+
SELECT source, COUNT(*) as count
|
|
81
|
+
FROM folders WHERE status != 'removed'
|
|
82
|
+
GROUP BY source
|
|
83
|
+
"""
|
|
84
|
+
)
|
|
85
|
+
counts["folders"] = {row["source"] or "local": row["count"] for row in cursor.fetchall()}
|
|
86
|
+
except sqlite3.OperationalError:
|
|
87
|
+
counts["folders"] = {}
|
|
88
|
+
|
|
89
|
+
# Browser visits
|
|
90
|
+
try:
|
|
91
|
+
cursor.execute("SELECT COUNT(*) FROM visits")
|
|
92
|
+
counts["visits"] = cursor.fetchone()[0]
|
|
93
|
+
except sqlite3.OperationalError:
|
|
94
|
+
counts["visits"] = 0
|
|
95
|
+
|
|
96
|
+
# Emails
|
|
97
|
+
try:
|
|
98
|
+
cursor.execute("SELECT COUNT(*) FROM emails")
|
|
99
|
+
counts["emails"] = cursor.fetchone()[0]
|
|
100
|
+
except sqlite3.OperationalError:
|
|
101
|
+
counts["emails"] = 0
|
|
102
|
+
|
|
103
|
+
# Chats by account
|
|
104
|
+
try:
|
|
105
|
+
cursor.execute("SELECT account, COUNT(*) as count FROM chats GROUP BY account")
|
|
106
|
+
counts["chats"] = {row["account"]: row["count"] for row in cursor.fetchall()}
|
|
107
|
+
except sqlite3.OperationalError:
|
|
108
|
+
counts["chats"] = {}
|
|
109
|
+
|
|
110
|
+
# Chat messages
|
|
111
|
+
try:
|
|
112
|
+
cursor.execute("SELECT COUNT(*) FROM messages")
|
|
113
|
+
counts["messages"] = cursor.fetchone()[0]
|
|
114
|
+
except sqlite3.OperationalError:
|
|
115
|
+
counts["messages"] = 0
|
|
116
|
+
|
|
117
|
+
# Top chats by message count
|
|
118
|
+
try:
|
|
119
|
+
cursor.execute(
|
|
120
|
+
"""
|
|
121
|
+
SELECT title, message_count, created_at
|
|
122
|
+
FROM chats
|
|
123
|
+
ORDER BY message_count DESC
|
|
124
|
+
LIMIT 5
|
|
125
|
+
"""
|
|
126
|
+
)
|
|
127
|
+
counts["top_chats"] = [
|
|
128
|
+
{
|
|
129
|
+
"title": row["title"],
|
|
130
|
+
"message_count": row["message_count"],
|
|
131
|
+
"created_at": row["created_at"],
|
|
132
|
+
}
|
|
133
|
+
for row in cursor.fetchall()
|
|
134
|
+
]
|
|
135
|
+
except sqlite3.OperationalError:
|
|
136
|
+
counts["top_chats"] = []
|
|
137
|
+
|
|
138
|
+
# Chat date range
|
|
139
|
+
try:
|
|
140
|
+
cursor.execute("SELECT MIN(created_at) as earliest, MAX(created_at) as latest FROM chats")
|
|
141
|
+
row = cursor.fetchone()
|
|
142
|
+
counts["chat_date_range"] = {
|
|
143
|
+
"earliest": row["earliest"] if row else None,
|
|
144
|
+
"latest": row["latest"] if row else None,
|
|
145
|
+
}
|
|
146
|
+
except sqlite3.OperationalError:
|
|
147
|
+
counts["chat_date_range"] = {"earliest": None, "latest": None}
|
|
148
|
+
|
|
149
|
+
# Remote source accounts (for display labels in print_status)
|
|
150
|
+
try:
|
|
151
|
+
cursor.execute("SELECT name, account FROM sources WHERE source_type = 'remote'")
|
|
152
|
+
counts["remote_source_accounts"] = {row["name"]: row["account"] for row in cursor.fetchall()}
|
|
153
|
+
except sqlite3.OperationalError:
|
|
154
|
+
counts["remote_source_accounts"] = {}
|
|
155
|
+
|
|
156
|
+
# Recently modified files
|
|
157
|
+
try:
|
|
158
|
+
cursor.execute(
|
|
159
|
+
"""
|
|
160
|
+
SELECT name, source, modified_at
|
|
161
|
+
FROM files WHERE status != 'removed'
|
|
162
|
+
ORDER BY modified_at DESC
|
|
163
|
+
LIMIT 10
|
|
164
|
+
"""
|
|
165
|
+
)
|
|
166
|
+
counts["recent_files"] = [
|
|
167
|
+
{
|
|
168
|
+
"name": row["name"],
|
|
169
|
+
"source": row["source"],
|
|
170
|
+
"modified_at": row["modified_at"],
|
|
171
|
+
}
|
|
172
|
+
for row in cursor.fetchall()
|
|
173
|
+
]
|
|
174
|
+
except sqlite3.OperationalError:
|
|
175
|
+
counts["recent_files"] = []
|
|
176
|
+
|
|
177
|
+
# Recent uploads
|
|
178
|
+
try:
|
|
179
|
+
cursor.execute(
|
|
180
|
+
"""
|
|
181
|
+
SELECT filename, type, status, items_added, uploaded_at
|
|
182
|
+
FROM uploads
|
|
183
|
+
ORDER BY uploaded_at DESC
|
|
184
|
+
LIMIT 5
|
|
185
|
+
"""
|
|
186
|
+
)
|
|
187
|
+
counts["recent_uploads"] = [
|
|
188
|
+
{
|
|
189
|
+
"filename": row["filename"],
|
|
190
|
+
"type": row["type"],
|
|
191
|
+
"status": row["status"],
|
|
192
|
+
"items_added": row["items_added"],
|
|
193
|
+
"uploaded_at": row["uploaded_at"],
|
|
194
|
+
}
|
|
195
|
+
for row in cursor.fetchall()
|
|
196
|
+
]
|
|
197
|
+
except sqlite3.OperationalError:
|
|
198
|
+
counts["recent_uploads"] = []
|
|
199
|
+
|
|
200
|
+
# Last ingest run (exclude 'running' and last-run-only rows which lack mode)
|
|
201
|
+
try:
|
|
202
|
+
cursor.execute(
|
|
203
|
+
"""
|
|
204
|
+
SELECT pipe, started_at, completed_at, mode,
|
|
205
|
+
items_processed, errors, status, elapsed_seconds
|
|
206
|
+
FROM ingests
|
|
207
|
+
WHERE status != 'running' AND mode IS NOT NULL
|
|
208
|
+
ORDER BY completed_at DESC LIMIT 1
|
|
209
|
+
"""
|
|
210
|
+
)
|
|
211
|
+
row = cursor.fetchone()
|
|
212
|
+
if row:
|
|
213
|
+
elapsed = row["elapsed_seconds"]
|
|
214
|
+
if elapsed is None and row["started_at"] and row["completed_at"]:
|
|
215
|
+
try:
|
|
216
|
+
start = datetime.fromisoformat(row["started_at"])
|
|
217
|
+
end = datetime.fromisoformat(row["completed_at"])
|
|
218
|
+
elapsed = round((end - start).total_seconds(), 1)
|
|
219
|
+
except (ValueError, TypeError):
|
|
220
|
+
pass
|
|
221
|
+
counts["last_run"] = {
|
|
222
|
+
"mode": row["mode"] or "unknown",
|
|
223
|
+
"pipe": row["pipe"],
|
|
224
|
+
"started_at": row["started_at"],
|
|
225
|
+
"completed_at": row["completed_at"],
|
|
226
|
+
"items_processed": row["items_processed"] or 0,
|
|
227
|
+
"errors": row["errors"] or 0,
|
|
228
|
+
"status": row["status"],
|
|
229
|
+
"elapsed_seconds": elapsed,
|
|
230
|
+
}
|
|
231
|
+
else:
|
|
232
|
+
counts["last_run"] = None
|
|
233
|
+
except sqlite3.OperationalError:
|
|
234
|
+
counts["last_run"] = None
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
return counts
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def get_source_health(config: Optional[dict]) -> dict:
|
|
241
|
+
"""Check source health via connector hooks and built-in checks."""
|
|
242
|
+
health: dict = {}
|
|
243
|
+
|
|
244
|
+
# Dynamic connector health via ConnectorSpec.health_check hooks
|
|
245
|
+
connector_rows: list[dict] = []
|
|
246
|
+
for name, spec in discover_connectors().items():
|
|
247
|
+
if is_installed(spec) and spec.health_check:
|
|
248
|
+
try:
|
|
249
|
+
fn = resolve_hook(spec.health_check)
|
|
250
|
+
if fn and config:
|
|
251
|
+
connector_rows.extend(fn(config))
|
|
252
|
+
except Exception:
|
|
253
|
+
pass
|
|
254
|
+
health["connector_rows"] = connector_rows
|
|
255
|
+
health["remote_enabled"] = len(connector_rows) > 0
|
|
256
|
+
|
|
257
|
+
# Semantic search — config-aware health check
|
|
258
|
+
config_enabled = config.get("semantic", {}).get("file_vectorization", False) if config else False
|
|
259
|
+
try:
|
|
260
|
+
from footprinter.semantic.vector_store import (
|
|
261
|
+
VectorStore,
|
|
262
|
+
_semantic_available,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
installed = _semantic_available()
|
|
266
|
+
except ImportError:
|
|
267
|
+
installed = False
|
|
268
|
+
VectorStore = None # type: ignore[assignment]
|
|
269
|
+
|
|
270
|
+
if not config_enabled:
|
|
271
|
+
health["semantic"] = {"enabled": False, "installed": installed, "available": False}
|
|
272
|
+
elif not installed:
|
|
273
|
+
health["semantic"] = {"enabled": True, "installed": False, "available": False}
|
|
274
|
+
elif not get_chroma_path().exists():
|
|
275
|
+
health["semantic"] = {"enabled": True, "installed": True, "available": False}
|
|
276
|
+
else:
|
|
277
|
+
try:
|
|
278
|
+
vs = VectorStore.get_instance()
|
|
279
|
+
file_stats = vs.get_file_stats()
|
|
280
|
+
conv_stats = vs.get_chat_stats()
|
|
281
|
+
health["semantic"] = {
|
|
282
|
+
"enabled": True,
|
|
283
|
+
"installed": True,
|
|
284
|
+
"available": True,
|
|
285
|
+
"file_chunks": file_stats.get("total_chunks", 0),
|
|
286
|
+
"chat_docs": conv_stats.get("total_documents", 0),
|
|
287
|
+
}
|
|
288
|
+
except Exception:
|
|
289
|
+
health["semantic"] = {
|
|
290
|
+
"enabled": True,
|
|
291
|
+
"installed": True,
|
|
292
|
+
"available": False,
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return health
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def format_relative_time(dt_str: Optional[str]) -> str:
|
|
299
|
+
"""Convert ISO datetime string to relative time like '2 hours ago'."""
|
|
300
|
+
if not dt_str:
|
|
301
|
+
return "unknown"
|
|
302
|
+
try:
|
|
303
|
+
dt = datetime.fromisoformat(dt_str)
|
|
304
|
+
if dt.tzinfo is None:
|
|
305
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
306
|
+
now = datetime.now(timezone.utc)
|
|
307
|
+
delta = now - dt
|
|
308
|
+
seconds = int(delta.total_seconds())
|
|
309
|
+
|
|
310
|
+
if seconds < 0:
|
|
311
|
+
return "just now"
|
|
312
|
+
if seconds < 60:
|
|
313
|
+
return f"{seconds}s ago"
|
|
314
|
+
minutes = seconds // 60
|
|
315
|
+
if minutes < 60:
|
|
316
|
+
return f"{minutes}m ago"
|
|
317
|
+
hours = minutes // 60
|
|
318
|
+
if hours < 24:
|
|
319
|
+
return f"{hours}h ago"
|
|
320
|
+
days = hours // 24
|
|
321
|
+
if days < 30:
|
|
322
|
+
return f"{days}d ago"
|
|
323
|
+
return dt.strftime("%Y-%m-%d")
|
|
324
|
+
except (ValueError, TypeError):
|
|
325
|
+
return "unknown"
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def visible_totals(counts: dict, health: dict) -> dict:
|
|
329
|
+
"""Compute file/folder totals from visible sources only.
|
|
330
|
+
|
|
331
|
+
When no remote connector is enabled, remote sources are excluded so
|
|
332
|
+
totals match the displayed breakdown.
|
|
333
|
+
Returns ``{"files": int, "folders": int, "size_mb": float}``.
|
|
334
|
+
"""
|
|
335
|
+
files = counts.get("files", {})
|
|
336
|
+
folders = counts.get("folders", {})
|
|
337
|
+
remote_accounts = counts.get("remote_source_accounts", {})
|
|
338
|
+
remote_enabled = health.get("remote_enabled", False)
|
|
339
|
+
|
|
340
|
+
if remote_enabled:
|
|
341
|
+
vis_files = files
|
|
342
|
+
vis_folders = folders
|
|
343
|
+
else:
|
|
344
|
+
vis_files = {k: v for k, v in files.items() if k not in remote_accounts}
|
|
345
|
+
vis_folders = {k: v for k, v in folders.items() if k not in remote_accounts}
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
"files": sum(info["count"] for info in vis_files.values()),
|
|
349
|
+
"folders": sum(vis_folders.values()),
|
|
350
|
+
"size_mb": sum(info["size_mb"] for info in vis_files.values()),
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _print_source_health(health: dict) -> None:
|
|
355
|
+
"""Render the Source Health table. Skip entirely if no rows would appear."""
|
|
356
|
+
connector_rows = health.get("connector_rows", [])
|
|
357
|
+
semantic = health.get("semantic", {})
|
|
358
|
+
|
|
359
|
+
# Early return if nothing to show
|
|
360
|
+
if not (connector_rows or semantic.get("enabled")):
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
health_table = Table(show_header=True, header_style="bold", title="Source Health")
|
|
364
|
+
health_table.add_column("Source", style="cyan")
|
|
365
|
+
health_table.add_column("Status")
|
|
366
|
+
|
|
367
|
+
# Connector rows — provided dynamically by connector health_check hooks
|
|
368
|
+
for row in connector_rows:
|
|
369
|
+
health_table.add_row(row["source"], row["status"])
|
|
370
|
+
|
|
371
|
+
# Semantic Search
|
|
372
|
+
if semantic.get("enabled"):
|
|
373
|
+
if not semantic.get("installed"):
|
|
374
|
+
health_table.add_row(
|
|
375
|
+
"Semantic Search",
|
|
376
|
+
"[yellow]missing deps[/yellow] — pip install footprinter-cli[semantic]",
|
|
377
|
+
)
|
|
378
|
+
elif not semantic.get("available"):
|
|
379
|
+
health_table.add_row(
|
|
380
|
+
"Semantic Search",
|
|
381
|
+
"[yellow]enabled[/yellow] — run fp ingest to build index",
|
|
382
|
+
)
|
|
383
|
+
else:
|
|
384
|
+
chunks = semantic.get("file_chunks", 0)
|
|
385
|
+
docs = semantic.get("chat_docs", 0)
|
|
386
|
+
health_table.add_row(
|
|
387
|
+
"Semantic Search (files)",
|
|
388
|
+
f"[green]active[/green] {chunks:,} chunks",
|
|
389
|
+
)
|
|
390
|
+
health_table.add_row(
|
|
391
|
+
"Semantic Search (chats)",
|
|
392
|
+
f"[green]active[/green] {docs:,} docs",
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
console.print(health_table)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def print_status(data: dict, health: dict) -> None:
|
|
399
|
+
"""Render status with rich panels and tables."""
|
|
400
|
+
db_path = data["database"]["path"]
|
|
401
|
+
db_size = data["database"]["size_mb"]
|
|
402
|
+
config_path = data["config"]["path"]
|
|
403
|
+
config_exists = data["config"]["exists"]
|
|
404
|
+
|
|
405
|
+
# Section 1: Header panel
|
|
406
|
+
header_lines = [f"[bold]Database:[/bold] {db_path} ({db_size:.1f} MB)"]
|
|
407
|
+
config_status = config_path if config_exists else f"{config_path} [dim](not found)[/dim]"
|
|
408
|
+
header_lines.append(f"[bold]Config:[/bold] {config_status}")
|
|
409
|
+
console.print(Panel("\n".join(header_lines), title="Footprinter Status", expand=False))
|
|
410
|
+
|
|
411
|
+
# Section 2: Source health (skip if no connectors configured)
|
|
412
|
+
_print_source_health(health)
|
|
413
|
+
|
|
414
|
+
# Section 3: Data counts table
|
|
415
|
+
counts = data["counts"]
|
|
416
|
+
table = Table(show_header=True, header_style="bold")
|
|
417
|
+
table.add_column("Source", style="cyan")
|
|
418
|
+
table.add_column("Count", justify="right")
|
|
419
|
+
table.add_column("Size", justify="right")
|
|
420
|
+
|
|
421
|
+
files = counts.get("files", {})
|
|
422
|
+
folders = counts.get("folders", {})
|
|
423
|
+
remote_accounts = counts.get("remote_source_accounts", {})
|
|
424
|
+
remote_enabled = health.get("remote_enabled", False)
|
|
425
|
+
|
|
426
|
+
totals = visible_totals(counts, health)
|
|
427
|
+
total_folder_count = totals["folders"]
|
|
428
|
+
total_file_count = totals["files"]
|
|
429
|
+
total_file_size = totals["size_mb"]
|
|
430
|
+
|
|
431
|
+
# Local section
|
|
432
|
+
local_folders = folders.get("local", 0)
|
|
433
|
+
if local_folders:
|
|
434
|
+
table.add_row("Local folders", f"{local_folders:,}", "")
|
|
435
|
+
|
|
436
|
+
local_files = files.get("local")
|
|
437
|
+
if local_files:
|
|
438
|
+
table.add_row(
|
|
439
|
+
"Local files",
|
|
440
|
+
f"{local_files['count']:,}",
|
|
441
|
+
f"{local_files['size_mb']:.1f} MB",
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Remote section (per account, rows shown with 0 counts when remote enabled)
|
|
445
|
+
if remote_enabled and remote_accounts:
|
|
446
|
+
# Build account → display label from connector health rows
|
|
447
|
+
account_labels = {
|
|
448
|
+
row["account"]: row["label"]
|
|
449
|
+
for row in health.get("connector_rows", [])
|
|
450
|
+
if "account" in row and "label" in row
|
|
451
|
+
}
|
|
452
|
+
table.add_section()
|
|
453
|
+
for source_name, account in remote_accounts.items():
|
|
454
|
+
display = account_labels.get(account, account)
|
|
455
|
+
remote_folders = folders.get(source_name, 0)
|
|
456
|
+
remote_files = files.get(source_name)
|
|
457
|
+
table.add_row(
|
|
458
|
+
f"Remote folders ({display})",
|
|
459
|
+
f"{remote_folders:,}",
|
|
460
|
+
"",
|
|
461
|
+
)
|
|
462
|
+
table.add_row(
|
|
463
|
+
f"Remote files ({display})",
|
|
464
|
+
f"{remote_files['count']:,}" if remote_files else "0",
|
|
465
|
+
f"{remote_files['size_mb']:.1f} MB" if remote_files else "0.0 MB",
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Totals section
|
|
469
|
+
table.add_section()
|
|
470
|
+
if total_folder_count:
|
|
471
|
+
table.add_row(
|
|
472
|
+
"[bold]Total folders[/bold]",
|
|
473
|
+
f"[bold]{total_folder_count:,}[/bold]",
|
|
474
|
+
"",
|
|
475
|
+
)
|
|
476
|
+
table.add_row(
|
|
477
|
+
"[bold]Total files[/bold]",
|
|
478
|
+
f"[bold]{total_file_count:,}[/bold]",
|
|
479
|
+
f"[bold]{total_file_size:.1f} MB[/bold]",
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Other data sources
|
|
483
|
+
table.add_section()
|
|
484
|
+
table.add_row("Browser history", f"{counts.get('visits', 0):,}", "")
|
|
485
|
+
table.add_row("Emails", f"{counts.get('emails', 0):,}", "")
|
|
486
|
+
table.add_row("Chat messages", f"{counts.get('messages', 0):,}", "")
|
|
487
|
+
|
|
488
|
+
chat_total = sum(counts.get("chats", {}).values())
|
|
489
|
+
if chat_total:
|
|
490
|
+
table.add_row("Chats", f"{chat_total:,}", "")
|
|
491
|
+
|
|
492
|
+
console.print(table)
|
|
493
|
+
|
|
494
|
+
# Section 4: Recently modified files
|
|
495
|
+
recent_files = counts.get("recent_files", [])
|
|
496
|
+
if recent_files:
|
|
497
|
+
console.print()
|
|
498
|
+
files_table = Table(show_header=True, header_style="bold", title="Recently Modified Files")
|
|
499
|
+
files_table.add_column("Filename", style="cyan", max_width=40)
|
|
500
|
+
files_table.add_column("Source")
|
|
501
|
+
files_table.add_column("Date", style="dim")
|
|
502
|
+
for f in recent_files:
|
|
503
|
+
files_table.add_row(
|
|
504
|
+
f["name"],
|
|
505
|
+
f["source"],
|
|
506
|
+
format_relative_time(f["modified_at"]),
|
|
507
|
+
)
|
|
508
|
+
console.print(files_table)
|
|
509
|
+
|
|
510
|
+
# Section 5: Recent uploads
|
|
511
|
+
recent_uploads = counts.get("recent_uploads", [])
|
|
512
|
+
if recent_uploads:
|
|
513
|
+
console.print()
|
|
514
|
+
upload_table = Table(show_header=True, header_style="bold", title="Recent Uploads")
|
|
515
|
+
upload_table.add_column("Filename", style="cyan")
|
|
516
|
+
upload_table.add_column("Type")
|
|
517
|
+
upload_table.add_column("Status")
|
|
518
|
+
upload_table.add_column("Items", justify="right")
|
|
519
|
+
upload_table.add_column("Date", style="dim")
|
|
520
|
+
for u in recent_uploads:
|
|
521
|
+
status_style = "[green]" if u["status"] == "completed" else "[red]"
|
|
522
|
+
upload_table.add_row(
|
|
523
|
+
u["filename"],
|
|
524
|
+
u["type"],
|
|
525
|
+
f"{status_style}{u['status']}[/]",
|
|
526
|
+
str(u["items_added"] or 0),
|
|
527
|
+
format_relative_time(u["uploaded_at"]),
|
|
528
|
+
)
|
|
529
|
+
console.print(upload_table)
|
|
530
|
+
|
|
531
|
+
# Section 6: Top chats (only when messages exist — metadata-only imports
|
|
532
|
+
# may have chat titles but 0 actual messages)
|
|
533
|
+
top_convos = counts.get("top_chats", [])
|
|
534
|
+
if top_convos and counts.get("messages", 0) > 0:
|
|
535
|
+
console.print()
|
|
536
|
+
chat_table = Table(show_header=True, header_style="bold", title="Top Chats")
|
|
537
|
+
chat_table.add_column("Title", style="cyan", max_width=50)
|
|
538
|
+
chat_table.add_column("Messages", justify="right")
|
|
539
|
+
chat_table.add_column("Date", style="dim")
|
|
540
|
+
for conv in top_convos:
|
|
541
|
+
chat_table.add_row(
|
|
542
|
+
conv["title"] or "(untitled)",
|
|
543
|
+
str(conv["message_count"] or 0),
|
|
544
|
+
format_relative_time(conv["created_at"]),
|
|
545
|
+
)
|
|
546
|
+
console.print(chat_table)
|
|
547
|
+
|
|
548
|
+
console.print()
|
|
549
|
+
|
|
550
|
+
# Section 7: Last run footer
|
|
551
|
+
last_run = data.get("last_run")
|
|
552
|
+
if last_run:
|
|
553
|
+
time_ago = format_relative_time(last_run.get("started_at"))
|
|
554
|
+
mode = last_run.get("mode", "unknown")
|
|
555
|
+
items = last_run.get("items_processed", 0)
|
|
556
|
+
errors = last_run.get("errors", 0)
|
|
557
|
+
elapsed = last_run.get("elapsed_seconds")
|
|
558
|
+
elapsed_str = f", {elapsed}s" if elapsed is not None else ""
|
|
559
|
+
console.print()
|
|
560
|
+
console.print(
|
|
561
|
+
f"[dim]Last ingest:[/dim] {time_ago} [dim]({mode}, {items:,} items, {errors} errors{elapsed_str})[/dim]"
|
|
562
|
+
)
|
|
563
|
+
else:
|
|
564
|
+
console.print()
|
|
565
|
+
console.print("[dim]No ingest runs recorded.[/dim]")
|
|
566
|
+
|
|
567
|
+
console.print()
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
# ---------------------------------------------------------------------------
|
|
571
|
+
# Zero-result heuristic: stages where 0 results likely indicate a problem
|
|
572
|
+
# ---------------------------------------------------------------------------
|
|
573
|
+
_CORE_ZERO_RESULT_CHECKS: dict[str, str] = {
|
|
574
|
+
"browser": "urls_indexed",
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _build_zero_result_checks() -> dict[str, str]:
|
|
579
|
+
"""Merge core checks with checks from installed connectors."""
|
|
580
|
+
from footprinter.connectors import discover_connectors, is_installed
|
|
581
|
+
|
|
582
|
+
checks = dict(_CORE_ZERO_RESULT_CHECKS)
|
|
583
|
+
for spec in discover_connectors().values():
|
|
584
|
+
if is_installed(spec):
|
|
585
|
+
for pipe_name, count_key in spec.zero_result_checks:
|
|
586
|
+
checks[pipe_name] = count_key
|
|
587
|
+
return checks
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def print_last_run(record: Optional[dict]) -> None:
|
|
591
|
+
"""Render the last pipeline run as a Rich table with zero-result warnings."""
|
|
592
|
+
if record is None:
|
|
593
|
+
console.print("No pipeline runs recorded.")
|
|
594
|
+
return
|
|
595
|
+
|
|
596
|
+
from footprinter.ingest.status import _stage_detail_string
|
|
597
|
+
|
|
598
|
+
interrupted = record.get("interrupted", False)
|
|
599
|
+
title = "Last Pipeline Run (interrupted)" if interrupted else "Last Pipeline Run"
|
|
600
|
+
table = Table(show_header=True, header_style="bold", title=title)
|
|
601
|
+
table.add_column("Stage", style="cyan")
|
|
602
|
+
table.add_column("Status")
|
|
603
|
+
table.add_column("Time", justify="right")
|
|
604
|
+
table.add_column("Details", style="dim")
|
|
605
|
+
|
|
606
|
+
status_icons = {
|
|
607
|
+
"completed": "[green]OK[/green]",
|
|
608
|
+
"completed_with_errors": "[yellow]WARN[/yellow]",
|
|
609
|
+
"info": "[blue]info[/blue]",
|
|
610
|
+
"skipped": "[yellow]skip[/yellow]",
|
|
611
|
+
"error": "[red]FAIL[/red]",
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
zero_checks = _build_zero_result_checks()
|
|
615
|
+
|
|
616
|
+
for stage_result in record.get("stages", []):
|
|
617
|
+
stage = stage_result.get("stage", "unknown")
|
|
618
|
+
status = stage_result.get("status", "unknown")
|
|
619
|
+
elapsed = stage_result.get("elapsed_seconds", 0)
|
|
620
|
+
icon = status_icons.get(status, f"[dim]{status}[/dim]")
|
|
621
|
+
details = _stage_detail_string(stage_result)
|
|
622
|
+
|
|
623
|
+
if status == "error":
|
|
624
|
+
error_msg = stage_result.get("error", "")
|
|
625
|
+
if error_msg:
|
|
626
|
+
details = str(error_msg)[:200]
|
|
627
|
+
|
|
628
|
+
# Zero-result warning
|
|
629
|
+
count_key = zero_checks.get(stage)
|
|
630
|
+
if count_key and status == "completed" and stage_result.get(count_key, -1) == 0:
|
|
631
|
+
icon = "[yellow]⚠ WARNING[/yellow]"
|
|
632
|
+
details = "0 results — check configuration"
|
|
633
|
+
|
|
634
|
+
table.add_row(stage, icon, f"{elapsed:.1f}s", details)
|
|
635
|
+
|
|
636
|
+
console.print(table)
|
|
637
|
+
|
|
638
|
+
# Footer
|
|
639
|
+
mode = record.get("mode", "unknown")
|
|
640
|
+
mode_display = f"{mode} (interrupted)" if interrupted else mode
|
|
641
|
+
total = record.get("total_elapsed_seconds", 0)
|
|
642
|
+
started_at = record.get("started_at")
|
|
643
|
+
time_ago = format_relative_time(started_at)
|
|
644
|
+
console.print(f"[dim]Mode: {mode_display} | Total: {total:.1f}s | {time_ago}[/dim]")
|
|
645
|
+
console.print()
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def main() -> None:
|
|
649
|
+
"""Entry point for fp status command."""
|
|
650
|
+
parser = argparse.ArgumentParser(
|
|
651
|
+
description="Show Footprinter system status",
|
|
652
|
+
prog="fp status",
|
|
653
|
+
)
|
|
654
|
+
parser.add_argument(
|
|
655
|
+
"--json",
|
|
656
|
+
action="store_true",
|
|
657
|
+
help="Output structured JSON instead of rich tables",
|
|
658
|
+
)
|
|
659
|
+
parser.add_argument(
|
|
660
|
+
"--last-run",
|
|
661
|
+
action="store_true",
|
|
662
|
+
help="Show details from the last pipeline run",
|
|
663
|
+
)
|
|
664
|
+
args = parser.parse_args()
|
|
665
|
+
|
|
666
|
+
# --last-run: per-stage breakdown from run_record.py (session-level JSON cache).
|
|
667
|
+
# Different from the footer's "Last ingest" which reads the ingests DB table
|
|
668
|
+
# for the most recent per-pipe record.
|
|
669
|
+
if getattr(args, "last_run", False):
|
|
670
|
+
from footprinter.ingest.run_record import load_run_record
|
|
671
|
+
|
|
672
|
+
print_last_run(load_run_record())
|
|
673
|
+
return
|
|
674
|
+
|
|
675
|
+
db_path = get_db_path()
|
|
676
|
+
config_path = get_config_path()
|
|
677
|
+
|
|
678
|
+
# Build structured data
|
|
679
|
+
data: dict = {
|
|
680
|
+
"database": {
|
|
681
|
+
"path": str(db_path),
|
|
682
|
+
"exists": db_path.exists(),
|
|
683
|
+
"size_mb": round(db_path.stat().st_size / 1024 / 1024, 1) if db_path.exists() else 0,
|
|
684
|
+
},
|
|
685
|
+
"config": {
|
|
686
|
+
"path": str(config_path),
|
|
687
|
+
"exists": config_path.exists(),
|
|
688
|
+
},
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
if not db_path.exists():
|
|
692
|
+
if args.json:
|
|
693
|
+
data["counts"] = {}
|
|
694
|
+
data["health"] = {}
|
|
695
|
+
data["last_run"] = None
|
|
696
|
+
print(json.dumps(data, indent=2, default=str))
|
|
697
|
+
else:
|
|
698
|
+
console.print(
|
|
699
|
+
Panel(
|
|
700
|
+
f"No database found at [cyan]{db_path}[/cyan]\nRun [bold]fp ingest[/bold] to start indexing.",
|
|
701
|
+
title="Footprinter Status",
|
|
702
|
+
expand=False,
|
|
703
|
+
)
|
|
704
|
+
)
|
|
705
|
+
return
|
|
706
|
+
|
|
707
|
+
try:
|
|
708
|
+
config = get_config()
|
|
709
|
+
except Exception:
|
|
710
|
+
config = None
|
|
711
|
+
counts = get_data_counts(db_path)
|
|
712
|
+
health = get_source_health(config)
|
|
713
|
+
|
|
714
|
+
data["counts"] = counts
|
|
715
|
+
data["health"] = health
|
|
716
|
+
data["last_run"] = counts.get("last_run")
|
|
717
|
+
|
|
718
|
+
# Align files_total with visibility-filtered totals
|
|
719
|
+
totals = visible_totals(counts, health)
|
|
720
|
+
counts["files_total"] = totals["files"]
|
|
721
|
+
|
|
722
|
+
if args.json:
|
|
723
|
+
print(json.dumps(data, indent=2, default=str))
|
|
724
|
+
else:
|
|
725
|
+
print_status(data, health)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
if __name__ == "__main__":
|
|
729
|
+
main()
|