footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,729 @@
1
+ """
2
+ Lightweight terminal status command for Footprinter.
3
+
4
+ Shows data counts, source health, and last run info using rich tables.
5
+ No web/FastAPI dependencies required.
6
+
7
+ Usage:
8
+ fp status # Rich formatted output
9
+ fp status --json # Machine-readable JSON
10
+ fp status --last-run # Last pipeline run details
11
+ python -m footprinter.cli.status
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import sqlite3
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+ from rich.console import Console
22
+ from rich.panel import Panel
23
+ from rich.table import Table
24
+
25
+ from footprinter.connectors import discover_connectors, is_installed, resolve_hook
26
+ from footprinter.paths import get_chroma_path, get_config_path, get_db_path
27
+ from footprinter.source_registry import get_config
28
+
29
+ console = Console()
30
+
31
+
32
+ def get_data_counts(db_path: Path) -> dict:
33
+ """Query database for all data counts. Each query wrapped in try/except."""
34
+ counts: dict = {}
35
+
36
+ conn = sqlite3.connect(str(db_path), timeout=10)
37
+ conn.row_factory = sqlite3.Row
38
+ conn.execute("PRAGMA busy_timeout=5000")
39
+ conn.execute("PRAGMA foreign_keys=ON")
40
+ cursor = conn.cursor()
41
+
42
+ try:
43
+ return _query_all_counts(cursor, counts)
44
+ finally:
45
+ conn.close()
46
+
47
+
48
+ def _query_all_counts(cursor, counts: dict) -> dict:
49
+ """Run all count queries. Separated for try/finally in caller."""
50
+ # Files by source
51
+ try:
52
+ cursor.execute(
53
+ """
54
+ SELECT source, COUNT(*) as count, SUM(size_bytes) as size
55
+ FROM files WHERE status != 'removed'
56
+ GROUP BY source
57
+ """
58
+ )
59
+ counts["files"] = {
60
+ row["source"]: {
61
+ "count": row["count"],
62
+ "size_mb": round((row["size"] or 0) / 1024 / 1024, 1),
63
+ }
64
+ for row in cursor.fetchall()
65
+ }
66
+ except sqlite3.OperationalError:
67
+ counts["files"] = {}
68
+
69
+ # Total files
70
+ try:
71
+ cursor.execute("SELECT COUNT(*) FROM files WHERE status != 'removed'")
72
+ counts["files_total"] = cursor.fetchone()[0]
73
+ except sqlite3.OperationalError:
74
+ counts["files_total"] = 0
75
+
76
+ # Folders by source
77
+ try:
78
+ cursor.execute(
79
+ """
80
+ SELECT source, COUNT(*) as count
81
+ FROM folders WHERE status != 'removed'
82
+ GROUP BY source
83
+ """
84
+ )
85
+ counts["folders"] = {row["source"] or "local": row["count"] for row in cursor.fetchall()}
86
+ except sqlite3.OperationalError:
87
+ counts["folders"] = {}
88
+
89
+ # Browser visits
90
+ try:
91
+ cursor.execute("SELECT COUNT(*) FROM visits")
92
+ counts["visits"] = cursor.fetchone()[0]
93
+ except sqlite3.OperationalError:
94
+ counts["visits"] = 0
95
+
96
+ # Emails
97
+ try:
98
+ cursor.execute("SELECT COUNT(*) FROM emails")
99
+ counts["emails"] = cursor.fetchone()[0]
100
+ except sqlite3.OperationalError:
101
+ counts["emails"] = 0
102
+
103
+ # Chats by account
104
+ try:
105
+ cursor.execute("SELECT account, COUNT(*) as count FROM chats GROUP BY account")
106
+ counts["chats"] = {row["account"]: row["count"] for row in cursor.fetchall()}
107
+ except sqlite3.OperationalError:
108
+ counts["chats"] = {}
109
+
110
+ # Chat messages
111
+ try:
112
+ cursor.execute("SELECT COUNT(*) FROM messages")
113
+ counts["messages"] = cursor.fetchone()[0]
114
+ except sqlite3.OperationalError:
115
+ counts["messages"] = 0
116
+
117
+ # Top chats by message count
118
+ try:
119
+ cursor.execute(
120
+ """
121
+ SELECT title, message_count, created_at
122
+ FROM chats
123
+ ORDER BY message_count DESC
124
+ LIMIT 5
125
+ """
126
+ )
127
+ counts["top_chats"] = [
128
+ {
129
+ "title": row["title"],
130
+ "message_count": row["message_count"],
131
+ "created_at": row["created_at"],
132
+ }
133
+ for row in cursor.fetchall()
134
+ ]
135
+ except sqlite3.OperationalError:
136
+ counts["top_chats"] = []
137
+
138
+ # Chat date range
139
+ try:
140
+ cursor.execute("SELECT MIN(created_at) as earliest, MAX(created_at) as latest FROM chats")
141
+ row = cursor.fetchone()
142
+ counts["chat_date_range"] = {
143
+ "earliest": row["earliest"] if row else None,
144
+ "latest": row["latest"] if row else None,
145
+ }
146
+ except sqlite3.OperationalError:
147
+ counts["chat_date_range"] = {"earliest": None, "latest": None}
148
+
149
+ # Remote source accounts (for display labels in print_status)
150
+ try:
151
+ cursor.execute("SELECT name, account FROM sources WHERE source_type = 'remote'")
152
+ counts["remote_source_accounts"] = {row["name"]: row["account"] for row in cursor.fetchall()}
153
+ except sqlite3.OperationalError:
154
+ counts["remote_source_accounts"] = {}
155
+
156
+ # Recently modified files
157
+ try:
158
+ cursor.execute(
159
+ """
160
+ SELECT name, source, modified_at
161
+ FROM files WHERE status != 'removed'
162
+ ORDER BY modified_at DESC
163
+ LIMIT 10
164
+ """
165
+ )
166
+ counts["recent_files"] = [
167
+ {
168
+ "name": row["name"],
169
+ "source": row["source"],
170
+ "modified_at": row["modified_at"],
171
+ }
172
+ for row in cursor.fetchall()
173
+ ]
174
+ except sqlite3.OperationalError:
175
+ counts["recent_files"] = []
176
+
177
+ # Recent uploads
178
+ try:
179
+ cursor.execute(
180
+ """
181
+ SELECT filename, type, status, items_added, uploaded_at
182
+ FROM uploads
183
+ ORDER BY uploaded_at DESC
184
+ LIMIT 5
185
+ """
186
+ )
187
+ counts["recent_uploads"] = [
188
+ {
189
+ "filename": row["filename"],
190
+ "type": row["type"],
191
+ "status": row["status"],
192
+ "items_added": row["items_added"],
193
+ "uploaded_at": row["uploaded_at"],
194
+ }
195
+ for row in cursor.fetchall()
196
+ ]
197
+ except sqlite3.OperationalError:
198
+ counts["recent_uploads"] = []
199
+
200
+ # Last ingest run (exclude 'running' and last-run-only rows which lack mode)
201
+ try:
202
+ cursor.execute(
203
+ """
204
+ SELECT pipe, started_at, completed_at, mode,
205
+ items_processed, errors, status, elapsed_seconds
206
+ FROM ingests
207
+ WHERE status != 'running' AND mode IS NOT NULL
208
+ ORDER BY completed_at DESC LIMIT 1
209
+ """
210
+ )
211
+ row = cursor.fetchone()
212
+ if row:
213
+ elapsed = row["elapsed_seconds"]
214
+ if elapsed is None and row["started_at"] and row["completed_at"]:
215
+ try:
216
+ start = datetime.fromisoformat(row["started_at"])
217
+ end = datetime.fromisoformat(row["completed_at"])
218
+ elapsed = round((end - start).total_seconds(), 1)
219
+ except (ValueError, TypeError):
220
+ pass
221
+ counts["last_run"] = {
222
+ "mode": row["mode"] or "unknown",
223
+ "pipe": row["pipe"],
224
+ "started_at": row["started_at"],
225
+ "completed_at": row["completed_at"],
226
+ "items_processed": row["items_processed"] or 0,
227
+ "errors": row["errors"] or 0,
228
+ "status": row["status"],
229
+ "elapsed_seconds": elapsed,
230
+ }
231
+ else:
232
+ counts["last_run"] = None
233
+ except sqlite3.OperationalError:
234
+ counts["last_run"] = None
235
+
236
+
237
+ return counts
238
+
239
+
240
+ def get_source_health(config: Optional[dict]) -> dict:
241
+ """Check source health via connector hooks and built-in checks."""
242
+ health: dict = {}
243
+
244
+ # Dynamic connector health via ConnectorSpec.health_check hooks
245
+ connector_rows: list[dict] = []
246
+ for name, spec in discover_connectors().items():
247
+ if is_installed(spec) and spec.health_check:
248
+ try:
249
+ fn = resolve_hook(spec.health_check)
250
+ if fn and config:
251
+ connector_rows.extend(fn(config))
252
+ except Exception:
253
+ pass
254
+ health["connector_rows"] = connector_rows
255
+ health["remote_enabled"] = len(connector_rows) > 0
256
+
257
+ # Semantic search — config-aware health check
258
+ config_enabled = config.get("semantic", {}).get("file_vectorization", False) if config else False
259
+ try:
260
+ from footprinter.semantic.vector_store import (
261
+ VectorStore,
262
+ _semantic_available,
263
+ )
264
+
265
+ installed = _semantic_available()
266
+ except ImportError:
267
+ installed = False
268
+ VectorStore = None # type: ignore[assignment]
269
+
270
+ if not config_enabled:
271
+ health["semantic"] = {"enabled": False, "installed": installed, "available": False}
272
+ elif not installed:
273
+ health["semantic"] = {"enabled": True, "installed": False, "available": False}
274
+ elif not get_chroma_path().exists():
275
+ health["semantic"] = {"enabled": True, "installed": True, "available": False}
276
+ else:
277
+ try:
278
+ vs = VectorStore.get_instance()
279
+ file_stats = vs.get_file_stats()
280
+ conv_stats = vs.get_chat_stats()
281
+ health["semantic"] = {
282
+ "enabled": True,
283
+ "installed": True,
284
+ "available": True,
285
+ "file_chunks": file_stats.get("total_chunks", 0),
286
+ "chat_docs": conv_stats.get("total_documents", 0),
287
+ }
288
+ except Exception:
289
+ health["semantic"] = {
290
+ "enabled": True,
291
+ "installed": True,
292
+ "available": False,
293
+ }
294
+
295
+ return health
296
+
297
+
298
+ def format_relative_time(dt_str: Optional[str]) -> str:
299
+ """Convert ISO datetime string to relative time like '2 hours ago'."""
300
+ if not dt_str:
301
+ return "unknown"
302
+ try:
303
+ dt = datetime.fromisoformat(dt_str)
304
+ if dt.tzinfo is None:
305
+ dt = dt.replace(tzinfo=timezone.utc)
306
+ now = datetime.now(timezone.utc)
307
+ delta = now - dt
308
+ seconds = int(delta.total_seconds())
309
+
310
+ if seconds < 0:
311
+ return "just now"
312
+ if seconds < 60:
313
+ return f"{seconds}s ago"
314
+ minutes = seconds // 60
315
+ if minutes < 60:
316
+ return f"{minutes}m ago"
317
+ hours = minutes // 60
318
+ if hours < 24:
319
+ return f"{hours}h ago"
320
+ days = hours // 24
321
+ if days < 30:
322
+ return f"{days}d ago"
323
+ return dt.strftime("%Y-%m-%d")
324
+ except (ValueError, TypeError):
325
+ return "unknown"
326
+
327
+
328
+ def visible_totals(counts: dict, health: dict) -> dict:
329
+ """Compute file/folder totals from visible sources only.
330
+
331
+ When no remote connector is enabled, remote sources are excluded so
332
+ totals match the displayed breakdown.
333
+ Returns ``{"files": int, "folders": int, "size_mb": float}``.
334
+ """
335
+ files = counts.get("files", {})
336
+ folders = counts.get("folders", {})
337
+ remote_accounts = counts.get("remote_source_accounts", {})
338
+ remote_enabled = health.get("remote_enabled", False)
339
+
340
+ if remote_enabled:
341
+ vis_files = files
342
+ vis_folders = folders
343
+ else:
344
+ vis_files = {k: v for k, v in files.items() if k not in remote_accounts}
345
+ vis_folders = {k: v for k, v in folders.items() if k not in remote_accounts}
346
+
347
+ return {
348
+ "files": sum(info["count"] for info in vis_files.values()),
349
+ "folders": sum(vis_folders.values()),
350
+ "size_mb": sum(info["size_mb"] for info in vis_files.values()),
351
+ }
352
+
353
+
354
+ def _print_source_health(health: dict) -> None:
355
+ """Render the Source Health table. Skip entirely if no rows would appear."""
356
+ connector_rows = health.get("connector_rows", [])
357
+ semantic = health.get("semantic", {})
358
+
359
+ # Early return if nothing to show
360
+ if not (connector_rows or semantic.get("enabled")):
361
+ return
362
+
363
+ health_table = Table(show_header=True, header_style="bold", title="Source Health")
364
+ health_table.add_column("Source", style="cyan")
365
+ health_table.add_column("Status")
366
+
367
+ # Connector rows — provided dynamically by connector health_check hooks
368
+ for row in connector_rows:
369
+ health_table.add_row(row["source"], row["status"])
370
+
371
+ # Semantic Search
372
+ if semantic.get("enabled"):
373
+ if not semantic.get("installed"):
374
+ health_table.add_row(
375
+ "Semantic Search",
376
+ "[yellow]missing deps[/yellow] — pip install footprinter-cli[semantic]",
377
+ )
378
+ elif not semantic.get("available"):
379
+ health_table.add_row(
380
+ "Semantic Search",
381
+ "[yellow]enabled[/yellow] — run fp ingest to build index",
382
+ )
383
+ else:
384
+ chunks = semantic.get("file_chunks", 0)
385
+ docs = semantic.get("chat_docs", 0)
386
+ health_table.add_row(
387
+ "Semantic Search (files)",
388
+ f"[green]active[/green] {chunks:,} chunks",
389
+ )
390
+ health_table.add_row(
391
+ "Semantic Search (chats)",
392
+ f"[green]active[/green] {docs:,} docs",
393
+ )
394
+
395
+ console.print(health_table)
396
+
397
+
398
+ def print_status(data: dict, health: dict) -> None:
399
+ """Render status with rich panels and tables."""
400
+ db_path = data["database"]["path"]
401
+ db_size = data["database"]["size_mb"]
402
+ config_path = data["config"]["path"]
403
+ config_exists = data["config"]["exists"]
404
+
405
+ # Section 1: Header panel
406
+ header_lines = [f"[bold]Database:[/bold] {db_path} ({db_size:.1f} MB)"]
407
+ config_status = config_path if config_exists else f"{config_path} [dim](not found)[/dim]"
408
+ header_lines.append(f"[bold]Config:[/bold] {config_status}")
409
+ console.print(Panel("\n".join(header_lines), title="Footprinter Status", expand=False))
410
+
411
+ # Section 2: Source health (skip if no connectors configured)
412
+ _print_source_health(health)
413
+
414
+ # Section 3: Data counts table
415
+ counts = data["counts"]
416
+ table = Table(show_header=True, header_style="bold")
417
+ table.add_column("Source", style="cyan")
418
+ table.add_column("Count", justify="right")
419
+ table.add_column("Size", justify="right")
420
+
421
+ files = counts.get("files", {})
422
+ folders = counts.get("folders", {})
423
+ remote_accounts = counts.get("remote_source_accounts", {})
424
+ remote_enabled = health.get("remote_enabled", False)
425
+
426
+ totals = visible_totals(counts, health)
427
+ total_folder_count = totals["folders"]
428
+ total_file_count = totals["files"]
429
+ total_file_size = totals["size_mb"]
430
+
431
+ # Local section
432
+ local_folders = folders.get("local", 0)
433
+ if local_folders:
434
+ table.add_row("Local folders", f"{local_folders:,}", "")
435
+
436
+ local_files = files.get("local")
437
+ if local_files:
438
+ table.add_row(
439
+ "Local files",
440
+ f"{local_files['count']:,}",
441
+ f"{local_files['size_mb']:.1f} MB",
442
+ )
443
+
444
+ # Remote section (per account, rows shown with 0 counts when remote enabled)
445
+ if remote_enabled and remote_accounts:
446
+ # Build account → display label from connector health rows
447
+ account_labels = {
448
+ row["account"]: row["label"]
449
+ for row in health.get("connector_rows", [])
450
+ if "account" in row and "label" in row
451
+ }
452
+ table.add_section()
453
+ for source_name, account in remote_accounts.items():
454
+ display = account_labels.get(account, account)
455
+ remote_folders = folders.get(source_name, 0)
456
+ remote_files = files.get(source_name)
457
+ table.add_row(
458
+ f"Remote folders ({display})",
459
+ f"{remote_folders:,}",
460
+ "",
461
+ )
462
+ table.add_row(
463
+ f"Remote files ({display})",
464
+ f"{remote_files['count']:,}" if remote_files else "0",
465
+ f"{remote_files['size_mb']:.1f} MB" if remote_files else "0.0 MB",
466
+ )
467
+
468
+ # Totals section
469
+ table.add_section()
470
+ if total_folder_count:
471
+ table.add_row(
472
+ "[bold]Total folders[/bold]",
473
+ f"[bold]{total_folder_count:,}[/bold]",
474
+ "",
475
+ )
476
+ table.add_row(
477
+ "[bold]Total files[/bold]",
478
+ f"[bold]{total_file_count:,}[/bold]",
479
+ f"[bold]{total_file_size:.1f} MB[/bold]",
480
+ )
481
+
482
+ # Other data sources
483
+ table.add_section()
484
+ table.add_row("Browser history", f"{counts.get('visits', 0):,}", "")
485
+ table.add_row("Emails", f"{counts.get('emails', 0):,}", "")
486
+ table.add_row("Chat messages", f"{counts.get('messages', 0):,}", "")
487
+
488
+ chat_total = sum(counts.get("chats", {}).values())
489
+ if chat_total:
490
+ table.add_row("Chats", f"{chat_total:,}", "")
491
+
492
+ console.print(table)
493
+
494
+ # Section 4: Recently modified files
495
+ recent_files = counts.get("recent_files", [])
496
+ if recent_files:
497
+ console.print()
498
+ files_table = Table(show_header=True, header_style="bold", title="Recently Modified Files")
499
+ files_table.add_column("Filename", style="cyan", max_width=40)
500
+ files_table.add_column("Source")
501
+ files_table.add_column("Date", style="dim")
502
+ for f in recent_files:
503
+ files_table.add_row(
504
+ f["name"],
505
+ f["source"],
506
+ format_relative_time(f["modified_at"]),
507
+ )
508
+ console.print(files_table)
509
+
510
+ # Section 5: Recent uploads
511
+ recent_uploads = counts.get("recent_uploads", [])
512
+ if recent_uploads:
513
+ console.print()
514
+ upload_table = Table(show_header=True, header_style="bold", title="Recent Uploads")
515
+ upload_table.add_column("Filename", style="cyan")
516
+ upload_table.add_column("Type")
517
+ upload_table.add_column("Status")
518
+ upload_table.add_column("Items", justify="right")
519
+ upload_table.add_column("Date", style="dim")
520
+ for u in recent_uploads:
521
+ status_style = "[green]" if u["status"] == "completed" else "[red]"
522
+ upload_table.add_row(
523
+ u["filename"],
524
+ u["type"],
525
+ f"{status_style}{u['status']}[/]",
526
+ str(u["items_added"] or 0),
527
+ format_relative_time(u["uploaded_at"]),
528
+ )
529
+ console.print(upload_table)
530
+
531
+ # Section 6: Top chats (only when messages exist — metadata-only imports
532
+ # may have chat titles but 0 actual messages)
533
+ top_convos = counts.get("top_chats", [])
534
+ if top_convos and counts.get("messages", 0) > 0:
535
+ console.print()
536
+ chat_table = Table(show_header=True, header_style="bold", title="Top Chats")
537
+ chat_table.add_column("Title", style="cyan", max_width=50)
538
+ chat_table.add_column("Messages", justify="right")
539
+ chat_table.add_column("Date", style="dim")
540
+ for conv in top_convos:
541
+ chat_table.add_row(
542
+ conv["title"] or "(untitled)",
543
+ str(conv["message_count"] or 0),
544
+ format_relative_time(conv["created_at"]),
545
+ )
546
+ console.print(chat_table)
547
+
548
+ console.print()
549
+
550
+ # Section 7: Last run footer
551
+ last_run = data.get("last_run")
552
+ if last_run:
553
+ time_ago = format_relative_time(last_run.get("started_at"))
554
+ mode = last_run.get("mode", "unknown")
555
+ items = last_run.get("items_processed", 0)
556
+ errors = last_run.get("errors", 0)
557
+ elapsed = last_run.get("elapsed_seconds")
558
+ elapsed_str = f", {elapsed}s" if elapsed is not None else ""
559
+ console.print()
560
+ console.print(
561
+ f"[dim]Last ingest:[/dim] {time_ago} [dim]({mode}, {items:,} items, {errors} errors{elapsed_str})[/dim]"
562
+ )
563
+ else:
564
+ console.print()
565
+ console.print("[dim]No ingest runs recorded.[/dim]")
566
+
567
+ console.print()
568
+
569
+
570
+ # ---------------------------------------------------------------------------
571
+ # Zero-result heuristic: stages where 0 results likely indicate a problem
572
+ # ---------------------------------------------------------------------------
573
+ _CORE_ZERO_RESULT_CHECKS: dict[str, str] = {
574
+ "browser": "urls_indexed",
575
+ }
576
+
577
+
578
+ def _build_zero_result_checks() -> dict[str, str]:
579
+ """Merge core checks with checks from installed connectors."""
580
+ from footprinter.connectors import discover_connectors, is_installed
581
+
582
+ checks = dict(_CORE_ZERO_RESULT_CHECKS)
583
+ for spec in discover_connectors().values():
584
+ if is_installed(spec):
585
+ for pipe_name, count_key in spec.zero_result_checks:
586
+ checks[pipe_name] = count_key
587
+ return checks
588
+
589
+
590
+ def print_last_run(record: Optional[dict]) -> None:
591
+ """Render the last pipeline run as a Rich table with zero-result warnings."""
592
+ if record is None:
593
+ console.print("No pipeline runs recorded.")
594
+ return
595
+
596
+ from footprinter.ingest.status import _stage_detail_string
597
+
598
+ interrupted = record.get("interrupted", False)
599
+ title = "Last Pipeline Run (interrupted)" if interrupted else "Last Pipeline Run"
600
+ table = Table(show_header=True, header_style="bold", title=title)
601
+ table.add_column("Stage", style="cyan")
602
+ table.add_column("Status")
603
+ table.add_column("Time", justify="right")
604
+ table.add_column("Details", style="dim")
605
+
606
+ status_icons = {
607
+ "completed": "[green]OK[/green]",
608
+ "completed_with_errors": "[yellow]WARN[/yellow]",
609
+ "info": "[blue]info[/blue]",
610
+ "skipped": "[yellow]skip[/yellow]",
611
+ "error": "[red]FAIL[/red]",
612
+ }
613
+
614
+ zero_checks = _build_zero_result_checks()
615
+
616
+ for stage_result in record.get("stages", []):
617
+ stage = stage_result.get("stage", "unknown")
618
+ status = stage_result.get("status", "unknown")
619
+ elapsed = stage_result.get("elapsed_seconds", 0)
620
+ icon = status_icons.get(status, f"[dim]{status}[/dim]")
621
+ details = _stage_detail_string(stage_result)
622
+
623
+ if status == "error":
624
+ error_msg = stage_result.get("error", "")
625
+ if error_msg:
626
+ details = str(error_msg)[:200]
627
+
628
+ # Zero-result warning
629
+ count_key = zero_checks.get(stage)
630
+ if count_key and status == "completed" and stage_result.get(count_key, -1) == 0:
631
+ icon = "[yellow]⚠ WARNING[/yellow]"
632
+ details = "0 results — check configuration"
633
+
634
+ table.add_row(stage, icon, f"{elapsed:.1f}s", details)
635
+
636
+ console.print(table)
637
+
638
+ # Footer
639
+ mode = record.get("mode", "unknown")
640
+ mode_display = f"{mode} (interrupted)" if interrupted else mode
641
+ total = record.get("total_elapsed_seconds", 0)
642
+ started_at = record.get("started_at")
643
+ time_ago = format_relative_time(started_at)
644
+ console.print(f"[dim]Mode: {mode_display} | Total: {total:.1f}s | {time_ago}[/dim]")
645
+ console.print()
646
+
647
+
648
+ def main() -> None:
649
+ """Entry point for fp status command."""
650
+ parser = argparse.ArgumentParser(
651
+ description="Show Footprinter system status",
652
+ prog="fp status",
653
+ )
654
+ parser.add_argument(
655
+ "--json",
656
+ action="store_true",
657
+ help="Output structured JSON instead of rich tables",
658
+ )
659
+ parser.add_argument(
660
+ "--last-run",
661
+ action="store_true",
662
+ help="Show details from the last pipeline run",
663
+ )
664
+ args = parser.parse_args()
665
+
666
+ # --last-run: per-stage breakdown from run_record.py (session-level JSON cache).
667
+ # Different from the footer's "Last ingest" which reads the ingests DB table
668
+ # for the most recent per-pipe record.
669
+ if getattr(args, "last_run", False):
670
+ from footprinter.ingest.run_record import load_run_record
671
+
672
+ print_last_run(load_run_record())
673
+ return
674
+
675
+ db_path = get_db_path()
676
+ config_path = get_config_path()
677
+
678
+ # Build structured data
679
+ data: dict = {
680
+ "database": {
681
+ "path": str(db_path),
682
+ "exists": db_path.exists(),
683
+ "size_mb": round(db_path.stat().st_size / 1024 / 1024, 1) if db_path.exists() else 0,
684
+ },
685
+ "config": {
686
+ "path": str(config_path),
687
+ "exists": config_path.exists(),
688
+ },
689
+ }
690
+
691
+ if not db_path.exists():
692
+ if args.json:
693
+ data["counts"] = {}
694
+ data["health"] = {}
695
+ data["last_run"] = None
696
+ print(json.dumps(data, indent=2, default=str))
697
+ else:
698
+ console.print(
699
+ Panel(
700
+ f"No database found at [cyan]{db_path}[/cyan]\nRun [bold]fp ingest[/bold] to start indexing.",
701
+ title="Footprinter Status",
702
+ expand=False,
703
+ )
704
+ )
705
+ return
706
+
707
+ try:
708
+ config = get_config()
709
+ except Exception:
710
+ config = None
711
+ counts = get_data_counts(db_path)
712
+ health = get_source_health(config)
713
+
714
+ data["counts"] = counts
715
+ data["health"] = health
716
+ data["last_run"] = counts.get("last_run")
717
+
718
+ # Align files_total with visibility-filtered totals
719
+ totals = visible_totals(counts, health)
720
+ counts["files_total"] = totals["files"]
721
+
722
+ if args.json:
723
+ print(json.dumps(data, indent=2, default=str))
724
+ else:
725
+ print_status(data, health)
726
+
727
+
728
+ if __name__ == "__main__":
729
+ main()