footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2001 @@
1
+ """
2
+ Interactive setup wizard for Footprinter.
3
+
4
+ Guides new users through configuration in ~3 minutes.
5
+ Usage:
6
+ fp setup # Run interactive wizard
7
+ fp setup --check # Validate existing configuration
8
+ fp setup --hooks # Install git hooks (sets core.hooksPath)
9
+ fp setup --reset # Clear data and re-run wizard
10
+ fp setup --seed-samples # Load sample data for exploration
11
+ fp setup --clear-samples # Remove sample data
12
+ """
13
+
14
+ import argparse
15
+ import logging
16
+ import os
17
+ import shutil
18
+ import sqlite3
19
+ import subprocess
20
+ import sys
21
+ from pathlib import Path
22
+
23
+ import yaml
24
+ from rich.console import Console
25
+ from rich.panel import Panel
26
+ from rich.rule import Rule
27
+ from rich.table import Table
28
+
29
+ from footprinter.cli import mcp_setup
30
+ from footprinter.cli._policy_helpers import (
31
+ get_policy_db as _get_db_connection,
32
+ )
33
+ from footprinter.cli._policy_helpers import (
34
+ normalize_path as _normalize_path,
35
+ )
36
+ from footprinter.cli._policy_helpers import (
37
+ seed_access_policies as _seed_access_policies,
38
+ )
39
+ from footprinter.cli._prompt import (
40
+ PromptCancelled,
41
+ )
42
+ from footprinter.cli._prompt import (
43
+ SafeConfirm as Confirm,
44
+ )
45
+ from footprinter.cli._prompt import (
46
+ SafePrompt as Prompt,
47
+ )
48
+
49
+ # In-process pipeline — imported here so tests can patch them
50
+ from footprinter.cli.ingest import _run_with_logging
51
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
52
+ from footprinter.paths import (
53
+ _TEST_ENV_NAME,
54
+ _TEST_MARKER_NAME,
55
+ get_bundled_path,
56
+ get_chroma_path,
57
+ get_config_path,
58
+ get_db_path,
59
+ get_log_path,
60
+ get_real_home,
61
+ is_test_mode,
62
+ )
63
+ from footprinter.source_registry import ConfigError, get_config
64
+
65
+ logger = logging.getLogger(__name__)
66
+
67
+
68
+ def _load_existing_config() -> dict | None:
69
+ """Load existing config, returning None if missing or invalid."""
70
+ try:
71
+ return get_config()
72
+ except ConfigError:
73
+ return None
74
+
75
+
76
+ console = Console()
77
+
78
+
79
+ def _repo_root() -> Path:
80
+ """Repo checkout root (dev-only: git hooks, subprocess cwd)."""
81
+ return Path(__file__).resolve().parent.parent.parent
82
+
83
+
84
+ def _hooks_available() -> bool:
85
+ """True when dev git hooks are present (private repo only)."""
86
+ return (_repo_root() / "scripts" / "hooks" / "post-merge").exists()
87
+
88
+
89
+ # Common directories checked during quick start — only those that exist are included
90
+ QUICK_START_CANDIDATES = ["~/Documents", "~/Desktop", "~/Work", "~/Projects"]
91
+
92
+ # Directories offered as optional extras (not defaults)
93
+ OPTIONAL_DIRECTORIES = ["~/.claude"]
94
+
95
+ KNOWN_BROWSERS = ["safari", "chrome"]
96
+
97
+ # Vectorization defaults — file types that benefit from semantic embedding
98
+ DEFAULT_FILE_TYPES = [".md", ".txt", ".pdf", ".docx"]
99
+
100
+ # Known junk patterns — (fnmatch_pattern, description) tuples
101
+ # Files matching these exist as text but contain no meaningful prose content.
102
+ # Patterns use ** glob syntax; fnmatch matches / on Unix.
103
+ KNOWN_JUNK_PATTERNS = [
104
+ ("**/Photos Library.photoslibrary/**", "macOS Spotlight index cache"),
105
+ ("**/.claude/debug/**", "Claude Code debug logs"),
106
+ ("**/.claude/paste-cache/**", "Claude Code paste cache"),
107
+ ("**/.claude/cache/**", "Claude Code cache"),
108
+ ("**/.claude/projects/**", "Claude Code session data"),
109
+ ("**/.claude/plans/**", "Claude Code auto-generated plans"),
110
+ ("**/.claude/plugins/**", "Claude Code plugin cache"),
111
+ ("**/.cci/**", "CumulusCI cache"),
112
+ ("**/.context/**", "IDE context directories"),
113
+ ("**/.github/**", "GitHub config and workflows"),
114
+ ("**/.ai-dev/**", "AI dev tool directories"),
115
+ ]
116
+
117
+ _SCAN_FILE_LIMIT = 50_000
118
+
119
+
120
+ def _scan_directories_for_vectorization(directories: list[str], file_types: list[str]) -> dict:
121
+ """Scan directories for files matching file_types, detecting junk patterns.
122
+
123
+ Returns dict with total, by_extension, junk_hits, total_after_exclusions,
124
+ and truncated flag.
125
+ """
126
+ from fnmatch import fnmatch
127
+
128
+ by_extension: dict[str, int] = {}
129
+ junk_hits: dict[str, int] = {}
130
+ total = 0
131
+ truncated = False
132
+
133
+ for directory in directories:
134
+ expanded = os.path.expanduser(directory)
135
+ if not os.path.isdir(expanded) or os.path.islink(expanded):
136
+ continue
137
+ for dirpath, _dirnames, filenames in os.walk(expanded, followlinks=False):
138
+ for filename in filenames:
139
+ ext = os.path.splitext(filename)[1].lower()
140
+ if ext not in file_types:
141
+ continue
142
+ total += 1
143
+ by_extension[ext] = by_extension.get(ext, 0) + 1
144
+
145
+ # Check junk patterns
146
+ full_path = os.path.join(dirpath, filename)
147
+ for pattern, _desc in KNOWN_JUNK_PATTERNS:
148
+ if fnmatch(full_path, pattern):
149
+ junk_hits[pattern] = junk_hits.get(pattern, 0) + 1
150
+ break # one pattern match per file is enough
151
+
152
+ if total >= _SCAN_FILE_LIMIT:
153
+ truncated = True
154
+ break
155
+ if truncated:
156
+ break
157
+ if truncated:
158
+ break
159
+
160
+ junk_total = sum(junk_hits.values())
161
+ return {
162
+ "total": total,
163
+ "by_extension": by_extension,
164
+ "junk_hits": junk_hits,
165
+ "total_after_exclusions": total - junk_total,
166
+ "truncated": truncated,
167
+ }
168
+
169
+
170
+ def get_available_browsers() -> list[str]:
171
+ """Browsers available on the current platform (Safari is macOS-only)."""
172
+ browsers = ["chrome"]
173
+ if sys.platform == "darwin":
174
+ browsers.insert(0, "safari")
175
+ return browsers
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # argparse registration (for fp CLI router)
180
+ # ---------------------------------------------------------------------------
181
+
182
+
183
+ def register(subparsers) -> None:
184
+ """Register ``fp setup`` with its subcommands."""
185
+ from footprinter.cli._common import FORMATTER
186
+
187
+ parser = subparsers.add_parser(
188
+ "setup",
189
+ help="Configuration wizard and system setup",
190
+ description=(
191
+ "Interactive setup wizard and system configuration.\n\n"
192
+ "Run with no arguments for the guided wizard (~3 minutes).\n"
193
+ "Use flags to run specific setup tasks."
194
+ ),
195
+ epilog=(
196
+ "examples:\n"
197
+ " fp setup Run the interactive wizard\n"
198
+ " fp setup --check Validate existing configuration\n"
199
+ " fp setup --seed-samples Load sample data for exploration\n"
200
+ " fp setup --clear-samples Remove sample data\n"
201
+ " fp setup --test Start throwaway test environment\n"
202
+ " fp setup --endtest End test mode\n"
203
+ " fp setup mcp --claude Configure MCP for Claude Desktop\n"
204
+ " fp setup folders add ~/Work/newdir\n"
205
+ "\n"
206
+ "tip: use 'fp setup <command> --help' for details on subcommands."
207
+ ),
208
+ formatter_class=FORMATTER,
209
+ )
210
+ parser.set_defaults(func=_handle_setup)
211
+
212
+ parser.add_argument(
213
+ "--check",
214
+ action="store_true",
215
+ help="Validate existing configuration and exit",
216
+ )
217
+ if _hooks_available():
218
+ parser.add_argument(
219
+ "--hooks",
220
+ action="store_true",
221
+ help="Install git hooks (sets core.hooksPath to scripts/hooks)",
222
+ )
223
+ parser.add_argument(
224
+ "--reset",
225
+ action="store_true",
226
+ help="Clear database and vector store, then re-run setup wizard",
227
+ )
228
+ parser.add_argument(
229
+ "--test",
230
+ action="store_true",
231
+ help="Start test mode — throwaway environment, zero risk to production data",
232
+ )
233
+ parser.add_argument(
234
+ "--endtest",
235
+ action="store_true",
236
+ help="End test mode — remove test environment, return to production data",
237
+ )
238
+ parser.add_argument(
239
+ "--seed-samples",
240
+ action="store_true",
241
+ help="Load sample data for exploration (tagged source='sample')",
242
+ )
243
+ parser.add_argument(
244
+ "--clear-samples",
245
+ action="store_true",
246
+ help="Remove all sample data and policies",
247
+ )
248
+
249
+ subs = parser.add_subparsers(dest="setup_action", metavar="COMMAND", title="commands (one required)")
250
+
251
+ # mcp
252
+ mcp_p = subs.add_parser(
253
+ "mcp",
254
+ help="Configure MCP integration",
255
+ description=(
256
+ "Configure the MCP server snippet for AI clients.\n\nChecks, previews, or writes the JSON config."
257
+ ),
258
+ epilog=(
259
+ "examples:\n"
260
+ " fp setup mcp --check Check if already configured\n"
261
+ " fp setup mcp --dry-run Preview config write without changing anything\n"
262
+ " fp setup mcp --claude Write to Claude Desktop config (creates backup)"
263
+ ),
264
+ formatter_class=FORMATTER,
265
+ )
266
+ mcp_p.add_argument(
267
+ "--check",
268
+ action="store_true",
269
+ dest="mcp_check",
270
+ help="Check if footprinter is configured in any MCP client",
271
+ )
272
+ mcp_p.add_argument(
273
+ "--claude",
274
+ action="store_true",
275
+ help="Write/merge snippet into Claude Desktop config (creates backup)",
276
+ )
277
+ mcp_p.add_argument(
278
+ "--dry-run",
279
+ action="store_true",
280
+ help="Preview config write without changing anything",
281
+ )
282
+
283
+ # folders (add/remove only — list is now fp folder list)
284
+ folders_p = subs.add_parser(
285
+ "folders",
286
+ help="Manage indexed folders",
287
+ description=(
288
+ "Add or remove directories from the indexing configuration.\n\n"
289
+ "Use 'fp folder list' to view indexed folders."
290
+ ),
291
+ epilog=("examples:\n fp setup folders add ~/Work/newproject\n fp setup folders remove ~/Work/old"),
292
+ formatter_class=FORMATTER,
293
+ )
294
+ folders_sub = folders_p.add_subparsers(dest="folders_command", metavar="COMMAND", title="commands (one required)")
295
+ add_p = folders_sub.add_parser(
296
+ "add",
297
+ help="Add a directory to index",
298
+ description="Add a directory path to the indexing configuration.",
299
+ formatter_class=FORMATTER,
300
+ )
301
+ add_p.add_argument("path", help="Directory path to add")
302
+ add_p.add_argument(
303
+ "--no-index",
304
+ action="store_true",
305
+ help="Skip running the indexer after adding",
306
+ )
307
+ remove_p = folders_sub.add_parser(
308
+ "remove",
309
+ help="Remove a directory from config",
310
+ description="Remove a directory from the indexing configuration.",
311
+ formatter_class=FORMATTER,
312
+ )
313
+ remove_p.add_argument("path", help="Directory path to remove")
314
+
315
+
316
+ def _handle_setup(args) -> None:
317
+ """Dispatch ``fp setup`` subcommands."""
318
+ try:
319
+ _handle_setup_inner(args)
320
+ except (PromptCancelled, KeyboardInterrupt):
321
+ console.print("\n[dim]Setup cancelled.[/dim]")
322
+ sys.exit(130)
323
+
324
+
325
+ def _dispatch_mcp(args) -> None:
326
+ """Shared MCP subcommand dispatch — used by both router and main()."""
327
+ # --check runs before the availability gate so it works without mcp extras
328
+ if getattr(args, "mcp_check", False):
329
+ sys.exit(mcp_setup.check_config())
330
+
331
+ # Gate write/print on mcp dependency (--check still works without it)
332
+ if not mcp_setup.is_mcp_available():
333
+ console.print("[red]MCP package not installed.[/red] Install with: pip install mcp")
334
+ sys.exit(1)
335
+
336
+ snippet = mcp_setup.generate_snippet()
337
+
338
+ if getattr(args, "claude", False) or getattr(args, "dry_run", False):
339
+ ok = mcp_setup.write_config(snippet, dry_run=args.dry_run)
340
+ sys.exit(0 if ok else 1)
341
+
342
+ # Default: print snippet
343
+ mcp_setup.print_snippet(snippet)
344
+
345
+
346
+ def _handle_setup_inner(args) -> None:
347
+ """Inner dispatch for ``fp setup`` — separated so cancellation is caught."""
348
+ action = getattr(args, "setup_action", None)
349
+
350
+ if action == "mcp":
351
+ _dispatch_mcp(args)
352
+ return
353
+
354
+ if action == "folders":
355
+ cmd = getattr(args, "folders_command", None)
356
+ if cmd == "add":
357
+ sys.exit(folders_add(args.path, index=not args.no_index))
358
+ elif cmd == "remove":
359
+ sys.exit(folders_remove(args.path))
360
+ else:
361
+ console.print("[yellow]Usage: fp setup folders add|remove[/yellow]")
362
+ return
363
+
364
+ if getattr(args, "seed_samples", False):
365
+ _do_seed_samples()
366
+ return
367
+
368
+ if getattr(args, "clear_samples", False):
369
+ _do_clear_samples()
370
+ return
371
+
372
+ if getattr(args, "test", False):
373
+ _start_test_mode()
374
+ return
375
+
376
+ if getattr(args, "endtest", False):
377
+ _end_test_mode()
378
+ return
379
+
380
+ if getattr(args, "reset", False):
381
+ db_path = get_db_path()
382
+ chroma_path = get_chroma_path()
383
+
384
+ if is_test_mode():
385
+ console.print(
386
+ "[yellow]Note: you are in test mode — this will reset the test environment, not production.[/yellow]"
387
+ )
388
+
389
+ console.print(
390
+ "[bold yellow]This will delete all indexed data.[/bold yellow]\nConfig and credentials are preserved."
391
+ )
392
+
393
+ if not Confirm.ask("Continue?"):
394
+ console.print("[dim]Reset cancelled.[/dim]")
395
+ return
396
+
397
+ cleared = []
398
+ if db_path.exists():
399
+ db_path.unlink()
400
+ cleared.append(str(db_path))
401
+ if chroma_path.exists():
402
+ shutil.rmtree(chroma_path)
403
+ cleared.append(str(chroma_path))
404
+ if cleared:
405
+ console.print(f"[green]Cleared:[/green] {', '.join(cleared)}")
406
+ else:
407
+ console.print("[dim]Nothing to clear (no existing data found).[/dim]")
408
+
409
+ run_interactive_wizard()
410
+ return
411
+
412
+ if getattr(args, "hooks", False):
413
+ sys.exit(install_git_hooks())
414
+ elif getattr(args, "check", False):
415
+ sys.exit(check_existing_config())
416
+ else:
417
+ run_interactive_wizard()
418
+
419
+
420
+ # ---------------------------------------------------------------------------
421
+ # Sample data helpers
422
+ # ---------------------------------------------------------------------------
423
+
424
+
425
+ def _do_seed_samples() -> None:
426
+ """Seed sample data for exploration."""
427
+ from footprinter.cli._sample_seed import seed_samples
428
+
429
+ conn = _get_db_connection()
430
+ if conn is None:
431
+ console.print("[red]No database found.[/red] Run 'fp setup' first.")
432
+ return
433
+
434
+ try:
435
+ result = seed_samples(conn)
436
+ console.print(
437
+ f"[green]Sample data seeded:[/green] "
438
+ f"{result['files_created']} files, "
439
+ f"{result['policies_seeded']} policies"
440
+ )
441
+ console.print(" [dim]See: fp setup --clear-samples to remove[/dim]")
442
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash
443
+ console.print(f"[yellow]Sample seeding failed: {e}[/yellow]")
444
+ finally:
445
+ conn.close()
446
+
447
+
448
+ def _do_clear_samples() -> None:
449
+ """Remove all sample data and policies."""
450
+ from footprinter.cli._sample_seed import clear_samples
451
+
452
+ conn = _get_db_connection()
453
+ if conn is None:
454
+ console.print("[red]No database found.[/red]")
455
+ return
456
+
457
+ try:
458
+ result = clear_samples(conn)
459
+ console.print(
460
+ f"[green]Sample data cleared:[/green] "
461
+ f"{result['files_removed']} files, "
462
+ f"{result['folders_removed']} folders, "
463
+ f"{result['policies_removed']} policies removed"
464
+ )
465
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash
466
+ console.print(f"[yellow]Sample clearing failed: {e}[/yellow]")
467
+ finally:
468
+ conn.close()
469
+
470
+
471
+ # ---------------------------------------------------------------------------
472
+ # Test mode helpers
473
+ # ---------------------------------------------------------------------------
474
+
475
+
476
+ def _start_test_mode() -> None:
477
+ """Create a throwaway test environment with its own data + config."""
478
+ real_home = get_real_home()
479
+ marker = real_home / _TEST_MARKER_NAME
480
+ test_env = real_home / _TEST_ENV_NAME
481
+
482
+ if marker.is_file():
483
+ console.print("[yellow]Test mode already active — wiping previous environment.[/yellow]")
484
+ if test_env.exists():
485
+ shutil.rmtree(test_env)
486
+
487
+ test_env.mkdir(parents=True, exist_ok=True)
488
+
489
+ # Copy production config so the test environment inherits settings.
490
+ prod_config = real_home / "config.yaml"
491
+ if prod_config.is_file():
492
+ shutil.copy2(prod_config, test_env / "config.yaml")
493
+
494
+ marker.write_text(str(test_env))
495
+ console.print(f"[green]Test mode started.[/green] Data at {test_env}")
496
+ run_interactive_wizard()
497
+
498
+
499
+ def _end_test_mode() -> None:
500
+ """Remove test environment and marker, returning to production data."""
501
+ real_home = get_real_home()
502
+ marker = real_home / _TEST_MARKER_NAME
503
+
504
+ if not marker.is_file():
505
+ console.print("[dim]Not in test mode — nothing to do.[/dim]")
506
+ return
507
+
508
+ test_env_path = Path(marker.read_text().strip())
509
+ if test_env_path.exists():
510
+ shutil.rmtree(test_env_path)
511
+ marker.unlink()
512
+ console.print("[green]Test mode ended.[/green] Back to production data.")
513
+
514
+
515
+ # ---------------------------------------------------------------------------
516
+ # Standalone entry point (fp setup)
517
+ # ---------------------------------------------------------------------------
518
+
519
+
520
+ def main():
521
+ """CLI entry point for fp setup."""
522
+ parser = argparse.ArgumentParser(
523
+ prog="fp setup",
524
+ description="Interactive setup wizard for Footprinter",
525
+ )
526
+ parser.add_argument(
527
+ "--check",
528
+ action="store_true",
529
+ help="Validate existing configuration and exit",
530
+ )
531
+ if _hooks_available():
532
+ parser.add_argument(
533
+ "--hooks",
534
+ action="store_true",
535
+ help="Install git hooks (sets core.hooksPath to scripts/hooks)",
536
+ )
537
+
538
+ subparsers = parser.add_subparsers(dest="subcommand")
539
+ mcp_parser = subparsers.add_parser(
540
+ "mcp",
541
+ help="Configure MCP integration",
542
+ )
543
+ mcp_parser.add_argument(
544
+ "--check",
545
+ action="store_true",
546
+ dest="mcp_check",
547
+ help="Check if footprinter is configured in any MCP client",
548
+ )
549
+ mcp_parser.add_argument(
550
+ "--claude",
551
+ action="store_true",
552
+ help="Write/merge snippet into Claude Desktop config (creates backup)",
553
+ )
554
+ mcp_parser.add_argument(
555
+ "--dry-run",
556
+ action="store_true",
557
+ help="Preview config write without changing anything",
558
+ )
559
+
560
+ folders_parser = subparsers.add_parser(
561
+ "folders",
562
+ help="Manage indexed folders",
563
+ )
564
+ folders_sub = folders_parser.add_subparsers(dest="folders_command")
565
+ add_parser = folders_sub.add_parser("add", help="Add a directory to index")
566
+ add_parser.add_argument("path", help="Directory path to add")
567
+ add_parser.add_argument(
568
+ "--no-index",
569
+ action="store_true",
570
+ help="Skip running the indexer after adding",
571
+ )
572
+ remove_parser = folders_sub.add_parser("remove", help="Remove a directory from config")
573
+ remove_parser.add_argument("path", help="Directory path to remove")
574
+
575
+ parser.add_argument(
576
+ "--seed-samples",
577
+ action="store_true",
578
+ help="Load sample data for exploration (tagged source='sample')",
579
+ )
580
+ parser.add_argument(
581
+ "--clear-samples",
582
+ action="store_true",
583
+ help="Remove all sample data and policies",
584
+ )
585
+
586
+ args = parser.parse_args()
587
+
588
+ if args.subcommand == "mcp":
589
+ _dispatch_mcp(args)
590
+ return
591
+
592
+ if args.subcommand == "folders":
593
+ cmd = getattr(args, "folders_command", None)
594
+ if cmd == "add":
595
+ sys.exit(folders_add(args.path, index=not args.no_index))
596
+ elif cmd == "remove":
597
+ sys.exit(folders_remove(args.path))
598
+ else:
599
+ folders_parser.print_help()
600
+ return
601
+
602
+ if args.seed_samples:
603
+ _do_seed_samples()
604
+ elif args.clear_samples:
605
+ _do_clear_samples()
606
+ elif getattr(args, "hooks", False):
607
+ sys.exit(install_git_hooks())
608
+ elif args.check:
609
+ sys.exit(check_existing_config())
610
+ else:
611
+ run_interactive_wizard()
612
+
613
+
614
+ def check_existing_config() -> int:
615
+ """Validate existing config and print results.
616
+
617
+ Returns:
618
+ 0 if config is valid, 1 otherwise.
619
+ """
620
+ try:
621
+ config = get_config()
622
+ except ConfigError as e:
623
+ console.print(f"[red]Config error:[/red] {e}")
624
+ return 1
625
+
626
+ errors, warnings = validate_config(config)
627
+ if errors:
628
+ console.print("[red]Configuration errors:[/red]")
629
+ for err in errors:
630
+ console.print(f" - {err}")
631
+ return 1
632
+
633
+ console.print("[green]Configuration is valid.[/green]")
634
+ if warnings:
635
+ console.print("[yellow]Warnings:[/yellow]")
636
+ for w in warnings:
637
+ console.print(f" - {w}")
638
+
639
+ # Architecture check
640
+ arch_warning = check_architecture()
641
+ if arch_warning:
642
+ console.print()
643
+ console.print(f"[yellow]Architecture warning:[/yellow] {arch_warning}")
644
+
645
+ # Core dependency check — only surface errors
646
+ core_deps = check_core_deps()
647
+ missing_core = [name for name, avail in core_deps if not avail]
648
+ if missing_core:
649
+ console.print()
650
+ console.print(f"[red]Missing core dependencies:[/red] {', '.join(missing_core)}")
651
+ console.print("Reinstall with: pip install footprinter-cli")
652
+
653
+ # Optional features table
654
+ features = check_optional_features(config)
655
+ console.print()
656
+ feat_table = Table(title="Optional Features", show_header=True, header_style="bold")
657
+ feat_table.add_column("Feature", style="cyan")
658
+ feat_table.add_column("Status")
659
+
660
+ for name, installed, enabled, hint in features:
661
+ if not installed:
662
+ feat_table.add_row(name, f"[yellow]not installed[/yellow] — {hint}")
663
+ elif enabled:
664
+ feat_table.add_row(name, "[green]enabled[/green]")
665
+ else:
666
+ feat_table.add_row(name, "[dim]installed, not enabled[/dim]")
667
+
668
+ console.print(feat_table)
669
+
670
+ return 1 if missing_core else 0
671
+
672
+
673
+ def _is_importable(module_name: str) -> bool:
674
+ """Return True if *module_name* can be imported."""
675
+ try:
676
+ __import__(module_name)
677
+ return True
678
+ except ImportError:
679
+ return False
680
+
681
+
682
+ def check_core_deps() -> list[tuple[str, bool]]:
683
+ """Check core dependencies. Returns ``(name, available)`` pairs.
684
+
685
+ Core deps are hard requirements — if any are missing the install is broken.
686
+ """
687
+ return [
688
+ ("PyYAML", _is_importable("yaml")),
689
+ ("Rich", _is_importable("rich")),
690
+ ]
691
+
692
+
693
+ def check_optional_features(
694
+ config: dict,
695
+ ) -> list[tuple[str, bool, bool | None, str]]:
696
+ """Check optional features against install state *and* config.
697
+
698
+ Returns ``(name, installed, enabled, hint)`` for each feature.
699
+ ``enabled`` is ``None`` when not applicable (shouldn't happen currently).
700
+ """
701
+ features: list[tuple[str, bool, bool | None, str]] = []
702
+
703
+ # Semantic Search (chromadb + onnxruntime)
704
+ sem_installed = _is_importable("chromadb") and _is_importable("onnxruntime")
705
+ sem_cfg = config.get("semantic", {})
706
+ sem_enabled = sem_cfg.get("file_vectorization", False) or sem_cfg.get("chat_vectorization", False)
707
+ features.append(("Semantic Search", sem_installed, sem_enabled, "pip install footprinter-cli[semantic]"))
708
+
709
+ # Connector-declared features (dynamic)
710
+ from footprinter.connectors import discover_connectors
711
+
712
+ for spec in discover_connectors().values():
713
+ for feat_name, probe, cfg_section, hint in spec.features:
714
+ installed = _is_importable(probe)
715
+ enabled = config.get(cfg_section, {}).get("enabled", False)
716
+ features.append((feat_name, installed, enabled, hint))
717
+
718
+ return features
719
+
720
+
721
+ def check_architecture() -> str | None:
722
+ """Check for architecture mismatches. Returns warning string or None."""
723
+ import platform
724
+
725
+ machine = platform.machine()
726
+ # Detect Rosetta: arm64 hardware but x86_64 Python.
727
+ # hw.optional.arm64 returns 1 on Apple Silicon even under Rosetta,
728
+ # unlike hw.machine which reports x86_64 under Rosetta.
729
+ if machine == "x86_64":
730
+ try:
731
+ hw = subprocess.run(["sysctl", "-n", "hw.optional.arm64"], capture_output=True, text=True)
732
+ if hw.stdout.strip() == "1":
733
+ return (
734
+ "Python is running as x86_64 on arm64 hardware (Rosetta). "
735
+ "Native dependencies may have compatibility issues. "
736
+ "Consider recreating venv with native arm64 Python."
737
+ )
738
+ except Exception:
739
+ pass # Best-effort Rosetta detection; sysctl may not exist on non-macOS
740
+ return None
741
+
742
+
743
+ def install_git_hooks() -> int:
744
+ """Set core.hooksPath to scripts/hooks.
745
+
746
+ Returns:
747
+ 0 on success, 1 on failure.
748
+ """
749
+ root = _repo_root()
750
+ hooks_dir = root / "scripts" / "hooks"
751
+ post_merge = hooks_dir / "post-merge"
752
+
753
+ if not post_merge.exists():
754
+ console.print(f"[red]Hook script not found:[/red] {post_merge}")
755
+ return 1
756
+
757
+ # Check we're in a git repo
758
+ try:
759
+ result = subprocess.run(
760
+ ["git", "rev-parse", "--git-dir"],
761
+ cwd=str(root),
762
+ capture_output=True,
763
+ text=True,
764
+ )
765
+ if result.returncode != 0:
766
+ console.print("[red]Not a git repository.[/red]")
767
+ return 1
768
+ except FileNotFoundError:
769
+ console.print("[red]git not found.[/red]")
770
+ return 1
771
+
772
+ # Set core.hooksPath
773
+ result = subprocess.run(
774
+ ["git", "config", "--local", "core.hooksPath", "scripts/hooks"],
775
+ cwd=str(root),
776
+ capture_output=True,
777
+ text=True,
778
+ )
779
+ if result.returncode != 0:
780
+ console.print(f"[red]Failed to set core.hooksPath:[/red] {result.stderr.strip()}")
781
+ return 1
782
+
783
+ console.print("[green]Git hooks installed.[/green]")
784
+ console.print(" core.hooksPath = [cyan]scripts/hooks[/cyan]")
785
+ console.print(f" post-merge hook: [cyan]{post_merge.relative_to(root)}[/cyan]")
786
+ return 0
787
+
788
+
789
+ def validate_config(config: dict) -> tuple[list[str], list[str]]:
790
+ """Validate a config dict and return errors and warnings.
791
+
792
+ Args:
793
+ config: Parsed YAML config dict.
794
+
795
+ Returns:
796
+ Tuple of (errors, warnings). Empty errors means valid.
797
+ """
798
+ errors = []
799
+
800
+ if config is None:
801
+ errors.append("Config is empty or invalid YAML")
802
+ return errors, []
803
+
804
+ # directories is required and must be a non-empty list
805
+ dirs = config.get("directories")
806
+ missing_dirs: list[str] = []
807
+ if not dirs:
808
+ errors.append("'directories' is missing or empty")
809
+ elif not isinstance(dirs, list):
810
+ errors.append("'directories' must be a list")
811
+ else:
812
+ for d in dirs:
813
+ expanded = os.path.expanduser(d)
814
+ if not os.path.isdir(expanded):
815
+ missing_dirs.append(d)
816
+
817
+ # browsers must be a list (can be empty)
818
+ browsers = config.get("browsers")
819
+ if browsers is None:
820
+ errors.append("'browsers' key is missing")
821
+ elif not isinstance(browsers, list):
822
+ errors.append("'browsers' must be a list")
823
+ else:
824
+ for b in browsers:
825
+ if b not in KNOWN_BROWSERS:
826
+ errors.append(f"Unknown browser: {b}")
827
+
828
+ # Absent directories are a warning, not an error — the bundled example
829
+ # lists macOS-flavored defaults (~/Work, ~/Personal, ~/.claude) that a
830
+ # fresh Linux install won't have. Let `fp setup --check` pass and point
831
+ # the user at what's missing instead of rejecting the whole config.
832
+ warnings = []
833
+ if missing_dirs:
834
+ warnings.append(
835
+ "Directories not found (will be skipped during indexing): "
836
+ + ", ".join(missing_dirs)
837
+ )
838
+ if "exclusions" not in config:
839
+ warnings.append("'exclusions' section missing — default exclusions will be used")
840
+ if "indexing" not in config:
841
+ warnings.append("'indexing' section missing — default settings will be used")
842
+
843
+ return errors, warnings
844
+
845
+
846
+ def _print_phase(step: int, total: int, name: str):
847
+ """Print phase progression indicator as a visual Rule."""
848
+ console.print()
849
+ console.print(Rule(f"[bold]Step {step} of {total} — {name}[/bold]", style="dim"))
850
+
851
+
852
+ def _choose_preset() -> dict | None:
853
+ """Offer preset profiles. Returns preset dict or None for full/custom."""
854
+ console.print(" [bold]Quick start[/bold] — common directories, no email, browser or chat history (add more later)")
855
+ console.print(" [bold]Full setup[/bold] — choose everything yourself")
856
+ choice = Prompt.ask(" Profile", choices=["quick", "full"], default="full")
857
+ if choice == "quick":
858
+ dirs = [d for d in QUICK_START_CANDIDATES if os.path.isdir(os.path.expanduser(d))]
859
+ if not dirs:
860
+ console.print(" [yellow]No common directories found — switching to full setup[/yellow]")
861
+ return None
862
+ return {"directories": dirs, "browsers": []}
863
+ return None
864
+
865
+
866
+ def run_interactive_wizard():
867
+ """Run the full interactive setup flow.
868
+
869
+ Structured as 6 phases: Welcome, Data Sources, Confirm & Write,
870
+ Populate, Connect, Summary.
871
+
872
+ PromptCancelled and KeyboardInterrupt propagate to the caller
873
+ (``_handle_setup``) which prints the cancellation message and
874
+ exits with code 130.
875
+ """
876
+ existing = _load_existing_config()
877
+
878
+ # Phase 1: Welcome
879
+ _print_phase(1, 6, "Welcome")
880
+ welcome_extra = ""
881
+ if existing is not None:
882
+ welcome_extra = (
883
+ "\n\n[bold yellow]Existing configuration detected.[/bold yellow]\n"
884
+ " Current settings will be shown as defaults. Only sections\n"
885
+ " you explicitly change will be updated."
886
+ )
887
+ console.print(
888
+ Panel(
889
+ "[bold]Footprinter Setup Wizard[/bold]\n\n"
890
+ "Footprinter indexes your files, browser history, emails, and chat\n"
891
+ "exports for AI-powered search and analysis.\n\n"
892
+ "[bold]Phases:[/bold]\n"
893
+ " 1. Welcome — what Footprinter does\n"
894
+ " 2. Data Sources — directories, browsers, chat exports\n"
895
+ " 3. Confirm & Write — preview and save configuration\n"
896
+ " 4. Populate — index your data\n"
897
+ " 5. Connect — access policies and Claude Desktop\n"
898
+ " 6. Summary — results and next steps"
899
+ + (
900
+ "\n\n[dim]Prerequisites (optional, can add later):[/dim]\n"
901
+ " - Full Disk Access for Safari history (System Settings > Privacy & Security)"
902
+ if sys.platform == "darwin"
903
+ else ""
904
+ )
905
+ + welcome_extra,
906
+ title="fp setup",
907
+ )
908
+ )
909
+
910
+ # Phase 2: Data Sources
911
+ _print_phase(2, 6, "Data Sources")
912
+ if existing is not None:
913
+ preset = None # Skip preset choice in reconfigure mode
914
+ else:
915
+ preset = _choose_preset()
916
+ if preset:
917
+ answers = {"directories": preset["directories"], "browsers": preset["browsers"]}
918
+ connector_results = {}
919
+ chat_export_path = None
920
+ semantic_answers = collect_vectorization_answers(directories=preset["directories"], quick=True)
921
+ else:
922
+ answers = collect_answers(existing=existing)
923
+ connector_results = {}
924
+ chat_export_path = collect_chat_export_path()
925
+ semantic_answers = collect_vectorization_answers(directories=answers["directories"], existing=existing)
926
+
927
+ # Phase 3: Confirm & Write
928
+ _print_phase(3, 6, "Confirm & Write")
929
+ preview_config(
930
+ answers,
931
+ connectors=connector_results,
932
+ chat_export_path=chat_export_path,
933
+ semantic=semantic_answers,
934
+ )
935
+
936
+ if not Confirm.ask("Write this configuration?", default=True):
937
+ console.print("[dim]Setup cancelled.[/dim]")
938
+ return
939
+
940
+ config = generate_config(answers, connector_results=connector_results, semantic=semantic_answers, existing=existing)
941
+ write_config(config)
942
+
943
+ # Phase 4: Populate
944
+ _print_phase(4, 6, "Populate")
945
+
946
+ # Truncate setup log before first orchestrator call
947
+ setup_log = get_log_path()
948
+ setup_log.parent.mkdir(parents=True, exist_ok=True)
949
+ setup_log.write_text("")
950
+
951
+ # Build dynamic description of what will run
952
+ stages_desc = ["local file indexing"]
953
+ if answers.get("browsers"):
954
+ stages_desc.append("browser history")
955
+ if chat_export_path:
956
+ stages_desc.append("chat import")
957
+ console.print(f" This will run: {', '.join(stages_desc)}.")
958
+
959
+ chat_result = {}
960
+ if Confirm.ask("Index and analyze your data now?", default=True):
961
+ try:
962
+ run_orchestrator(answers, connector_results=connector_results)
963
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
964
+ console.print(f" [yellow]Indexing error: {e}[/yellow]")
965
+ if chat_export_path:
966
+ try:
967
+ chat_result = import_chat_export(chat_export_path)
968
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
969
+ console.print(f" [yellow]Chat import error: {e}[/yellow]")
970
+ else:
971
+ console.print(" [dim]Skipped. Run later: fp ingest[/dim]")
972
+
973
+ # CSV import step — between data indexing and access policies
974
+ _offer_csv_import_wizard()
975
+
976
+ # Phase 5: Connect
977
+ _print_phase(5, 6, "Connect")
978
+ seed_access_policies()
979
+ mcp_configured = offer_setup_claude()
980
+
981
+ # Optional: seed sample data
982
+ if Confirm.ask("\nSeed sample data for exploration?", default=False):
983
+ _do_seed_samples()
984
+
985
+ # Phase 6: Summary
986
+ _print_phase(6, 6, "Summary")
987
+ print_summary(
988
+ chat_result=chat_result,
989
+ mcp_configured=mcp_configured,
990
+ connector_results=connector_results,
991
+ )
992
+
993
+
994
+ def _offer_csv_import_wizard() -> None:
995
+ """Wizard wrapper that opens the DB and calls _offer_csv_import."""
996
+ from footprinter.cli._common import open_db
997
+
998
+ try:
999
+ with open_db() as conn:
1000
+ _offer_csv_import(conn)
1001
+ except SystemExit:
1002
+ # open_db exits if DB not found — not an error during setup
1003
+ console.print(" [dim]Database not ready — skipping CSV import.[/dim]")
1004
+
1005
+
1006
+ def _offer_csv_import(conn) -> None:
1007
+ """Prompt user to import clients/projects from CSV files.
1008
+
1009
+ Loops until the user enters an empty path to finish.
1010
+ Detects entity type from CSV headers (client_type → clients,
1011
+ project_name → projects). Shows a summary and confirms before inserting.
1012
+ """
1013
+ import csv as csv_mod
1014
+
1015
+ console.print("\n[bold]Import clients/projects from CSV[/bold]")
1016
+ console.print(
1017
+ " If you have a spreadsheet of clients or projects, paste the file path.\n"
1018
+ " [dim]Leave blank to skip. You can import later with: fp upsert clients data.csv --commit[/dim]"
1019
+ )
1020
+
1021
+ while True:
1022
+ path_str = Prompt.ask(" CSV file path (blank to skip)", default="")
1023
+ if not path_str:
1024
+ return
1025
+
1026
+ csv_path = Path(path_str).expanduser()
1027
+ if not csv_path.exists():
1028
+ console.print(f" [red]File not found: {csv_path}[/red]")
1029
+ continue
1030
+
1031
+ # Read headers to detect entity type
1032
+ try:
1033
+ with open(csv_path, encoding="utf-8", newline="") as f:
1034
+ reader = csv_mod.DictReader(f)
1035
+ headers = reader.fieldnames or []
1036
+ rows = list(reader)
1037
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
1038
+ console.print(f" [red]Could not read CSV: {e}[/red]")
1039
+ continue
1040
+
1041
+ if not rows:
1042
+ console.print(" [dim]Empty CSV — nothing to import.[/dim]")
1043
+ continue
1044
+
1045
+ # Detect entity type from headers
1046
+ if "client_type" in headers:
1047
+ entity_type = "client"
1048
+ svc_name = "client_service"
1049
+ elif "project_name" in headers:
1050
+ entity_type = "project"
1051
+ svc_name = "project_service"
1052
+ else:
1053
+ console.print(
1054
+ " [red]Could not detect CSV type.[/red] Expected 'client_type' "
1055
+ "(for clients) or 'project_name' (for projects) in headers."
1056
+ )
1057
+ continue
1058
+
1059
+ from footprinter.cli.upsert import CSV_COLUMNS, _process_csv_rows
1060
+
1061
+ required_cols, optional_cols, int_cols = CSV_COLUMNS[entity_type]
1062
+
1063
+ # Check required columns
1064
+ missing = set(required_cols) - set(headers)
1065
+ if missing:
1066
+ console.print(f" [red]Missing required columns: {', '.join(sorted(missing))}[/red]")
1067
+ continue
1068
+
1069
+ import footprinter.services as svc
1070
+
1071
+ service = getattr(svc, svc_name)
1072
+
1073
+ created, updated, errors, error_details = _process_csv_rows(
1074
+ conn,
1075
+ rows,
1076
+ service,
1077
+ entity_type,
1078
+ required_cols,
1079
+ optional_cols,
1080
+ int_cols,
1081
+ )
1082
+
1083
+ # Show summary
1084
+ table = Table(title=f"CSV Import — {entity_type}s")
1085
+ table.add_column("Metric", style="cyan")
1086
+ table.add_column("Count", justify="right")
1087
+ table.add_row("Created", str(created))
1088
+ table.add_row("Updated", str(updated))
1089
+ table.add_row("Errors", str(errors))
1090
+ console.print(table)
1091
+
1092
+ if error_details:
1093
+ for err in error_details[:5]:
1094
+ console.print(f" [yellow]Row {err['row']}: {err['error']}[/yellow]")
1095
+ if len(error_details) > 5:
1096
+ console.print(f" [dim]... and {len(error_details) - 5} more errors[/dim]")
1097
+
1098
+ console.print(f" [green]Imported {created} new, updated {updated} existing {entity_type}(s).[/green]")
1099
+
1100
+
1101
+ def collect_answers(existing: dict | None = None) -> dict:
1102
+ """Gather user input via rich prompts.
1103
+
1104
+ Args:
1105
+ existing: Optional existing config dict. When provided, current
1106
+ directories and browsers are shown as defaults.
1107
+
1108
+ Returns:
1109
+ Dict with keys: directories, browsers.
1110
+ """
1111
+ answers = {}
1112
+
1113
+ # --- Directories ---
1114
+ console.print("\n[bold]1. Directories to scan[/bold]")
1115
+ console.print(
1116
+ " Footprinter will scan these directories for files to index —\n"
1117
+ " metadata, content types, and project structure.\n"
1118
+ " [dim]Common choices: ~/Work, ~/Personal, ~/Documents[/dim]\n"
1119
+ " [dim]Use ~ for your home directory.[/dim]"
1120
+ )
1121
+
1122
+ existing_dirs = (existing or {}).get("directories", [])
1123
+ if existing_dirs:
1124
+ console.print(f" Current directories: {', '.join(existing_dirs)}")
1125
+ if Confirm.ask(" Keep current directories?", default=True):
1126
+ directories = list(existing_dirs)
1127
+ # Still offer to add more
1128
+ console.print(" [dim]You can add more directories below (leave blank to continue).[/dim]")
1129
+ while True:
1130
+ path = Prompt.ask(" Add another directory (leave blank to finish)", default="")
1131
+ if not path:
1132
+ break
1133
+ if Path(path).expanduser().is_dir():
1134
+ directories.append(path)
1135
+ console.print(f" [green]✓[/green] Added {path}")
1136
+ else:
1137
+ console.print(f" [red]Directory not found: {path}[/red]")
1138
+ answers["directories"] = directories
1139
+ else:
1140
+ # User wants to re-enter directories — fall through to standard collection
1141
+ answers["directories"] = _collect_directories_from_scratch()
1142
+ else:
1143
+ answers["directories"] = _collect_directories_from_scratch()
1144
+
1145
+ # --- Browsers ---
1146
+ console.print("\n[bold]2. Browser history[/bold]")
1147
+ console.print(
1148
+ " Optionally index your browsing history for search and context.\n"
1149
+ " [dim]You can enable this later in config.yaml.[/dim]"
1150
+ )
1151
+
1152
+ existing_browsers = (existing or {}).get("browsers", [])
1153
+ if existing_browsers:
1154
+ console.print(f" Currently enabled: {', '.join(existing_browsers)}")
1155
+ if Confirm.ask(" Keep current browser settings?", default=True):
1156
+ browsers = list(existing_browsers)
1157
+ else:
1158
+ browsers = _collect_browsers_from_scratch()
1159
+ else:
1160
+ browsers = _collect_browsers_from_scratch()
1161
+ answers["browsers"] = browsers
1162
+
1163
+ return answers
1164
+
1165
+
1166
+ def _collect_directories_from_scratch() -> list[str]:
1167
+ """Collect directories interactively from scratch."""
1168
+ while True:
1169
+ directories = []
1170
+
1171
+ # Prompt for directories one at a time
1172
+ while True:
1173
+ prompt_text = (
1174
+ " Enter directory path" if not directories else " Add another directory (leave blank to finish)"
1175
+ )
1176
+ path = Prompt.ask(prompt_text, default="" if directories else ...)
1177
+ if not path:
1178
+ break
1179
+ expanded = os.path.expanduser(path)
1180
+ if os.path.isdir(expanded):
1181
+ directories.append(path)
1182
+ console.print(f" [green]✓[/green] Added {path}")
1183
+ else:
1184
+ console.print(f" [red]Directory not found: {path}[/red]")
1185
+
1186
+ # Offer optional directories if they exist
1187
+ for d in OPTIONAL_DIRECTORIES:
1188
+ expanded = os.path.expanduser(d)
1189
+ if os.path.isdir(expanded):
1190
+ if d == "~/.claude":
1191
+ console.print(" [dim]~/.claude contains Claude Code settings and chat history[/dim]")
1192
+ if Confirm.ask(f" Include {d}?", default=False):
1193
+ directories.append(d)
1194
+
1195
+ if directories:
1196
+ return directories
1197
+ console.print(" [red]At least one directory is required.[/red]")
1198
+
1199
+
1200
+ def _collect_browsers_from_scratch() -> list[str]:
1201
+ """Collect browser selection interactively from scratch."""
1202
+ browser_hints = {
1203
+ "safari": "[dim](requires Full Disk Access)[/dim]",
1204
+ "chrome": "[dim](no additional permissions needed)[/dim]",
1205
+ }
1206
+ browsers = []
1207
+ for b in get_available_browsers():
1208
+ hint = browser_hints.get(b, "")
1209
+ if Confirm.ask(f" Include {b}? {hint}", default=True):
1210
+ browsers.append(b)
1211
+ return browsers
1212
+
1213
+
1214
+ def _check_semantic_deps() -> bool:
1215
+ """Check semantic deps and offer pip install if missing. Return True if available."""
1216
+ if _is_importable("chromadb") and _is_importable("onnxruntime"):
1217
+ return True
1218
+
1219
+ console.print("\n [yellow]Semantic search requires chromadb and onnxruntime.[/yellow]")
1220
+ if Confirm.ask(" Install now? (pip install footprinter-cli[semantic])", default=True):
1221
+ result = subprocess.run(
1222
+ [sys.executable, "-m", "pip", "install", "footprinter-cli[semantic]"],
1223
+ capture_output=True,
1224
+ text=True,
1225
+ )
1226
+ if result.returncode == 0:
1227
+ console.print(" [green]✓[/green] Semantic dependencies installed.")
1228
+ return True
1229
+ else:
1230
+ console.print(f" [red]Install failed:[/red] {result.stderr.strip()}")
1231
+
1232
+ console.print(" [dim]You can enable semantic search later with fp setup.[/dim]")
1233
+ return False
1234
+
1235
+
1236
+ def collect_vectorization_answers(
1237
+ directories: list[str],
1238
+ existing: dict | None = None,
1239
+ quick: bool = False,
1240
+ ) -> dict:
1241
+ """Ask about content indexing: snippets and vectorization.
1242
+
1243
+ Groups all content extraction decisions into one section:
1244
+ - Content snippets: FTS keyword search previews (per entity)
1245
+ - Semantic search: vector embeddings for meaning-based search (per entity)
1246
+
1247
+ Args:
1248
+ directories: Directories to scan for file type preview.
1249
+ existing: Optional existing config dict for defaults.
1250
+ quick: If True, show compact summary with auto-selected exclusions.
1251
+
1252
+ Returns:
1253
+ Dict with content_snippets (bool),
1254
+ file_vectorization, chat_vectorization (bool),
1255
+ file_types (list), exclude_patterns (list).
1256
+ """
1257
+ existing_vec = (existing or {}).get("vectorization", {})
1258
+ existing_semantic = (existing or {}).get("semantic", {})
1259
+ existing_snippets = (existing or {}).get("indexing", {}).get("content_snippets", False)
1260
+ file_types = existing_vec.get("file_types", list(DEFAULT_FILE_TYPES))
1261
+ existing_excludes = existing_vec.get("exclude_patterns", [])
1262
+
1263
+ console.print("\n[bold]Content Indexing[/bold]")
1264
+ console.print(
1265
+ " By default, Footprinter indexes metadata only — filenames,\n"
1266
+ " timestamps, and structure. The options below let it read\n"
1267
+ " file content for richer search.\n"
1268
+ )
1269
+
1270
+ console.print(" [bold]Content snippets[/bold]")
1271
+ console.print(
1272
+ " Stores a short preview of file content for keyword search.\n"
1273
+ " Without this, search matches filenames and metadata only.\n"
1274
+ " [dim]Trade-off: Footprinter reads file content during indexing.[/dim]"
1275
+ )
1276
+ content_snippets = Confirm.ask(" Enable file content snippets?", default=existing_snippets)
1277
+
1278
+ console.print("\n [bold]Semantic search[/bold]")
1279
+ console.print(
1280
+ " Stores content as embeddings in a local ChromaDB database.\n"
1281
+ " This lets you find files and chats by meaning, not just keywords.\n"
1282
+ " [dim]Trade-off: additional disk space (~500 MB) and longer indexing time.[/dim]"
1283
+ )
1284
+
1285
+ if quick:
1286
+ result = _collect_vectorization_quick(directories, file_types, existing_excludes, existing_semantic)
1287
+ else:
1288
+ result = _collect_vectorization_full(directories, file_types, existing_excludes, existing_semantic)
1289
+ result["content_snippets"] = content_snippets
1290
+ return result
1291
+
1292
+
1293
+ def _collect_vectorization_quick(
1294
+ directories: list[str],
1295
+ file_types: list[str],
1296
+ existing_excludes: list[str],
1297
+ existing_semantic: dict,
1298
+ ) -> dict:
1299
+ """Quick-mode vectorization: compact summary with auto-selected exclusions."""
1300
+ scan = _scan_directories_for_vectorization(directories, file_types)
1301
+
1302
+ if scan["total"] > 0:
1303
+ junk_count = sum(scan["junk_hits"].values())
1304
+ console.print(f"\n Found [bold]{scan['total']}[/bold] files matching {', '.join(file_types)}")
1305
+ if junk_count > 0:
1306
+ console.print(
1307
+ f" [yellow]{junk_count} likely junk files detected[/yellow] "
1308
+ f"→ {scan['total_after_exclusions']} after exclusions"
1309
+ )
1310
+
1311
+ file_default = existing_semantic.get("file_vectorization", False)
1312
+ chat_default = existing_semantic.get("chat_vectorization", False)
1313
+
1314
+ file_vec = Confirm.ask(" Enable semantic search for files?", default=file_default)
1315
+ chat_vec = Confirm.ask(" Enable semantic search for chats?", default=chat_default)
1316
+
1317
+ if not file_vec and not chat_vec:
1318
+ return {
1319
+ "file_vectorization": False,
1320
+ "chat_vectorization": False,
1321
+ "file_types": file_types,
1322
+ "exclude_patterns": existing_excludes,
1323
+ }
1324
+
1325
+ # Auto-include detected junk exclusions
1326
+ exclude_patterns = list(existing_excludes)
1327
+ for pattern in scan["junk_hits"]:
1328
+ if pattern not in exclude_patterns:
1329
+ exclude_patterns.append(pattern)
1330
+
1331
+ if not _check_semantic_deps():
1332
+ return {
1333
+ "file_vectorization": False,
1334
+ "chat_vectorization": False,
1335
+ "file_types": file_types,
1336
+ "exclude_patterns": exclude_patterns,
1337
+ }
1338
+
1339
+ return {
1340
+ "file_vectorization": file_vec,
1341
+ "chat_vectorization": chat_vec,
1342
+ "file_types": file_types,
1343
+ "exclude_patterns": exclude_patterns,
1344
+ }
1345
+
1346
+
1347
+ def _collect_vectorization_full(
1348
+ directories: list[str],
1349
+ file_types: list[str],
1350
+ existing_excludes: list[str],
1351
+ existing_semantic: dict,
1352
+ ) -> dict:
1353
+ """Full-mode vectorization: detailed file type editing and exclusion toggles."""
1354
+ # Step 1: File type allowlist
1355
+ console.print(f"\n File types to embed: [bold]{', '.join(file_types)}[/bold]")
1356
+ keep_types = Confirm.ask(" Keep these file types?", default=True)
1357
+ if not keep_types:
1358
+ raw = Prompt.ask(" Enter file types (comma-separated, e.g. .md, .txt, .py)")
1359
+ file_types = [t.strip() for t in raw.split(",") if t.strip()]
1360
+
1361
+ # Step 2: Scan and show results
1362
+ scan = _scan_directories_for_vectorization(directories, file_types)
1363
+
1364
+ if scan["total"] > 0:
1365
+ console.print(f"\n Scanned: [bold]{scan['total']}[/bold] files found")
1366
+ for ext, count in sorted(scan["by_extension"].items()):
1367
+ console.print(f" {ext}: {count}")
1368
+
1369
+ # Junk exclusions
1370
+ exclude_patterns = list(existing_excludes)
1371
+ if scan["junk_hits"]:
1372
+ console.print("\n [yellow]Recommended exclusions:[/yellow]")
1373
+ detected_patterns = []
1374
+ for pattern, count in scan["junk_hits"].items():
1375
+ desc = next((d for p, d in KNOWN_JUNK_PATTERNS if p == pattern), pattern)
1376
+ console.print(f" {pattern} ({count} files) — {desc}")
1377
+ detected_patterns.append(pattern)
1378
+
1379
+ accept_all = Confirm.ask(" Accept recommended exclusions?", default=True)
1380
+ if accept_all:
1381
+ for p in detected_patterns:
1382
+ if p not in exclude_patterns:
1383
+ exclude_patterns.append(p)
1384
+ else:
1385
+ for pattern in detected_patterns:
1386
+ desc = next((d for p, d in KNOWN_JUNK_PATTERNS if p == pattern), pattern)
1387
+ include = Confirm.ask(f" Exclude {pattern}?", default=True)
1388
+ if include and pattern not in exclude_patterns:
1389
+ exclude_patterns.append(pattern)
1390
+
1391
+ # Step 3: Show before/after and enable decision
1392
+ if scan["total"] > 0:
1393
+ after = scan["total"] - sum(scan["junk_hits"].get(p, 0) for p in exclude_patterns)
1394
+ console.print(f"\n Files to embed: [bold]{after}[/bold] (of {scan['total']} total)")
1395
+
1396
+ file_default = existing_semantic.get("file_vectorization", False)
1397
+ chat_default = existing_semantic.get("chat_vectorization", False)
1398
+ file_vec = Confirm.ask(" Enable semantic search for files?", default=file_default)
1399
+ chat_vec = Confirm.ask(" Enable semantic search for chats?", default=chat_default)
1400
+
1401
+ if not file_vec and not chat_vec:
1402
+ return {
1403
+ "file_vectorization": False,
1404
+ "chat_vectorization": False,
1405
+ "file_types": file_types,
1406
+ "exclude_patterns": exclude_patterns,
1407
+ }
1408
+
1409
+ if not _check_semantic_deps():
1410
+ return {
1411
+ "file_vectorization": False,
1412
+ "chat_vectorization": False,
1413
+ "file_types": file_types,
1414
+ "exclude_patterns": exclude_patterns,
1415
+ }
1416
+
1417
+ return {
1418
+ "file_vectorization": file_vec,
1419
+ "chat_vectorization": chat_vec,
1420
+ "file_types": file_types,
1421
+ "exclude_patterns": exclude_patterns,
1422
+ }
1423
+
1424
+
1425
+ def preview_config(
1426
+ answers: dict,
1427
+ console=None,
1428
+ connectors: dict = None,
1429
+ chat_export_path: str = None,
1430
+ semantic: dict = None,
1431
+ ):
1432
+ """Display a summary of the configuration before writing.
1433
+
1434
+ Args:
1435
+ answers: Dict from collect_answers().
1436
+ console: Optional Rich Console (for testing).
1437
+ connectors: Optional connector results dict.
1438
+ chat_export_path: Optional path to a chat export file/directory.
1439
+ semantic: Optional dict from collect_vectorization_answers().
1440
+ """
1441
+ if console is None:
1442
+ console = Console()
1443
+
1444
+ lines = []
1445
+ lines.append(f"Directories: {', '.join(answers.get('directories', []))}")
1446
+ browsers = answers.get("browsers", [])
1447
+ if browsers:
1448
+ lines.append(f"Browsers: {', '.join(browsers)}")
1449
+ else:
1450
+ lines.append("Browsers: [dim]none (can add later)[/dim]")
1451
+ if chat_export_path:
1452
+ lines.append(f"Chat export: {chat_export_path}")
1453
+ else:
1454
+ lines.append("Chat export: [dim]none (can add later)[/dim]")
1455
+ if semantic and (semantic.get("file_vectorization") or semantic.get("chat_vectorization")):
1456
+ parts = []
1457
+ if semantic.get("file_vectorization"):
1458
+ parts.append("files")
1459
+ if semantic.get("chat_vectorization"):
1460
+ parts.append("chats")
1461
+ lines.append(f"Semantic search: {', '.join(parts)}")
1462
+ if semantic.get("file_types"):
1463
+ lines.append(f" File types: {', '.join(semantic['file_types'])}")
1464
+ if semantic.get("exclude_patterns"):
1465
+ lines.append(f" Exclusion patterns: {len(semantic['exclude_patterns'])}")
1466
+ else:
1467
+ lines.append("Semantic search: [dim]disabled (can enable later)[/dim]")
1468
+
1469
+ if semantic and semantic.get("content_snippets"):
1470
+ lines.append("Content snippets: files")
1471
+ else:
1472
+ lines.append("Content snippets: [dim]disabled (can enable later)[/dim]")
1473
+
1474
+ console.print()
1475
+ console.print(
1476
+ Panel(
1477
+ "\n".join(lines),
1478
+ title="Configuration Preview",
1479
+ border_style="dim",
1480
+ expand=False,
1481
+ )
1482
+ )
1483
+ console.print()
1484
+
1485
+
1486
+ def _deep_merge(base: dict, overlay: dict) -> dict:
1487
+ """Recursively merge overlay into base. Returns a new dict."""
1488
+ result = dict(base)
1489
+ for key, value in overlay.items():
1490
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
1491
+ result[key] = _deep_merge(result[key], value)
1492
+ else:
1493
+ result[key] = value
1494
+ return result
1495
+
1496
+
1497
+ def generate_config(
1498
+ answers: dict,
1499
+ connector_results: dict = None,
1500
+ semantic: dict = None,
1501
+ existing: dict | None = None,
1502
+ ) -> dict:
1503
+ """Load config.example.yaml and apply user answers.
1504
+
1505
+ Args:
1506
+ answers: Dict from collect_answers().
1507
+ connector_results: Optional dict from connector setup hooks mapping
1508
+ account names to verified service lists
1509
+ (e.g. {"personal": ["drive"]}).
1510
+ semantic: Optional dict from collect_vectorization_answers() with
1511
+ file_vectorization and chat_vectorization bools.
1512
+ existing: Optional existing config dict. When provided, its values
1513
+ are deep-merged on top of the template before wizard answers
1514
+ are applied, preserving sections the user didn't change.
1515
+ Note: source_seeds are reconciled by name (template seeds
1516
+ kept, existing seeds overlaid) rather than replaced wholesale.
1517
+
1518
+ Returns:
1519
+ Config dict ready to write as YAML.
1520
+ """
1521
+ import copy
1522
+
1523
+ if connector_results is None:
1524
+ connector_results = {}
1525
+
1526
+ with open(get_bundled_path("config.example.yaml"), "r") as f:
1527
+ config = yaml.safe_load(f)
1528
+
1529
+ if existing is not None:
1530
+ # Save template seeds before merge (_deep_merge replaces lists wholesale)
1531
+ template_seeds = list(config.get("source_seeds", []))
1532
+ config = _deep_merge(config, copy.deepcopy(existing))
1533
+ # Reconcile source_seeds: keep all template seeds, overlay existing by name
1534
+ existing_seeds = config.get("source_seeds", [])
1535
+ by_name = {s["name"]: s for s in template_seeds}
1536
+ for s in existing_seeds:
1537
+ by_name[s["name"]] = s
1538
+ config["source_seeds"] = list(by_name.values())
1539
+
1540
+ # Apply answers — these always come from explicit user input
1541
+ config["directories"] = answers.get("directories") or []
1542
+ config["browsers"] = answers.get("browsers", [])
1543
+
1544
+ # Strip the placeholder API key — real key goes in .env
1545
+ if "claude" in config and "api_key" in config["claude"]:
1546
+ config["claude"]["api_key"] = "YOUR_API_KEY_HERE"
1547
+
1548
+ # Apply connector config via hooks (enable flags, source_seeds, accounts)
1549
+ if connector_results:
1550
+ from footprinter.connectors import discover_connectors, resolve_hook
1551
+
1552
+ for _name, spec in discover_connectors().items():
1553
+ if spec.config_apply:
1554
+ fn = resolve_hook(spec.config_apply)
1555
+ if fn:
1556
+ fn(config, connector_results)
1557
+
1558
+ # Apply semantic search settings — always ensure section exists with safe defaults
1559
+ config.setdefault("semantic", {})
1560
+ if semantic:
1561
+ config["semantic"]["file_vectorization"] = semantic.get("file_vectorization", False)
1562
+ config["semantic"]["chat_vectorization"] = semantic.get("chat_vectorization", False)
1563
+ else:
1564
+ config["semantic"].setdefault("file_vectorization", False)
1565
+ config["semantic"].setdefault("chat_vectorization", False)
1566
+
1567
+ # Apply vectorization settings from the wizard (file_types, exclude_patterns)
1568
+ if semantic and "file_types" in semantic:
1569
+ config.setdefault("vectorization", {})
1570
+ config["vectorization"]["file_types"] = semantic["file_types"]
1571
+ if semantic and "exclude_patterns" in semantic:
1572
+ config.setdefault("vectorization", {})
1573
+ config["vectorization"]["exclude_patterns"] = semantic["exclude_patterns"]
1574
+
1575
+ # Apply content snippets setting
1576
+ config.setdefault("indexing", {})
1577
+ if semantic and "content_snippets" in semantic:
1578
+ config["indexing"]["content_snippets"] = semantic["content_snippets"]
1579
+
1580
+ return config
1581
+
1582
+
1583
+ def write_config(config: dict, path: Path = None):
1584
+ """Write config dict to YAML file.
1585
+
1586
+ Args:
1587
+ config: Config dict to write.
1588
+ path: Override output path (default: config/config.yaml).
1589
+ """
1590
+ target = path or get_config_path()
1591
+ target.parent.mkdir(parents=True, exist_ok=True)
1592
+
1593
+ with open(target, "w") as f:
1594
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
1595
+
1596
+ console.print(f" Wrote [bold]{target}[/bold]")
1597
+
1598
+
1599
+ def _run_orchestrator_stages(stages: list[str]):
1600
+ """Run pipeline stages in-process via the same code path as ``fp ingest``.
1601
+
1602
+ Uses DataPipelineOrchestrator + ``_run_with_logging()`` directly.
1603
+
1604
+ Args:
1605
+ stages: List of stage names (e.g. ["local_folders", "local_files"]).
1606
+ """
1607
+ orchestrator = DataPipelineOrchestrator()
1608
+ try:
1609
+ _run_with_logging(
1610
+ orchestrator,
1611
+ pipes=stages,
1612
+ mode="incremental",
1613
+ quiet=False,
1614
+ header="Setup Indexing",
1615
+ show_next_steps=False,
1616
+ )
1617
+ except ValueError as e:
1618
+ console.print(f"[yellow]Pipeline error:[/yellow] {e}")
1619
+ except KeyboardInterrupt:
1620
+ console.print("[dim]Interrupted.[/dim]")
1621
+
1622
+
1623
+ def run_orchestrator(answers: dict = None, connector_results: dict = None):
1624
+ """Run initial indexing stages via the in-process pipeline.
1625
+
1626
+ Builds stages dynamically: always includes local_folders,local_files.
1627
+ Adds browser stage if answers contains non-empty browsers list.
1628
+ Adds connector pipes if connector_results has verified accounts.
1629
+
1630
+ Args:
1631
+ answers: Dict from collect_answers(). None defaults to {}.
1632
+ connector_results: Optional dict of connector results.
1633
+ """
1634
+ if answers is None:
1635
+ answers = {}
1636
+ if connector_results is None:
1637
+ connector_results = {}
1638
+
1639
+ console.print("\n[bold]Running initial indexing...[/bold]")
1640
+ stages = ["local_folders", "local_files"]
1641
+ if answers.get("browsers"):
1642
+ stages.append("browser")
1643
+ if connector_results:
1644
+ from footprinter.connectors import discover_connectors, is_installed
1645
+
1646
+ for name, spec in discover_connectors().items():
1647
+ if is_installed(spec):
1648
+ stages.extend(spec.pipes)
1649
+ _run_orchestrator_stages(stages)
1650
+
1651
+
1652
+ def collect_chat_export_path() -> str | None:
1653
+ """Prompt user for a chat export path (Phase 2 — Data Sources).
1654
+
1655
+ Returns:
1656
+ Expanded path string if user provides a valid path, None otherwise.
1657
+ """
1658
+ console.print("\n[bold]3. Chat history[/bold]")
1659
+ console.print(
1660
+ " Optionally import Claude or ChatGPT chat exports.\n"
1661
+ " [dim]You can also import later with: fp ingest import <file>[/dim]"
1662
+ )
1663
+ if not Confirm.ask(" Do you have Claude or ChatGPT exports to import?", default=False):
1664
+ return None
1665
+
1666
+ console.print(" [dim]Supported: Claude .zip export or unzipped directory[/dim]")
1667
+ path = Prompt.ask(" Path to export file (.zip or directory)")
1668
+ if not path:
1669
+ return None
1670
+
1671
+ path = os.path.expanduser(path)
1672
+ resolved = Path(path)
1673
+ if not resolved.exists():
1674
+ console.print(f" [red]File not found: {path}[/red]")
1675
+ return None
1676
+
1677
+ return str(resolved)
1678
+
1679
+
1680
+ def import_chat_export(path: str) -> dict:
1681
+ """Import a chat export from a previously collected path (Phase 5 — Populate).
1682
+
1683
+ Args:
1684
+ path: Expanded path to the export file or directory.
1685
+
1686
+ Returns:
1687
+ Result dict from ChatIndexer.upload(), or {} on failure.
1688
+ """
1689
+ resolved = Path(path)
1690
+ try:
1691
+ from footprinter.ingest.chat_indexer import ChatIndexer
1692
+ from footprinter.ingest.database import Database
1693
+
1694
+ db = Database(str(get_db_path()))
1695
+ manager = ChatIndexer(db)
1696
+ result = manager.upload(resolved)
1697
+ console.print(" [green]Chat import complete.[/green]")
1698
+ if isinstance(result, dict):
1699
+ added = result.get("chats_added", 0)
1700
+ updated = result.get("chats_updated", 0)
1701
+ msgs = result.get("messages_imported", 0)
1702
+ console.print(
1703
+ f" Imported: [cyan]{added + updated}[/cyan] chats "
1704
+ f"({added} new, {updated} updated), "
1705
+ f"[cyan]{msgs}[/cyan] messages"
1706
+ )
1707
+ return result if isinstance(result, dict) else {}
1708
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1709
+ console.print(f" [yellow]Chat import failed: {e}[/yellow]")
1710
+ console.print(f" [dim]Run manually: fp ingest import {path}[/dim]")
1711
+ return {}
1712
+
1713
+
1714
+ def offer_setup_claude() -> bool:
1715
+ """Offer to configure Claude Desktop MCP integration.
1716
+
1717
+ Returns:
1718
+ True if MCP was successfully configured, False otherwise.
1719
+ """
1720
+ if not mcp_setup.is_mcp_available():
1721
+ console.print("\n[dim]MCP package not installed — skipping Claude Desktop configuration.[/dim]")
1722
+ console.print(" [dim]Install with: pip install mcp[/dim]")
1723
+ return False
1724
+
1725
+ try:
1726
+ snippet = mcp_setup.generate_snippet()
1727
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1728
+ console.print(f" [yellow]MCP setup failed: {e}[/yellow]")
1729
+ console.print(" [dim]Run manually: fp setup mcp --claude[/dim]")
1730
+ return False
1731
+
1732
+ # Offer snippet for manual copy/paste (Cursor, Windsurf, etc.)
1733
+ if Confirm.ask(
1734
+ "\nView MCP config snippet (for Claude Code, Cursor, VS Code, and other clients)?",
1735
+ default=True,
1736
+ ):
1737
+ mcp_setup.print_snippet(snippet)
1738
+
1739
+ # Offer Claude Desktop auto-config
1740
+ if not Confirm.ask("\nConfigure Claude Desktop automatically?", default=False):
1741
+ return False
1742
+
1743
+ try:
1744
+ mcp_setup.write_config(snippet)
1745
+ console.print(" [green]Claude Desktop MCP configured.[/green]")
1746
+ return True
1747
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1748
+ console.print(f" [yellow]MCP setup failed: {e}[/yellow]")
1749
+ console.print(" [dim]Run manually: fp setup mcp --claude[/dim]")
1750
+ return False
1751
+
1752
+
1753
+ # _get_db_connection and _normalize_path imported from _policy_helpers
1754
+
1755
+
1756
+ def _require_config() -> tuple[dict, Path]:
1757
+ """Load config via get_config(), exit on missing or invalid config.
1758
+
1759
+ Returns:
1760
+ Tuple of (config_dict, config_path).
1761
+
1762
+ Exits:
1763
+ sys.exit(1) with helpful message if config is missing or corrupt.
1764
+ """
1765
+ try:
1766
+ config = get_config()
1767
+ except ConfigError as e:
1768
+ console.print(f"[red]Config error:[/red] {e}")
1769
+ sys.exit(1)
1770
+
1771
+ return config, get_config_path()
1772
+
1773
+
1774
+ def folders_add(path: str, index: bool = True) -> int:
1775
+ """Add a directory to the config and optionally trigger indexing.
1776
+
1777
+ Args:
1778
+ path: Directory path to add.
1779
+ index: If True, prompt to run indexing after adding.
1780
+
1781
+ Returns:
1782
+ 0 on success, 1 on error.
1783
+ """
1784
+ normalized = _normalize_path(path)
1785
+ expanded = os.path.expanduser(normalized)
1786
+
1787
+ config, config_path = _require_config()
1788
+ directories = config.get("directories", [])
1789
+
1790
+ # Duplicate-check before existence-check: a configured path is a duplicate
1791
+ # regardless of whether the directory is currently reachable, and "already
1792
+ # configured" is more actionable than "not a directory" when both are true.
1793
+ existing_expanded = {os.path.expanduser(d) for d in directories}
1794
+ if expanded in existing_expanded:
1795
+ console.print(f"[yellow]Already configured:[/yellow] {normalized}")
1796
+ return 1
1797
+
1798
+ if not os.path.isdir(expanded):
1799
+ console.print(f"[red]Not a directory or not found:[/red] {path}")
1800
+ return 1
1801
+
1802
+ directories.append(normalized)
1803
+ config["directories"] = directories
1804
+ write_config(config, config_path)
1805
+ console.print(f"[green]Added:[/green] {normalized}")
1806
+
1807
+ if index:
1808
+ if Confirm.ask("Run indexing for the new folder now?", default=True):
1809
+ _run_orchestrator_stages(["local_folders", "local_files"])
1810
+
1811
+ return 0
1812
+
1813
+
1814
+ def folders_remove(path: str) -> int:
1815
+ """Remove a directory from the config.
1816
+
1817
+ Does NOT delete files from the database — they remain as audit trail.
1818
+
1819
+ Args:
1820
+ path: Directory path to remove.
1821
+
1822
+ Returns:
1823
+ 0 on success, 1 if path wasn't configured.
1824
+ """
1825
+ normalized = _normalize_path(path)
1826
+ expanded = os.path.expanduser(normalized)
1827
+
1828
+ config, config_path = _require_config()
1829
+ directories = config.get("directories", [])
1830
+
1831
+ # Filter out entries that match when expanded
1832
+ remaining = [d for d in directories if os.path.expanduser(d) != expanded]
1833
+
1834
+ if len(remaining) == len(directories):
1835
+ console.print(f"[yellow]Not configured:[/yellow] {normalized}")
1836
+ return 1
1837
+
1838
+ config["directories"] = remaining
1839
+ write_config(config, config_path)
1840
+ console.print(f"[green]Removed:[/green] {normalized}")
1841
+ console.print("[dim] Note: indexed files remain in the database.[/dim]")
1842
+ return 0
1843
+
1844
+
1845
+ def _get_indexing_counts() -> dict:
1846
+ """Query DB for folder and file counts. Returns empty dict if DB doesn't exist."""
1847
+ conn = _get_db_connection()
1848
+ if conn is None:
1849
+ return {}
1850
+
1851
+ try:
1852
+ cur = conn.cursor()
1853
+ counts = {}
1854
+ for table, query in [
1855
+ ("folders", "SELECT COUNT(*) FROM folders"),
1856
+ ("files", "SELECT COUNT(*) FROM files WHERE status != 'removed'"),
1857
+ ("visits", "SELECT COUNT(*) FROM visits"),
1858
+ ("projects", "SELECT COUNT(*) FROM projects"),
1859
+ ("chats", "SELECT COUNT(*) FROM chats WHERE status != 'removed'"),
1860
+ ("messages", "SELECT COUNT(*) FROM messages WHERE status != 'removed'"),
1861
+ ]:
1862
+ try:
1863
+ cur.execute(query)
1864
+ counts[table] = cur.fetchone()[0]
1865
+ except sqlite3.OperationalError:
1866
+ counts[table] = 0
1867
+ return counts
1868
+ except Exception: # Intentional broad catch: setup wizard display must not crash
1869
+ return {}
1870
+ finally:
1871
+ conn.close()
1872
+
1873
+
1874
+ def seed_access_policies() -> dict:
1875
+ """Seed default MCP access policies (metadata-only access). Idempotent via INSERT OR IGNORE.
1876
+
1877
+ Returns:
1878
+ Dict with visibility_seeded and permission_seeded bools, or {} if no DB.
1879
+ """
1880
+ conn = _get_db_connection()
1881
+ if conn is None:
1882
+ return {}
1883
+
1884
+ try:
1885
+ result = _seed_access_policies(conn)
1886
+
1887
+ if result.get("visibility_seeded") or result.get("permission_seeded"):
1888
+ console.print(
1889
+ "\n[bold]MCP access policies[/bold]: seeded default access (metadata visible, content allowed)"
1890
+ )
1891
+ else:
1892
+ console.print("\n[bold]MCP access policies[/bold]: already configured")
1893
+ console.print(" [dim]Manage with: fp mcp view show | fp mcp read show[/dim]")
1894
+
1895
+ # Explain what the defaults mean
1896
+ console.print("\n [dim]Visible[/dim] = Claude can see file names, sizes, and paths")
1897
+ console.print(" [dim]Content allowed[/dim] = Claude can read file contents when asked")
1898
+ console.print(
1899
+ " [dim]Security posture: fail-open (all reads allowed). "
1900
+ "See reference/mcp-access-control.md § Security Posture.[/dim]"
1901
+ )
1902
+
1903
+ # Offer to restrict to metadata-only access
1904
+ if Confirm.ask(
1905
+ "\n Restrict to metadata only? (no content reading)",
1906
+ default=False,
1907
+ ):
1908
+ from footprinter.db.policies import set_permission_policy
1909
+
1910
+ set_permission_policy(conn, "global", "deny")
1911
+ console.print(" [green]Switched to metadata-only access (content denied)[/green]")
1912
+ else:
1913
+ console.print(" [dim]Keeping full access (content allowed)[/dim]")
1914
+
1915
+ return result
1916
+ except Exception as e: # Intentional broad catch: policy seeding is best-effort during setup
1917
+ logger.error(f"Failed to seed access policies: {e}")
1918
+ console.print(f" [yellow]Warning: failed to seed access policies: {e}[/yellow]")
1919
+ console.print(" [dim]Run 'fp setup' later to retry[/dim]")
1920
+ return {}
1921
+ finally:
1922
+ conn.close()
1923
+
1924
+
1925
+ def print_summary(
1926
+ chat_result: dict = None,
1927
+ mcp_configured: bool = False,
1928
+ connector_results: dict = None,
1929
+ ):
1930
+ """Display results table and next steps.
1931
+
1932
+ Args:
1933
+ chat_result: Result dict from import_chat_export(), or None.
1934
+ mcp_configured: Whether MCP was configured during the wizard.
1935
+ connector_results: Result dict from connector setup hooks, or None.
1936
+ """
1937
+ console.print()
1938
+
1939
+ table = Table(title="Setup Complete")
1940
+ table.add_column("File", style="bold")
1941
+ table.add_column("Status")
1942
+
1943
+ # Config
1944
+ config_path = get_config_path()
1945
+ if config_path.exists():
1946
+ table.add_row(str(config_path), "[green]Created[/green]")
1947
+ else:
1948
+ table.add_row(str(config_path), "[red]Missing[/red]")
1949
+
1950
+ # Database
1951
+ db_path = get_db_path()
1952
+ if db_path.exists():
1953
+ table.add_row(str(db_path), "[green]Ready[/green]")
1954
+ else:
1955
+ table.add_row(str(db_path), "[yellow]Not yet created[/yellow]")
1956
+
1957
+ console.print(table)
1958
+
1959
+ # Indexing counts
1960
+ counts = _get_indexing_counts()
1961
+ if counts:
1962
+ console.print()
1963
+ console.print(
1964
+ f" Indexed: [cyan]{counts.get('folders', 0)}[/cyan] folders, [cyan]{counts.get('files', 0)}[/cyan] files"
1965
+ )
1966
+ browser_count = counts.get("visits", 0)
1967
+ if browser_count > 0:
1968
+ console.print(f" Browser history: [cyan]{browser_count}[/cyan] URLs")
1969
+ chat_count = counts.get("chats", 0)
1970
+ chat_msg_count = counts.get("messages", 0)
1971
+ if chat_count > 0:
1972
+ console.print(f" Chat: [cyan]{chat_count}[/cyan] chats, [cyan]{chat_msg_count}[/cyan] messages")
1973
+ project_count = counts.get("projects", 0)
1974
+ if project_count > 0:
1975
+ console.print(f" Projects detected: [cyan]{project_count}[/cyan]")
1976
+ console.print(" Use [bold]fp project[/bold] and [bold]fp client[/bold] to organize your data.")
1977
+
1978
+ # Getting started section
1979
+ console.print()
1980
+ console.print("[bold]Ready to explore your data:[/bold]")
1981
+ console.print(' [cyan]fp search[/cyan] [dim]"query"[/dim] Search your files')
1982
+ console.print(" [cyan]fp ingest status[/cyan] Show data counts")
1983
+ console.print(" [cyan]fp ingest[/cyan] Re-index (incremental)")
1984
+ console.print()
1985
+ console.print("[dim]Run fp -h or fp <command> --help for more.[/dim]")
1986
+
1987
+ # Optional hints for things not yet configured
1988
+ extras = []
1989
+ connectors_configured = bool(connector_results)
1990
+ if not connectors_configured:
1991
+ extras.append("fp connect")
1992
+ chat_count = counts.get("chats", 0) if counts else 0
1993
+ if (chat_result is None or not chat_result) and chat_count == 0:
1994
+ extras.append("fp ingest import <file>")
1995
+ if extras:
1996
+ console.print()
1997
+ console.print(f"[dim]Not yet set up: {', '.join(extras)}[/dim]")
1998
+
1999
+
2000
+ if __name__ == "__main__":
2001
+ main()