footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1836 @@
1
+ """
2
+ Interactive setup wizard for Footprinter.
3
+
4
+ Guides new users through configuration in ~3 minutes.
5
+ Usage:
6
+ fp setup # Run interactive wizard
7
+ fp setup --check # Validate existing configuration
8
+ fp setup --hooks # Install git hooks (sets core.hooksPath)
9
+ fp setup --reset # Clear data and re-run wizard
10
+ """
11
+
12
+ import argparse
13
+ import logging
14
+ import os
15
+ import shutil
16
+ import sqlite3
17
+ import subprocess
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ import yaml
22
+ from rich.console import Console
23
+ from rich.panel import Panel
24
+ from rich.rule import Rule
25
+ from rich.table import Table
26
+
27
+ from footprinter.cli import mcp_setup
28
+ from footprinter.cli._policy_helpers import (
29
+ get_policy_db as _get_db_connection,
30
+ )
31
+ from footprinter.cli._policy_helpers import (
32
+ normalize_path as _normalize_path,
33
+ )
34
+ from footprinter.cli._policy_helpers import (
35
+ seed_access_policies as _seed_access_policies,
36
+ )
37
+ from footprinter.cli._prompt import (
38
+ PromptCancelled,
39
+ )
40
+ from footprinter.cli._prompt import (
41
+ SafeConfirm as Confirm,
42
+ )
43
+ from footprinter.cli._prompt import (
44
+ SafePrompt as Prompt,
45
+ )
46
+
47
+ # In-process pipeline — imported here so tests can patch them
48
+ from footprinter.cli.ingest import _run_with_logging
49
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
50
+ from footprinter.paths import (
51
+ get_bundled_path,
52
+ get_chroma_path,
53
+ get_config_path,
54
+ get_db_path,
55
+ get_log_path,
56
+ )
57
+ from footprinter.source_registry import ConfigError, get_config
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+
62
+ def _load_existing_config() -> dict | None:
63
+ """Load existing config, returning None if missing or invalid."""
64
+ try:
65
+ return get_config()
66
+ except ConfigError:
67
+ return None
68
+
69
+
70
+ console = Console()
71
+
72
+
73
+ def _repo_root() -> Path:
74
+ """Repo checkout root (dev-only: git hooks, subprocess cwd)."""
75
+ return Path(__file__).resolve().parent.parent.parent
76
+
77
+
78
+ def _hooks_available() -> bool:
79
+ """True when dev git hooks are present (private repo only)."""
80
+ return (_repo_root() / "scripts" / "hooks" / "post-merge").exists()
81
+
82
+
83
+ # Common directories checked during quick start — only those that exist are included
84
+ QUICK_START_CANDIDATES = ["~/Documents", "~/Desktop", "~/Work", "~/Projects"]
85
+
86
+ # Directories offered as optional extras (not defaults)
87
+ OPTIONAL_DIRECTORIES = ["~/.claude"]
88
+
89
+ KNOWN_BROWSERS = ["safari", "chrome"]
90
+
91
+ # Vectorization defaults — file types that benefit from semantic embedding
92
+ DEFAULT_FILE_TYPES = [".md", ".txt", ".pdf", ".docx"]
93
+
94
+ # Known junk patterns — (fnmatch_pattern, description) tuples
95
+ # Files matching these exist as text but contain no meaningful prose content.
96
+ # Patterns use ** glob syntax; fnmatch matches / on Unix.
97
+ KNOWN_JUNK_PATTERNS = [
98
+ ("**/Photos Library.photoslibrary/**", "macOS Spotlight index cache"),
99
+ ("**/.claude/debug/**", "Claude Code debug logs"),
100
+ ("**/.claude/paste-cache/**", "Claude Code paste cache"),
101
+ ("**/.claude/cache/**", "Claude Code cache"),
102
+ ("**/.claude/projects/**", "Claude Code session data"),
103
+ ("**/.claude/plans/**", "Claude Code auto-generated plans"),
104
+ ("**/.claude/plugins/**", "Claude Code plugin cache"),
105
+ ("**/.cci/**", "CumulusCI cache"),
106
+ ("**/.context/**", "IDE context directories"),
107
+ ("**/.github/**", "GitHub config and workflows"),
108
+ ("**/.ai-dev/**", "AI dev tool directories"),
109
+ ]
110
+
111
+ _SCAN_FILE_LIMIT = 50_000
112
+
113
+
114
+ def _scan_directories_for_vectorization(directories: list[str], file_types: list[str]) -> dict:
115
+ """Scan directories for files matching file_types, detecting junk patterns.
116
+
117
+ Returns dict with total, by_extension, junk_hits, total_after_exclusions,
118
+ and truncated flag.
119
+ """
120
+ from fnmatch import fnmatch
121
+
122
+ by_extension: dict[str, int] = {}
123
+ junk_hits: dict[str, int] = {}
124
+ total = 0
125
+ truncated = False
126
+
127
+ for directory in directories:
128
+ expanded = os.path.expanduser(directory)
129
+ if not os.path.isdir(expanded) or os.path.islink(expanded):
130
+ continue
131
+ for dirpath, _dirnames, filenames in os.walk(expanded, followlinks=False):
132
+ for filename in filenames:
133
+ ext = os.path.splitext(filename)[1].lower()
134
+ if ext not in file_types:
135
+ continue
136
+ total += 1
137
+ by_extension[ext] = by_extension.get(ext, 0) + 1
138
+
139
+ # Check junk patterns
140
+ full_path = os.path.join(dirpath, filename)
141
+ for pattern, _desc in KNOWN_JUNK_PATTERNS:
142
+ if fnmatch(full_path, pattern):
143
+ junk_hits[pattern] = junk_hits.get(pattern, 0) + 1
144
+ break # one pattern match per file is enough
145
+
146
+ if total >= _SCAN_FILE_LIMIT:
147
+ truncated = True
148
+ break
149
+ if truncated:
150
+ break
151
+ if truncated:
152
+ break
153
+
154
+ junk_total = sum(junk_hits.values())
155
+ return {
156
+ "total": total,
157
+ "by_extension": by_extension,
158
+ "junk_hits": junk_hits,
159
+ "total_after_exclusions": total - junk_total,
160
+ "truncated": truncated,
161
+ }
162
+
163
+
164
+ def get_available_browsers() -> list[str]:
165
+ """Browsers available on the current platform (Safari is macOS-only)."""
166
+ browsers = ["chrome"]
167
+ if sys.platform == "darwin":
168
+ browsers.insert(0, "safari")
169
+ return browsers
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # argparse registration (for fp CLI router)
174
+ # ---------------------------------------------------------------------------
175
+
176
+
177
+ def register(subparsers) -> None:
178
+ """Register ``fp setup`` with its subcommands."""
179
+ from footprinter.cli._common import FORMATTER
180
+
181
+ parser = subparsers.add_parser(
182
+ "setup",
183
+ help="Configuration wizard and system setup",
184
+ description=(
185
+ "Interactive setup wizard and system configuration.\n\n"
186
+ "Run with no arguments for the guided wizard (~3 minutes).\n"
187
+ "Use flags to run specific setup tasks."
188
+ ),
189
+ epilog=(
190
+ "examples:\n"
191
+ " fp setup Run the interactive wizard\n"
192
+ " fp setup --check Validate existing configuration\n"
193
+ " fp setup mcp --claude Configure MCP for Claude Desktop\n"
194
+ " fp setup folders add ~/Work/newdir\n"
195
+ "\n"
196
+ "tip: use 'fp setup <command> --help' for details on subcommands."
197
+ ),
198
+ formatter_class=FORMATTER,
199
+ )
200
+ parser.set_defaults(func=_handle_setup)
201
+
202
+ parser.add_argument(
203
+ "--check",
204
+ action="store_true",
205
+ help="Validate existing configuration and exit",
206
+ )
207
+ if _hooks_available():
208
+ parser.add_argument(
209
+ "--hooks",
210
+ action="store_true",
211
+ help="Install git hooks (sets core.hooksPath to scripts/hooks)",
212
+ )
213
+ parser.add_argument(
214
+ "--reset",
215
+ action="store_true",
216
+ help="Clear database and vector store, then re-run setup wizard",
217
+ )
218
+
219
+ subs = parser.add_subparsers(dest="setup_action", metavar="COMMAND", title="commands (one required)")
220
+
221
+ # mcp
222
+ mcp_p = subs.add_parser(
223
+ "mcp",
224
+ help="Configure MCP integration",
225
+ description=(
226
+ "Configure the MCP server snippet for AI clients.\n\nChecks, previews, or writes the JSON config."
227
+ ),
228
+ epilog=(
229
+ "examples:\n"
230
+ " fp setup mcp --check Check if already configured\n"
231
+ " fp setup mcp --dry-run Preview config write without changing anything\n"
232
+ " fp setup mcp --claude Write to Claude Desktop config (creates backup)"
233
+ ),
234
+ formatter_class=FORMATTER,
235
+ )
236
+ mcp_p.add_argument(
237
+ "--check",
238
+ action="store_true",
239
+ dest="mcp_check",
240
+ help="Check if footprinter is configured in any MCP client",
241
+ )
242
+ mcp_p.add_argument(
243
+ "--claude",
244
+ action="store_true",
245
+ help="Write/merge snippet into Claude Desktop config (creates backup)",
246
+ )
247
+ mcp_p.add_argument(
248
+ "--dry-run",
249
+ action="store_true",
250
+ help="Preview config write without changing anything",
251
+ )
252
+
253
+ # folders (add/remove only — list is now fp folder list)
254
+ folders_p = subs.add_parser(
255
+ "folders",
256
+ help="Manage indexed folders",
257
+ description=(
258
+ "Add or remove directories from the indexing configuration.\n\n"
259
+ "Use 'fp folder list' to view indexed folders."
260
+ ),
261
+ epilog=("examples:\n fp setup folders add ~/Work/newproject\n fp setup folders remove ~/Work/old"),
262
+ formatter_class=FORMATTER,
263
+ )
264
+ folders_sub = folders_p.add_subparsers(dest="folders_command", metavar="COMMAND", title="commands (one required)")
265
+ add_p = folders_sub.add_parser(
266
+ "add",
267
+ help="Add a directory to index",
268
+ description="Add a directory path to the indexing configuration.",
269
+ formatter_class=FORMATTER,
270
+ )
271
+ add_p.add_argument("path", help="Directory path to add")
272
+ add_p.add_argument(
273
+ "--no-index",
274
+ action="store_true",
275
+ help="Skip running the indexer after adding",
276
+ )
277
+ remove_p = folders_sub.add_parser(
278
+ "remove",
279
+ help="Remove a directory from config",
280
+ description="Remove a directory from the indexing configuration.",
281
+ formatter_class=FORMATTER,
282
+ )
283
+ remove_p.add_argument("path", help="Directory path to remove")
284
+
285
+
286
+ def _handle_setup(args) -> None:
287
+ """Dispatch ``fp setup`` subcommands."""
288
+ try:
289
+ _handle_setup_inner(args)
290
+ except (PromptCancelled, KeyboardInterrupt):
291
+ console.print("\n[dim]Setup cancelled.[/dim]")
292
+ sys.exit(130)
293
+
294
+
295
+ def _dispatch_mcp(args) -> None:
296
+ """Shared MCP subcommand dispatch — used by both router and main()."""
297
+ # --check runs before the availability gate so it works without mcp extras
298
+ if getattr(args, "mcp_check", False):
299
+ sys.exit(mcp_setup.check_config())
300
+
301
+ # Gate write/print on mcp dependency (--check still works without it)
302
+ if not mcp_setup.is_mcp_available():
303
+ console.print("[red]MCP package not installed.[/red] Install with: pip install mcp")
304
+ sys.exit(1)
305
+
306
+ snippet = mcp_setup.generate_snippet()
307
+
308
+ if getattr(args, "claude", False) or getattr(args, "dry_run", False):
309
+ ok = mcp_setup.write_config(snippet, dry_run=args.dry_run)
310
+ sys.exit(0 if ok else 1)
311
+
312
+ # Default: print snippet
313
+ mcp_setup.print_snippet(snippet)
314
+
315
+
316
+ def _handle_setup_inner(args) -> None:
317
+ """Inner dispatch for ``fp setup`` — separated so cancellation is caught."""
318
+ action = getattr(args, "setup_action", None)
319
+
320
+ if action == "mcp":
321
+ _dispatch_mcp(args)
322
+ return
323
+
324
+ if action == "folders":
325
+ cmd = getattr(args, "folders_command", None)
326
+ if cmd == "add":
327
+ sys.exit(folders_add(args.path, index=not args.no_index))
328
+ elif cmd == "remove":
329
+ sys.exit(folders_remove(args.path))
330
+ else:
331
+ console.print("[yellow]Usage: fp setup folders add|remove[/yellow]")
332
+ return
333
+
334
+ if getattr(args, "reset", False):
335
+ db_path = get_db_path()
336
+ chroma_path = get_chroma_path()
337
+
338
+ console.print(
339
+ "[bold yellow]This will delete all indexed data.[/bold yellow]\nConfig and credentials are preserved."
340
+ )
341
+
342
+ if not Confirm.ask("Continue?"):
343
+ console.print("[dim]Reset cancelled.[/dim]")
344
+ return
345
+
346
+ cleared = []
347
+ if db_path.exists():
348
+ db_path.unlink()
349
+ cleared.append(str(db_path))
350
+ if chroma_path.exists():
351
+ shutil.rmtree(chroma_path)
352
+ cleared.append(str(chroma_path))
353
+ if cleared:
354
+ console.print(f"[green]Cleared:[/green] {', '.join(cleared)}")
355
+ else:
356
+ console.print("[dim]Nothing to clear (no existing data found).[/dim]")
357
+
358
+ run_interactive_wizard()
359
+ return
360
+
361
+ if getattr(args, "hooks", False):
362
+ sys.exit(install_git_hooks())
363
+ elif getattr(args, "check", False):
364
+ sys.exit(check_existing_config())
365
+ else:
366
+ run_interactive_wizard()
367
+
368
+
369
+ # ---------------------------------------------------------------------------
370
+ # Standalone entry point (fp setup)
371
+ # ---------------------------------------------------------------------------
372
+
373
+
374
+ def main():
375
+ """CLI entry point for fp setup."""
376
+ parser = argparse.ArgumentParser(
377
+ prog="fp setup",
378
+ description="Interactive setup wizard for Footprinter",
379
+ )
380
+ parser.add_argument(
381
+ "--check",
382
+ action="store_true",
383
+ help="Validate existing configuration and exit",
384
+ )
385
+ if _hooks_available():
386
+ parser.add_argument(
387
+ "--hooks",
388
+ action="store_true",
389
+ help="Install git hooks (sets core.hooksPath to scripts/hooks)",
390
+ )
391
+
392
+ subparsers = parser.add_subparsers(dest="subcommand")
393
+ mcp_parser = subparsers.add_parser(
394
+ "mcp",
395
+ help="Configure MCP integration",
396
+ )
397
+ mcp_parser.add_argument(
398
+ "--check",
399
+ action="store_true",
400
+ dest="mcp_check",
401
+ help="Check if footprinter is configured in any MCP client",
402
+ )
403
+ mcp_parser.add_argument(
404
+ "--claude",
405
+ action="store_true",
406
+ help="Write/merge snippet into Claude Desktop config (creates backup)",
407
+ )
408
+ mcp_parser.add_argument(
409
+ "--dry-run",
410
+ action="store_true",
411
+ help="Preview config write without changing anything",
412
+ )
413
+
414
+ folders_parser = subparsers.add_parser(
415
+ "folders",
416
+ help="Manage indexed folders",
417
+ )
418
+ folders_sub = folders_parser.add_subparsers(dest="folders_command")
419
+ add_parser = folders_sub.add_parser("add", help="Add a directory to index")
420
+ add_parser.add_argument("path", help="Directory path to add")
421
+ add_parser.add_argument(
422
+ "--no-index",
423
+ action="store_true",
424
+ help="Skip running the indexer after adding",
425
+ )
426
+ remove_parser = folders_sub.add_parser("remove", help="Remove a directory from config")
427
+ remove_parser.add_argument("path", help="Directory path to remove")
428
+
429
+ args = parser.parse_args()
430
+
431
+ if args.subcommand == "mcp":
432
+ _dispatch_mcp(args)
433
+ return
434
+
435
+ if args.subcommand == "folders":
436
+ cmd = getattr(args, "folders_command", None)
437
+ if cmd == "add":
438
+ sys.exit(folders_add(args.path, index=not args.no_index))
439
+ elif cmd == "remove":
440
+ sys.exit(folders_remove(args.path))
441
+ else:
442
+ folders_parser.print_help()
443
+ return
444
+
445
+ if getattr(args, "hooks", False):
446
+ sys.exit(install_git_hooks())
447
+ elif args.check:
448
+ sys.exit(check_existing_config())
449
+ else:
450
+ run_interactive_wizard()
451
+
452
+
453
+ def check_existing_config() -> int:
454
+ """Validate existing config and print results.
455
+
456
+ Returns:
457
+ 0 if config is valid, 1 otherwise.
458
+ """
459
+ try:
460
+ config = get_config()
461
+ except ConfigError as e:
462
+ console.print(f"[red]Config error:[/red] {e}")
463
+ return 1
464
+
465
+ errors, warnings = validate_config(config)
466
+ if errors:
467
+ console.print("[red]Configuration errors:[/red]")
468
+ for err in errors:
469
+ console.print(f" - {err}")
470
+ return 1
471
+
472
+ console.print("[green]Configuration is valid.[/green]")
473
+ if warnings:
474
+ console.print("[yellow]Warnings:[/yellow]")
475
+ for w in warnings:
476
+ console.print(f" - {w}")
477
+
478
+ # Architecture check
479
+ arch_warning = check_architecture()
480
+ if arch_warning:
481
+ console.print()
482
+ console.print(f"[yellow]Architecture warning:[/yellow] {arch_warning}")
483
+
484
+ # Core dependency check — only surface errors
485
+ core_deps = check_core_deps()
486
+ missing_core = [name for name, avail in core_deps if not avail]
487
+ if missing_core:
488
+ console.print()
489
+ console.print(f"[red]Missing core dependencies:[/red] {', '.join(missing_core)}")
490
+ console.print("Reinstall with: pip install footprinter-cli")
491
+
492
+ # Optional features table
493
+ features = check_optional_features(config)
494
+ console.print()
495
+ feat_table = Table(title="Optional Features", show_header=True, header_style="bold")
496
+ feat_table.add_column("Feature", style="cyan")
497
+ feat_table.add_column("Status")
498
+
499
+ for name, installed, enabled, hint in features:
500
+ if not installed:
501
+ feat_table.add_row(name, f"[yellow]not installed[/yellow] — {hint}")
502
+ elif enabled:
503
+ feat_table.add_row(name, "[green]enabled[/green]")
504
+ else:
505
+ feat_table.add_row(name, "[dim]installed, not enabled[/dim]")
506
+
507
+ console.print(feat_table)
508
+
509
+ return 1 if missing_core else 0
510
+
511
+
512
+ def _is_importable(module_name: str) -> bool:
513
+ """Return True if *module_name* can be imported."""
514
+ try:
515
+ __import__(module_name)
516
+ return True
517
+ except ImportError:
518
+ return False
519
+
520
+
521
+ def check_core_deps() -> list[tuple[str, bool]]:
522
+ """Check core dependencies. Returns ``(name, available)`` pairs.
523
+
524
+ Core deps are hard requirements — if any are missing the install is broken.
525
+ """
526
+ return [
527
+ ("PyYAML", _is_importable("yaml")),
528
+ ("Rich", _is_importable("rich")),
529
+ ]
530
+
531
+
532
+ def check_optional_features(
533
+ config: dict,
534
+ ) -> list[tuple[str, bool, bool | None, str]]:
535
+ """Check optional features against install state *and* config.
536
+
537
+ Returns ``(name, installed, enabled, hint)`` for each feature.
538
+ ``enabled`` is ``None`` when not applicable (shouldn't happen currently).
539
+ """
540
+ features: list[tuple[str, bool, bool | None, str]] = []
541
+
542
+ # Semantic Search (chromadb + onnxruntime)
543
+ sem_installed = _is_importable("chromadb") and _is_importable("onnxruntime")
544
+ sem_cfg = config.get("semantic", {})
545
+ sem_enabled = sem_cfg.get("file_vectorization", False) or sem_cfg.get("chat_vectorization", False)
546
+ features.append(("Semantic Search", sem_installed, sem_enabled, "pip install footprinter-cli[semantic]"))
547
+
548
+ # Connector-declared features (dynamic)
549
+ from footprinter.connectors import discover_connectors
550
+
551
+ for spec in discover_connectors().values():
552
+ for feat_name, probe, cfg_section, hint in spec.features:
553
+ installed = _is_importable(probe)
554
+ enabled = config.get(cfg_section, {}).get("enabled", False)
555
+ features.append((feat_name, installed, enabled, hint))
556
+
557
+ return features
558
+
559
+
560
+ def check_architecture() -> str | None:
561
+ """Check for architecture mismatches. Returns warning string or None."""
562
+ import platform
563
+
564
+ machine = platform.machine()
565
+ # Detect Rosetta: arm64 hardware but x86_64 Python.
566
+ # hw.optional.arm64 returns 1 on Apple Silicon even under Rosetta,
567
+ # unlike hw.machine which reports x86_64 under Rosetta.
568
+ if machine == "x86_64":
569
+ try:
570
+ hw = subprocess.run(["sysctl", "-n", "hw.optional.arm64"], capture_output=True, text=True)
571
+ if hw.stdout.strip() == "1":
572
+ return (
573
+ "Python is running as x86_64 on arm64 hardware (Rosetta). "
574
+ "Native dependencies may have compatibility issues. "
575
+ "Consider recreating venv with native arm64 Python."
576
+ )
577
+ except Exception:
578
+ pass # Best-effort Rosetta detection; sysctl may not exist on non-macOS
579
+ return None
580
+
581
+
582
+ def install_git_hooks() -> int:
583
+ """Set core.hooksPath to scripts/hooks.
584
+
585
+ Returns:
586
+ 0 on success, 1 on failure.
587
+ """
588
+ root = _repo_root()
589
+ hooks_dir = root / "scripts" / "hooks"
590
+ post_merge = hooks_dir / "post-merge"
591
+
592
+ if not post_merge.exists():
593
+ console.print(f"[red]Hook script not found:[/red] {post_merge}")
594
+ return 1
595
+
596
+ # Check we're in a git repo
597
+ try:
598
+ result = subprocess.run(
599
+ ["git", "rev-parse", "--git-dir"],
600
+ cwd=str(root),
601
+ capture_output=True,
602
+ text=True,
603
+ )
604
+ if result.returncode != 0:
605
+ console.print("[red]Not a git repository.[/red]")
606
+ return 1
607
+ except FileNotFoundError:
608
+ console.print("[red]git not found.[/red]")
609
+ return 1
610
+
611
+ # Set core.hooksPath
612
+ result = subprocess.run(
613
+ ["git", "config", "--local", "core.hooksPath", "scripts/hooks"],
614
+ cwd=str(root),
615
+ capture_output=True,
616
+ text=True,
617
+ )
618
+ if result.returncode != 0:
619
+ console.print(f"[red]Failed to set core.hooksPath:[/red] {result.stderr.strip()}")
620
+ return 1
621
+
622
+ console.print("[green]Git hooks installed.[/green]")
623
+ console.print(" core.hooksPath = [cyan]scripts/hooks[/cyan]")
624
+ console.print(f" post-merge hook: [cyan]{post_merge.relative_to(root)}[/cyan]")
625
+ return 0
626
+
627
+
628
+ def validate_config(config: dict) -> tuple[list[str], list[str]]:
629
+ """Validate a config dict and return errors and warnings.
630
+
631
+ Args:
632
+ config: Parsed YAML config dict.
633
+
634
+ Returns:
635
+ Tuple of (errors, warnings). Empty errors means valid.
636
+ """
637
+ errors = []
638
+
639
+ if config is None:
640
+ errors.append("Config is empty or invalid YAML")
641
+ return errors, []
642
+
643
+ # directories is required and must be a non-empty list
644
+ dirs = config.get("directories")
645
+ missing_dirs: list[str] = []
646
+ if not dirs:
647
+ errors.append("'directories' is missing or empty")
648
+ elif not isinstance(dirs, list):
649
+ errors.append("'directories' must be a list")
650
+ else:
651
+ for d in dirs:
652
+ expanded = os.path.expanduser(d)
653
+ if not os.path.isdir(expanded):
654
+ missing_dirs.append(d)
655
+
656
+ # browsers must be a list (can be empty)
657
+ browsers = config.get("browsers")
658
+ if browsers is None:
659
+ errors.append("'browsers' key is missing")
660
+ elif not isinstance(browsers, list):
661
+ errors.append("'browsers' must be a list")
662
+ else:
663
+ for b in browsers:
664
+ if b not in KNOWN_BROWSERS:
665
+ errors.append(f"Unknown browser: {b}")
666
+
667
+ # Absent directories are a warning, not an error — the bundled example
668
+ # lists macOS-flavored defaults (~/Work, ~/Personal, ~/.claude) that a
669
+ # fresh Linux install won't have. Let `fp setup --check` pass and point
670
+ # the user at what's missing instead of rejecting the whole config.
671
+ warnings = []
672
+ if missing_dirs:
673
+ warnings.append(
674
+ "Directories not found (will be skipped during indexing): "
675
+ + ", ".join(missing_dirs)
676
+ )
677
+ if "exclusions" not in config:
678
+ warnings.append("'exclusions' section missing — default exclusions will be used")
679
+ if "indexing" not in config:
680
+ warnings.append("'indexing' section missing — default settings will be used")
681
+
682
+ return errors, warnings
683
+
684
+
685
+ def _print_phase(step: int, total: int, name: str):
686
+ """Print phase progression indicator as a visual Rule."""
687
+ console.print()
688
+ console.print(Rule(f"[bold]Step {step} of {total} — {name}[/bold]", style="dim"))
689
+
690
+
691
+ def _choose_preset() -> dict | None:
692
+ """Offer preset profiles. Returns preset dict or None for full/custom."""
693
+ console.print(" [bold]Quick start[/bold] — common directories, no email, browser or chat history (add more later)")
694
+ console.print(" [bold]Full setup[/bold] — choose everything yourself")
695
+ choice = Prompt.ask(" Profile", choices=["quick", "full"], default="full")
696
+ if choice == "quick":
697
+ dirs = [d for d in QUICK_START_CANDIDATES if os.path.isdir(os.path.expanduser(d))]
698
+ if not dirs:
699
+ console.print(" [yellow]No common directories found — switching to full setup[/yellow]")
700
+ return None
701
+ return {"directories": dirs, "browsers": []}
702
+ return None
703
+
704
+
705
+ def run_interactive_wizard():
706
+ """Run the full interactive setup flow.
707
+
708
+ Structured as 6 phases: Welcome, Data Sources, Confirm & Write,
709
+ Populate, Connect, Summary.
710
+
711
+ PromptCancelled and KeyboardInterrupt propagate to the caller
712
+ (``_handle_setup``) which prints the cancellation message and
713
+ exits with code 130.
714
+ """
715
+ existing = _load_existing_config()
716
+
717
+ # Phase 1: Welcome
718
+ _print_phase(1, 6, "Welcome")
719
+ welcome_extra = ""
720
+ if existing is not None:
721
+ welcome_extra = (
722
+ "\n\n[bold yellow]Existing configuration detected.[/bold yellow]\n"
723
+ " Current settings will be shown as defaults. Only sections\n"
724
+ " you explicitly change will be updated."
725
+ )
726
+ console.print(
727
+ Panel(
728
+ "[bold]Footprinter Setup Wizard[/bold]\n\n"
729
+ "Footprinter indexes your files, browser history, emails, and chat\n"
730
+ "exports for AI-powered search and analysis.\n\n"
731
+ "[bold]Phases:[/bold]\n"
732
+ " 1. Welcome — what Footprinter does\n"
733
+ " 2. Data Sources — directories, browsers, chat exports\n"
734
+ " 3. Confirm & Write — preview and save configuration\n"
735
+ " 4. Populate — index your data\n"
736
+ " 5. Connect — access policies and Claude Desktop\n"
737
+ " 6. Summary — results and next steps"
738
+ + (
739
+ "\n\n[dim]Prerequisites (optional, can add later):[/dim]\n"
740
+ " - Full Disk Access for Safari history (System Settings > Privacy & Security)"
741
+ if sys.platform == "darwin"
742
+ else ""
743
+ )
744
+ + welcome_extra,
745
+ title="fp setup",
746
+ )
747
+ )
748
+
749
+ # Phase 2: Data Sources
750
+ _print_phase(2, 6, "Data Sources")
751
+ if existing is not None:
752
+ preset = None # Skip preset choice in reconfigure mode
753
+ else:
754
+ preset = _choose_preset()
755
+ if preset:
756
+ answers = {"directories": preset["directories"], "browsers": preset["browsers"]}
757
+ connector_results = {}
758
+ chat_export_path = None
759
+ semantic_answers = collect_vectorization_answers(directories=preset["directories"], quick=True)
760
+ else:
761
+ answers = collect_answers(existing=existing)
762
+ connector_results = {}
763
+ chat_export_path = collect_chat_export_path()
764
+ semantic_answers = collect_vectorization_answers(directories=answers["directories"], existing=existing)
765
+
766
+ # Phase 3: Confirm & Write
767
+ _print_phase(3, 6, "Confirm & Write")
768
+ preview_config(
769
+ answers,
770
+ connectors=connector_results,
771
+ chat_export_path=chat_export_path,
772
+ semantic=semantic_answers,
773
+ )
774
+
775
+ if not Confirm.ask("Write this configuration?", default=True):
776
+ console.print("[dim]Setup cancelled.[/dim]")
777
+ return
778
+
779
+ config = generate_config(answers, connector_results=connector_results, semantic=semantic_answers, existing=existing)
780
+ write_config(config)
781
+
782
+ # Phase 4: Populate
783
+ _print_phase(4, 6, "Populate")
784
+
785
+ # Truncate setup log before first orchestrator call
786
+ setup_log = get_log_path()
787
+ setup_log.parent.mkdir(parents=True, exist_ok=True)
788
+ setup_log.write_text("")
789
+
790
+ # Build dynamic description of what will run
791
+ stages_desc = ["local file indexing"]
792
+ if answers.get("browsers"):
793
+ stages_desc.append("browser history")
794
+ if chat_export_path:
795
+ stages_desc.append("chat import")
796
+ console.print(f" This will run: {', '.join(stages_desc)}.")
797
+
798
+ chat_result = {}
799
+ if Confirm.ask("Index and analyze your data now?", default=True):
800
+ try:
801
+ run_orchestrator(answers, connector_results=connector_results)
802
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
803
+ console.print(f" [yellow]Indexing error: {e}[/yellow]")
804
+ if chat_export_path:
805
+ try:
806
+ chat_result = import_chat_export(chat_export_path)
807
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
808
+ console.print(f" [yellow]Chat import error: {e}[/yellow]")
809
+ else:
810
+ console.print(" [dim]Skipped. Run later: fp ingest[/dim]")
811
+
812
+ # CSV import step — between data indexing and access policies
813
+ _offer_csv_import_wizard()
814
+
815
+ # Phase 5: Connect
816
+ _print_phase(5, 6, "Connect")
817
+ seed_access_policies()
818
+ mcp_configured = offer_setup_claude()
819
+
820
+ # Phase 6: Summary
821
+ _print_phase(6, 6, "Summary")
822
+ print_summary(
823
+ chat_result=chat_result,
824
+ mcp_configured=mcp_configured,
825
+ connector_results=connector_results,
826
+ )
827
+
828
+
829
+ def _offer_csv_import_wizard() -> None:
830
+ """Wizard wrapper that opens the DB and calls _offer_csv_import."""
831
+ from footprinter.cli._common import open_db
832
+
833
+ try:
834
+ with open_db() as conn:
835
+ _offer_csv_import(conn)
836
+ except SystemExit:
837
+ # open_db exits if DB not found — not an error during setup
838
+ console.print(" [dim]Database not ready — skipping CSV import.[/dim]")
839
+
840
+
841
+ def _offer_csv_import(conn) -> None:
842
+ """Prompt user to import clients/projects from CSV files.
843
+
844
+ Loops until the user enters an empty path to finish.
845
+ Detects entity type from CSV headers (client_type → clients,
846
+ project_name → projects). Shows a summary and confirms before inserting.
847
+ """
848
+ import csv as csv_mod
849
+
850
+ console.print("\n[bold]Import clients/projects from CSV[/bold]")
851
+ console.print(
852
+ " If you have a spreadsheet of clients or projects, paste the file path.\n"
853
+ " [dim]Leave blank to skip. You can import later with: fp upsert clients data.csv --commit[/dim]"
854
+ )
855
+
856
+ while True:
857
+ path_str = Prompt.ask(" CSV file path (blank to skip)", default="")
858
+ if not path_str:
859
+ return
860
+
861
+ csv_path = Path(path_str).expanduser()
862
+ if not csv_path.exists():
863
+ console.print(f" [red]File not found: {csv_path}[/red]")
864
+ continue
865
+
866
+ # Read headers to detect entity type
867
+ try:
868
+ with open(csv_path, encoding="utf-8", newline="") as f:
869
+ reader = csv_mod.DictReader(f)
870
+ headers = reader.fieldnames or []
871
+ rows = list(reader)
872
+ except Exception as e: # Intentional broad catch: setup wizard step must not crash the wizard
873
+ console.print(f" [red]Could not read CSV: {e}[/red]")
874
+ continue
875
+
876
+ if not rows:
877
+ console.print(" [dim]Empty CSV — nothing to import.[/dim]")
878
+ continue
879
+
880
+ # Detect entity type from headers
881
+ if "client_type" in headers:
882
+ entity_type = "client"
883
+ svc_name = "client_service"
884
+ elif "project_name" in headers:
885
+ entity_type = "project"
886
+ svc_name = "project_service"
887
+ else:
888
+ console.print(
889
+ " [red]Could not detect CSV type.[/red] Expected 'client_type' "
890
+ "(for clients) or 'project_name' (for projects) in headers."
891
+ )
892
+ continue
893
+
894
+ from footprinter.cli.upsert import CSV_COLUMNS, _process_csv_rows
895
+
896
+ required_cols, optional_cols, int_cols = CSV_COLUMNS[entity_type]
897
+
898
+ # Check required columns
899
+ missing = set(required_cols) - set(headers)
900
+ if missing:
901
+ console.print(f" [red]Missing required columns: {', '.join(sorted(missing))}[/red]")
902
+ continue
903
+
904
+ import footprinter.services as svc
905
+
906
+ service = getattr(svc, svc_name)
907
+
908
+ created, updated, errors, error_details = _process_csv_rows(
909
+ conn,
910
+ rows,
911
+ service,
912
+ entity_type,
913
+ required_cols,
914
+ optional_cols,
915
+ int_cols,
916
+ )
917
+
918
+ # Show summary
919
+ table = Table(title=f"CSV Import — {entity_type}s")
920
+ table.add_column("Metric", style="cyan")
921
+ table.add_column("Count", justify="right")
922
+ table.add_row("Created", str(created))
923
+ table.add_row("Updated", str(updated))
924
+ table.add_row("Errors", str(errors))
925
+ console.print(table)
926
+
927
+ if error_details:
928
+ for err in error_details[:5]:
929
+ console.print(f" [yellow]Row {err['row']}: {err['error']}[/yellow]")
930
+ if len(error_details) > 5:
931
+ console.print(f" [dim]... and {len(error_details) - 5} more errors[/dim]")
932
+
933
+ console.print(f" [green]Imported {created} new, updated {updated} existing {entity_type}(s).[/green]")
934
+
935
+
936
+ def collect_answers(existing: dict | None = None) -> dict:
937
+ """Gather user input via rich prompts.
938
+
939
+ Args:
940
+ existing: Optional existing config dict. When provided, current
941
+ directories and browsers are shown as defaults.
942
+
943
+ Returns:
944
+ Dict with keys: directories, browsers.
945
+ """
946
+ answers = {}
947
+
948
+ # --- Directories ---
949
+ console.print("\n[bold]1. Directories to scan[/bold]")
950
+ console.print(
951
+ " Footprinter will scan these directories for files to index —\n"
952
+ " metadata, content types, and project structure.\n"
953
+ " [dim]Common choices: ~/Work, ~/Personal, ~/Documents[/dim]\n"
954
+ " [dim]Use ~ for your home directory.[/dim]"
955
+ )
956
+
957
+ existing_dirs = (existing or {}).get("directories", [])
958
+ if existing_dirs:
959
+ console.print(f" Current directories: {', '.join(existing_dirs)}")
960
+ if Confirm.ask(" Keep current directories?", default=True):
961
+ directories = list(existing_dirs)
962
+ # Still offer to add more
963
+ console.print(" [dim]You can add more directories below (leave blank to continue).[/dim]")
964
+ while True:
965
+ path = Prompt.ask(" Add another directory (leave blank to finish)", default="")
966
+ if not path:
967
+ break
968
+ if Path(path).expanduser().is_dir():
969
+ directories.append(path)
970
+ console.print(f" [green]✓[/green] Added {path}")
971
+ else:
972
+ console.print(f" [red]Directory not found: {path}[/red]")
973
+ answers["directories"] = directories
974
+ else:
975
+ # User wants to re-enter directories — fall through to standard collection
976
+ answers["directories"] = _collect_directories_from_scratch()
977
+ else:
978
+ answers["directories"] = _collect_directories_from_scratch()
979
+
980
+ # --- Browsers ---
981
+ console.print("\n[bold]2. Browser history[/bold]")
982
+ console.print(
983
+ " Optionally index your browsing history for search and context.\n"
984
+ " [dim]You can enable this later in config.yaml.[/dim]"
985
+ )
986
+
987
+ existing_browsers = (existing or {}).get("browsers", [])
988
+ if existing_browsers:
989
+ console.print(f" Currently enabled: {', '.join(existing_browsers)}")
990
+ if Confirm.ask(" Keep current browser settings?", default=True):
991
+ browsers = list(existing_browsers)
992
+ else:
993
+ browsers = _collect_browsers_from_scratch()
994
+ else:
995
+ browsers = _collect_browsers_from_scratch()
996
+ answers["browsers"] = browsers
997
+
998
+ return answers
999
+
1000
+
1001
+ def _collect_directories_from_scratch() -> list[str]:
1002
+ """Collect directories interactively from scratch."""
1003
+ while True:
1004
+ directories = []
1005
+
1006
+ # Prompt for directories one at a time
1007
+ while True:
1008
+ prompt_text = (
1009
+ " Enter directory path" if not directories else " Add another directory (leave blank to finish)"
1010
+ )
1011
+ path = Prompt.ask(prompt_text, default="" if directories else ...)
1012
+ if not path:
1013
+ break
1014
+ expanded = os.path.expanduser(path)
1015
+ if os.path.isdir(expanded):
1016
+ directories.append(path)
1017
+ console.print(f" [green]✓[/green] Added {path}")
1018
+ else:
1019
+ console.print(f" [red]Directory not found: {path}[/red]")
1020
+
1021
+ # Offer optional directories if they exist
1022
+ for d in OPTIONAL_DIRECTORIES:
1023
+ expanded = os.path.expanduser(d)
1024
+ if os.path.isdir(expanded):
1025
+ if d == "~/.claude":
1026
+ console.print(" [dim]~/.claude contains Claude Code settings and chat history[/dim]")
1027
+ if Confirm.ask(f" Include {d}?", default=False):
1028
+ directories.append(d)
1029
+
1030
+ if directories:
1031
+ return directories
1032
+ console.print(" [red]At least one directory is required.[/red]")
1033
+
1034
+
1035
+ def _collect_browsers_from_scratch() -> list[str]:
1036
+ """Collect browser selection interactively from scratch."""
1037
+ browser_hints = {
1038
+ "safari": "[dim](requires Full Disk Access)[/dim]",
1039
+ "chrome": "[dim](no additional permissions needed)[/dim]",
1040
+ }
1041
+ browsers = []
1042
+ for b in get_available_browsers():
1043
+ hint = browser_hints.get(b, "")
1044
+ if Confirm.ask(f" Include {b}? {hint}", default=True):
1045
+ browsers.append(b)
1046
+ return browsers
1047
+
1048
+
1049
+ def _check_semantic_deps() -> bool:
1050
+ """Check semantic deps and offer pip install if missing. Return True if available."""
1051
+ if _is_importable("chromadb") and _is_importable("onnxruntime"):
1052
+ return True
1053
+
1054
+ console.print("\n [yellow]Semantic search requires chromadb and onnxruntime.[/yellow]")
1055
+ if Confirm.ask(" Install now? (pip install footprinter-cli[semantic])", default=True):
1056
+ result = subprocess.run(
1057
+ [sys.executable, "-m", "pip", "install", "footprinter-cli[semantic]"],
1058
+ capture_output=True,
1059
+ text=True,
1060
+ )
1061
+ if result.returncode == 0:
1062
+ console.print(" [green]✓[/green] Semantic dependencies installed.")
1063
+ return True
1064
+ else:
1065
+ console.print(f" [red]Install failed:[/red] {result.stderr.strip()}")
1066
+
1067
+ console.print(" [dim]You can enable semantic search later with fp setup.[/dim]")
1068
+ return False
1069
+
1070
+
1071
+ def collect_vectorization_answers(
1072
+ directories: list[str],
1073
+ existing: dict | None = None,
1074
+ quick: bool = False,
1075
+ ) -> dict:
1076
+ """Ask about content indexing: snippets and vectorization.
1077
+
1078
+ Groups all content extraction decisions into one section:
1079
+ - Content snippets: FTS keyword search previews (per entity)
1080
+ - Semantic search: vector embeddings for meaning-based search (per entity)
1081
+
1082
+ Args:
1083
+ directories: Directories to scan for file type preview.
1084
+ existing: Optional existing config dict for defaults.
1085
+ quick: If True, show compact summary with auto-selected exclusions.
1086
+
1087
+ Returns:
1088
+ Dict with content_snippets (bool),
1089
+ file_vectorization, chat_vectorization (bool),
1090
+ file_types (list), exclude_patterns (list).
1091
+ """
1092
+ existing_vec = (existing or {}).get("vectorization", {})
1093
+ existing_semantic = (existing or {}).get("semantic", {})
1094
+ existing_snippets = (existing or {}).get("indexing", {}).get("content_snippets", False)
1095
+ file_types = existing_vec.get("file_types", list(DEFAULT_FILE_TYPES))
1096
+ existing_excludes = existing_vec.get("exclude_patterns", [])
1097
+
1098
+ console.print("\n[bold]Content Indexing[/bold]")
1099
+ console.print(
1100
+ " By default, Footprinter indexes metadata only — filenames,\n"
1101
+ " timestamps, and structure. The options below let it read\n"
1102
+ " file content for richer search.\n"
1103
+ )
1104
+
1105
+ console.print(" [bold]Content snippets[/bold]")
1106
+ console.print(
1107
+ " Stores a short preview of file content for keyword search.\n"
1108
+ " Without this, search matches filenames and metadata only.\n"
1109
+ " [dim]Trade-off: Footprinter reads file content during indexing.[/dim]"
1110
+ )
1111
+ content_snippets = Confirm.ask(" Enable file content snippets?", default=existing_snippets)
1112
+
1113
+ console.print("\n [bold]Semantic search[/bold]")
1114
+ console.print(
1115
+ " Stores content as embeddings in a local ChromaDB database.\n"
1116
+ " This lets you find files and chats by meaning, not just keywords.\n"
1117
+ " [dim]Trade-off: additional disk space (~500 MB) and longer indexing time.[/dim]"
1118
+ )
1119
+
1120
+ if quick:
1121
+ result = _collect_vectorization_quick(directories, file_types, existing_excludes, existing_semantic)
1122
+ else:
1123
+ result = _collect_vectorization_full(directories, file_types, existing_excludes, existing_semantic)
1124
+ result["content_snippets"] = content_snippets
1125
+ return result
1126
+
1127
+
1128
+ def _collect_vectorization_quick(
1129
+ directories: list[str],
1130
+ file_types: list[str],
1131
+ existing_excludes: list[str],
1132
+ existing_semantic: dict,
1133
+ ) -> dict:
1134
+ """Quick-mode vectorization: compact summary with auto-selected exclusions."""
1135
+ scan = _scan_directories_for_vectorization(directories, file_types)
1136
+
1137
+ if scan["total"] > 0:
1138
+ junk_count = sum(scan["junk_hits"].values())
1139
+ console.print(f"\n Found [bold]{scan['total']}[/bold] files matching {', '.join(file_types)}")
1140
+ if junk_count > 0:
1141
+ console.print(
1142
+ f" [yellow]{junk_count} likely junk files detected[/yellow] "
1143
+ f"→ {scan['total_after_exclusions']} after exclusions"
1144
+ )
1145
+
1146
+ file_default = existing_semantic.get("file_vectorization", False)
1147
+ chat_default = existing_semantic.get("chat_vectorization", False)
1148
+
1149
+ file_vec = Confirm.ask(" Enable semantic search for files?", default=file_default)
1150
+ chat_vec = Confirm.ask(" Enable semantic search for chats?", default=chat_default)
1151
+
1152
+ if not file_vec and not chat_vec:
1153
+ return {
1154
+ "file_vectorization": False,
1155
+ "chat_vectorization": False,
1156
+ "file_types": file_types,
1157
+ "exclude_patterns": existing_excludes,
1158
+ }
1159
+
1160
+ # Auto-include detected junk exclusions
1161
+ exclude_patterns = list(existing_excludes)
1162
+ for pattern in scan["junk_hits"]:
1163
+ if pattern not in exclude_patterns:
1164
+ exclude_patterns.append(pattern)
1165
+
1166
+ if not _check_semantic_deps():
1167
+ return {
1168
+ "file_vectorization": False,
1169
+ "chat_vectorization": False,
1170
+ "file_types": file_types,
1171
+ "exclude_patterns": exclude_patterns,
1172
+ }
1173
+
1174
+ return {
1175
+ "file_vectorization": file_vec,
1176
+ "chat_vectorization": chat_vec,
1177
+ "file_types": file_types,
1178
+ "exclude_patterns": exclude_patterns,
1179
+ }
1180
+
1181
+
1182
+ def _collect_vectorization_full(
1183
+ directories: list[str],
1184
+ file_types: list[str],
1185
+ existing_excludes: list[str],
1186
+ existing_semantic: dict,
1187
+ ) -> dict:
1188
+ """Full-mode vectorization: detailed file type editing and exclusion toggles."""
1189
+ # Step 1: File type allowlist
1190
+ console.print(f"\n File types to embed: [bold]{', '.join(file_types)}[/bold]")
1191
+ keep_types = Confirm.ask(" Keep these file types?", default=True)
1192
+ if not keep_types:
1193
+ raw = Prompt.ask(" Enter file types (comma-separated, e.g. .md, .txt, .py)")
1194
+ file_types = [t.strip() for t in raw.split(",") if t.strip()]
1195
+
1196
+ # Step 2: Scan and show results
1197
+ scan = _scan_directories_for_vectorization(directories, file_types)
1198
+
1199
+ if scan["total"] > 0:
1200
+ console.print(f"\n Scanned: [bold]{scan['total']}[/bold] files found")
1201
+ for ext, count in sorted(scan["by_extension"].items()):
1202
+ console.print(f" {ext}: {count}")
1203
+
1204
+ # Junk exclusions
1205
+ exclude_patterns = list(existing_excludes)
1206
+ if scan["junk_hits"]:
1207
+ console.print("\n [yellow]Recommended exclusions:[/yellow]")
1208
+ detected_patterns = []
1209
+ for pattern, count in scan["junk_hits"].items():
1210
+ desc = next((d for p, d in KNOWN_JUNK_PATTERNS if p == pattern), pattern)
1211
+ console.print(f" {pattern} ({count} files) — {desc}")
1212
+ detected_patterns.append(pattern)
1213
+
1214
+ accept_all = Confirm.ask(" Accept recommended exclusions?", default=True)
1215
+ if accept_all:
1216
+ for p in detected_patterns:
1217
+ if p not in exclude_patterns:
1218
+ exclude_patterns.append(p)
1219
+ else:
1220
+ for pattern in detected_patterns:
1221
+ desc = next((d for p, d in KNOWN_JUNK_PATTERNS if p == pattern), pattern)
1222
+ include = Confirm.ask(f" Exclude {pattern}?", default=True)
1223
+ if include and pattern not in exclude_patterns:
1224
+ exclude_patterns.append(pattern)
1225
+
1226
+ # Step 3: Show before/after and enable decision
1227
+ if scan["total"] > 0:
1228
+ after = scan["total"] - sum(scan["junk_hits"].get(p, 0) for p in exclude_patterns)
1229
+ console.print(f"\n Files to embed: [bold]{after}[/bold] (of {scan['total']} total)")
1230
+
1231
+ file_default = existing_semantic.get("file_vectorization", False)
1232
+ chat_default = existing_semantic.get("chat_vectorization", False)
1233
+ file_vec = Confirm.ask(" Enable semantic search for files?", default=file_default)
1234
+ chat_vec = Confirm.ask(" Enable semantic search for chats?", default=chat_default)
1235
+
1236
+ if not file_vec and not chat_vec:
1237
+ return {
1238
+ "file_vectorization": False,
1239
+ "chat_vectorization": False,
1240
+ "file_types": file_types,
1241
+ "exclude_patterns": exclude_patterns,
1242
+ }
1243
+
1244
+ if not _check_semantic_deps():
1245
+ return {
1246
+ "file_vectorization": False,
1247
+ "chat_vectorization": False,
1248
+ "file_types": file_types,
1249
+ "exclude_patterns": exclude_patterns,
1250
+ }
1251
+
1252
+ return {
1253
+ "file_vectorization": file_vec,
1254
+ "chat_vectorization": chat_vec,
1255
+ "file_types": file_types,
1256
+ "exclude_patterns": exclude_patterns,
1257
+ }
1258
+
1259
+
1260
+ def preview_config(
1261
+ answers: dict,
1262
+ console=None,
1263
+ connectors: dict = None,
1264
+ chat_export_path: str = None,
1265
+ semantic: dict = None,
1266
+ ):
1267
+ """Display a summary of the configuration before writing.
1268
+
1269
+ Args:
1270
+ answers: Dict from collect_answers().
1271
+ console: Optional Rich Console (for testing).
1272
+ connectors: Optional connector results dict.
1273
+ chat_export_path: Optional path to a chat export file/directory.
1274
+ semantic: Optional dict from collect_vectorization_answers().
1275
+ """
1276
+ if console is None:
1277
+ console = Console()
1278
+
1279
+ lines = []
1280
+ lines.append(f"Directories: {', '.join(answers.get('directories', []))}")
1281
+ browsers = answers.get("browsers", [])
1282
+ if browsers:
1283
+ lines.append(f"Browsers: {', '.join(browsers)}")
1284
+ else:
1285
+ lines.append("Browsers: [dim]none (can add later)[/dim]")
1286
+ if chat_export_path:
1287
+ lines.append(f"Chat export: {chat_export_path}")
1288
+ else:
1289
+ lines.append("Chat export: [dim]none (can add later)[/dim]")
1290
+ if semantic and (semantic.get("file_vectorization") or semantic.get("chat_vectorization")):
1291
+ parts = []
1292
+ if semantic.get("file_vectorization"):
1293
+ parts.append("files")
1294
+ if semantic.get("chat_vectorization"):
1295
+ parts.append("chats")
1296
+ lines.append(f"Semantic search: {', '.join(parts)}")
1297
+ if semantic.get("file_types"):
1298
+ lines.append(f" File types: {', '.join(semantic['file_types'])}")
1299
+ if semantic.get("exclude_patterns"):
1300
+ lines.append(f" Exclusion patterns: {len(semantic['exclude_patterns'])}")
1301
+ else:
1302
+ lines.append("Semantic search: [dim]disabled (can enable later)[/dim]")
1303
+
1304
+ if semantic and semantic.get("content_snippets"):
1305
+ lines.append("Content snippets: files")
1306
+ else:
1307
+ lines.append("Content snippets: [dim]disabled (can enable later)[/dim]")
1308
+
1309
+ console.print()
1310
+ console.print(
1311
+ Panel(
1312
+ "\n".join(lines),
1313
+ title="Configuration Preview",
1314
+ border_style="dim",
1315
+ expand=False,
1316
+ )
1317
+ )
1318
+ console.print()
1319
+
1320
+
1321
+ def _deep_merge(base: dict, overlay: dict) -> dict:
1322
+ """Recursively merge overlay into base. Returns a new dict."""
1323
+ result = dict(base)
1324
+ for key, value in overlay.items():
1325
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
1326
+ result[key] = _deep_merge(result[key], value)
1327
+ else:
1328
+ result[key] = value
1329
+ return result
1330
+
1331
+
1332
+ def generate_config(
1333
+ answers: dict,
1334
+ connector_results: dict = None,
1335
+ semantic: dict = None,
1336
+ existing: dict | None = None,
1337
+ ) -> dict:
1338
+ """Load config.example.yaml and apply user answers.
1339
+
1340
+ Args:
1341
+ answers: Dict from collect_answers().
1342
+ connector_results: Optional dict from connector setup hooks mapping
1343
+ account names to verified service lists
1344
+ (e.g. {"personal": ["drive"]}).
1345
+ semantic: Optional dict from collect_vectorization_answers() with
1346
+ file_vectorization and chat_vectorization bools.
1347
+ existing: Optional existing config dict. When provided, its values
1348
+ are deep-merged on top of the template before wizard answers
1349
+ are applied, preserving sections the user didn't change.
1350
+ Note: source_seeds are reconciled by name (template seeds
1351
+ kept, existing seeds overlaid) rather than replaced wholesale.
1352
+
1353
+ Returns:
1354
+ Config dict ready to write as YAML.
1355
+ """
1356
+ import copy
1357
+
1358
+ if connector_results is None:
1359
+ connector_results = {}
1360
+
1361
+ with open(get_bundled_path("config.example.yaml"), "r") as f:
1362
+ config = yaml.safe_load(f)
1363
+
1364
+ if existing is not None:
1365
+ # Save template seeds before merge (_deep_merge replaces lists wholesale)
1366
+ template_seeds = list(config.get("source_seeds", []))
1367
+ config = _deep_merge(config, copy.deepcopy(existing))
1368
+ # Reconcile source_seeds: keep all template seeds, overlay existing by name
1369
+ existing_seeds = config.get("source_seeds", [])
1370
+ by_name = {s["name"]: s for s in template_seeds}
1371
+ for s in existing_seeds:
1372
+ by_name[s["name"]] = s
1373
+ config["source_seeds"] = list(by_name.values())
1374
+
1375
+ # Apply answers — these always come from explicit user input
1376
+ config["directories"] = answers.get("directories") or []
1377
+ config["browsers"] = answers.get("browsers", [])
1378
+
1379
+ # Strip the placeholder API key — real key goes in .env
1380
+ if "claude" in config and "api_key" in config["claude"]:
1381
+ config["claude"]["api_key"] = "YOUR_API_KEY_HERE"
1382
+
1383
+ # Apply connector config via hooks (enable flags, source_seeds, accounts)
1384
+ if connector_results:
1385
+ from footprinter.connectors import discover_connectors, resolve_hook
1386
+
1387
+ for _name, spec in discover_connectors().items():
1388
+ if spec.config_apply:
1389
+ fn = resolve_hook(spec.config_apply)
1390
+ if fn:
1391
+ fn(config, connector_results)
1392
+
1393
+ # Apply semantic search settings — always ensure section exists with safe defaults
1394
+ config.setdefault("semantic", {})
1395
+ if semantic:
1396
+ config["semantic"]["file_vectorization"] = semantic.get("file_vectorization", False)
1397
+ config["semantic"]["chat_vectorization"] = semantic.get("chat_vectorization", False)
1398
+ else:
1399
+ config["semantic"].setdefault("file_vectorization", False)
1400
+ config["semantic"].setdefault("chat_vectorization", False)
1401
+
1402
+ # Apply vectorization settings from the wizard (file_types, exclude_patterns)
1403
+ if semantic and "file_types" in semantic:
1404
+ config.setdefault("vectorization", {})
1405
+ config["vectorization"]["file_types"] = semantic["file_types"]
1406
+ if semantic and "exclude_patterns" in semantic:
1407
+ config.setdefault("vectorization", {})
1408
+ config["vectorization"]["exclude_patterns"] = semantic["exclude_patterns"]
1409
+
1410
+ # Apply content snippets setting
1411
+ config.setdefault("indexing", {})
1412
+ if semantic and "content_snippets" in semantic:
1413
+ config["indexing"]["content_snippets"] = semantic["content_snippets"]
1414
+
1415
+ return config
1416
+
1417
+
1418
+ def write_config(config: dict, path: Path = None):
1419
+ """Write config dict to YAML file.
1420
+
1421
+ Args:
1422
+ config: Config dict to write.
1423
+ path: Override output path (default: config/config.yaml).
1424
+ """
1425
+ target = path or get_config_path()
1426
+ target.parent.mkdir(parents=True, exist_ok=True)
1427
+
1428
+ with open(target, "w") as f:
1429
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
1430
+
1431
+ console.print(f" Wrote [bold]{target}[/bold]")
1432
+
1433
+
1434
+ def _run_orchestrator_stages(stages: list[str]):
1435
+ """Run pipeline stages in-process via the same code path as ``fp ingest``.
1436
+
1437
+ Uses DataPipelineOrchestrator + ``_run_with_logging()`` directly.
1438
+
1439
+ Args:
1440
+ stages: List of stage names (e.g. ["local_folders", "local_files"]).
1441
+ """
1442
+ orchestrator = DataPipelineOrchestrator()
1443
+ try:
1444
+ _run_with_logging(
1445
+ orchestrator,
1446
+ pipes=stages,
1447
+ mode="incremental",
1448
+ quiet=False,
1449
+ header="Setup Indexing",
1450
+ show_next_steps=False,
1451
+ )
1452
+ except ValueError as e:
1453
+ console.print(f"[yellow]Pipeline error:[/yellow] {e}")
1454
+ except KeyboardInterrupt:
1455
+ console.print("[dim]Interrupted.[/dim]")
1456
+
1457
+
1458
+ def run_orchestrator(answers: dict = None, connector_results: dict = None):
1459
+ """Run initial indexing stages via the in-process pipeline.
1460
+
1461
+ Builds stages dynamically: always includes local_folders,local_files.
1462
+ Adds browser stage if answers contains non-empty browsers list.
1463
+ Adds connector pipes if connector_results has verified accounts.
1464
+
1465
+ Args:
1466
+ answers: Dict from collect_answers(). None defaults to {}.
1467
+ connector_results: Optional dict of connector results.
1468
+ """
1469
+ if answers is None:
1470
+ answers = {}
1471
+ if connector_results is None:
1472
+ connector_results = {}
1473
+
1474
+ console.print("\n[bold]Running initial indexing...[/bold]")
1475
+ stages = ["local_folders", "local_files"]
1476
+ if answers.get("browsers"):
1477
+ stages.append("browser")
1478
+ if connector_results:
1479
+ from footprinter.connectors import discover_connectors, is_installed
1480
+
1481
+ for name, spec in discover_connectors().items():
1482
+ if is_installed(spec):
1483
+ stages.extend(spec.pipes)
1484
+ _run_orchestrator_stages(stages)
1485
+
1486
+
1487
+ def collect_chat_export_path() -> str | None:
1488
+ """Prompt user for a chat export path (Phase 2 — Data Sources).
1489
+
1490
+ Returns:
1491
+ Expanded path string if user provides a valid path, None otherwise.
1492
+ """
1493
+ console.print("\n[bold]3. Chat history[/bold]")
1494
+ console.print(
1495
+ " Optionally import Claude or ChatGPT chat exports.\n"
1496
+ " [dim]You can also import later with: fp ingest import <file>[/dim]"
1497
+ )
1498
+ if not Confirm.ask(" Do you have Claude or ChatGPT exports to import?", default=False):
1499
+ return None
1500
+
1501
+ console.print(" [dim]Supported: Claude .zip export or unzipped directory[/dim]")
1502
+ path = Prompt.ask(" Path to export file (.zip or directory)")
1503
+ if not path:
1504
+ return None
1505
+
1506
+ path = os.path.expanduser(path)
1507
+ resolved = Path(path)
1508
+ if not resolved.exists():
1509
+ console.print(f" [red]File not found: {path}[/red]")
1510
+ return None
1511
+
1512
+ return str(resolved)
1513
+
1514
+
1515
+ def import_chat_export(path: str) -> dict:
1516
+ """Import a chat export from a previously collected path (Phase 5 — Populate).
1517
+
1518
+ Args:
1519
+ path: Expanded path to the export file or directory.
1520
+
1521
+ Returns:
1522
+ Result dict from ChatIndexer.upload(), or {} on failure.
1523
+ """
1524
+ resolved = Path(path)
1525
+ try:
1526
+ from footprinter.ingest.chat_indexer import ChatIndexer
1527
+ from footprinter.ingest.database import Database
1528
+
1529
+ db = Database(str(get_db_path()))
1530
+ manager = ChatIndexer(db)
1531
+ result = manager.upload(resolved)
1532
+ console.print(" [green]Chat import complete.[/green]")
1533
+ if isinstance(result, dict):
1534
+ added = result.get("chats_added", 0)
1535
+ updated = result.get("chats_updated", 0)
1536
+ msgs = result.get("messages_imported", 0)
1537
+ console.print(
1538
+ f" Imported: [cyan]{added + updated}[/cyan] chats "
1539
+ f"({added} new, {updated} updated), "
1540
+ f"[cyan]{msgs}[/cyan] messages"
1541
+ )
1542
+ return result if isinstance(result, dict) else {}
1543
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1544
+ console.print(f" [yellow]Chat import failed: {e}[/yellow]")
1545
+ console.print(f" [dim]Run manually: fp ingest import {path}[/dim]")
1546
+ return {}
1547
+
1548
+
1549
+ def offer_setup_claude() -> bool:
1550
+ """Offer to configure Claude Desktop MCP integration.
1551
+
1552
+ Returns:
1553
+ True if MCP was successfully configured, False otherwise.
1554
+ """
1555
+ if not mcp_setup.is_mcp_available():
1556
+ console.print("\n[dim]MCP package not installed — skipping Claude Desktop configuration.[/dim]")
1557
+ console.print(" [dim]Install with: pip install mcp[/dim]")
1558
+ return False
1559
+
1560
+ try:
1561
+ snippet = mcp_setup.generate_snippet()
1562
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1563
+ console.print(f" [yellow]MCP setup failed: {e}[/yellow]")
1564
+ console.print(" [dim]Run manually: fp setup mcp --claude[/dim]")
1565
+ return False
1566
+
1567
+ # Offer snippet for manual copy/paste (Cursor, Windsurf, etc.)
1568
+ if Confirm.ask(
1569
+ "\nView MCP config snippet (for Claude Code, Cursor, VS Code, and other clients)?",
1570
+ default=True,
1571
+ ):
1572
+ mcp_setup.print_snippet(snippet)
1573
+
1574
+ # Offer Claude Desktop auto-config
1575
+ if not Confirm.ask("\nConfigure Claude Desktop automatically?", default=False):
1576
+ return False
1577
+
1578
+ try:
1579
+ mcp_setup.write_config(snippet)
1580
+ console.print(" [green]Claude Desktop MCP configured.[/green]")
1581
+ return True
1582
+ except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
1583
+ console.print(f" [yellow]MCP setup failed: {e}[/yellow]")
1584
+ console.print(" [dim]Run manually: fp setup mcp --claude[/dim]")
1585
+ return False
1586
+
1587
+
1588
+ # _get_db_connection and _normalize_path imported from _policy_helpers
1589
+
1590
+
1591
+ def _require_config() -> tuple[dict, Path]:
1592
+ """Load config via get_config(), exit on missing or invalid config.
1593
+
1594
+ Returns:
1595
+ Tuple of (config_dict, config_path).
1596
+
1597
+ Exits:
1598
+ sys.exit(1) with helpful message if config is missing or corrupt.
1599
+ """
1600
+ try:
1601
+ config = get_config()
1602
+ except ConfigError as e:
1603
+ console.print(f"[red]Config error:[/red] {e}")
1604
+ sys.exit(1)
1605
+
1606
+ return config, get_config_path()
1607
+
1608
+
1609
+ def folders_add(path: str, index: bool = True) -> int:
1610
+ """Add a directory to the config and optionally trigger indexing.
1611
+
1612
+ Args:
1613
+ path: Directory path to add.
1614
+ index: If True, prompt to run indexing after adding.
1615
+
1616
+ Returns:
1617
+ 0 on success, 1 on error.
1618
+ """
1619
+ normalized = _normalize_path(path)
1620
+ expanded = os.path.expanduser(normalized)
1621
+
1622
+ config, config_path = _require_config()
1623
+ directories = config.get("directories", [])
1624
+
1625
+ # Duplicate-check before existence-check: a configured path is a duplicate
1626
+ # regardless of whether the directory is currently reachable, and "already
1627
+ # configured" is more actionable than "not a directory" when both are true.
1628
+ existing_expanded = {os.path.expanduser(d) for d in directories}
1629
+ if expanded in existing_expanded:
1630
+ console.print(f"[yellow]Already configured:[/yellow] {normalized}")
1631
+ return 1
1632
+
1633
+ if not os.path.isdir(expanded):
1634
+ console.print(f"[red]Not a directory or not found:[/red] {path}")
1635
+ return 1
1636
+
1637
+ directories.append(normalized)
1638
+ config["directories"] = directories
1639
+ write_config(config, config_path)
1640
+ console.print(f"[green]Added:[/green] {normalized}")
1641
+
1642
+ if index:
1643
+ if Confirm.ask("Run indexing for the new folder now?", default=True):
1644
+ _run_orchestrator_stages(["local_folders", "local_files"])
1645
+
1646
+ return 0
1647
+
1648
+
1649
+ def folders_remove(path: str) -> int:
1650
+ """Remove a directory from the config.
1651
+
1652
+ Does NOT delete files from the database — they remain as audit trail.
1653
+
1654
+ Args:
1655
+ path: Directory path to remove.
1656
+
1657
+ Returns:
1658
+ 0 on success, 1 if path wasn't configured.
1659
+ """
1660
+ normalized = _normalize_path(path)
1661
+ expanded = os.path.expanduser(normalized)
1662
+
1663
+ config, config_path = _require_config()
1664
+ directories = config.get("directories", [])
1665
+
1666
+ # Filter out entries that match when expanded
1667
+ remaining = [d for d in directories if os.path.expanduser(d) != expanded]
1668
+
1669
+ if len(remaining) == len(directories):
1670
+ console.print(f"[yellow]Not configured:[/yellow] {normalized}")
1671
+ return 1
1672
+
1673
+ config["directories"] = remaining
1674
+ write_config(config, config_path)
1675
+ console.print(f"[green]Removed:[/green] {normalized}")
1676
+ console.print("[dim] Note: indexed files remain in the database.[/dim]")
1677
+ return 0
1678
+
1679
+
1680
+ def _get_indexing_counts() -> dict:
1681
+ """Query DB for folder and file counts. Returns empty dict if DB doesn't exist."""
1682
+ conn = _get_db_connection()
1683
+ if conn is None:
1684
+ return {}
1685
+
1686
+ try:
1687
+ cur = conn.cursor()
1688
+ counts = {}
1689
+ for table, query in [
1690
+ ("folders", "SELECT COUNT(*) FROM folders"),
1691
+ ("files", "SELECT COUNT(*) FROM files WHERE status != 'removed'"),
1692
+ ("visits", "SELECT COUNT(*) FROM visits"),
1693
+ ("projects", "SELECT COUNT(*) FROM projects"),
1694
+ ("chats", "SELECT COUNT(*) FROM chats WHERE status != 'removed'"),
1695
+ ("messages", "SELECT COUNT(*) FROM messages WHERE status != 'removed'"),
1696
+ ]:
1697
+ try:
1698
+ cur.execute(query)
1699
+ counts[table] = cur.fetchone()[0]
1700
+ except sqlite3.OperationalError:
1701
+ counts[table] = 0
1702
+ return counts
1703
+ except Exception: # Intentional broad catch: setup wizard display must not crash
1704
+ return {}
1705
+ finally:
1706
+ conn.close()
1707
+
1708
+
1709
+ def seed_access_policies() -> dict:
1710
+ """Seed default MCP access policies (metadata-only access). Idempotent via INSERT OR IGNORE.
1711
+
1712
+ Returns:
1713
+ Dict with visibility_seeded and permission_seeded bools, or {} if no DB.
1714
+ """
1715
+ conn = _get_db_connection()
1716
+ if conn is None:
1717
+ return {}
1718
+
1719
+ try:
1720
+ result = _seed_access_policies(conn)
1721
+
1722
+ if result.get("visibility_seeded") or result.get("permission_seeded"):
1723
+ console.print(
1724
+ "\n[bold]MCP access policies[/bold]: seeded default access (metadata visible, content allowed)"
1725
+ )
1726
+ else:
1727
+ console.print("\n[bold]MCP access policies[/bold]: already configured")
1728
+ console.print(" [dim]Manage with: fp mcp view show | fp mcp read show[/dim]")
1729
+
1730
+ # Explain what the defaults mean
1731
+ console.print("\n [dim]Visible[/dim] = Claude can see file names, sizes, and paths")
1732
+ console.print(" [dim]Content allowed[/dim] = Claude can read file contents when asked")
1733
+ console.print(
1734
+ " [dim]Security posture: fail-open (all reads allowed). "
1735
+ "See reference/mcp-access-control.md § Security Posture.[/dim]"
1736
+ )
1737
+
1738
+ # Offer to restrict to metadata-only access
1739
+ if Confirm.ask(
1740
+ "\n Restrict to metadata only? (no content reading)",
1741
+ default=False,
1742
+ ):
1743
+ from footprinter.db.policies import set_permission_policy
1744
+
1745
+ set_permission_policy(conn, "global", "deny")
1746
+ console.print(" [green]Switched to metadata-only access (content denied)[/green]")
1747
+ else:
1748
+ console.print(" [dim]Keeping full access (content allowed)[/dim]")
1749
+
1750
+ return result
1751
+ except Exception as e: # Intentional broad catch: policy seeding is best-effort during setup
1752
+ logger.error(f"Failed to seed access policies: {e}")
1753
+ console.print(f" [yellow]Warning: failed to seed access policies: {e}[/yellow]")
1754
+ console.print(" [dim]Run 'fp setup' later to retry[/dim]")
1755
+ return {}
1756
+ finally:
1757
+ conn.close()
1758
+
1759
+
1760
+ def print_summary(
1761
+ chat_result: dict = None,
1762
+ mcp_configured: bool = False,
1763
+ connector_results: dict = None,
1764
+ ):
1765
+ """Display results table and next steps.
1766
+
1767
+ Args:
1768
+ chat_result: Result dict from import_chat_export(), or None.
1769
+ mcp_configured: Whether MCP was configured during the wizard.
1770
+ connector_results: Result dict from connector setup hooks, or None.
1771
+ """
1772
+ console.print()
1773
+
1774
+ table = Table(title="Setup Complete")
1775
+ table.add_column("File", style="bold")
1776
+ table.add_column("Status")
1777
+
1778
+ # Config
1779
+ config_path = get_config_path()
1780
+ if config_path.exists():
1781
+ table.add_row(str(config_path), "[green]Created[/green]")
1782
+ else:
1783
+ table.add_row(str(config_path), "[red]Missing[/red]")
1784
+
1785
+ # Database
1786
+ db_path = get_db_path()
1787
+ if db_path.exists():
1788
+ table.add_row(str(db_path), "[green]Ready[/green]")
1789
+ else:
1790
+ table.add_row(str(db_path), "[yellow]Not yet created[/yellow]")
1791
+
1792
+ console.print(table)
1793
+
1794
+ # Indexing counts
1795
+ counts = _get_indexing_counts()
1796
+ if counts:
1797
+ console.print()
1798
+ console.print(
1799
+ f" Indexed: [cyan]{counts.get('folders', 0)}[/cyan] folders, [cyan]{counts.get('files', 0)}[/cyan] files"
1800
+ )
1801
+ browser_count = counts.get("visits", 0)
1802
+ if browser_count > 0:
1803
+ console.print(f" Browser history: [cyan]{browser_count}[/cyan] URLs")
1804
+ chat_count = counts.get("chats", 0)
1805
+ chat_msg_count = counts.get("messages", 0)
1806
+ if chat_count > 0:
1807
+ console.print(f" Chat: [cyan]{chat_count}[/cyan] chats, [cyan]{chat_msg_count}[/cyan] messages")
1808
+ project_count = counts.get("projects", 0)
1809
+ if project_count > 0:
1810
+ console.print(f" Projects detected: [cyan]{project_count}[/cyan]")
1811
+ console.print(" Use [bold]fp project[/bold] and [bold]fp client[/bold] to organize your data.")
1812
+
1813
+ # Getting started section
1814
+ console.print()
1815
+ console.print("[bold]Ready to explore your data:[/bold]")
1816
+ console.print(' [cyan]fp search[/cyan] [dim]"query"[/dim] Search your files')
1817
+ console.print(" [cyan]fp ingest status[/cyan] Show data counts")
1818
+ console.print(" [cyan]fp ingest[/cyan] Re-index (incremental)")
1819
+ console.print()
1820
+ console.print("[dim]Run fp -h or fp <command> --help for more.[/dim]")
1821
+
1822
+ # Optional hints for things not yet configured
1823
+ extras = []
1824
+ connectors_configured = bool(connector_results)
1825
+ if not connectors_configured:
1826
+ extras.append("fp connect")
1827
+ chat_count = counts.get("chats", 0) if counts else 0
1828
+ if (chat_result is None or not chat_result) and chat_count == 0:
1829
+ extras.append("fp ingest import <file>")
1830
+ if extras:
1831
+ console.print()
1832
+ console.print(f"[dim]Not yet set up: {', '.join(extras)}[/dim]")
1833
+
1834
+
1835
+ if __name__ == "__main__":
1836
+ main()