footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,306 @@
1
+ """
2
+ MCP Configuration Helper for AI clients.
3
+
4
+ Detects config paths for MCP clients, generates the correct
5
+ MCP server snippet for this Footprinter installation, and optionally writes it.
6
+
7
+ Usage:
8
+ fp setup mcp # Print MCP snippet to paste
9
+ fp setup mcp --check # Check all MCP client configs for footprinter
10
+ fp setup mcp --claude # Write/merge snippet into Claude Desktop config (with backup)
11
+ fp setup mcp --dry-run # Preview config write without changing anything
12
+ """
13
+
14
+ import json
15
+ import os
16
+ import platform
17
+ import shutil
18
+ import sys
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ from rich.console import Console
24
+ from rich.panel import Panel
25
+ from rich.table import Table
26
+
27
+ console = Console()
28
+
29
+ # Known MCP-compatible clients and their config locations.
30
+ MCP_CLIENT_CONFIGS = [
31
+ {"name": "Claude Desktop", "path": "~/Library/Application Support/Claude/claude_desktop_config.json"},
32
+ {"name": "Claude Code", "command": "claude mcp add footprinter -- fp mcp"},
33
+ {"name": "Cursor", "path": "~/.cursor/mcp.json"},
34
+ {"name": "VS Code", "path": ".vscode/mcp.json (per-project)"},
35
+ {"name": "Gemini CLI", "path": "~/.gemini/settings.json"},
36
+ ]
37
+
38
+
39
+ def is_mcp_available() -> bool:
40
+ """Check if the mcp package is installed.
41
+
42
+ Returns:
43
+ True if ``import mcp`` succeeds, False otherwise.
44
+ """
45
+ try:
46
+ __import__("mcp")
47
+ return True
48
+ except ImportError:
49
+ return False
50
+
51
+
52
+ def _repo_root() -> Path:
53
+ """Repo checkout root (dev-only: MCP cwd, run_mcp.sh discovery)."""
54
+ return Path(__file__).resolve().parent.parent.parent
55
+
56
+
57
+ def _is_dev_checkout(root: Optional[Path] = None) -> bool:
58
+ """True when running from a source checkout (not a pip install)."""
59
+ return ((root or _repo_root()) / "pyproject.toml").exists()
60
+
61
+
62
+ def detect_config_path() -> Optional[Path]:
63
+ """Detect Claude Desktop config path for the current platform.
64
+
65
+ Returns:
66
+ Path to claude_desktop_config.json, or None if unsupported platform.
67
+ """
68
+ system = platform.system()
69
+ if system == "Darwin":
70
+ return Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
71
+ elif system == "Linux":
72
+ return Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
73
+ elif system == "Windows":
74
+ appdata = os.environ.get("APPDATA", "")
75
+ if not appdata:
76
+ appdata = str(Path.home() / "AppData" / "Roaming")
77
+ return Path(appdata) / "Claude" / "claude_desktop_config.json"
78
+ return None
79
+
80
+
81
+ def get_mcp_command(project_root: Path = None) -> tuple[str, list[str]]:
82
+ """Get the command and args to launch the MCP server.
83
+
84
+ Priority: fp entry point → run_mcp.sh → sys.executable -m footprinter.mcp.
85
+
86
+ Args:
87
+ project_root: Override project root (default: auto-detected).
88
+
89
+ Returns:
90
+ Tuple of (command, args_list).
91
+ """
92
+ root = project_root or _repo_root()
93
+
94
+ # 1. Prefer fp entry point (most portable for pip installs)
95
+ fp_cmd = shutil.which("fp")
96
+ if fp_cmd:
97
+ return fp_cmd, ["mcp"]
98
+
99
+ # 2. Fall back to run_mcp.sh (dev environments)
100
+ run_script = root / "run_mcp.sh"
101
+ if run_script.exists():
102
+ return str(run_script), ["mcp"]
103
+
104
+ # 3. Fall back to current Python + module
105
+ return sys.executable, ["-m", "footprinter.mcp"]
106
+
107
+
108
+ def generate_snippet(project_root: Path = None) -> dict:
109
+ """Generate the MCP server config snippet as a dict.
110
+
111
+ Args:
112
+ project_root: Override project root (default: auto-detected).
113
+
114
+ Returns:
115
+ Dict suitable for merging into claude_desktop_config.json.
116
+ """
117
+ root = project_root or _repo_root()
118
+ command, args = get_mcp_command(root)
119
+
120
+ # Warn if the command doesn't exist on disk or PATH
121
+ if not Path(command).is_file() and not shutil.which(command):
122
+ console.print(f"[yellow]Warning: command not found: {command}[/yellow]")
123
+
124
+ server_config = {"command": command}
125
+ if args:
126
+ server_config["args"] = args
127
+ # Only set cwd when it's meaningful (explicit root or dev checkout)
128
+ if project_root is not None or _is_dev_checkout(root):
129
+ server_config["cwd"] = str(root)
130
+
131
+ return {"mcpServers": {"footprinter": server_config}}
132
+
133
+
134
+ def _get_checkable_clients() -> list[tuple[str, Path]]:
135
+ """Resolve MCP clients with checkable config file paths.
136
+
137
+ Returns file-based clients from MCP_CLIENT_CONFIGS, skipping
138
+ command-based entries (Claude Code) and per-project paths (VS Code).
139
+ """
140
+ clients = []
141
+ for entry in MCP_CLIENT_CONFIGS:
142
+ if "path" not in entry:
143
+ continue
144
+ raw = entry["path"]
145
+ if "(" in raw: # skip per-project paths like ".vscode/mcp.json (per-project)"
146
+ continue
147
+ name = entry["name"]
148
+ if name == "Claude Desktop":
149
+ path = detect_config_path()
150
+ if path:
151
+ clients.append((name, path))
152
+ else:
153
+ clients.append((name, Path(raw).expanduser()))
154
+ return clients
155
+
156
+
157
+ def check_config(config_path: Path = None) -> int:
158
+ """Check MCP client configs for footprinter registration.
159
+
160
+ When config_path is provided, checks only that single path (backward
161
+ compat). Otherwise iterates all checkable clients.
162
+
163
+ Args:
164
+ config_path: Override config path (default: check all clients).
165
+
166
+ Returns:
167
+ 0 if footprinter configured in at least one client,
168
+ 1 if all configs missing/unreadable,
169
+ 2 if configs exist but footprinter not in any.
170
+ """
171
+ if config_path is not None:
172
+ clients = [("Custom", config_path)]
173
+ else:
174
+ clients = _get_checkable_clients()
175
+
176
+ if not clients:
177
+ console.print("[red]No checkable MCP clients found for this platform.[/red]")
178
+ return 1
179
+
180
+ any_configured = False
181
+ any_exists = False
182
+
183
+ for name, path in clients:
184
+ if not path.exists():
185
+ console.print(f" {name}: [yellow]config not found[/yellow] ({path})")
186
+ continue
187
+
188
+ try:
189
+ with open(path, "r") as f:
190
+ config = json.load(f)
191
+ except (json.JSONDecodeError, OSError) as e:
192
+ console.print(f" {name}: [red]cannot read config[/red] ({e})")
193
+ continue
194
+
195
+ any_exists = True
196
+ servers = config.get("mcpServers", {})
197
+
198
+ if "footprinter" in servers:
199
+ any_configured = True
200
+ server = servers["footprinter"]
201
+ console.print(f" {name}: [green]configured[/green]")
202
+ console.print(f" command: {server.get('command', '?')}")
203
+ if server.get("args"):
204
+ console.print(f" args: {server['args']}")
205
+ else:
206
+ console.print(f" {name}: [yellow]not configured[/yellow]")
207
+
208
+ # Report dependency status once at the end
209
+ if is_mcp_available():
210
+ console.print(" mcp package: [green]installed[/green]")
211
+ else:
212
+ console.print(" mcp package: [red]not installed[/red]")
213
+ console.print(" Reinstall with: pip install --force-reinstall footprinter-cli")
214
+
215
+ if any_configured:
216
+ return 0
217
+ if any_exists:
218
+ return 2
219
+ return 1
220
+
221
+
222
+ def write_config(snippet: dict, config_path: Path = None, dry_run: bool = False) -> bool:
223
+ """Write or merge the MCP snippet into Claude Desktop config.
224
+
225
+ Creates a backup before modifying an existing file.
226
+
227
+ Args:
228
+ snippet: The snippet dict from generate_snippet().
229
+ config_path: Override config path (default: auto-detected).
230
+ dry_run: If True, show what would happen without writing.
231
+
232
+ Returns:
233
+ True if write succeeded (or would succeed in dry-run mode).
234
+ """
235
+ path = config_path or detect_config_path()
236
+
237
+ if path is None:
238
+ console.print("[red]Unsupported platform — cannot detect config path.[/red]")
239
+ return False
240
+
241
+ # Load existing config or start empty
242
+ existing = {}
243
+ if path.exists():
244
+ try:
245
+ with open(path, "r") as f:
246
+ existing = json.load(f)
247
+ except (json.JSONDecodeError, OSError) as e:
248
+ console.print(f"[red]Cannot read existing config:[/red] {e}")
249
+ return False
250
+
251
+ # Merge: add/update mcpServers.footprinter
252
+ if "mcpServers" not in existing:
253
+ existing["mcpServers"] = {}
254
+ existing["mcpServers"]["footprinter"] = snippet["mcpServers"]["footprinter"]
255
+
256
+ if dry_run:
257
+ console.print(f"[dim]Would write to:[/dim] {path}")
258
+ console.print(json.dumps(existing, indent=2))
259
+ return True
260
+
261
+ # Backup existing file
262
+ if path.exists():
263
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
264
+ backup = path.with_suffix(f".backup_{timestamp}.json")
265
+ shutil.copy2(path, backup)
266
+ console.print(f" Backed up to [dim]{backup}[/dim]")
267
+
268
+ # Write
269
+ path.parent.mkdir(parents=True, exist_ok=True)
270
+ with open(path, "w") as f:
271
+ json.dump(existing, f, indent=2)
272
+ f.write("\n")
273
+
274
+ console.print(f" Wrote [bold]{path}[/bold]")
275
+ return True
276
+
277
+
278
+ def print_client_paths():
279
+ """Render a table of known MCP clients and their config locations."""
280
+ table = Table(title="MCP Client Config Paths", show_header=True)
281
+ table.add_column("Client", style="bold")
282
+ table.add_column("Config Location / Command")
283
+
284
+ for client in MCP_CLIENT_CONFIGS:
285
+ if "command" in client:
286
+ table.add_row(client["name"], f"[cyan]{client['command']}[/cyan]")
287
+ else:
288
+ table.add_row(client["name"], f"[dim]{client['path']}[/dim]")
289
+
290
+ console.print()
291
+ console.print(table)
292
+
293
+
294
+ def print_snippet(snippet: dict):
295
+ """Display the MCP snippet for manual pasting.
296
+
297
+ Args:
298
+ snippet: The snippet dict from generate_snippet().
299
+ """
300
+ json_str = json.dumps(snippet, indent=2)
301
+ console.print()
302
+ console.print("Add this to your MCP client config:")
303
+ console.print(Panel(json_str, title="MCP Config"))
304
+ print_client_paths()
305
+ console.print()
306
+ console.print("[dim]Or run [bold]fp setup mcp --claude[/bold] to write it to Claude Desktop automatically.[/dim]")
@@ -0,0 +1,393 @@
1
+ """
2
+ Command-line search interface — keyword, semantic, and hybrid modes.
3
+ """
4
+
5
+ import argparse
6
+ import os
7
+ import sys
8
+
9
+ from rich.console import Console
10
+
11
+ from footprinter.cli._common import open_db, output_json
12
+
13
+ try:
14
+ from footprinter.semantic.vector_store import VectorStore, _semantic_available
15
+
16
+ _HAS_ML = _semantic_available()
17
+ except ImportError:
18
+ _HAS_ML = False
19
+
20
+ console = Console()
21
+
22
+
23
+ def _normalize_file_relevance(distance: float) -> float:
24
+ """Convert ChromaDB distance to 0-1 relevance score."""
25
+ return max(0.0, 1.0 - (distance / 2.0))
26
+
27
+
28
+ def _resolve_mode(mode: str | None, out: Console, *, quiet: bool = False) -> str:
29
+ """Resolve effective search mode based on request and ML availability."""
30
+ if mode == "semantic" and not _HAS_ML:
31
+ if not quiet:
32
+ out.print("Semantic search requires additional dependencies.")
33
+ out.print(" Install with: pip install footprinter-cli\\[semantic]")
34
+ sys.exit(1)
35
+
36
+ if mode == "hybrid" and not _HAS_ML:
37
+ if not quiet:
38
+ out.print(
39
+ "[dim]Semantic search not available — using keyword search. "
40
+ "Run: pip install footprinter-cli\\[semantic] for AI-powered results.[/dim]"
41
+ )
42
+ return "keyword"
43
+
44
+ if mode is not None:
45
+ return mode
46
+
47
+ # Auto-detect
48
+ if _HAS_ML:
49
+ return "hybrid"
50
+
51
+ if not quiet:
52
+ out.print(
53
+ "[dim]Semantic search not available — using keyword search. "
54
+ "Run: pip install footprinter-cli\\[semantic] for AI-powered results.[/dim]"
55
+ )
56
+ return "keyword"
57
+
58
+
59
+ def _normalize_path(path: str) -> str:
60
+ """Normalize a file path for dedup comparison."""
61
+ if not path:
62
+ return ""
63
+ return os.path.normpath(os.path.expanduser(path))
64
+
65
+
66
+ def _fts_file_to_result(row: dict) -> dict:
67
+ """Convert a search_files() result row into the merged result format."""
68
+ return {
69
+ "source_type": "file",
70
+ "relevance": row.get("fts_score", 0.5),
71
+ "data": {
72
+ "file_path": row["path"] or row["name"],
73
+ "chunk_index": 0,
74
+ "total_chunks": 1,
75
+ "content_snippet": f"{row['name']} ({row['content_type'] or 'file'})",
76
+ "name": row["name"],
77
+ "source": row["source"],
78
+ "modified_at": row.get("modified_at", ""),
79
+ },
80
+ }
81
+
82
+
83
+ def _keyword_search(
84
+ query: str,
85
+ limit: int = 10,
86
+ type_filter: str | None = None,
87
+ db_path: str | None = None,
88
+ ) -> list[dict]:
89
+ """Run FTS5 keyword search across files and (optionally) chats."""
90
+ from footprinter.db.search import search_files
91
+ from footprinter.semantic.hybrid_search import fts5_fallback_search
92
+
93
+ if db_path is None:
94
+ from footprinter.paths import get_db_path
95
+
96
+ db_path = str(get_db_path())
97
+
98
+ merged = []
99
+
100
+ # File FTS5 search
101
+ with open_db(db_path) as conn:
102
+ file_data = search_files(conn, query, limit=limit, file_ext=type_filter)
103
+ for r in file_data["results"]:
104
+ merged.append(_fts_file_to_result(r))
105
+
106
+ # Chat FTS5 search (skip if type filter limits to files)
107
+ if not type_filter:
108
+ chat_results, _ = fts5_fallback_search(
109
+ query,
110
+ n_results=limit,
111
+ db_path=db_path,
112
+ )
113
+ for r in chat_results:
114
+ merged.append(
115
+ {
116
+ "source_type": "chat",
117
+ "relevance": r.get("relevance_score", 0.5),
118
+ "data": {
119
+ "chat_title": r.get("chat_title", "(untitled)"),
120
+ "source": r.get("source", ""),
121
+ "snippet": r.get("snippet", ""),
122
+ "chat_id": r.get("chat_id"),
123
+ },
124
+ }
125
+ )
126
+
127
+ merged.sort(key=lambda x: x["relevance"], reverse=True)
128
+ return merged[:limit]
129
+
130
+
131
+ def _semantic_search(
132
+ query: str,
133
+ limit: int = 10,
134
+ type_filter: str | None = None,
135
+ ) -> list[dict]:
136
+ """Run vector-only search (original behavior)."""
137
+ store = VectorStore.get_instance()
138
+
139
+ filter_meta = None
140
+ if type_filter:
141
+ filter_meta = {"file_type": type_filter}
142
+
143
+ file_results = store.search_files(query, n_results=limit, filter_metadata=filter_meta)
144
+ if type_filter:
145
+ chat_results = []
146
+ else:
147
+ chat_results = store.search_chats(query, n_results=limit)
148
+
149
+ merged = []
150
+ for r in file_results:
151
+ distance = r.get("distance", 0.0)
152
+ merged.append(
153
+ {
154
+ "source_type": "file",
155
+ "relevance": _normalize_file_relevance(distance),
156
+ "data": r,
157
+ }
158
+ )
159
+
160
+ for r in chat_results:
161
+ merged.append(
162
+ {
163
+ "source_type": "chat",
164
+ "relevance": r.get("relevance_score", 0.0),
165
+ "data": r,
166
+ }
167
+ )
168
+
169
+ merged.sort(key=lambda x: x["relevance"], reverse=True)
170
+ return merged[:limit]
171
+
172
+
173
+ def _hybrid_search(
174
+ query: str,
175
+ limit: int = 10,
176
+ type_filter: str | None = None,
177
+ db_path: str | None = None,
178
+ ) -> list[dict]:
179
+ """Run hybrid search: FTS5 + vectors merged via RRF for chats, dedup for files."""
180
+ from footprinter.semantic.hybrid_search import (
181
+ chat_snippet,
182
+ reciprocal_rank_fusion,
183
+ )
184
+ from footprinter.semantic.hybrid_search import (
185
+ keyword_search as chat_keyword_search,
186
+ )
187
+
188
+ if db_path is None:
189
+ from footprinter.paths import get_db_path
190
+
191
+ db_path = str(get_db_path())
192
+
193
+ # --- File merging: dedup by normalized path, boost overlaps ---
194
+ keyword_file_results = []
195
+ with open_db(db_path) as conn:
196
+ from footprinter.db.search import search_files
197
+
198
+ file_data = search_files(conn, query, limit=limit, file_ext=type_filter)
199
+ for r in file_data["results"]:
200
+ keyword_file_results.append(_fts_file_to_result(r))
201
+
202
+ semantic_results = _semantic_search(query, limit=limit, type_filter=type_filter)
203
+ semantic_file_results = [r for r in semantic_results if r["source_type"] == "file"]
204
+ semantic_chat_results = [r for r in semantic_results if r["source_type"] == "chat"]
205
+
206
+ # Merge files by normalized path
207
+ seen_files = {}
208
+ for item in semantic_file_results:
209
+ key = _normalize_path(item["data"].get("file_path", ""))
210
+ seen_files[key] = item
211
+
212
+ for item in keyword_file_results:
213
+ key = _normalize_path(item["data"].get("file_path", ""))
214
+ if key in seen_files:
215
+ seen_files[key]["relevance"] = min(1.0, seen_files[key]["relevance"] + 0.15)
216
+ else:
217
+ seen_files[key] = item
218
+
219
+ merged = list(seen_files.values())
220
+
221
+ # --- Chat merging: use RRF when both sources have results ---
222
+ if not type_filter:
223
+ raw_keyword_chats = chat_keyword_search(query, db_path=db_path, limit=limit)
224
+
225
+ if semantic_chat_results and raw_keyword_chats:
226
+ # Convert semantic chat results to the shape RRF expects
227
+ semantic_for_rrf = []
228
+ for item in semantic_chat_results:
229
+ d = item["data"]
230
+ semantic_for_rrf.append(
231
+ {
232
+ "chat_id": d.get("chat_id", d.get("chat_title", "")),
233
+ "chat_title": d.get("chat_title", ""),
234
+ "message_id": d.get("message_id"),
235
+ "role": d.get("role", ""),
236
+ "source": d.get("source", ""),
237
+ "created_at": d.get("created_at", ""),
238
+ "snippet": d.get("snippet", ""),
239
+ "relevance_score": item["relevance"],
240
+ "chunk_type": d.get("chunk_type", "message"),
241
+ "chunk_index": d.get("chunk_index", 0),
242
+ "total_chunks": d.get("total_chunks", 1),
243
+ }
244
+ )
245
+
246
+ rrf_results = reciprocal_rank_fusion(semantic_for_rrf, raw_keyword_chats)
247
+ for r in rrf_results:
248
+ merged.append(
249
+ {
250
+ "source_type": "chat",
251
+ "relevance": r.get("relevance_score", 0.0),
252
+ "data": {
253
+ "chat_title": r.get("chat_title", "(untitled)"),
254
+ "source": r.get("source", ""),
255
+ "snippet": r.get("snippet", ""),
256
+ "chat_id": r.get("chat_id"),
257
+ },
258
+ }
259
+ )
260
+ elif semantic_chat_results:
261
+ merged.extend(semantic_chat_results)
262
+ elif raw_keyword_chats:
263
+ for r in raw_keyword_chats:
264
+ merged.append(
265
+ {
266
+ "source_type": "chat",
267
+ "relevance": r.get("fts_score", 0.5),
268
+ "data": {
269
+ "chat_title": r.get("chat_title", "(untitled)"),
270
+ "source": r.get("source", ""),
271
+ "snippet": chat_snippet(r),
272
+ "chat_id": r.get("chat_id"),
273
+ },
274
+ }
275
+ )
276
+
277
+ merged.sort(key=lambda x: x["relevance"], reverse=True)
278
+ return merged[:limit]
279
+
280
+
281
+ def execute_search(
282
+ query: str,
283
+ limit: int = 10,
284
+ type_filter: str | None = None,
285
+ mode: str | None = None,
286
+ output: Console | None = None,
287
+ db_path: str | None = None,
288
+ json_output: bool = False,
289
+ ) -> None:
290
+ """Run search and display results.
291
+
292
+ Shared implementation used by the ``fp search`` subcommand.
293
+ """
294
+ out = output or console
295
+ effective_mode = _resolve_mode(mode, out, quiet=json_output)
296
+
297
+ # Dispatch by mode
298
+ try:
299
+ if effective_mode == "keyword":
300
+ merged = _keyword_search(query, limit=limit, type_filter=type_filter, db_path=db_path)
301
+ elif effective_mode == "semantic":
302
+ merged = _semantic_search(query, limit=limit, type_filter=type_filter)
303
+ else:
304
+ merged = _hybrid_search(query, limit=limit, type_filter=type_filter, db_path=db_path)
305
+ except Exception as exc:
306
+ if not json_output:
307
+ out.print(f"[red]Search failed:[/red] {exc}")
308
+ else:
309
+ output_json({"query": query, "mode": effective_mode, "error": str(exc), "results": []})
310
+ sys.exit(1)
311
+
312
+ if json_output:
313
+ output_json(
314
+ {
315
+ "query": query,
316
+ "mode": effective_mode,
317
+ "results": merged,
318
+ }
319
+ )
320
+ return
321
+
322
+ out.print(f"\nSearching for: '{query}' ({effective_mode} mode)")
323
+ out.print("=" * 80)
324
+
325
+ if not merged:
326
+ out.print("No results found.")
327
+ return
328
+
329
+ # Display results
330
+ for i, item in enumerate(merged, 1):
331
+ if item["source_type"] == "file":
332
+ r = item["data"]
333
+ file_path = r.get("file_path", r.get("name", ""))
334
+ chunk_info = ""
335
+ if r.get("total_chunks", 1) > 1:
336
+ chunk_info = f" (chunk {r['chunk_index'] + 1}/{r['total_chunks']})"
337
+
338
+ out.print(f"\n{i}. [File] {file_path}{chunk_info}")
339
+ out.print("-" * 80)
340
+ out.print(r.get("content_snippet", ""))
341
+ out.print()
342
+ else:
343
+ r = item["data"]
344
+ title = r.get("chat_title", "(untitled)")
345
+ source = r.get("source", "")
346
+ source_label = f" ({source})" if source else ""
347
+
348
+ out.print(f"\n{i}. [Chat] {title}{source_label}")
349
+ out.print("-" * 80)
350
+ out.print(r.get("snippet", ""))
351
+ out.print()
352
+
353
+ out.print("=" * 80)
354
+ out.print(f"Showing {len(merged)} results")
355
+
356
+
357
+ def main():
358
+ """CLI for search."""
359
+ from footprinter.cli._common import add_json_flag
360
+
361
+ parser = argparse.ArgumentParser(
362
+ prog="fp search",
363
+ description="Search across your files and chats",
364
+ )
365
+ parser.add_argument("query", nargs="+", help="Search query")
366
+ parser.add_argument(
367
+ "--mode",
368
+ choices=["keyword", "semantic", "hybrid"],
369
+ default=None,
370
+ help="Search mode: keyword (FTS5), semantic (vectors), hybrid (both).",
371
+ )
372
+ parser.add_argument(
373
+ "-n",
374
+ "--limit",
375
+ type=int,
376
+ default=10,
377
+ help="Max results to return (default: 10)",
378
+ )
379
+ parser.add_argument("--type", help="Filter by file type (e.g., .pdf, .md). Excludes chat results.")
380
+ add_json_flag(parser)
381
+
382
+ args = parser.parse_args()
383
+ execute_search(
384
+ query=" ".join(args.query),
385
+ limit=args.limit,
386
+ type_filter=args.type,
387
+ mode=args.mode,
388
+ json_output=getattr(args, "json", False),
389
+ )
390
+
391
+
392
+ if __name__ == "__main__":
393
+ main()