footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MCP Configuration Helper for AI clients.
|
|
3
|
+
|
|
4
|
+
Detects config paths for MCP clients, generates the correct
|
|
5
|
+
MCP server snippet for this Footprinter installation, and optionally writes it.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
fp setup mcp # Print MCP snippet to paste
|
|
9
|
+
fp setup mcp --check # Check all MCP client configs for footprinter
|
|
10
|
+
fp setup mcp --claude # Write/merge snippet into Claude Desktop config (with backup)
|
|
11
|
+
fp setup mcp --dry-run # Preview config write without changing anything
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import platform
|
|
17
|
+
import shutil
|
|
18
|
+
import sys
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from rich.console import Console
|
|
24
|
+
from rich.panel import Panel
|
|
25
|
+
from rich.table import Table
|
|
26
|
+
|
|
27
|
+
console = Console()
|
|
28
|
+
|
|
29
|
+
# Known MCP-compatible clients and their config locations.
|
|
30
|
+
MCP_CLIENT_CONFIGS = [
|
|
31
|
+
{"name": "Claude Desktop", "path": "~/Library/Application Support/Claude/claude_desktop_config.json"},
|
|
32
|
+
{"name": "Claude Code", "command": "claude mcp add footprinter -- fp mcp"},
|
|
33
|
+
{"name": "Cursor", "path": "~/.cursor/mcp.json"},
|
|
34
|
+
{"name": "VS Code", "path": ".vscode/mcp.json (per-project)"},
|
|
35
|
+
{"name": "Gemini CLI", "path": "~/.gemini/settings.json"},
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_mcp_available() -> bool:
|
|
40
|
+
"""Check if the mcp package is installed.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
True if ``import mcp`` succeeds, False otherwise.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
__import__("mcp")
|
|
47
|
+
return True
|
|
48
|
+
except ImportError:
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _repo_root() -> Path:
|
|
53
|
+
"""Repo checkout root (dev-only: MCP cwd, run_mcp.sh discovery)."""
|
|
54
|
+
return Path(__file__).resolve().parent.parent.parent
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _is_dev_checkout(root: Optional[Path] = None) -> bool:
|
|
58
|
+
"""True when running from a source checkout (not a pip install)."""
|
|
59
|
+
return ((root or _repo_root()) / "pyproject.toml").exists()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def detect_config_path() -> Optional[Path]:
|
|
63
|
+
"""Detect Claude Desktop config path for the current platform.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Path to claude_desktop_config.json, or None if unsupported platform.
|
|
67
|
+
"""
|
|
68
|
+
system = platform.system()
|
|
69
|
+
if system == "Darwin":
|
|
70
|
+
return Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
|
|
71
|
+
elif system == "Linux":
|
|
72
|
+
return Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
|
|
73
|
+
elif system == "Windows":
|
|
74
|
+
appdata = os.environ.get("APPDATA", "")
|
|
75
|
+
if not appdata:
|
|
76
|
+
appdata = str(Path.home() / "AppData" / "Roaming")
|
|
77
|
+
return Path(appdata) / "Claude" / "claude_desktop_config.json"
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_mcp_command(project_root: Path = None) -> tuple[str, list[str]]:
|
|
82
|
+
"""Get the command and args to launch the MCP server.
|
|
83
|
+
|
|
84
|
+
Priority: fp entry point → run_mcp.sh → sys.executable -m footprinter.mcp.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
project_root: Override project root (default: auto-detected).
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (command, args_list).
|
|
91
|
+
"""
|
|
92
|
+
root = project_root or _repo_root()
|
|
93
|
+
|
|
94
|
+
# 1. Prefer fp entry point (most portable for pip installs)
|
|
95
|
+
fp_cmd = shutil.which("fp")
|
|
96
|
+
if fp_cmd:
|
|
97
|
+
return fp_cmd, ["mcp"]
|
|
98
|
+
|
|
99
|
+
# 2. Fall back to run_mcp.sh (dev environments)
|
|
100
|
+
run_script = root / "run_mcp.sh"
|
|
101
|
+
if run_script.exists():
|
|
102
|
+
return str(run_script), ["mcp"]
|
|
103
|
+
|
|
104
|
+
# 3. Fall back to current Python + module
|
|
105
|
+
return sys.executable, ["-m", "footprinter.mcp"]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def generate_snippet(project_root: Path = None) -> dict:
|
|
109
|
+
"""Generate the MCP server config snippet as a dict.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
project_root: Override project root (default: auto-detected).
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Dict suitable for merging into claude_desktop_config.json.
|
|
116
|
+
"""
|
|
117
|
+
root = project_root or _repo_root()
|
|
118
|
+
command, args = get_mcp_command(root)
|
|
119
|
+
|
|
120
|
+
# Warn if the command doesn't exist on disk or PATH
|
|
121
|
+
if not Path(command).is_file() and not shutil.which(command):
|
|
122
|
+
console.print(f"[yellow]Warning: command not found: {command}[/yellow]")
|
|
123
|
+
|
|
124
|
+
server_config = {"command": command}
|
|
125
|
+
if args:
|
|
126
|
+
server_config["args"] = args
|
|
127
|
+
# Only set cwd when it's meaningful (explicit root or dev checkout)
|
|
128
|
+
if project_root is not None or _is_dev_checkout(root):
|
|
129
|
+
server_config["cwd"] = str(root)
|
|
130
|
+
|
|
131
|
+
return {"mcpServers": {"footprinter": server_config}}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _get_checkable_clients() -> list[tuple[str, Path]]:
|
|
135
|
+
"""Resolve MCP clients with checkable config file paths.
|
|
136
|
+
|
|
137
|
+
Returns file-based clients from MCP_CLIENT_CONFIGS, skipping
|
|
138
|
+
command-based entries (Claude Code) and per-project paths (VS Code).
|
|
139
|
+
"""
|
|
140
|
+
clients = []
|
|
141
|
+
for entry in MCP_CLIENT_CONFIGS:
|
|
142
|
+
if "path" not in entry:
|
|
143
|
+
continue
|
|
144
|
+
raw = entry["path"]
|
|
145
|
+
if "(" in raw: # skip per-project paths like ".vscode/mcp.json (per-project)"
|
|
146
|
+
continue
|
|
147
|
+
name = entry["name"]
|
|
148
|
+
if name == "Claude Desktop":
|
|
149
|
+
path = detect_config_path()
|
|
150
|
+
if path:
|
|
151
|
+
clients.append((name, path))
|
|
152
|
+
else:
|
|
153
|
+
clients.append((name, Path(raw).expanduser()))
|
|
154
|
+
return clients
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def check_config(config_path: Path = None) -> int:
|
|
158
|
+
"""Check MCP client configs for footprinter registration.
|
|
159
|
+
|
|
160
|
+
When config_path is provided, checks only that single path (backward
|
|
161
|
+
compat). Otherwise iterates all checkable clients.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
config_path: Override config path (default: check all clients).
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
0 if footprinter configured in at least one client,
|
|
168
|
+
1 if all configs missing/unreadable,
|
|
169
|
+
2 if configs exist but footprinter not in any.
|
|
170
|
+
"""
|
|
171
|
+
if config_path is not None:
|
|
172
|
+
clients = [("Custom", config_path)]
|
|
173
|
+
else:
|
|
174
|
+
clients = _get_checkable_clients()
|
|
175
|
+
|
|
176
|
+
if not clients:
|
|
177
|
+
console.print("[red]No checkable MCP clients found for this platform.[/red]")
|
|
178
|
+
return 1
|
|
179
|
+
|
|
180
|
+
any_configured = False
|
|
181
|
+
any_exists = False
|
|
182
|
+
|
|
183
|
+
for name, path in clients:
|
|
184
|
+
if not path.exists():
|
|
185
|
+
console.print(f" {name}: [yellow]config not found[/yellow] ({path})")
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
with open(path, "r") as f:
|
|
190
|
+
config = json.load(f)
|
|
191
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
192
|
+
console.print(f" {name}: [red]cannot read config[/red] ({e})")
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
any_exists = True
|
|
196
|
+
servers = config.get("mcpServers", {})
|
|
197
|
+
|
|
198
|
+
if "footprinter" in servers:
|
|
199
|
+
any_configured = True
|
|
200
|
+
server = servers["footprinter"]
|
|
201
|
+
console.print(f" {name}: [green]configured[/green]")
|
|
202
|
+
console.print(f" command: {server.get('command', '?')}")
|
|
203
|
+
if server.get("args"):
|
|
204
|
+
console.print(f" args: {server['args']}")
|
|
205
|
+
else:
|
|
206
|
+
console.print(f" {name}: [yellow]not configured[/yellow]")
|
|
207
|
+
|
|
208
|
+
# Report dependency status once at the end
|
|
209
|
+
if is_mcp_available():
|
|
210
|
+
console.print(" mcp package: [green]installed[/green]")
|
|
211
|
+
else:
|
|
212
|
+
console.print(" mcp package: [red]not installed[/red]")
|
|
213
|
+
console.print(" Reinstall with: pip install --force-reinstall footprinter-cli")
|
|
214
|
+
|
|
215
|
+
if any_configured:
|
|
216
|
+
return 0
|
|
217
|
+
if any_exists:
|
|
218
|
+
return 2
|
|
219
|
+
return 1
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def write_config(snippet: dict, config_path: Path = None, dry_run: bool = False) -> bool:
|
|
223
|
+
"""Write or merge the MCP snippet into Claude Desktop config.
|
|
224
|
+
|
|
225
|
+
Creates a backup before modifying an existing file.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
snippet: The snippet dict from generate_snippet().
|
|
229
|
+
config_path: Override config path (default: auto-detected).
|
|
230
|
+
dry_run: If True, show what would happen without writing.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
True if write succeeded (or would succeed in dry-run mode).
|
|
234
|
+
"""
|
|
235
|
+
path = config_path or detect_config_path()
|
|
236
|
+
|
|
237
|
+
if path is None:
|
|
238
|
+
console.print("[red]Unsupported platform — cannot detect config path.[/red]")
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
# Load existing config or start empty
|
|
242
|
+
existing = {}
|
|
243
|
+
if path.exists():
|
|
244
|
+
try:
|
|
245
|
+
with open(path, "r") as f:
|
|
246
|
+
existing = json.load(f)
|
|
247
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
248
|
+
console.print(f"[red]Cannot read existing config:[/red] {e}")
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
# Merge: add/update mcpServers.footprinter
|
|
252
|
+
if "mcpServers" not in existing:
|
|
253
|
+
existing["mcpServers"] = {}
|
|
254
|
+
existing["mcpServers"]["footprinter"] = snippet["mcpServers"]["footprinter"]
|
|
255
|
+
|
|
256
|
+
if dry_run:
|
|
257
|
+
console.print(f"[dim]Would write to:[/dim] {path}")
|
|
258
|
+
console.print(json.dumps(existing, indent=2))
|
|
259
|
+
return True
|
|
260
|
+
|
|
261
|
+
# Backup existing file
|
|
262
|
+
if path.exists():
|
|
263
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
264
|
+
backup = path.with_suffix(f".backup_{timestamp}.json")
|
|
265
|
+
shutil.copy2(path, backup)
|
|
266
|
+
console.print(f" Backed up to [dim]{backup}[/dim]")
|
|
267
|
+
|
|
268
|
+
# Write
|
|
269
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
270
|
+
with open(path, "w") as f:
|
|
271
|
+
json.dump(existing, f, indent=2)
|
|
272
|
+
f.write("\n")
|
|
273
|
+
|
|
274
|
+
console.print(f" Wrote [bold]{path}[/bold]")
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def print_client_paths():
|
|
279
|
+
"""Render a table of known MCP clients and their config locations."""
|
|
280
|
+
table = Table(title="MCP Client Config Paths", show_header=True)
|
|
281
|
+
table.add_column("Client", style="bold")
|
|
282
|
+
table.add_column("Config Location / Command")
|
|
283
|
+
|
|
284
|
+
for client in MCP_CLIENT_CONFIGS:
|
|
285
|
+
if "command" in client:
|
|
286
|
+
table.add_row(client["name"], f"[cyan]{client['command']}[/cyan]")
|
|
287
|
+
else:
|
|
288
|
+
table.add_row(client["name"], f"[dim]{client['path']}[/dim]")
|
|
289
|
+
|
|
290
|
+
console.print()
|
|
291
|
+
console.print(table)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def print_snippet(snippet: dict):
|
|
295
|
+
"""Display the MCP snippet for manual pasting.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
snippet: The snippet dict from generate_snippet().
|
|
299
|
+
"""
|
|
300
|
+
json_str = json.dumps(snippet, indent=2)
|
|
301
|
+
console.print()
|
|
302
|
+
console.print("Add this to your MCP client config:")
|
|
303
|
+
console.print(Panel(json_str, title="MCP Config"))
|
|
304
|
+
print_client_paths()
|
|
305
|
+
console.print()
|
|
306
|
+
console.print("[dim]Or run [bold]fp setup mcp --claude[/bold] to write it to Claude Desktop automatically.[/dim]")
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line search interface — keyword, semantic, and hybrid modes.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from footprinter.cli._common import open_db, output_json
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from footprinter.semantic.vector_store import VectorStore, _semantic_available
|
|
15
|
+
|
|
16
|
+
_HAS_ML = _semantic_available()
|
|
17
|
+
except ImportError:
|
|
18
|
+
_HAS_ML = False
|
|
19
|
+
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _normalize_file_relevance(distance: float) -> float:
|
|
24
|
+
"""Convert ChromaDB distance to 0-1 relevance score."""
|
|
25
|
+
return max(0.0, 1.0 - (distance / 2.0))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve_mode(mode: str | None, out: Console, *, quiet: bool = False) -> str:
|
|
29
|
+
"""Resolve effective search mode based on request and ML availability."""
|
|
30
|
+
if mode == "semantic" and not _HAS_ML:
|
|
31
|
+
if not quiet:
|
|
32
|
+
out.print("Semantic search requires additional dependencies.")
|
|
33
|
+
out.print(" Install with: pip install footprinter-cli\\[semantic]")
|
|
34
|
+
sys.exit(1)
|
|
35
|
+
|
|
36
|
+
if mode == "hybrid" and not _HAS_ML:
|
|
37
|
+
if not quiet:
|
|
38
|
+
out.print(
|
|
39
|
+
"[dim]Semantic search not available — using keyword search. "
|
|
40
|
+
"Run: pip install footprinter-cli\\[semantic] for AI-powered results.[/dim]"
|
|
41
|
+
)
|
|
42
|
+
return "keyword"
|
|
43
|
+
|
|
44
|
+
if mode is not None:
|
|
45
|
+
return mode
|
|
46
|
+
|
|
47
|
+
# Auto-detect
|
|
48
|
+
if _HAS_ML:
|
|
49
|
+
return "hybrid"
|
|
50
|
+
|
|
51
|
+
if not quiet:
|
|
52
|
+
out.print(
|
|
53
|
+
"[dim]Semantic search not available — using keyword search. "
|
|
54
|
+
"Run: pip install footprinter-cli\\[semantic] for AI-powered results.[/dim]"
|
|
55
|
+
)
|
|
56
|
+
return "keyword"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _normalize_path(path: str) -> str:
|
|
60
|
+
"""Normalize a file path for dedup comparison."""
|
|
61
|
+
if not path:
|
|
62
|
+
return ""
|
|
63
|
+
return os.path.normpath(os.path.expanduser(path))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _fts_file_to_result(row: dict) -> dict:
|
|
67
|
+
"""Convert a search_files() result row into the merged result format."""
|
|
68
|
+
return {
|
|
69
|
+
"source_type": "file",
|
|
70
|
+
"relevance": row.get("fts_score", 0.5),
|
|
71
|
+
"data": {
|
|
72
|
+
"file_path": row["path"] or row["name"],
|
|
73
|
+
"chunk_index": 0,
|
|
74
|
+
"total_chunks": 1,
|
|
75
|
+
"content_snippet": f"{row['name']} ({row['content_type'] or 'file'})",
|
|
76
|
+
"name": row["name"],
|
|
77
|
+
"source": row["source"],
|
|
78
|
+
"modified_at": row.get("modified_at", ""),
|
|
79
|
+
},
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _keyword_search(
|
|
84
|
+
query: str,
|
|
85
|
+
limit: int = 10,
|
|
86
|
+
type_filter: str | None = None,
|
|
87
|
+
db_path: str | None = None,
|
|
88
|
+
) -> list[dict]:
|
|
89
|
+
"""Run FTS5 keyword search across files and (optionally) chats."""
|
|
90
|
+
from footprinter.db.search import search_files
|
|
91
|
+
from footprinter.semantic.hybrid_search import fts5_fallback_search
|
|
92
|
+
|
|
93
|
+
if db_path is None:
|
|
94
|
+
from footprinter.paths import get_db_path
|
|
95
|
+
|
|
96
|
+
db_path = str(get_db_path())
|
|
97
|
+
|
|
98
|
+
merged = []
|
|
99
|
+
|
|
100
|
+
# File FTS5 search
|
|
101
|
+
with open_db(db_path) as conn:
|
|
102
|
+
file_data = search_files(conn, query, limit=limit, file_ext=type_filter)
|
|
103
|
+
for r in file_data["results"]:
|
|
104
|
+
merged.append(_fts_file_to_result(r))
|
|
105
|
+
|
|
106
|
+
# Chat FTS5 search (skip if type filter limits to files)
|
|
107
|
+
if not type_filter:
|
|
108
|
+
chat_results, _ = fts5_fallback_search(
|
|
109
|
+
query,
|
|
110
|
+
n_results=limit,
|
|
111
|
+
db_path=db_path,
|
|
112
|
+
)
|
|
113
|
+
for r in chat_results:
|
|
114
|
+
merged.append(
|
|
115
|
+
{
|
|
116
|
+
"source_type": "chat",
|
|
117
|
+
"relevance": r.get("relevance_score", 0.5),
|
|
118
|
+
"data": {
|
|
119
|
+
"chat_title": r.get("chat_title", "(untitled)"),
|
|
120
|
+
"source": r.get("source", ""),
|
|
121
|
+
"snippet": r.get("snippet", ""),
|
|
122
|
+
"chat_id": r.get("chat_id"),
|
|
123
|
+
},
|
|
124
|
+
}
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
merged.sort(key=lambda x: x["relevance"], reverse=True)
|
|
128
|
+
return merged[:limit]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _semantic_search(
|
|
132
|
+
query: str,
|
|
133
|
+
limit: int = 10,
|
|
134
|
+
type_filter: str | None = None,
|
|
135
|
+
) -> list[dict]:
|
|
136
|
+
"""Run vector-only search (original behavior)."""
|
|
137
|
+
store = VectorStore.get_instance()
|
|
138
|
+
|
|
139
|
+
filter_meta = None
|
|
140
|
+
if type_filter:
|
|
141
|
+
filter_meta = {"file_type": type_filter}
|
|
142
|
+
|
|
143
|
+
file_results = store.search_files(query, n_results=limit, filter_metadata=filter_meta)
|
|
144
|
+
if type_filter:
|
|
145
|
+
chat_results = []
|
|
146
|
+
else:
|
|
147
|
+
chat_results = store.search_chats(query, n_results=limit)
|
|
148
|
+
|
|
149
|
+
merged = []
|
|
150
|
+
for r in file_results:
|
|
151
|
+
distance = r.get("distance", 0.0)
|
|
152
|
+
merged.append(
|
|
153
|
+
{
|
|
154
|
+
"source_type": "file",
|
|
155
|
+
"relevance": _normalize_file_relevance(distance),
|
|
156
|
+
"data": r,
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
for r in chat_results:
|
|
161
|
+
merged.append(
|
|
162
|
+
{
|
|
163
|
+
"source_type": "chat",
|
|
164
|
+
"relevance": r.get("relevance_score", 0.0),
|
|
165
|
+
"data": r,
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
merged.sort(key=lambda x: x["relevance"], reverse=True)
|
|
170
|
+
return merged[:limit]
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _hybrid_search(
|
|
174
|
+
query: str,
|
|
175
|
+
limit: int = 10,
|
|
176
|
+
type_filter: str | None = None,
|
|
177
|
+
db_path: str | None = None,
|
|
178
|
+
) -> list[dict]:
|
|
179
|
+
"""Run hybrid search: FTS5 + vectors merged via RRF for chats, dedup for files."""
|
|
180
|
+
from footprinter.semantic.hybrid_search import (
|
|
181
|
+
chat_snippet,
|
|
182
|
+
reciprocal_rank_fusion,
|
|
183
|
+
)
|
|
184
|
+
from footprinter.semantic.hybrid_search import (
|
|
185
|
+
keyword_search as chat_keyword_search,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if db_path is None:
|
|
189
|
+
from footprinter.paths import get_db_path
|
|
190
|
+
|
|
191
|
+
db_path = str(get_db_path())
|
|
192
|
+
|
|
193
|
+
# --- File merging: dedup by normalized path, boost overlaps ---
|
|
194
|
+
keyword_file_results = []
|
|
195
|
+
with open_db(db_path) as conn:
|
|
196
|
+
from footprinter.db.search import search_files
|
|
197
|
+
|
|
198
|
+
file_data = search_files(conn, query, limit=limit, file_ext=type_filter)
|
|
199
|
+
for r in file_data["results"]:
|
|
200
|
+
keyword_file_results.append(_fts_file_to_result(r))
|
|
201
|
+
|
|
202
|
+
semantic_results = _semantic_search(query, limit=limit, type_filter=type_filter)
|
|
203
|
+
semantic_file_results = [r for r in semantic_results if r["source_type"] == "file"]
|
|
204
|
+
semantic_chat_results = [r for r in semantic_results if r["source_type"] == "chat"]
|
|
205
|
+
|
|
206
|
+
# Merge files by normalized path
|
|
207
|
+
seen_files = {}
|
|
208
|
+
for item in semantic_file_results:
|
|
209
|
+
key = _normalize_path(item["data"].get("file_path", ""))
|
|
210
|
+
seen_files[key] = item
|
|
211
|
+
|
|
212
|
+
for item in keyword_file_results:
|
|
213
|
+
key = _normalize_path(item["data"].get("file_path", ""))
|
|
214
|
+
if key in seen_files:
|
|
215
|
+
seen_files[key]["relevance"] = min(1.0, seen_files[key]["relevance"] + 0.15)
|
|
216
|
+
else:
|
|
217
|
+
seen_files[key] = item
|
|
218
|
+
|
|
219
|
+
merged = list(seen_files.values())
|
|
220
|
+
|
|
221
|
+
# --- Chat merging: use RRF when both sources have results ---
|
|
222
|
+
if not type_filter:
|
|
223
|
+
raw_keyword_chats = chat_keyword_search(query, db_path=db_path, limit=limit)
|
|
224
|
+
|
|
225
|
+
if semantic_chat_results and raw_keyword_chats:
|
|
226
|
+
# Convert semantic chat results to the shape RRF expects
|
|
227
|
+
semantic_for_rrf = []
|
|
228
|
+
for item in semantic_chat_results:
|
|
229
|
+
d = item["data"]
|
|
230
|
+
semantic_for_rrf.append(
|
|
231
|
+
{
|
|
232
|
+
"chat_id": d.get("chat_id", d.get("chat_title", "")),
|
|
233
|
+
"chat_title": d.get("chat_title", ""),
|
|
234
|
+
"message_id": d.get("message_id"),
|
|
235
|
+
"role": d.get("role", ""),
|
|
236
|
+
"source": d.get("source", ""),
|
|
237
|
+
"created_at": d.get("created_at", ""),
|
|
238
|
+
"snippet": d.get("snippet", ""),
|
|
239
|
+
"relevance_score": item["relevance"],
|
|
240
|
+
"chunk_type": d.get("chunk_type", "message"),
|
|
241
|
+
"chunk_index": d.get("chunk_index", 0),
|
|
242
|
+
"total_chunks": d.get("total_chunks", 1),
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
rrf_results = reciprocal_rank_fusion(semantic_for_rrf, raw_keyword_chats)
|
|
247
|
+
for r in rrf_results:
|
|
248
|
+
merged.append(
|
|
249
|
+
{
|
|
250
|
+
"source_type": "chat",
|
|
251
|
+
"relevance": r.get("relevance_score", 0.0),
|
|
252
|
+
"data": {
|
|
253
|
+
"chat_title": r.get("chat_title", "(untitled)"),
|
|
254
|
+
"source": r.get("source", ""),
|
|
255
|
+
"snippet": r.get("snippet", ""),
|
|
256
|
+
"chat_id": r.get("chat_id"),
|
|
257
|
+
},
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
elif semantic_chat_results:
|
|
261
|
+
merged.extend(semantic_chat_results)
|
|
262
|
+
elif raw_keyword_chats:
|
|
263
|
+
for r in raw_keyword_chats:
|
|
264
|
+
merged.append(
|
|
265
|
+
{
|
|
266
|
+
"source_type": "chat",
|
|
267
|
+
"relevance": r.get("fts_score", 0.5),
|
|
268
|
+
"data": {
|
|
269
|
+
"chat_title": r.get("chat_title", "(untitled)"),
|
|
270
|
+
"source": r.get("source", ""),
|
|
271
|
+
"snippet": chat_snippet(r),
|
|
272
|
+
"chat_id": r.get("chat_id"),
|
|
273
|
+
},
|
|
274
|
+
}
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
merged.sort(key=lambda x: x["relevance"], reverse=True)
|
|
278
|
+
return merged[:limit]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def execute_search(
|
|
282
|
+
query: str,
|
|
283
|
+
limit: int = 10,
|
|
284
|
+
type_filter: str | None = None,
|
|
285
|
+
mode: str | None = None,
|
|
286
|
+
output: Console | None = None,
|
|
287
|
+
db_path: str | None = None,
|
|
288
|
+
json_output: bool = False,
|
|
289
|
+
) -> None:
|
|
290
|
+
"""Run search and display results.
|
|
291
|
+
|
|
292
|
+
Shared implementation used by the ``fp search`` subcommand.
|
|
293
|
+
"""
|
|
294
|
+
out = output or console
|
|
295
|
+
effective_mode = _resolve_mode(mode, out, quiet=json_output)
|
|
296
|
+
|
|
297
|
+
# Dispatch by mode
|
|
298
|
+
try:
|
|
299
|
+
if effective_mode == "keyword":
|
|
300
|
+
merged = _keyword_search(query, limit=limit, type_filter=type_filter, db_path=db_path)
|
|
301
|
+
elif effective_mode == "semantic":
|
|
302
|
+
merged = _semantic_search(query, limit=limit, type_filter=type_filter)
|
|
303
|
+
else:
|
|
304
|
+
merged = _hybrid_search(query, limit=limit, type_filter=type_filter, db_path=db_path)
|
|
305
|
+
except Exception as exc:
|
|
306
|
+
if not json_output:
|
|
307
|
+
out.print(f"[red]Search failed:[/red] {exc}")
|
|
308
|
+
else:
|
|
309
|
+
output_json({"query": query, "mode": effective_mode, "error": str(exc), "results": []})
|
|
310
|
+
sys.exit(1)
|
|
311
|
+
|
|
312
|
+
if json_output:
|
|
313
|
+
output_json(
|
|
314
|
+
{
|
|
315
|
+
"query": query,
|
|
316
|
+
"mode": effective_mode,
|
|
317
|
+
"results": merged,
|
|
318
|
+
}
|
|
319
|
+
)
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
out.print(f"\nSearching for: '{query}' ({effective_mode} mode)")
|
|
323
|
+
out.print("=" * 80)
|
|
324
|
+
|
|
325
|
+
if not merged:
|
|
326
|
+
out.print("No results found.")
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
# Display results
|
|
330
|
+
for i, item in enumerate(merged, 1):
|
|
331
|
+
if item["source_type"] == "file":
|
|
332
|
+
r = item["data"]
|
|
333
|
+
file_path = r.get("file_path", r.get("name", ""))
|
|
334
|
+
chunk_info = ""
|
|
335
|
+
if r.get("total_chunks", 1) > 1:
|
|
336
|
+
chunk_info = f" (chunk {r['chunk_index'] + 1}/{r['total_chunks']})"
|
|
337
|
+
|
|
338
|
+
out.print(f"\n{i}. [File] {file_path}{chunk_info}")
|
|
339
|
+
out.print("-" * 80)
|
|
340
|
+
out.print(r.get("content_snippet", ""))
|
|
341
|
+
out.print()
|
|
342
|
+
else:
|
|
343
|
+
r = item["data"]
|
|
344
|
+
title = r.get("chat_title", "(untitled)")
|
|
345
|
+
source = r.get("source", "")
|
|
346
|
+
source_label = f" ({source})" if source else ""
|
|
347
|
+
|
|
348
|
+
out.print(f"\n{i}. [Chat] {title}{source_label}")
|
|
349
|
+
out.print("-" * 80)
|
|
350
|
+
out.print(r.get("snippet", ""))
|
|
351
|
+
out.print()
|
|
352
|
+
|
|
353
|
+
out.print("=" * 80)
|
|
354
|
+
out.print(f"Showing {len(merged)} results")
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def main():
|
|
358
|
+
"""CLI for search."""
|
|
359
|
+
from footprinter.cli._common import add_json_flag
|
|
360
|
+
|
|
361
|
+
parser = argparse.ArgumentParser(
|
|
362
|
+
prog="fp search",
|
|
363
|
+
description="Search across your files and chats",
|
|
364
|
+
)
|
|
365
|
+
parser.add_argument("query", nargs="+", help="Search query")
|
|
366
|
+
parser.add_argument(
|
|
367
|
+
"--mode",
|
|
368
|
+
choices=["keyword", "semantic", "hybrid"],
|
|
369
|
+
default=None,
|
|
370
|
+
help="Search mode: keyword (FTS5), semantic (vectors), hybrid (both).",
|
|
371
|
+
)
|
|
372
|
+
parser.add_argument(
|
|
373
|
+
"-n",
|
|
374
|
+
"--limit",
|
|
375
|
+
type=int,
|
|
376
|
+
default=10,
|
|
377
|
+
help="Max results to return (default: 10)",
|
|
378
|
+
)
|
|
379
|
+
parser.add_argument("--type", help="Filter by file type (e.g., .pdf, .md). Excludes chat results.")
|
|
380
|
+
add_json_flag(parser)
|
|
381
|
+
|
|
382
|
+
args = parser.parse_args()
|
|
383
|
+
execute_search(
|
|
384
|
+
query=" ".join(args.query),
|
|
385
|
+
limit=args.limit,
|
|
386
|
+
type_filter=args.type,
|
|
387
|
+
mode=args.mode,
|
|
388
|
+
json_output=getattr(args, "json", False),
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
if __name__ == "__main__":
|
|
393
|
+
main()
|