probe-search 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
probe/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """probe — AI Agent Context Engine."""
2
+
3
+ __version__ = "0.2.0"
probe/cli.py ADDED
@@ -0,0 +1,500 @@
1
+ """CLI interface for probe."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import shutil
8
+ import subprocess
9
+ import sys
10
+ import time
11
+ from pathlib import Path
12
+
13
+ import click
14
+ from rich.console import Console
15
+ from rich.table import Table
16
+
17
+ import probe
18
+ from probe.config import DEFAULT_MODELS, ProbeConfig, detect_provider, load_config, save_config
19
+ from probe.indexer.refresh_gate import RefreshGate
20
+
21
+ console = Console()
22
+ PROBE_DIR_NAME = ".probe"
23
+
24
+
25
+ def _find_probe_dir(create: bool = False) -> Path:
26
+ probe_dir = Path.cwd() / PROBE_DIR_NAME
27
+ if create:
28
+ probe_dir.mkdir(exist_ok=True)
29
+ return probe_dir
30
+
31
+
32
+ def _require_probe_dir() -> Path:
33
+ """Find .probe/ dir or exit with helpful message."""
34
+ probe_dir = Path.cwd() / PROBE_DIR_NAME
35
+ if not probe_dir.exists():
36
+ console.print(
37
+ "[yellow]Not indexed yet. Run 'probe index' first.[/yellow]"
38
+ )
39
+ raise SystemExit(0)
40
+ return probe_dir
41
+
42
+
43
+ def _get_config() -> ProbeConfig:
44
+ probe_dir = _find_probe_dir()
45
+ return load_config(probe_dir / "config.yaml")
46
+
47
+
48
+ def _build_providers(config: ProbeConfig):
49
+ from probe.config import PROVIDER_ENV_VARS
50
+ from probe.providers.base import EmbeddingProvider, RerankProvider
51
+
52
+ embedding: EmbeddingProvider
53
+ reranker: RerankProvider | None = None
54
+
55
+ env_var = PROVIDER_ENV_VARS.get(config.embedding_provider, "")
56
+ api_key = os.environ.get(env_var, "") if env_var else ""
57
+ if not api_key:
58
+ console.print(
59
+ f"[red]Error: {env_var} not set."
60
+ f" Required for {config.embedding_provider} embeddings.[/red]"
61
+ )
62
+ sys.exit(1)
63
+
64
+ if config.embedding_provider == "zeroentropy":
65
+ from probe.providers.zeroentropy import ZeroEntropyEmbedding
66
+ embedding = ZeroEntropyEmbedding(
67
+ api_key, config.embedding_model, config.embedding_dimensions,
68
+ )
69
+ elif config.embedding_provider == "openai":
70
+ from probe.providers.openai import OpenAIEmbedding
71
+ embedding = OpenAIEmbedding(api_key, config.embedding_model, config.embedding_dimensions)
72
+ elif config.embedding_provider == "cohere":
73
+ from probe.providers.cohere import CohereEmbedding
74
+ embedding = CohereEmbedding(api_key, config.embedding_model, config.embedding_dimensions)
75
+ else:
76
+ console.print(f"[red]Unknown embedding provider: {config.embedding_provider}[/red]")
77
+ sys.exit(1)
78
+
79
+ if config.rerank_provider == "zeroentropy":
80
+ api_key = os.environ.get("ZEROENTROPY_API_KEY", "")
81
+ if api_key:
82
+ from probe.providers.zeroentropy import ZeroEntropyRerank
83
+ reranker = ZeroEntropyRerank(api_key, config.rerank_model)
84
+ elif config.rerank_provider == "cohere":
85
+ api_key = os.environ.get("COHERE_API_KEY", "")
86
+ if api_key:
87
+ from probe.providers.cohere import CohereRerank
88
+ reranker = CohereRerank(api_key, config.rerank_model)
89
+
90
+ return embedding, reranker
91
+
92
+
93
+ @click.group()
94
+ @click.version_option(version=probe.__version__, prog_name="probe")
95
+ def main():
96
+ """probe -- AI Agent Context Engine. Give your coding agent a brain beyond code."""
97
+ pass
98
+
99
+
100
+ @main.command()
101
+ @click.argument("paths", nargs=-1, type=click.Path(exists=True))
102
+ @click.option("--full", is_flag=True, help="Force full re-index")
103
+ def index(paths, full):
104
+ """Index project files for semantic search."""
105
+ from probe.indexer.pipeline import IndexPipeline
106
+ from probe.search.vector import VectorStore
107
+ from probe.store.database import ProbeDB
108
+
109
+ if not paths:
110
+ paths = (".",)
111
+
112
+ config = _get_config()
113
+ probe_dir = _find_probe_dir(create=True)
114
+ db = ProbeDB(probe_dir / "probe.db")
115
+ db.initialize()
116
+
117
+ embedding, _ = _build_providers(config)
118
+ vector_store = VectorStore(probe_dir / "vectors.npy", dimensions=config.embedding_dimensions)
119
+
120
+ pipeline = IndexPipeline(db=db, vector_store=vector_store, embedding_provider=embedding)
121
+
122
+ console.print(f"[bold]Indexing {len(paths)} path(s)...[/bold]")
123
+ stats = pipeline.index([Path(p) for p in paths], full=full)
124
+
125
+ console.print(
126
+ f"\n[green]Done![/green] "
127
+ f"Indexed {stats['files_indexed']} files, "
128
+ f"created {stats['chunks_created']} chunks, "
129
+ f"skipped {stats['files_skipped']} unchanged files."
130
+ )
131
+ db.close()
132
+
133
+
134
+ @main.command()
135
+ @click.argument("query")
136
+ @click.option("--top-k", default=10, help="Max results to return")
137
+ @click.option("--max-tokens", default=4096, help="Token budget for results")
138
+ @click.option("--type", "file_types", multiple=True, help="Filter by file type")
139
+ @click.option("--no-rerank", is_flag=True, help="Skip reranking")
140
+ def search(query, top_k, max_tokens, file_types, no_rerank):
141
+ """Search project knowledge with natural language."""
142
+ from probe.search.engine import ContextEngine
143
+ from probe.search.vector import VectorStore
144
+ from probe.store.database import ProbeDB
145
+
146
+ config = _get_config()
147
+ probe_dir = _require_probe_dir()
148
+ db = ProbeDB(probe_dir / "probe.db")
149
+ db.initialize()
150
+ vector_store = VectorStore(probe_dir / "vectors.npy", dimensions=config.embedding_dimensions)
151
+ vector_store.load()
152
+
153
+ # Refresh-before-search: update index if files changed since last index.
154
+ gate = RefreshGate.from_env()
155
+ if gate.should_refresh():
156
+ from probe.indexer.pipeline import IndexPipeline
157
+ embedding_for_refresh, _ = _build_providers(config)
158
+ pipeline = IndexPipeline(
159
+ db=db, vector_store=vector_store,
160
+ embedding_provider=embedding_for_refresh,
161
+ )
162
+ try:
163
+ refresh_stats = pipeline.refresh_changed([Path.cwd()])
164
+ gate.mark()
165
+ total_changed = (
166
+ refresh_stats["added"] + refresh_stats["changed"] + refresh_stats["removed"]
167
+ )
168
+ if total_changed > 0:
169
+ console.print(
170
+ f"[dim]Refreshed: +{refresh_stats['added']} "
171
+ f"±{refresh_stats['changed']} -{refresh_stats['removed']} "
172
+ f"({refresh_stats['elapsed_ms']}ms)[/dim]"
173
+ )
174
+ except Exception as e:
175
+ from rich.markup import escape
176
+ console.print(
177
+ f"[yellow]Warning: refresh failed ({escape(str(e))}); using stale index.[/yellow]"
178
+ )
179
+
180
+ # Note: providers are built twice on search — once for the refresh pass above
181
+ # and once here for the search. Provider constructors are cheap; keeping the
182
+ # two paths independent avoids having the refresh block reach into search state.
183
+ embedding, reranker = _build_providers(config)
184
+
185
+ engine = ContextEngine(db=db, vector_store=vector_store,
186
+ embedding_provider=embedding,
187
+ rerank_provider=reranker if not no_rerank else None)
188
+
189
+ t0 = time.time()
190
+ response = engine.search(query=query, top_k=top_k, max_tokens=max_tokens,
191
+ file_types=list(file_types) if file_types else None,
192
+ rerank=not no_rerank)
193
+ elapsed = time.time() - t0
194
+
195
+ if not response.results:
196
+ console.print("[yellow]No results found.[/yellow]")
197
+ db.close()
198
+ return
199
+
200
+ console.print(f"\n [bold]Found {len(response.results)} results[/bold] "
201
+ f"({response.sources_searched} chunks searched)\n")
202
+
203
+ for result in response.results:
204
+ score_color = "green" if result.score > 0.7 else "yellow" if result.score > 0.4 else "dim"
205
+ score_str = f"[{score_color}][{result.score:.2f}][/{score_color}]"
206
+
207
+ loc = f"[cyan]{result.file}[/cyan]"
208
+ if result.header_path:
209
+ loc += f" > [dim]{result.header_path}[/dim]"
210
+ elif result.symbol_name:
211
+ loc += f" > [dim]{result.symbol_name}[/dim]"
212
+ elif result.page_number:
213
+ loc += f" > [dim]page {result.page_number}[/dim]"
214
+
215
+ console.print(f" {score_str} {loc}")
216
+ lines = result.content.strip().split("\n")[:3]
217
+ for line in lines:
218
+ console.print(f" [dim]{line[:100]}[/dim]")
219
+ console.print()
220
+
221
+ model_info = f"{config.embedding_model}"
222
+ if not no_rerank and reranker is not None:
223
+ model_info += f" + {config.rerank_model}"
224
+ console.print(f" [dim]{'---' * 14}[/dim]")
225
+ console.print(f" [dim]{model_info} | {response.total_tokens:,} tokens | {elapsed:.1f}s[/dim]\n")
226
+ db.close()
227
+
228
+
229
+ @main.command()
230
+ def status():
231
+ """Show index status and configuration."""
232
+ from probe.store.database import ProbeDB
233
+
234
+ probe_dir = _require_probe_dir()
235
+ config = _get_config()
236
+ db = ProbeDB(probe_dir / "probe.db")
237
+ db.initialize()
238
+ stats = db.get_stats()
239
+
240
+ table = Table(title="probe status")
241
+ table.add_column("Property", style="cyan")
242
+ table.add_column("Value")
243
+ table.add_row("Indexed files", str(stats["total_files"]))
244
+ table.add_row("Total chunks", str(stats["total_chunks"]))
245
+ table.add_row("Last indexed", stats["last_indexed"] or "never")
246
+ for ft, count in stats.get("file_types", {}).items():
247
+ table.add_row(f" {ft}", str(count))
248
+ table.add_row("Embedding", f"{config.embedding_provider}/{config.embedding_model}")
249
+ table.add_row("Reranker", f"{config.rerank_provider}/{config.rerank_model}")
250
+ console.print(table)
251
+ db.close()
252
+
253
+
254
+ @main.command(name="list")
255
+ def list_files():
256
+ """List all indexed files."""
257
+ from probe.store.database import ProbeDB
258
+
259
+ probe_dir = _require_probe_dir()
260
+ db = ProbeDB(probe_dir / "probe.db")
261
+ db.initialize()
262
+ files = db.list_files()
263
+
264
+ if not files:
265
+ console.print("[yellow]No files indexed. Run 'probe index' first.[/yellow]")
266
+ db.close()
267
+ return
268
+
269
+ for f in files:
270
+ console.print(f" \\[{f['file_type']}] {f['path']}")
271
+ console.print(f"\n[dim]{len(files)} files[/dim]")
272
+ db.close()
273
+
274
+
275
+ @main.command()
276
+ def config():
277
+ """Show current provider configuration."""
278
+ cfg = _get_config()
279
+ dims = cfg.embedding_dimensions
280
+ console.print(
281
+ f"Embedding: [cyan]{cfg.embedding_provider}[/cyan]"
282
+ f" / {cfg.embedding_model} ({dims}d)"
283
+ )
284
+ console.print(f"Reranker: [cyan]{cfg.rerank_provider}[/cyan] / {cfg.rerank_model}")
285
+
286
+
287
+ @main.command()
288
+ def init():
289
+ """Interactive setup: choose providers and configure API keys."""
290
+ probe_dir = _find_probe_dir(create=True)
291
+
292
+ provider = detect_provider()
293
+ if provider:
294
+ console.print(f"[green]Auto-detected provider: {provider}[/green]")
295
+ models = DEFAULT_MODELS[provider]
296
+ cfg = ProbeConfig(
297
+ embedding_provider=provider,
298
+ embedding_model=models["embedding"],
299
+ rerank_provider=provider if models["rerank"] else "zeroentropy",
300
+ rerank_model=models["rerank"] or "zerank-2",
301
+ )
302
+ else:
303
+ console.print("[yellow]No API keys found in environment.[/yellow]")
304
+ console.print("Set one of: ZEROENTROPY_API_KEY, OPENAI_API_KEY, or COHERE_API_KEY")
305
+ console.print("\nUsing default config (ZeroEntropy).")
306
+ cfg = ProbeConfig()
307
+
308
+ save_config(cfg, probe_dir / "config.yaml")
309
+ console.print(f"\n[green]Config saved to {probe_dir / 'config.yaml'}[/green]")
310
+
311
+
312
+ @main.command()
313
+ def mcp():
314
+ """Start the MCP server (stdio transport)."""
315
+ from probe.mcp.server import run_mcp_server
316
+ run_mcp_server()
317
+
318
+
319
+ def _enable_probe_in_all_projects() -> int:
320
+ """Remove "probe" from every project's disabledMcpServers list in ~/.claude.json.
321
+
322
+ Claude Code stores per-project MCP enable/disable state there; a newly-added
323
+ user-scope MCP server can appear as disabled in some projects. This helper
324
+ is a narrowly-scoped post-install cleanup so users don't have to toggle
325
+ probe on per-project via /mcp.
326
+
327
+ Returns the number of projects modified. Silently returns 0 on missing file;
328
+ prints a yellow warning on malformed JSON or write failure but never raises.
329
+ """
330
+ claude_json_path = Path.home() / ".claude.json"
331
+ if not claude_json_path.exists():
332
+ return 0
333
+
334
+ try:
335
+ data = json.loads(claude_json_path.read_text())
336
+ except (OSError, json.JSONDecodeError) as e:
337
+ console.print(
338
+ f"[yellow]Warning: could not parse {claude_json_path} ({e}); "
339
+ "probe may need to be enabled manually via /mcp in Claude Code.[/yellow]"
340
+ )
341
+ return 0
342
+
343
+ projects = data.get("projects")
344
+ if not isinstance(projects, dict):
345
+ return 0
346
+
347
+ modified = 0
348
+ for _proj_path, proj_data in projects.items():
349
+ if not isinstance(proj_data, dict):
350
+ continue
351
+ disabled = proj_data.get("disabledMcpServers")
352
+ if isinstance(disabled, list) and "probe" in disabled:
353
+ proj_data["disabledMcpServers"] = [s for s in disabled if s != "probe"]
354
+ modified += 1
355
+
356
+ if modified == 0:
357
+ return 0
358
+
359
+ # Atomic write: temp file in same dir + os.replace
360
+ tmp_path = claude_json_path.with_suffix(".json.probe-tmp")
361
+ try:
362
+ tmp_path.write_text(json.dumps(data, indent=2))
363
+ os.replace(tmp_path, claude_json_path)
364
+ except OSError as e:
365
+ console.print(
366
+ f"[yellow]Warning: could not rewrite {claude_json_path} ({e}); "
367
+ "probe may need to be enabled manually via /mcp.[/yellow]"
368
+ )
369
+ # Best-effort cleanup of tmp file
370
+ try:
371
+ tmp_path.unlink(missing_ok=True)
372
+ except OSError:
373
+ pass
374
+ return 0
375
+
376
+ return modified
377
+
378
+
379
+ @main.command()
380
+ @click.option("--api-key", default=None, help="ZeroEntropy API key (skip prompt).")
381
+ @click.option("--no-embed-key", is_flag=True,
382
+ help="Register without embedding API key (rely on shell env).")
383
+ @click.option("--force", is_flag=True, help="Skip already-installed confirmation.")
384
+ def install(api_key, no_embed_key, force):
385
+ """Register probe as a user-scope MCP server in Claude Code."""
386
+ claude_bin = shutil.which("claude")
387
+ if not claude_bin:
388
+ console.print(
389
+ "[red]Claude Code CLI not found.[/red] "
390
+ "Install it from the official Claude Code documentation, then rerun `probe install`."
391
+ )
392
+ sys.exit(1)
393
+
394
+ # Check if already installed. `claude mcp get` doesn't accept --scope; it
395
+ # searches across scopes, which is fine for our "already installed?" check.
396
+ get_result = subprocess.run(
397
+ [claude_bin, "mcp", "get", "probe"],
398
+ capture_output=True,
399
+ )
400
+ if get_result.returncode == 0:
401
+ if not force:
402
+ if not click.confirm("probe is already registered. Reinstall?", default=False):
403
+ console.print("No changes made.")
404
+ return
405
+ subprocess.run(
406
+ [claude_bin, "mcp", "remove", "probe", "--scope", "user"],
407
+ capture_output=True,
408
+ )
409
+
410
+ # Resolve API key
411
+ resolved_key: str | None = None
412
+ if not no_embed_key:
413
+ if api_key:
414
+ resolved_key = api_key
415
+ else:
416
+ env_key = os.environ.get("ZEROENTROPY_API_KEY")
417
+ if env_key and click.confirm(
418
+ "Use $ZEROENTROPY_API_KEY from environment?", default=True,
419
+ ):
420
+ resolved_key = env_key
421
+ else:
422
+ for _ in range(3):
423
+ entered = click.prompt(
424
+ "Enter your ZeroEntropy API key",
425
+ hide_input=True, default="", show_default=False,
426
+ )
427
+ if entered.strip():
428
+ resolved_key = entered.strip()
429
+ break
430
+ else:
431
+ console.print("[red]API key required.[/red]")
432
+ sys.exit(1)
433
+
434
+ # Resolve probe command + args
435
+ probe_bin = shutil.which("probe")
436
+ if probe_bin:
437
+ probe_command = probe_bin
438
+ probe_args = ["mcp"]
439
+ else:
440
+ probe_command = sys.executable
441
+ probe_args = ["-m", "probe.cli", "mcp"]
442
+ console.print(
443
+ f"[yellow]Note: probe binary not on PATH; using {sys.executable} -m probe.cli. "
444
+ "If you move this Python env, rerun `probe install`.[/yellow]"
445
+ )
446
+
447
+ # Build the JSON config. Using `claude mcp add-json` instead of
448
+ # `claude mcp add` because the latter's -e flag is variadic and eats
449
+ # the server-name positional in some arg orderings.
450
+ mcp_config: dict = {
451
+ "type": "stdio",
452
+ "command": probe_command,
453
+ "args": probe_args,
454
+ }
455
+ if resolved_key:
456
+ mcp_config["env"] = {"ZEROENTROPY_API_KEY": resolved_key}
457
+
458
+ add_cmd = [
459
+ claude_bin, "mcp", "add-json", "--scope", "user", "probe",
460
+ json.dumps(mcp_config),
461
+ ]
462
+ add_result = subprocess.run(add_cmd, capture_output=True)
463
+ if add_result.returncode != 0:
464
+ console.print(
465
+ f"[red]claude mcp add-json failed:[/red]\n{add_result.stderr.decode(errors='replace')}"
466
+ )
467
+ sys.exit(1)
468
+
469
+ console.print(
470
+ "[green]✓ probe installed at user scope.[/green]\n"
471
+ " Open any project in Claude Code and ask a question — "
472
+ "probe will auto-index on first search.\n"
473
+ " To uninstall: probe uninstall"
474
+ )
475
+
476
+ # Auto-enable probe in any project that had it on its disabledMcpServers list.
477
+ n_enabled = _enable_probe_in_all_projects()
478
+ if n_enabled > 0:
479
+ console.print(f"[dim] Enabled probe in {n_enabled} project(s) that had it disabled.[/dim]")
480
+
481
+
482
+ @main.command()
483
+ @click.option("--purge", is_flag=True, help="Also delete .probe/ from cwd.")
484
+ def uninstall(purge):
485
+ """Unregister probe from Claude Code."""
486
+ claude_bin = shutil.which("claude")
487
+ if claude_bin:
488
+ subprocess.run(
489
+ [claude_bin, "mcp", "remove", "probe", "--scope", "user"],
490
+ capture_output=True,
491
+ )
492
+ # Ignore errors: "not found" is fine.
493
+
494
+ if purge:
495
+ probe_dir = Path.cwd() / ".probe"
496
+ if probe_dir.exists():
497
+ shutil.rmtree(probe_dir, ignore_errors=True)
498
+ console.print(f"[dim]Deleted {probe_dir}[/dim]")
499
+
500
+ console.print("[green]✓ probe uninstalled.[/green]")
probe/config.py ADDED
@@ -0,0 +1,87 @@
1
+ """Configuration for probe."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ import yaml
10
+
11
+ PROVIDER_ENV_VARS = {
12
+ "zeroentropy": "ZEROENTROPY_API_KEY",
13
+ "openai": "OPENAI_API_KEY",
14
+ "cohere": "COHERE_API_KEY",
15
+ }
16
+
17
+ PROVIDER_PRIORITY = ["zeroentropy", "openai", "cohere"]
18
+
19
+ DEFAULT_MODELS = {
20
+ "zeroentropy": {"embedding": "zembed-1", "rerank": "zerank-2"},
21
+ "openai": {"embedding": "text-embedding-3-large", "rerank": None},
22
+ "cohere": {"embedding": "embed-v4.0", "rerank": "rerank-v3.5"},
23
+ }
24
+
25
+
26
+ @dataclass
27
+ class ProbeConfig:
28
+ """Probe configuration."""
29
+
30
+ embedding_provider: str = "zeroentropy"
31
+ embedding_model: str = "zembed-1"
32
+ embedding_dimensions: int = 1280
33
+ rerank_provider: str = "zeroentropy"
34
+ rerank_model: str = "zerank-2"
35
+
36
+
37
+ def save_config(config: ProbeConfig, path: Path) -> None:
38
+ """Save config to YAML file."""
39
+ data = {
40
+ "providers": {
41
+ "embedding": {
42
+ "name": config.embedding_provider,
43
+ "model": config.embedding_model,
44
+ "dimensions": config.embedding_dimensions,
45
+ },
46
+ "reranker": {
47
+ "name": config.rerank_provider,
48
+ "model": config.rerank_model,
49
+ },
50
+ }
51
+ }
52
+ path.parent.mkdir(parents=True, exist_ok=True)
53
+ with open(path, "w") as f:
54
+ yaml.dump(data, f, default_flow_style=False)
55
+
56
+
57
+ def load_config(path: Path) -> ProbeConfig:
58
+ """Load config from YAML file. Returns defaults if file doesn't exist."""
59
+ if not path.exists():
60
+ return ProbeConfig()
61
+
62
+ with open(path) as f:
63
+ data = yaml.safe_load(f)
64
+
65
+ if not data or "providers" not in data:
66
+ return ProbeConfig()
67
+
68
+ providers = data["providers"]
69
+ embed = providers.get("embedding", {})
70
+ rerank = providers.get("reranker", {})
71
+
72
+ return ProbeConfig(
73
+ embedding_provider=embed.get("name", "zeroentropy"),
74
+ embedding_model=embed.get("model", "zembed-1"),
75
+ embedding_dimensions=embed.get("dimensions", 1280),
76
+ rerank_provider=rerank.get("name", "zeroentropy"),
77
+ rerank_model=rerank.get("model", "zerank-2"),
78
+ )
79
+
80
+
81
+ def detect_provider() -> str | None:
82
+ """Auto-detect the best available provider from environment variables."""
83
+ for provider in PROVIDER_PRIORITY:
84
+ env_var = PROVIDER_ENV_VARS[provider]
85
+ if os.environ.get(env_var):
86
+ return provider
87
+ return None
File without changes