envbot 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py ADDED
@@ -0,0 +1,481 @@
1
+ """
2
+ cli.py
3
+ Entry point for the envbot CLI.
4
+
5
+ Usage
6
+ -----
7
+ envbot "mongodb connection string"
8
+ envbot "openai key" --k 5
9
+ envbot "azure storage" --show-source
10
+ envbot --reindex
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import sys
16
+ import time
17
+ from typing import Optional
18
+
19
+ import typer
20
+ from rich.console import Console
21
+ from rich.panel import Panel
22
+ from rich.table import Table
23
+ from rich.text import Text
24
+ from rich.progress import (
25
+ Progress,
26
+ SpinnerColumn,
27
+ TextColumn,
28
+ BarColumn,
29
+ TaskProgressColumn,
30
+ TimeElapsedColumn,
31
+ )
32
+ from rich.rule import Rule
33
+ from rich import box
34
+
35
+ app = typer.Typer(
36
+ name="envbot",
37
+ help="Local AI-powered environment variable search assistant.",
38
+ add_completion=False,
39
+ invoke_without_command=True,
40
+ )
41
+
42
+ console = Console(highlight=False)
43
+ err_console = Console(stderr=True, highlight=False)
44
+
45
+ # ── Branding ──────────────────────────────────────────────────────────────────
46
+
47
+ BANNER = r"""
48
+ _ _
49
+ ___ _ ____ _____| |__ ___ | |_
50
+ / _ | '_ \ \ / / __| '_ \ / _ \| __|
51
+ | __| | | \ V /| (__| |_) | (_) | |_
52
+ \___|_| |_|\_/ \___|_.__/ \___/ \__|
53
+ """
54
+
55
+ ACCENT = "bright_cyan"
56
+ SUCCESS = "bright_green"
57
+ WARN = "bright_yellow"
58
+ ERR = "bright_red"
59
+ DIM = "dim white"
60
+ HIGHLIGHT = "bold bright_magenta"
61
+
62
+
63
+ def _print_banner() -> None:
64
+ """Print the envbot ASCII art banner inside a styled panel."""
65
+ banner_text = Text(BANNER, style=f"bold {ACCENT}")
66
+ tagline = Text(
67
+ " AI-powered .env variable search assistant\n",
68
+ style=f"italic {DIM}",
69
+ )
70
+ content = Text.assemble(banner_text, tagline)
71
+ console.print(
72
+ Panel(
73
+ content,
74
+ border_style=ACCENT,
75
+ box=box.DOUBLE_EDGE,
76
+ padding=(0, 2),
77
+ )
78
+ )
79
+
80
+
81
+ # ── Reindex pipeline ─────────────────────────────────────────────────────────
82
+
83
+ def _do_reindex() -> None:
84
+ """Full reindex pipeline: scan -> parse -> embed -> save."""
85
+ from scanner import scan_and_copy
86
+ from parser import parse_all
87
+ from indexer import build_index
88
+
89
+ console.print()
90
+ console.print(Rule("[bold bright_cyan] REINDEX PIPELINE [/bold bright_cyan]", style=ACCENT))
91
+ console.print()
92
+
93
+ with Progress(
94
+ SpinnerColumn("dots", style=f"bold {WARN}"),
95
+ TextColumn("[progress.description]{task.description}"),
96
+ BarColumn(bar_width=30, style=DIM, complete_style=ACCENT, finished_style=SUCCESS),
97
+ TaskProgressColumn(),
98
+ TimeElapsedColumn(),
99
+ console=console,
100
+ transient=False,
101
+ ) as progress:
102
+
103
+ # Step 1: Scan
104
+ task_scan = progress.add_task(
105
+ f"[{WARN}]Scanning drives for .env files...", total=100
106
+ )
107
+ progress.update(task_scan, advance=10)
108
+ meta = scan_and_copy()
109
+ progress.update(task_scan, completed=100,
110
+ description=f"[{SUCCESS}]Scanned -- {len(meta)} env file(s) found")
111
+
112
+ # Step 2: Parse
113
+ task_parse = progress.add_task(
114
+ f"[{WARN}]Parsing environment variables...", total=100
115
+ )
116
+ progress.update(task_parse, advance=10)
117
+ records = parse_all()
118
+ progress.update(task_parse, completed=100,
119
+ description=f"[{SUCCESS}]Parsed -- {len(records)} variables extracted")
120
+
121
+ if not records:
122
+ progress.stop()
123
+ err_console.print(
124
+ Panel(
125
+ "[bold]No variables found.[/bold]\n"
126
+ "Make sure your configured drives contain .env files.",
127
+ title="Error",
128
+ border_style=ERR,
129
+ box=box.ROUNDED,
130
+ )
131
+ )
132
+ raise typer.Exit(code=1)
133
+
134
+ # Step 3: Embed & index
135
+ task_embed = progress.add_task(
136
+ f"[{WARN}]Generating embeddings & building FAISS index...", total=100
137
+ )
138
+ progress.update(task_embed, advance=5)
139
+ build_index(records)
140
+ progress.update(task_embed, completed=100,
141
+ description=f"[{SUCCESS}]Indexed -- {len(records)} vectors saved")
142
+
143
+ console.print()
144
+ console.print(
145
+ Panel(
146
+ f"[bold {SUCCESS}]Reindexing complete![/bold {SUCCESS}]\n"
147
+ f"[{DIM}]Your environment variables are ready to search.",
148
+ border_style=SUCCESS,
149
+ box=box.ROUNDED,
150
+ padding=(0, 2),
151
+ )
152
+ )
153
+
154
+
155
+ # ── Configuration wizard ─────────────────────────────────────────────────────
156
+
157
+ def run_config_wizard() -> None:
158
+ import json
159
+ import string
160
+ import os
161
+ from pathlib import Path
162
+
163
+ CONFIG_FILE = Path.home() / ".envbot_config.json"
164
+
165
+ _print_banner()
166
+
167
+ console.print(
168
+ Rule("[bold bright_cyan] CONFIGURATION [/bold bright_cyan]", style=ACCENT)
169
+ )
170
+ console.print()
171
+
172
+ # Load existing config if available
173
+ existing_config = None
174
+ if CONFIG_FILE.exists():
175
+ try:
176
+ existing_config = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
177
+ except Exception:
178
+ pass
179
+
180
+ if existing_config:
181
+ cfg_table = Table(
182
+ box=box.SIMPLE_HEAVY,
183
+ show_header=True,
184
+ header_style=f"bold {ACCENT}",
185
+ border_style=DIM,
186
+ padding=(0, 2),
187
+ )
188
+ cfg_table.add_column("Setting", style=f"bold {WARN}")
189
+ cfg_table.add_column("Value", style=f"bold white")
190
+
191
+ drives_str = ", ".join(existing_config.get("scan_drives", []))
192
+ stored_str = existing_config.get("data_dir", "")
193
+ cfg_table.add_row("Scan Drives", drives_str)
194
+ cfg_table.add_row("Storage Path", stored_str)
195
+
196
+ console.print(
197
+ Panel(cfg_table, title="[bold]Current Configuration[/bold]",
198
+ border_style=ACCENT, box=box.ROUNDED, padding=(1, 2))
199
+ )
200
+ console.print()
201
+
202
+ use_existing = typer.confirm("Use this configuration for reindexing?", default=True)
203
+ if use_existing:
204
+ return
205
+
206
+ console.print()
207
+
208
+ # Step 1: Detect available drives / directories
209
+ is_windows = sys.platform.startswith("win")
210
+ selected_drives = []
211
+
212
+ if is_windows:
213
+ drives = []
214
+ for letter in string.ascii_uppercase:
215
+ drive_path = f"{letter}:\\"
216
+ if os.path.exists(drive_path):
217
+ drives.append(drive_path)
218
+
219
+ drive_table = Table(box=box.SIMPLE, show_header=False, border_style=DIM)
220
+ drive_table.add_column("Drive", style=f"bold {ACCENT}")
221
+ for d in drives:
222
+ drive_table.add_row(d)
223
+ console.print(
224
+ Panel(drive_table, title=f"[bold {ACCENT}]Available Drives[/bold {ACCENT}]",
225
+ border_style=ACCENT, box=box.ROUNDED, padding=(0, 2))
226
+ )
227
+ console.print()
228
+
229
+ while not selected_drives:
230
+ drives_input = typer.prompt(
231
+ "Enter drive letter(s) to scan (comma-separated, e.g. C, D)",
232
+ default="D"
233
+ )
234
+ for part in drives_input.split(","):
235
+ letter = part.strip().upper().replace(":", "").replace("\\", "")
236
+ if not letter:
237
+ continue
238
+ drive_path = f"{letter}:\\"
239
+ if os.path.exists(drive_path):
240
+ if drive_path not in selected_drives:
241
+ selected_drives.append(drive_path)
242
+ else:
243
+ console.print(f"[{WARN}] Drive {letter}: does not exist or is not ready.[/{WARN}]")
244
+
245
+ if not selected_drives:
246
+ console.print(f"[{ERR}] Please select at least one valid drive.[/{ERR}]")
247
+ else:
248
+ default_dir = str(Path.home())
249
+ console.print(
250
+ Panel(
251
+ f"[{DIM}]Platform: [bold]{sys.platform}[/bold]\n"
252
+ f"Enter one or more directory paths to scan recursively.",
253
+ title=f"[bold {ACCENT}]Directory Selection[/bold {ACCENT}]",
254
+ border_style=ACCENT,
255
+ box=box.ROUNDED,
256
+ padding=(0, 2),
257
+ )
258
+ )
259
+ console.print()
260
+
261
+ while not selected_drives:
262
+ paths_input = typer.prompt(
263
+ "Enter directory path(s) to scan (comma-separated)",
264
+ default=default_dir
265
+ )
266
+ for part in paths_input.split(","):
267
+ path_str = part.strip()
268
+ if not path_str:
269
+ continue
270
+ path_obj = Path(path_str).expanduser()
271
+ if path_obj.exists() and path_obj.is_dir():
272
+ res_path = str(path_obj.resolve())
273
+ if res_path not in selected_drives:
274
+ selected_drives.append(res_path)
275
+ else:
276
+ console.print(f"[{WARN}] Directory '{path_str}' does not exist or is not a folder.[/{WARN}]")
277
+
278
+ if not selected_drives:
279
+ console.print(f"[{ERR}] Please select at least one valid directory to scan.[/{ERR}]")
280
+
281
+ console.print()
282
+
283
+ # Step 2: Storage path
284
+ default_storage = str(Path.home() / ".envbot_data")
285
+ valid_storage = False
286
+ selected_storage = ""
287
+
288
+ while not valid_storage:
289
+ storage_input = typer.prompt(
290
+ "Enter path to store envbot data",
291
+ default=default_storage
292
+ )
293
+ storage_path = Path(storage_input.strip())
294
+ try:
295
+ storage_path.mkdir(parents=True, exist_ok=True)
296
+ test_file = storage_path / ".write_test"
297
+ test_file.touch()
298
+ test_file.unlink()
299
+ selected_storage = str(storage_path.resolve())
300
+ valid_storage = True
301
+ except (PermissionError, OSError) as e:
302
+ console.print(f"[{ERR}] Cannot write to '{storage_path}': {e}[/{ERR}]")
303
+ console.print(" Please enter a different path.")
304
+
305
+ # Save config
306
+ new_config = {
307
+ "scan_drives": selected_drives,
308
+ "data_dir": selected_storage
309
+ }
310
+ CONFIG_FILE.write_text(json.dumps(new_config, indent=2), encoding="utf-8")
311
+
312
+ console.print()
313
+ # Summary table
314
+ summary = Table(box=box.SIMPLE_HEAVY, show_header=True,
315
+ header_style=f"bold {ACCENT}", border_style=DIM, padding=(0, 2))
316
+ summary.add_column("Setting", style=f"bold {WARN}")
317
+ summary.add_column("Value", style=f"bold white")
318
+ summary.add_row("Scan Drives", ", ".join(selected_drives))
319
+ summary.add_row("Storage Path", selected_storage)
320
+
321
+ console.print(
322
+ Panel(summary, title=f"[bold {SUCCESS}]Configuration Saved[/bold {SUCCESS}]",
323
+ border_style=SUCCESS, box=box.DOUBLE_EDGE, padding=(1, 2))
324
+ )
325
+ console.print()
326
+
327
+
328
+ # ── Search result rendering ───────────────────────────────────────────────────
329
+
330
+ def _render_results(
331
+ results: list,
332
+ show_source: bool,
333
+ show_value: bool,
334
+ ) -> None:
335
+ """Render search results in a beautiful Rich table."""
336
+ import re
337
+ from scanner import COPIED_DIR
338
+
339
+ def _get_value(source_file: str, var_name: str) -> str:
340
+ copied_path = COPIED_DIR / source_file
341
+ if not copied_path.exists():
342
+ return "<file not found>"
343
+ try:
344
+ content = copied_path.read_text(encoding="utf-8", errors="replace")
345
+ except Exception:
346
+ return "<error reading file>"
347
+
348
+ pattern = re.compile(
349
+ r"^\s*(?:export\s+)?" + re.escape(var_name) + r"\s*=\s*(.*)$",
350
+ re.MULTILINE
351
+ )
352
+ match = pattern.search(content)
353
+ if not match:
354
+ return "<not found>"
355
+
356
+ val = match.group(1).strip()
357
+ if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
358
+ val = val[1:-1]
359
+ return val
360
+
361
+ # Build the results table
362
+ table = Table(
363
+ box=box.ROUNDED,
364
+ show_header=True,
365
+ header_style=f"bold {ACCENT}",
366
+ border_style=ACCENT,
367
+ padding=(0, 1),
368
+ title=f"[bold {ACCENT}]Search Results[/bold {ACCENT}]",
369
+ title_style=f"bold {ACCENT}",
370
+ caption=f"[{DIM}]{len(results)} result(s) found",
371
+ caption_style=DIM,
372
+ )
373
+
374
+ table.add_column("#", style=f"bold {DIM}", justify="right", width=3)
375
+ table.add_column("Variable", style=f"bold {SUCCESS}", min_width=15)
376
+ if show_value:
377
+ table.add_column("Value", style=f"bold {WARN}", min_width=15)
378
+ if show_source:
379
+ table.add_column("Source", style=f"{DIM}", min_width=20)
380
+
381
+ for i, r in enumerate(results, 1):
382
+ row = [str(i), r.variable_name]
383
+ if show_value:
384
+ row.append(_get_value(r.source_file, r.variable_name))
385
+ if show_source:
386
+ row.append(r.source_path)
387
+ table.add_row(*row)
388
+
389
+ console.print()
390
+ console.print(table)
391
+ console.print()
392
+
393
+
394
+ # ── Main callback ─────────────────────────────────────────────────────────────
395
+
396
+ @app.callback()
397
+ def main(
398
+ ctx: typer.Context,
399
+ query: Optional[str] = typer.Argument(
400
+ None, help="Natural language search query."
401
+ ),
402
+ k: int = typer.Option(
403
+ 3, "--k", "-k", help="Number of results to return.", min=1, max=100
404
+ ),
405
+ show_source: bool = typer.Option(
406
+ False, "--show-source", help="Print the source .env file path."
407
+ ),
408
+ show_value: bool = typer.Option(
409
+ False, "--show-value", "-v", help="Print the variable value (read on-the-fly)."
410
+ ),
411
+ reindex: bool = typer.Option(
412
+ False, "--reindex", help="Rescan configured drives and rebuild the search index."
413
+ ),
414
+ ) -> None:
415
+ # ── Reindex ────────────────────────────────────────────────────────────
416
+ if reindex:
417
+ run_config_wizard()
418
+ _do_reindex()
419
+ raise typer.Exit()
420
+
421
+ # ── No query supplied -> print help ────────────────────────────────────
422
+ if not query:
423
+ _print_banner()
424
+ console.print(ctx.get_help())
425
+ raise typer.Exit()
426
+
427
+ # ── Search ─────────────────────────────────────────────────────────────
428
+ try:
429
+ from search import search
430
+
431
+ with Progress(
432
+ SpinnerColumn("dots", style=f"bold {ACCENT}"),
433
+ TextColumn(f"[{ACCENT}]Searching for '[bold]{query}[/bold]'..."),
434
+ console=console,
435
+ transient=True,
436
+ ) as progress:
437
+ progress.add_task("searching", total=None)
438
+ results = search(query.strip(), k=k)
439
+
440
+ except FileNotFoundError as exc:
441
+ err_console.print(
442
+ Panel(
443
+ f"[bold]{exc}[/bold]",
444
+ title="Index Not Found",
445
+ border_style=ERR,
446
+ box=box.ROUNDED,
447
+ )
448
+ )
449
+ raise typer.Exit(code=1)
450
+ except Exception as exc:
451
+ err_console.print(
452
+ Panel(
453
+ f"[bold]{exc}[/bold]",
454
+ title="Search Error",
455
+ border_style=ERR,
456
+ box=box.ROUNDED,
457
+ )
458
+ )
459
+ raise typer.Exit(code=1)
460
+
461
+ if not results:
462
+ console.print(
463
+ Panel(
464
+ f"No matches found for '[bold]{query}[/bold]'.\n"
465
+ f"[{DIM}]Try a different search term or run --reindex.",
466
+ border_style=WARN,
467
+ box=box.ROUNDED,
468
+ )
469
+ )
470
+ raise typer.Exit()
471
+
472
+ _render_results(results, show_source, show_value)
473
+
474
+
475
+ def entry() -> None:
476
+ """Setuptools entry point wrapper."""
477
+ app()
478
+
479
+
480
+ if __name__ == "__main__":
481
+ entry()
@@ -0,0 +1,304 @@
1
+ Metadata-Version: 2.4
2
+ Name: envbot
3
+ Version: 1.0.0
4
+ Summary: Local AI-powered environment variable search assistant
5
+ Home-page: https://github.com/ravindraogg/envbot
6
+ Author: Ravi
7
+ Keywords: env environment variables search ai faiss semantic dotenv cli
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Environment :: Console
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Utilities
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: sentence-transformers>=2.7.0
21
+ Requires-Dist: faiss-cpu>=1.8.0
22
+ Requires-Dist: typer>=0.12.0
23
+ Requires-Dist: rich>=13.7.0
24
+ Requires-Dist: python-dotenv>=1.0.0
25
+ Requires-Dist: numpy>=2.1.0
26
+ Dynamic: author
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: keywords
32
+ Dynamic: license-file
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+ Dynamic: summary
36
+
37
+ # envbot
38
+
39
+ **Local AI-powered environment variable search assistant.**
40
+
41
+ Stop digging through dozens of `.env` files scattered across your projects. `envbot` indexes all your environment variables and lets you find them instantly using natural language search -- powered by sentence embeddings and FAISS.
42
+
43
+ ```
44
+ envbot "mongodb connection string"
45
+ ```
46
+
47
+ ```
48
+ Search Results
49
+ +--------------------------------------------------------------------------+
50
+ | # | Variable | Value | Source |
51
+ |-----+-----------------+------------------------------+-------------------|
52
+ | 1 | MONGO_URI | mongodb+srv://user:pass@c... | D:\App\.env |
53
+ | 2 | DATABASE_URL | mongodb://localhost:27017/... | D:\Api\.env |
54
+ | 3 | DB_CONNECTION | mongodb://admin@cluster0... | D:\Svc\.env |
55
+ +--------------------------------------------------------------------------+
56
+ 3 result(s) found
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Features
62
+
63
+ - **AI-Powered Semantic Search** -- Understands what you mean, not just exact matches. Search for `"stripe payment key"` and it finds `STRIPE_SECRET_KEY`.
64
+ - **Multi-Drive / Multi-Directory Scanning** -- Scan one or more drives (Windows) or directories (macOS/Linux) in a single index.
65
+ - **Cross-Platform** -- Works on Windows, macOS, and Linux.
66
+ - **Interactive Configuration Wizard** -- No config files to edit manually. The CLI guides you through setup.
67
+ - **Zero-Leak Security** -- Variable values are **never stored** in the index. They are only read on-the-fly when you explicitly request them with `-v`.
68
+ - **Beautiful CLI** -- Rich terminal UI with progress bars, spinners, tables, and color-coded output.
69
+ - **Fast** -- FAISS vector search returns results in milliseconds after the initial model load.
70
+
71
+ ---
72
+
73
+ ## Installation
74
+
75
+ ### Prerequisites
76
+
77
+ - **Python 3.11+** is required.
78
+ - **pip** (comes with Python).
79
+
80
+ ### Option 1: Install from PyPI
81
+
82
+ ```bash
83
+ pip install envbot
84
+ ```
85
+
86
+ ### Option 2: Install from GitHub
87
+
88
+ ```bash
89
+ pip install git+https://github.com/your-username/envbot.git
90
+ ```
91
+
92
+ ### Option 3: Install from Source
93
+
94
+ ```bash
95
+ git clone https://github.com/your-username/envbot.git
96
+ cd envbot
97
+ pip install .
98
+ ```
99
+
100
+ > **Note:** On first run, `envbot` will automatically download the `all-MiniLM-L6-v2` sentence transformer model (~80 MB). This happens only once.
101
+
102
+ ---
103
+
104
+ ## Quick Start
105
+
106
+ ### Step 1: Configure & Index
107
+
108
+ Run the reindex command. The interactive wizard will guide you:
109
+
110
+ ```bash
111
+ envbot --reindex
112
+ ```
113
+
114
+ You will be prompted to:
115
+
116
+ 1. **Select drives/directories to scan** -- Choose which drives (Windows: `C, D, E`) or directories (Linux/macOS: `/home/user/projects`) to scan for `.env` files.
117
+ 2. **Set storage path** -- Choose where to store the copied `.env` files and the FAISS index. Defaults to `~/.envbot_data`.
118
+
119
+ The wizard saves your preferences to `~/.envbot_config.json`. On subsequent runs, it will ask if you want to reuse the saved configuration.
120
+
121
+ ### Step 2: Search
122
+
123
+ ```bash
124
+ envbot "database"
125
+ ```
126
+
127
+ That's it! You'll see a table of matching environment variable names.
128
+
129
+ ---
130
+
131
+ ## Usage
132
+
133
+ All options must be placed **before** the search query.
134
+
135
+ ### Basic Search
136
+
137
+ ```bash
138
+ envbot "openai api key"
139
+ ```
140
+
141
+ ### Show Variable Values (`-v`)
142
+
143
+ Read and display the actual value from the `.env` file on-the-fly:
144
+
145
+ ```bash
146
+ envbot -v "stripe"
147
+ ```
148
+
149
+ ### Show Source File Path (`--show-source`)
150
+
151
+ See which `.env` file each variable came from:
152
+
153
+ ```bash
154
+ envbot --show-source "database"
155
+ ```
156
+
157
+ ### Change Number of Results (`-k`)
158
+
159
+ By default, 3 results are returned. Get more:
160
+
161
+ ```bash
162
+ envbot -k 10 "api key"
163
+ ```
164
+
165
+ ### Combine All Flags
166
+
167
+ ```bash
168
+ envbot -v --show-source -k 5 "mongodb"
169
+ ```
170
+
171
+ ### Rebuild the Index
172
+
173
+ Re-scan all configured drives and rebuild the search database:
174
+
175
+ ```bash
176
+ envbot --reindex
177
+ ```
178
+
179
+ ### View Help
180
+
181
+ ```bash
182
+ envbot --help
183
+ ```
184
+
185
+ ---
186
+
187
+ ## How It Works
188
+
189
+ ```
190
+ +------------------+ +------------------+ +------------------+
191
+ | 1. SCAN | --> | 2. PARSE | --> | 3. EMBED |
192
+ | Recursively | | Extract variable | | Generate vector |
193
+ | find .env files | | names (no values)| | embeddings via |
194
+ | across drives | | from all files | | SentenceTransf. |
195
+ +------------------+ +------------------+ +------------------+
196
+ |
197
+ v
198
+ +------------------+ +------------------+ +------------------+
199
+ | 6. DISPLAY | <-- | 5. RANK | <-- | 4. INDEX |
200
+ | Rich table with | | Cosine similar. | | Store vectors |
201
+ | colors & values | | via FAISS | | in FAISS index |
202
+ +------------------+ +------------------+ +------------------+
203
+ ```
204
+
205
+ ### Architecture
206
+
207
+ | File | Purpose |
208
+ |---------------|---------------------------------------------------------|
209
+ | `cli.py` | Entry point, argument parsing, Rich UI rendering |
210
+ | `scanner.py` | Recursively walks drives/directories for `.env` files |
211
+ | `parser.py` | Extracts variable names from copied `.env` files |
212
+ | `indexer.py` | Generates embeddings and builds the FAISS vector index |
213
+ | `search.py` | Encodes queries and performs FAISS similarity search |
214
+
215
+ ### Security Model
216
+
217
+ - **Values are never stored** in the FAISS index or metadata files.
218
+ - Only variable **names** and **source file paths** are persisted.
219
+ - When you use `-v`, values are read on-the-fly from the local copy at search time.
220
+ - Copied `.env` files are stored in your configured data directory (default: `~/.envbot_data/copied_envs/`).
221
+
222
+ ---
223
+
224
+ ## Configuration
225
+
226
+ Your configuration is stored at `~/.envbot_config.json`:
227
+
228
+ ```json
229
+ {
230
+ "scan_drives": ["D:\\", "E:\\"],
231
+ "data_dir": "C:\\Users\\you\\.envbot_data"
232
+ }
233
+ ```
234
+
235
+ | Key | Description |
236
+ |----------------|--------------------------------------------------|
237
+ | `scan_drives` | List of drives (Windows) or directories to scan |
238
+ | `data_dir` | Where the index, metadata, and copies are stored |
239
+
240
+ You can edit this file manually or re-run `envbot --reindex` to use the wizard.
241
+
242
+ ---
243
+
244
+ ## Command Reference
245
+
246
+ | Command | Description |
247
+ |------------------------------------------------|--------------------------------------|
248
+ | `envbot "query"` | Search for matching variables |
249
+ | `envbot -v "query"` | Search and show values |
250
+ | `envbot --show-source "query"` | Search and show source file paths |
251
+ | `envbot -k 10 "query"` | Return up to 10 results |
252
+ | `envbot -v --show-source -k 5 "query"` | All flags combined |
253
+ | `envbot --reindex` | Configure drives and rebuild index |
254
+ | `envbot --help` | Show help message |
255
+
256
+ ---
257
+
258
+ ## Requirements
259
+
260
+ | Package | Version | Purpose |
261
+ |-----------------------|------------|--------------------------------|
262
+ | `sentence-transformers` | >= 2.7.0 | Semantic text embeddings |
263
+ | `faiss-cpu` | >= 1.8.0 | Vector similarity search |
264
+ | `typer` | >= 0.12.0 | CLI framework |
265
+ | `rich` | >= 13.7.0 | Terminal UI (tables, spinners) |
266
+ | `python-dotenv` | >= 1.0.0 | .env file parsing |
267
+ | `numpy` | >= 2.1.0 | Numerical operations |
268
+ | `torch` | >= 2.0.0 | ML backend for transformers |
269
+
270
+ ---
271
+
272
+ ## Platform Support
273
+
274
+ | Platform | Status | Drive/Path Format |
275
+ |----------------|-------------|---------------------------|
276
+ | Windows 10/11 | Supported | `C:\`, `D:\`, `E:\` |
277
+ | macOS | Supported | `/Users/you/projects` |
278
+ | Linux | Supported | `/home/you/projects` |
279
+
280
+ ---
281
+
282
+ ## Troubleshooting
283
+
284
+ ### "FAISS index not found" error
285
+
286
+ You need to build the index first:
287
+
288
+ ```bash
289
+ envbot --reindex
290
+ ```
291
+
292
+ ### Slow first search
293
+
294
+ The first search takes a few seconds because the AI model needs to load into memory. Subsequent searches in the same session are instant.
295
+
296
+ ### Permission errors on Windows
297
+
298
+ If you see `PermissionError: [WinError 5]`, make sure the storage path is set to a user-writable directory (the default `~/.envbot_data` should work). Re-run `envbot --reindex` to reconfigure.
299
+
300
+ ---
301
+
302
+ ## License
303
+
304
+ MIT License. See [LICENSE](LICENSE) for details.
@@ -0,0 +1,11 @@
1
+ cli.py,sha256=ptA5eaymVwO3saANsOC8-mpZUvxxU9QiWedBio2fXgE,16239
2
+ indexer.py,sha256=OqcsJ0_TezEHpgiLpHFvrJFPYYl5qHfIV47BdTyeILk,7412
3
+ parser.py,sha256=vHYqJwJ9ke1qkhcAoinOq1oLfU-5vo58vhak8slTbMc,2229
4
+ scanner.py,sha256=PBv9ABXl-64krnGxQpNSTGE1Vg7N1w7PDAtpTMIyfR8,4365
5
+ search.py,sha256=_Q5lKAKYuvUBomfCCOIrQTZlccFhxioixpp7mDnGD7c,3567
6
+ envbot-1.0.0.dist-info/licenses/LICENSE,sha256=Ucf2A_iir0c2aTbnUNBXd1M1-_plnHrxy-mv2_Wflfs,1061
7
+ envbot-1.0.0.dist-info/METADATA,sha256=8GZa7EIDTqcOE0ZukXCMnUkK5bsv9tIv4bSygrJH_rA,10128
8
+ envbot-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ envbot-1.0.0.dist-info/entry_points.txt,sha256=Xe5cFNsZfSA28KLgxT_8uGHsoHQwMGSHg853BvRiblM,37
10
+ envbot-1.0.0.dist-info/top_level.txt,sha256=IP_R7JTzFR6DHIcNZRzUDtn7cvQxt291Cua0VXVxng4,34
11
+ envbot-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ envbot = cli:entry
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ravi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ cli
2
+ indexer
3
+ parser
4
+ scanner
5
+ search
indexer.py ADDED
@@ -0,0 +1,187 @@
1
+ """
2
+ indexer.py
3
+ Generates semantic search text for each variable, embeds with
4
+ all-MiniLM-L6-v2, builds a FAISS index, and persists everything to disk.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from pathlib import Path
10
+
11
+ import faiss
12
+ import numpy as np
13
+ import os
14
+
15
+ os.environ["TRANSFORMERS_NO_TF"] = "1"
16
+ os.environ["USE_TF"] = "0"
17
+
18
+ from sentence_transformers import SentenceTransformer
19
+
20
+ from scanner import DATA_DIR
21
+ from parser import parse_all
22
+
23
+ # ── Paths ─────────────────────────────────────────────────────────────────────
24
+
25
+ FAISS_INDEX_PATH = DATA_DIR / "index.faiss"
26
+ ENV_METADATA_PATH = DATA_DIR / "env_metadata.json"
27
+
28
+ MODEL_NAME = "all-MiniLM-L6-v2"
29
+
30
+ # ── Semantic expansion map ────────────────────────────────────────────────────
31
+ # Maps token fragments (lowercase) that appear in variable names to extra
32
+ # semantic context words injected into the search text.
33
+ _EXPANSION: dict[str, str] = {
34
+ "openai": "openai gpt llm chatgpt ai language model token",
35
+ "gpt": "gpt openai language model chatgpt llm",
36
+ "anthropic": "anthropic claude ai llm language model",
37
+ "gemini": "gemini google ai llm language model",
38
+ "mongo": "mongodb mongo database nosql connection string db",
39
+ "database": "database db sql connection url string",
40
+ "db": "database db sql connection url string",
41
+ "postgres": "postgresql postgres relational database sql connection",
42
+ "mysql": "mysql relational database sql connection string",
43
+ "redis": "redis cache in-memory key-value store connection",
44
+ "azure": "azure microsoft cloud storage blob connection string",
45
+ "aws": "aws amazon cloud s3 bucket credentials access key",
46
+ "gcp": "gcp google cloud platform credentials service account",
47
+ "s3": "s3 amazon aws bucket object storage",
48
+ "firebase": "firebase google realtime database auth",
49
+ "supabase": "supabase postgres database backend auth",
50
+ "stripe": "stripe payment gateway api key billing",
51
+ "paypal": "paypal payment gateway api key billing",
52
+ "twilio": "twilio sms messaging phone api key",
53
+ "sendgrid": "sendgrid email smtp delivery api key",
54
+ "mailgun": "mailgun email smtp delivery api key",
55
+ "smtp": "smtp email mail server host port",
56
+ "email": "email smtp mail server sender credentials",
57
+ "jwt": "jwt json web token secret auth authentication",
58
+ "secret": "secret key token authentication signing",
59
+ "auth": "auth authentication authorization token login",
60
+ "api": "api key token access credentials",
61
+ "key": "key secret token credentials api",
62
+ "token": "token auth key bearer access secret",
63
+ "password": "password credential login secret",
64
+ "private": "private key secret credential",
65
+ "public": "public key access endpoint",
66
+ "url": "url uri endpoint host connection string",
67
+ "uri": "uri url endpoint connection string database",
68
+ "host": "host hostname server address url",
69
+ "port": "port number server address network",
70
+ "storage": "storage bucket blob files cloud",
71
+ "cdn": "cdn content delivery network static assets",
72
+ "webhook": "webhook callback url endpoint event",
73
+ "slack": "slack workspace channel api token webhook",
74
+ "github": "github git repository token oauth",
75
+ "google": "google oauth credentials client id secret",
76
+ "facebook": "facebook oauth credentials app id secret",
77
+ "twitter": "twitter x oauth api key bearer token",
78
+ "debug": "debug logging development mode flag",
79
+ "env": "environment mode staging production development",
80
+ "next": "nextjs next.js react frontend",
81
+ "react": "react frontend javascript spa",
82
+ "node": "nodejs node javascript runtime",
83
+ "flask": "flask python web framework",
84
+ "django": "django python web framework",
85
+ "log": "logging log level debug info",
86
+ "region": "region zone cloud datacenter location",
87
+ "bucket": "bucket s3 storage cloud object files",
88
+ "endpoint": "endpoint url api host address",
89
+ "connection":"connection string url database",
90
+ "cert": "certificate ssl tls security",
91
+ "ssl": "ssl tls certificate security https",
92
+ "tls": "tls ssl certificate security https",
93
+ }
94
+
95
+
96
+ def _human_readable(var_name: str) -> str:
97
+ """
98
+ OPENAI_API_KEY → 'openai api key'
99
+ Convert UPPER_SNAKE_CASE to lowercase spaced words.
100
+ """
101
+ words = var_name.lower().replace("-", "_").split("_")
102
+ return " ".join(w for w in words if w)
103
+
104
+
105
+ def _expand(human: str) -> str:
106
+ """
107
+ Inject extra semantic tokens based on word fragments in the variable name.
108
+ Returns a deduplicated enriched string.
109
+ """
110
+ tokens = set(human.split())
111
+ extras: list[str] = [human]
112
+
113
+ for fragment, expansion in _EXPANSION.items():
114
+ if fragment in human:
115
+ extras.append(expansion)
116
+
117
+ combined = " ".join(extras)
118
+ # Deduplicate words while preserving order
119
+ seen: set[str] = set()
120
+ result: list[str] = []
121
+ for word in combined.split():
122
+ if word not in seen:
123
+ seen.add(word)
124
+ result.append(word)
125
+ return " ".join(result)
126
+
127
+
128
+ def build_search_text(var_name: str) -> str:
129
+ human = _human_readable(var_name)
130
+ return _expand(human)
131
+
132
+
133
+ # ── Core indexing ─────────────────────────────────────────────────────────────
134
+
135
+ def build_index(records: list[dict] | None = None) -> None:
136
+ """
137
+ Build (or rebuild) the FAISS index from parsed records.
138
+ Persists index.faiss and env_metadata.json to DATA_DIR.
139
+ """
140
+ if records is None:
141
+ records = parse_all()
142
+
143
+ if not records:
144
+ raise RuntimeError("No env variable records found. Run scan first.")
145
+
146
+ print(" Loading SentenceTransformer...")
147
+ model = SentenceTransformer(MODEL_NAME)
148
+ print(" Model loaded")
149
+
150
+ print(f" Generating embeddings for {len(records)} variables...")
151
+ texts = [build_search_text(r["variable_name"]) for r in records]
152
+ embeddings: np.ndarray = model.encode(
153
+ texts,
154
+ batch_size=128,
155
+ show_progress_bar=False,
156
+ convert_to_numpy=True,
157
+ normalize_embeddings=True, # cosine sim via inner product
158
+ )
159
+
160
+ dim = embeddings.shape[1]
161
+ index = faiss.IndexFlatIP(dim) # Inner Product = cosine on normalised vecs
162
+ index.add(embeddings.astype(np.float32))
163
+
164
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
165
+ faiss.write_index(index, str(FAISS_INDEX_PATH))
166
+
167
+ # Strip any 'value' field before persisting (security)
168
+ safe_records = [
169
+ {
170
+ "variable_name": r["variable_name"],
171
+ "source_file": r["source_file"],
172
+ "source_path": r["source_path"],
173
+ }
174
+ for r in records
175
+ ]
176
+ ENV_METADATA_PATH.write_text(
177
+ json.dumps(safe_records, indent=2), encoding="utf-8"
178
+ )
179
+
180
+ print(
181
+ f" Index built - {index.ntotal} vectors, dim={dim}.\n"
182
+ f" Saved to {FAISS_INDEX_PATH}"
183
+ )
184
+
185
+
186
+ if __name__ == "__main__":
187
+ build_index()
parser.py ADDED
@@ -0,0 +1,77 @@
1
+ """
2
+ parser.py
3
+ Reads all copied .env files from D:\\ENVBOT_DATA\\copied_envs,
4
+ extracts variable names (never values), and returns structured records.
5
+ """
6
+
7
+ import re
8
+ from pathlib import Path
9
+
10
+ from scanner import COPIED_DIR, METADATA_FILE
11
+ import json
12
+
13
+ # Matches SOME_VAR=... or export SOME_VAR=...
14
+ # Captures only the variable name.
15
+ _VAR_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=", re.MULTILINE)
16
+
17
+
18
+ def _load_metadata() -> dict[str, str]:
19
+ """Return mapping copied_filename → original_path."""
20
+ if not METADATA_FILE.exists():
21
+ return {}
22
+ raw = json.loads(METADATA_FILE.read_text(encoding="utf-8"))
23
+ return {Path(r["copied_path"]).name: r["original_path"] for r in raw}
24
+
25
+
26
+ def parse_all() -> list[dict]:
27
+ """
28
+ Parse every .txt file in COPIED_DIR.
29
+ Returns list of:
30
+ {
31
+ "variable_name": "OPENAI_API_KEY",
32
+ "source_file": "Projects_App1_env.txt",
33
+ "source_path": "D:\\Projects\\App1\\.env"
34
+ }
35
+ Values are NEVER stored.
36
+ """
37
+ path_map = _load_metadata()
38
+ records: list[dict] = []
39
+
40
+ if not COPIED_DIR.exists():
41
+ return records
42
+
43
+ for txt_file in sorted(COPIED_DIR.glob("*.txt")):
44
+ try:
45
+ content = txt_file.read_text(encoding="utf-8", errors="replace")
46
+ except OSError:
47
+ continue
48
+
49
+ original_path = path_map.get(txt_file.name, "unknown")
50
+
51
+ for match in _VAR_RE.finditer(content):
52
+ var_name = match.group(1)
53
+ records.append(
54
+ {
55
+ "variable_name": var_name,
56
+ "source_file": txt_file.name,
57
+ "source_path": original_path,
58
+ }
59
+ )
60
+
61
+ # Deduplicate: same variable from same source file → keep one entry
62
+ seen: set[tuple[str, str]] = set()
63
+ deduped: list[dict] = []
64
+ for r in records:
65
+ key = (r["variable_name"], r["source_file"])
66
+ if key not in seen:
67
+ seen.add(key)
68
+ deduped.append(r)
69
+
70
+ return deduped
71
+
72
+
73
+ if __name__ == "__main__":
74
+ entries = parse_all()
75
+ print(f"Parsed {len(entries)} unique variable entries.")
76
+ for e in entries[:10]:
77
+ print(f" {e['variable_name']} ← {e['source_path']}")
scanner.py ADDED
@@ -0,0 +1,140 @@
1
+ """
2
+ scanner.py
3
+ Recursively scans D:\\ for .env files, copies them to D:\\ENVBOT_DATA\\copied_envs,
4
+ and writes metadata.json.
5
+ """
6
+
7
+ import json
8
+ import shutil
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+
12
+ # ── Constants ────────────────────────────────────────────────────────────────
13
+
14
+ CONFIG_FILE = Path.home() / ".envbot_config.json"
15
+
16
+ def _load_config() -> dict:
17
+ if CONFIG_FILE.exists():
18
+ try:
19
+ return json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
20
+ except Exception:
21
+ pass
22
+ return {
23
+ "scan_drives": ["D:\\"],
24
+ "data_dir": str(Path.home() / ".envbot_data")
25
+ }
26
+
27
+ _config = _load_config()
28
+ SCAN_DRIVES = _config["scan_drives"]
29
+ DATA_DIR = Path(_config["data_dir"])
30
+ COPIED_DIR = DATA_DIR / "copied_envs"
31
+ METADATA_FILE = DATA_DIR / "metadata.json"
32
+
33
+ TARGET_NAMES = {
34
+ ".env",
35
+ ".env.local",
36
+ ".env.development",
37
+ ".env.production",
38
+ ".env.test",
39
+ }
40
+
41
+ IGNORED_DIRS = {
42
+ "node_modules",
43
+ ".git",
44
+ ".next",
45
+ "dist",
46
+ "build",
47
+ "venv",
48
+ ".venv",
49
+ "__pycache__",
50
+ }
51
+
52
+
53
+ # ── Helpers ───────────────────────────────────────────────────────────────────
54
+
55
+ def _safe_name(original: Path) -> str:
56
+ """
57
+ Convert D:\\Projects\\App1\\.env → Projects_App1_env.txt
58
+ Strips the drive letter, replaces path separators and dots with underscores,
59
+ collapses leading underscores, appends .txt.
60
+ """
61
+ parts = list(original.parts)
62
+ if original.drive:
63
+ parts = parts[1:]
64
+
65
+ # Join with underscore, strip leading dots from each part
66
+ sanitised = "_".join(p.lstrip(".").replace(".", "_") for p in parts if p)
67
+ sanitised = sanitised.replace("\\", "_").replace("/", "_").replace(":", "_")
68
+ sanitised = sanitised.strip("_") or "unknown"
69
+ return sanitised + ".txt"
70
+
71
+
72
+ def _walk_env_files(root: Path):
73
+ """Yield Path objects for every matching .env file under root."""
74
+ try:
75
+ entries = list(root.iterdir())
76
+ except (PermissionError, OSError):
77
+ return
78
+
79
+ for entry in entries:
80
+ try:
81
+ if entry.is_symlink():
82
+ continue
83
+ if entry.is_dir():
84
+ if entry.name in IGNORED_DIRS:
85
+ continue
86
+ yield from _walk_env_files(entry)
87
+ elif entry.is_file() and entry.name in TARGET_NAMES:
88
+ yield entry
89
+ except (PermissionError, OSError):
90
+ continue
91
+
92
+
93
+ # ── Public API ────────────────────────────────────────────────────────────────
94
+
95
+ def scan_and_copy() -> list[dict]:
96
+ """
97
+ Scan D:\\ for .env files, copy them into COPIED_DIR, write metadata.json.
98
+ Returns the metadata list.
99
+ """
100
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
101
+ COPIED_DIR.mkdir(parents=True, exist_ok=True)
102
+
103
+ metadata: list[dict] = []
104
+ seen_names: dict[str, int] = {}
105
+
106
+ for drive in SCAN_DRIVES:
107
+ scan_root = Path(drive)
108
+ for env_file in _walk_env_files(scan_root):
109
+ base_name = _safe_name(env_file)
110
+
111
+ # Deduplicate: if name already used, append counter
112
+ if base_name in seen_names:
113
+ seen_names[base_name] += 1
114
+ stem = base_name[: -len(".txt")]
115
+ base_name = f"{stem}_{seen_names[base_name]}.txt"
116
+ else:
117
+ seen_names[base_name] = 0
118
+
119
+ dest = COPIED_DIR / base_name
120
+
121
+ try:
122
+ shutil.copy2(env_file, dest)
123
+ except (PermissionError, OSError):
124
+ continue
125
+
126
+ metadata.append(
127
+ {
128
+ "original_path": str(env_file),
129
+ "copied_path": str(dest),
130
+ "discovered_time": datetime.now(timezone.utc).isoformat(),
131
+ }
132
+ )
133
+
134
+ METADATA_FILE.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
135
+ return metadata
136
+
137
+
138
+ if __name__ == "__main__":
139
+ records = scan_and_copy()
140
+ print(f"Discovered and copied {len(records)} .env file(s).")
search.py ADDED
@@ -0,0 +1,119 @@
1
+ """
2
+ search.py
3
+ Loads the persisted FAISS index and metadata, encodes a query,
4
+ and returns the top-K matching variable names with optional source info.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from functools import lru_cache
12
+
13
+ import faiss
14
+ import numpy as np
15
+ import os
16
+
17
+ os.environ["TRANSFORMERS_NO_TF"] = "1"
18
+ os.environ["USE_TF"] = "0"
19
+
20
+ from sentence_transformers import SentenceTransformer
21
+
22
+ from scanner import DATA_DIR
23
+ from indexer import FAISS_INDEX_PATH, ENV_METADATA_PATH, MODEL_NAME
24
+
25
+ DEFAULT_K = 3
26
+
27
+
28
+ # ── Lazy singletons ───────────────────────────────────────────────────────────
29
+
30
+ @lru_cache(maxsize=1)
31
+ def _get_model() -> SentenceTransformer:
32
+ return SentenceTransformer(MODEL_NAME)
33
+
34
+
35
+ @lru_cache(maxsize=1)
36
+ def _get_index() -> faiss.Index:
37
+ if not FAISS_INDEX_PATH.exists():
38
+ raise FileNotFoundError(
39
+ f"FAISS index not found at {FAISS_INDEX_PATH}. "
40
+ "Run `envbot --reindex` first."
41
+ )
42
+ return faiss.read_index(str(FAISS_INDEX_PATH))
43
+
44
+
45
+ @lru_cache(maxsize=1)
46
+ def _get_metadata() -> list[dict]:
47
+ if not ENV_METADATA_PATH.exists():
48
+ raise FileNotFoundError(
49
+ f"Metadata not found at {ENV_METADATA_PATH}. "
50
+ "Run `envbot --reindex` first."
51
+ )
52
+ return json.loads(ENV_METADATA_PATH.read_text(encoding="utf-8"))
53
+
54
+
55
+ # ── Public search API ─────────────────────────────────────────────────────────
56
+
57
+ class SearchResult:
58
+ __slots__ = ("variable_name", "source_file", "source_path")
59
+
60
+ def __init__(self, variable_name: str, source_file: str, source_path: str) -> None:
61
+ self.variable_name = variable_name
62
+ self.source_file = source_file
63
+ self.source_path = source_path
64
+
65
+
66
+ def search(query: str, k: int = DEFAULT_K) -> list[SearchResult]:
67
+ """
68
+ Embed *query*, run FAISS inner-product search, return up to *k* results.
69
+ Deduplicates on variable_name: if the same name appears in multiple
70
+ source files, each unique (name, path) pair is kept, but we avoid
71
+ returning the exact same variable+path combo more than once.
72
+ """
73
+ model = _get_model()
74
+ index = _get_index()
75
+ metadata = _get_metadata()
76
+
77
+ if index.ntotal == 0:
78
+ return []
79
+
80
+ # Encode and normalise query
81
+ vec: np.ndarray = model.encode(
82
+ [query],
83
+ convert_to_numpy=True,
84
+ normalize_embeddings=True,
85
+ ).astype(np.float32)
86
+
87
+ # Retrieve more candidates so dedup doesn't leave us short
88
+ fetch_k = min(k * 4, index.ntotal)
89
+ _, indices = index.search(vec, fetch_k)
90
+
91
+ results: list[SearchResult] = []
92
+ seen: set[tuple[str, str]] = set()
93
+
94
+ for idx in indices[0]:
95
+ if idx < 0 or idx >= len(metadata):
96
+ continue
97
+ rec = metadata[idx]
98
+ key = (rec["variable_name"], rec["source_path"])
99
+ if key in seen:
100
+ continue
101
+ seen.add(key)
102
+ results.append(
103
+ SearchResult(
104
+ variable_name=rec["variable_name"],
105
+ source_file=rec["source_file"],
106
+ source_path=rec["source_path"],
107
+ )
108
+ )
109
+ if len(results) >= k:
110
+ break
111
+
112
+ return results
113
+
114
+
115
+ if __name__ == "__main__":
116
+ import sys
117
+ q = " ".join(sys.argv[1:]) or "mongodb connection string"
118
+ for r in search(q):
119
+ print(r.variable_name, "←", r.source_path)