envbot 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +481 -0
- envbot-1.0.0.dist-info/METADATA +304 -0
- envbot-1.0.0.dist-info/RECORD +11 -0
- envbot-1.0.0.dist-info/WHEEL +5 -0
- envbot-1.0.0.dist-info/entry_points.txt +2 -0
- envbot-1.0.0.dist-info/licenses/LICENSE +21 -0
- envbot-1.0.0.dist-info/top_level.txt +5 -0
- indexer.py +187 -0
- parser.py +77 -0
- scanner.py +140 -0
- search.py +119 -0
cli.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli.py
|
|
3
|
+
Entry point for the envbot CLI.
|
|
4
|
+
|
|
5
|
+
Usage
|
|
6
|
+
-----
|
|
7
|
+
envbot "mongodb connection string"
|
|
8
|
+
envbot "openai key" --k 5
|
|
9
|
+
envbot "azure storage" --show-source
|
|
10
|
+
envbot --reindex
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
import typer
|
|
20
|
+
from rich.console import Console
|
|
21
|
+
from rich.panel import Panel
|
|
22
|
+
from rich.table import Table
|
|
23
|
+
from rich.text import Text
|
|
24
|
+
from rich.progress import (
|
|
25
|
+
Progress,
|
|
26
|
+
SpinnerColumn,
|
|
27
|
+
TextColumn,
|
|
28
|
+
BarColumn,
|
|
29
|
+
TaskProgressColumn,
|
|
30
|
+
TimeElapsedColumn,
|
|
31
|
+
)
|
|
32
|
+
from rich.rule import Rule
|
|
33
|
+
from rich import box
|
|
34
|
+
|
|
35
|
+
app = typer.Typer(
|
|
36
|
+
name="envbot",
|
|
37
|
+
help="Local AI-powered environment variable search assistant.",
|
|
38
|
+
add_completion=False,
|
|
39
|
+
invoke_without_command=True,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
console = Console(highlight=False)
|
|
43
|
+
err_console = Console(stderr=True, highlight=False)
|
|
44
|
+
|
|
45
|
+
# ── Branding ──────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
BANNER = r"""
|
|
48
|
+
_ _
|
|
49
|
+
___ _ ____ _____| |__ ___ | |_
|
|
50
|
+
/ _ | '_ \ \ / / __| '_ \ / _ \| __|
|
|
51
|
+
| __| | | \ V /| (__| |_) | (_) | |_
|
|
52
|
+
\___|_| |_|\_/ \___|_.__/ \___/ \__|
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
ACCENT = "bright_cyan"
|
|
56
|
+
SUCCESS = "bright_green"
|
|
57
|
+
WARN = "bright_yellow"
|
|
58
|
+
ERR = "bright_red"
|
|
59
|
+
DIM = "dim white"
|
|
60
|
+
HIGHLIGHT = "bold bright_magenta"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _print_banner() -> None:
|
|
64
|
+
"""Print the envbot ASCII art banner inside a styled panel."""
|
|
65
|
+
banner_text = Text(BANNER, style=f"bold {ACCENT}")
|
|
66
|
+
tagline = Text(
|
|
67
|
+
" AI-powered .env variable search assistant\n",
|
|
68
|
+
style=f"italic {DIM}",
|
|
69
|
+
)
|
|
70
|
+
content = Text.assemble(banner_text, tagline)
|
|
71
|
+
console.print(
|
|
72
|
+
Panel(
|
|
73
|
+
content,
|
|
74
|
+
border_style=ACCENT,
|
|
75
|
+
box=box.DOUBLE_EDGE,
|
|
76
|
+
padding=(0, 2),
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ── Reindex pipeline ─────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def _do_reindex() -> None:
|
|
84
|
+
"""Full reindex pipeline: scan -> parse -> embed -> save."""
|
|
85
|
+
from scanner import scan_and_copy
|
|
86
|
+
from parser import parse_all
|
|
87
|
+
from indexer import build_index
|
|
88
|
+
|
|
89
|
+
console.print()
|
|
90
|
+
console.print(Rule("[bold bright_cyan] REINDEX PIPELINE [/bold bright_cyan]", style=ACCENT))
|
|
91
|
+
console.print()
|
|
92
|
+
|
|
93
|
+
with Progress(
|
|
94
|
+
SpinnerColumn("dots", style=f"bold {WARN}"),
|
|
95
|
+
TextColumn("[progress.description]{task.description}"),
|
|
96
|
+
BarColumn(bar_width=30, style=DIM, complete_style=ACCENT, finished_style=SUCCESS),
|
|
97
|
+
TaskProgressColumn(),
|
|
98
|
+
TimeElapsedColumn(),
|
|
99
|
+
console=console,
|
|
100
|
+
transient=False,
|
|
101
|
+
) as progress:
|
|
102
|
+
|
|
103
|
+
# Step 1: Scan
|
|
104
|
+
task_scan = progress.add_task(
|
|
105
|
+
f"[{WARN}]Scanning drives for .env files...", total=100
|
|
106
|
+
)
|
|
107
|
+
progress.update(task_scan, advance=10)
|
|
108
|
+
meta = scan_and_copy()
|
|
109
|
+
progress.update(task_scan, completed=100,
|
|
110
|
+
description=f"[{SUCCESS}]Scanned -- {len(meta)} env file(s) found")
|
|
111
|
+
|
|
112
|
+
# Step 2: Parse
|
|
113
|
+
task_parse = progress.add_task(
|
|
114
|
+
f"[{WARN}]Parsing environment variables...", total=100
|
|
115
|
+
)
|
|
116
|
+
progress.update(task_parse, advance=10)
|
|
117
|
+
records = parse_all()
|
|
118
|
+
progress.update(task_parse, completed=100,
|
|
119
|
+
description=f"[{SUCCESS}]Parsed -- {len(records)} variables extracted")
|
|
120
|
+
|
|
121
|
+
if not records:
|
|
122
|
+
progress.stop()
|
|
123
|
+
err_console.print(
|
|
124
|
+
Panel(
|
|
125
|
+
"[bold]No variables found.[/bold]\n"
|
|
126
|
+
"Make sure your configured drives contain .env files.",
|
|
127
|
+
title="Error",
|
|
128
|
+
border_style=ERR,
|
|
129
|
+
box=box.ROUNDED,
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
raise typer.Exit(code=1)
|
|
133
|
+
|
|
134
|
+
# Step 3: Embed & index
|
|
135
|
+
task_embed = progress.add_task(
|
|
136
|
+
f"[{WARN}]Generating embeddings & building FAISS index...", total=100
|
|
137
|
+
)
|
|
138
|
+
progress.update(task_embed, advance=5)
|
|
139
|
+
build_index(records)
|
|
140
|
+
progress.update(task_embed, completed=100,
|
|
141
|
+
description=f"[{SUCCESS}]Indexed -- {len(records)} vectors saved")
|
|
142
|
+
|
|
143
|
+
console.print()
|
|
144
|
+
console.print(
|
|
145
|
+
Panel(
|
|
146
|
+
f"[bold {SUCCESS}]Reindexing complete![/bold {SUCCESS}]\n"
|
|
147
|
+
f"[{DIM}]Your environment variables are ready to search.",
|
|
148
|
+
border_style=SUCCESS,
|
|
149
|
+
box=box.ROUNDED,
|
|
150
|
+
padding=(0, 2),
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ── Configuration wizard ─────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
def run_config_wizard() -> None:
|
|
158
|
+
import json
|
|
159
|
+
import string
|
|
160
|
+
import os
|
|
161
|
+
from pathlib import Path
|
|
162
|
+
|
|
163
|
+
CONFIG_FILE = Path.home() / ".envbot_config.json"
|
|
164
|
+
|
|
165
|
+
_print_banner()
|
|
166
|
+
|
|
167
|
+
console.print(
|
|
168
|
+
Rule("[bold bright_cyan] CONFIGURATION [/bold bright_cyan]", style=ACCENT)
|
|
169
|
+
)
|
|
170
|
+
console.print()
|
|
171
|
+
|
|
172
|
+
# Load existing config if available
|
|
173
|
+
existing_config = None
|
|
174
|
+
if CONFIG_FILE.exists():
|
|
175
|
+
try:
|
|
176
|
+
existing_config = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
|
|
177
|
+
except Exception:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
if existing_config:
|
|
181
|
+
cfg_table = Table(
|
|
182
|
+
box=box.SIMPLE_HEAVY,
|
|
183
|
+
show_header=True,
|
|
184
|
+
header_style=f"bold {ACCENT}",
|
|
185
|
+
border_style=DIM,
|
|
186
|
+
padding=(0, 2),
|
|
187
|
+
)
|
|
188
|
+
cfg_table.add_column("Setting", style=f"bold {WARN}")
|
|
189
|
+
cfg_table.add_column("Value", style=f"bold white")
|
|
190
|
+
|
|
191
|
+
drives_str = ", ".join(existing_config.get("scan_drives", []))
|
|
192
|
+
stored_str = existing_config.get("data_dir", "")
|
|
193
|
+
cfg_table.add_row("Scan Drives", drives_str)
|
|
194
|
+
cfg_table.add_row("Storage Path", stored_str)
|
|
195
|
+
|
|
196
|
+
console.print(
|
|
197
|
+
Panel(cfg_table, title="[bold]Current Configuration[/bold]",
|
|
198
|
+
border_style=ACCENT, box=box.ROUNDED, padding=(1, 2))
|
|
199
|
+
)
|
|
200
|
+
console.print()
|
|
201
|
+
|
|
202
|
+
use_existing = typer.confirm("Use this configuration for reindexing?", default=True)
|
|
203
|
+
if use_existing:
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
console.print()
|
|
207
|
+
|
|
208
|
+
# Step 1: Detect available drives / directories
|
|
209
|
+
is_windows = sys.platform.startswith("win")
|
|
210
|
+
selected_drives = []
|
|
211
|
+
|
|
212
|
+
if is_windows:
|
|
213
|
+
drives = []
|
|
214
|
+
for letter in string.ascii_uppercase:
|
|
215
|
+
drive_path = f"{letter}:\\"
|
|
216
|
+
if os.path.exists(drive_path):
|
|
217
|
+
drives.append(drive_path)
|
|
218
|
+
|
|
219
|
+
drive_table = Table(box=box.SIMPLE, show_header=False, border_style=DIM)
|
|
220
|
+
drive_table.add_column("Drive", style=f"bold {ACCENT}")
|
|
221
|
+
for d in drives:
|
|
222
|
+
drive_table.add_row(d)
|
|
223
|
+
console.print(
|
|
224
|
+
Panel(drive_table, title=f"[bold {ACCENT}]Available Drives[/bold {ACCENT}]",
|
|
225
|
+
border_style=ACCENT, box=box.ROUNDED, padding=(0, 2))
|
|
226
|
+
)
|
|
227
|
+
console.print()
|
|
228
|
+
|
|
229
|
+
while not selected_drives:
|
|
230
|
+
drives_input = typer.prompt(
|
|
231
|
+
"Enter drive letter(s) to scan (comma-separated, e.g. C, D)",
|
|
232
|
+
default="D"
|
|
233
|
+
)
|
|
234
|
+
for part in drives_input.split(","):
|
|
235
|
+
letter = part.strip().upper().replace(":", "").replace("\\", "")
|
|
236
|
+
if not letter:
|
|
237
|
+
continue
|
|
238
|
+
drive_path = f"{letter}:\\"
|
|
239
|
+
if os.path.exists(drive_path):
|
|
240
|
+
if drive_path not in selected_drives:
|
|
241
|
+
selected_drives.append(drive_path)
|
|
242
|
+
else:
|
|
243
|
+
console.print(f"[{WARN}] Drive {letter}: does not exist or is not ready.[/{WARN}]")
|
|
244
|
+
|
|
245
|
+
if not selected_drives:
|
|
246
|
+
console.print(f"[{ERR}] Please select at least one valid drive.[/{ERR}]")
|
|
247
|
+
else:
|
|
248
|
+
default_dir = str(Path.home())
|
|
249
|
+
console.print(
|
|
250
|
+
Panel(
|
|
251
|
+
f"[{DIM}]Platform: [bold]{sys.platform}[/bold]\n"
|
|
252
|
+
f"Enter one or more directory paths to scan recursively.",
|
|
253
|
+
title=f"[bold {ACCENT}]Directory Selection[/bold {ACCENT}]",
|
|
254
|
+
border_style=ACCENT,
|
|
255
|
+
box=box.ROUNDED,
|
|
256
|
+
padding=(0, 2),
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
console.print()
|
|
260
|
+
|
|
261
|
+
while not selected_drives:
|
|
262
|
+
paths_input = typer.prompt(
|
|
263
|
+
"Enter directory path(s) to scan (comma-separated)",
|
|
264
|
+
default=default_dir
|
|
265
|
+
)
|
|
266
|
+
for part in paths_input.split(","):
|
|
267
|
+
path_str = part.strip()
|
|
268
|
+
if not path_str:
|
|
269
|
+
continue
|
|
270
|
+
path_obj = Path(path_str).expanduser()
|
|
271
|
+
if path_obj.exists() and path_obj.is_dir():
|
|
272
|
+
res_path = str(path_obj.resolve())
|
|
273
|
+
if res_path not in selected_drives:
|
|
274
|
+
selected_drives.append(res_path)
|
|
275
|
+
else:
|
|
276
|
+
console.print(f"[{WARN}] Directory '{path_str}' does not exist or is not a folder.[/{WARN}]")
|
|
277
|
+
|
|
278
|
+
if not selected_drives:
|
|
279
|
+
console.print(f"[{ERR}] Please select at least one valid directory to scan.[/{ERR}]")
|
|
280
|
+
|
|
281
|
+
console.print()
|
|
282
|
+
|
|
283
|
+
# Step 2: Storage path
|
|
284
|
+
default_storage = str(Path.home() / ".envbot_data")
|
|
285
|
+
valid_storage = False
|
|
286
|
+
selected_storage = ""
|
|
287
|
+
|
|
288
|
+
while not valid_storage:
|
|
289
|
+
storage_input = typer.prompt(
|
|
290
|
+
"Enter path to store envbot data",
|
|
291
|
+
default=default_storage
|
|
292
|
+
)
|
|
293
|
+
storage_path = Path(storage_input.strip())
|
|
294
|
+
try:
|
|
295
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
296
|
+
test_file = storage_path / ".write_test"
|
|
297
|
+
test_file.touch()
|
|
298
|
+
test_file.unlink()
|
|
299
|
+
selected_storage = str(storage_path.resolve())
|
|
300
|
+
valid_storage = True
|
|
301
|
+
except (PermissionError, OSError) as e:
|
|
302
|
+
console.print(f"[{ERR}] Cannot write to '{storage_path}': {e}[/{ERR}]")
|
|
303
|
+
console.print(" Please enter a different path.")
|
|
304
|
+
|
|
305
|
+
# Save config
|
|
306
|
+
new_config = {
|
|
307
|
+
"scan_drives": selected_drives,
|
|
308
|
+
"data_dir": selected_storage
|
|
309
|
+
}
|
|
310
|
+
CONFIG_FILE.write_text(json.dumps(new_config, indent=2), encoding="utf-8")
|
|
311
|
+
|
|
312
|
+
console.print()
|
|
313
|
+
# Summary table
|
|
314
|
+
summary = Table(box=box.SIMPLE_HEAVY, show_header=True,
|
|
315
|
+
header_style=f"bold {ACCENT}", border_style=DIM, padding=(0, 2))
|
|
316
|
+
summary.add_column("Setting", style=f"bold {WARN}")
|
|
317
|
+
summary.add_column("Value", style=f"bold white")
|
|
318
|
+
summary.add_row("Scan Drives", ", ".join(selected_drives))
|
|
319
|
+
summary.add_row("Storage Path", selected_storage)
|
|
320
|
+
|
|
321
|
+
console.print(
|
|
322
|
+
Panel(summary, title=f"[bold {SUCCESS}]Configuration Saved[/bold {SUCCESS}]",
|
|
323
|
+
border_style=SUCCESS, box=box.DOUBLE_EDGE, padding=(1, 2))
|
|
324
|
+
)
|
|
325
|
+
console.print()
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# ── Search result rendering ───────────────────────────────────────────────────
|
|
329
|
+
|
|
330
|
+
def _render_results(
|
|
331
|
+
results: list,
|
|
332
|
+
show_source: bool,
|
|
333
|
+
show_value: bool,
|
|
334
|
+
) -> None:
|
|
335
|
+
"""Render search results in a beautiful Rich table."""
|
|
336
|
+
import re
|
|
337
|
+
from scanner import COPIED_DIR
|
|
338
|
+
|
|
339
|
+
def _get_value(source_file: str, var_name: str) -> str:
|
|
340
|
+
copied_path = COPIED_DIR / source_file
|
|
341
|
+
if not copied_path.exists():
|
|
342
|
+
return "<file not found>"
|
|
343
|
+
try:
|
|
344
|
+
content = copied_path.read_text(encoding="utf-8", errors="replace")
|
|
345
|
+
except Exception:
|
|
346
|
+
return "<error reading file>"
|
|
347
|
+
|
|
348
|
+
pattern = re.compile(
|
|
349
|
+
r"^\s*(?:export\s+)?" + re.escape(var_name) + r"\s*=\s*(.*)$",
|
|
350
|
+
re.MULTILINE
|
|
351
|
+
)
|
|
352
|
+
match = pattern.search(content)
|
|
353
|
+
if not match:
|
|
354
|
+
return "<not found>"
|
|
355
|
+
|
|
356
|
+
val = match.group(1).strip()
|
|
357
|
+
if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
|
|
358
|
+
val = val[1:-1]
|
|
359
|
+
return val
|
|
360
|
+
|
|
361
|
+
# Build the results table
|
|
362
|
+
table = Table(
|
|
363
|
+
box=box.ROUNDED,
|
|
364
|
+
show_header=True,
|
|
365
|
+
header_style=f"bold {ACCENT}",
|
|
366
|
+
border_style=ACCENT,
|
|
367
|
+
padding=(0, 1),
|
|
368
|
+
title=f"[bold {ACCENT}]Search Results[/bold {ACCENT}]",
|
|
369
|
+
title_style=f"bold {ACCENT}",
|
|
370
|
+
caption=f"[{DIM}]{len(results)} result(s) found",
|
|
371
|
+
caption_style=DIM,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
table.add_column("#", style=f"bold {DIM}", justify="right", width=3)
|
|
375
|
+
table.add_column("Variable", style=f"bold {SUCCESS}", min_width=15)
|
|
376
|
+
if show_value:
|
|
377
|
+
table.add_column("Value", style=f"bold {WARN}", min_width=15)
|
|
378
|
+
if show_source:
|
|
379
|
+
table.add_column("Source", style=f"{DIM}", min_width=20)
|
|
380
|
+
|
|
381
|
+
for i, r in enumerate(results, 1):
|
|
382
|
+
row = [str(i), r.variable_name]
|
|
383
|
+
if show_value:
|
|
384
|
+
row.append(_get_value(r.source_file, r.variable_name))
|
|
385
|
+
if show_source:
|
|
386
|
+
row.append(r.source_path)
|
|
387
|
+
table.add_row(*row)
|
|
388
|
+
|
|
389
|
+
console.print()
|
|
390
|
+
console.print(table)
|
|
391
|
+
console.print()
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
# ── Main callback ─────────────────────────────────────────────────────────────
|
|
395
|
+
|
|
396
|
+
@app.callback()
|
|
397
|
+
def main(
|
|
398
|
+
ctx: typer.Context,
|
|
399
|
+
query: Optional[str] = typer.Argument(
|
|
400
|
+
None, help="Natural language search query."
|
|
401
|
+
),
|
|
402
|
+
k: int = typer.Option(
|
|
403
|
+
3, "--k", "-k", help="Number of results to return.", min=1, max=100
|
|
404
|
+
),
|
|
405
|
+
show_source: bool = typer.Option(
|
|
406
|
+
False, "--show-source", help="Print the source .env file path."
|
|
407
|
+
),
|
|
408
|
+
show_value: bool = typer.Option(
|
|
409
|
+
False, "--show-value", "-v", help="Print the variable value (read on-the-fly)."
|
|
410
|
+
),
|
|
411
|
+
reindex: bool = typer.Option(
|
|
412
|
+
False, "--reindex", help="Rescan configured drives and rebuild the search index."
|
|
413
|
+
),
|
|
414
|
+
) -> None:
|
|
415
|
+
# ── Reindex ────────────────────────────────────────────────────────────
|
|
416
|
+
if reindex:
|
|
417
|
+
run_config_wizard()
|
|
418
|
+
_do_reindex()
|
|
419
|
+
raise typer.Exit()
|
|
420
|
+
|
|
421
|
+
# ── No query supplied -> print help ────────────────────────────────────
|
|
422
|
+
if not query:
|
|
423
|
+
_print_banner()
|
|
424
|
+
console.print(ctx.get_help())
|
|
425
|
+
raise typer.Exit()
|
|
426
|
+
|
|
427
|
+
# ── Search ─────────────────────────────────────────────────────────────
|
|
428
|
+
try:
|
|
429
|
+
from search import search
|
|
430
|
+
|
|
431
|
+
with Progress(
|
|
432
|
+
SpinnerColumn("dots", style=f"bold {ACCENT}"),
|
|
433
|
+
TextColumn(f"[{ACCENT}]Searching for '[bold]{query}[/bold]'..."),
|
|
434
|
+
console=console,
|
|
435
|
+
transient=True,
|
|
436
|
+
) as progress:
|
|
437
|
+
progress.add_task("searching", total=None)
|
|
438
|
+
results = search(query.strip(), k=k)
|
|
439
|
+
|
|
440
|
+
except FileNotFoundError as exc:
|
|
441
|
+
err_console.print(
|
|
442
|
+
Panel(
|
|
443
|
+
f"[bold]{exc}[/bold]",
|
|
444
|
+
title="Index Not Found",
|
|
445
|
+
border_style=ERR,
|
|
446
|
+
box=box.ROUNDED,
|
|
447
|
+
)
|
|
448
|
+
)
|
|
449
|
+
raise typer.Exit(code=1)
|
|
450
|
+
except Exception as exc:
|
|
451
|
+
err_console.print(
|
|
452
|
+
Panel(
|
|
453
|
+
f"[bold]{exc}[/bold]",
|
|
454
|
+
title="Search Error",
|
|
455
|
+
border_style=ERR,
|
|
456
|
+
box=box.ROUNDED,
|
|
457
|
+
)
|
|
458
|
+
)
|
|
459
|
+
raise typer.Exit(code=1)
|
|
460
|
+
|
|
461
|
+
if not results:
|
|
462
|
+
console.print(
|
|
463
|
+
Panel(
|
|
464
|
+
f"No matches found for '[bold]{query}[/bold]'.\n"
|
|
465
|
+
f"[{DIM}]Try a different search term or run --reindex.",
|
|
466
|
+
border_style=WARN,
|
|
467
|
+
box=box.ROUNDED,
|
|
468
|
+
)
|
|
469
|
+
)
|
|
470
|
+
raise typer.Exit()
|
|
471
|
+
|
|
472
|
+
_render_results(results, show_source, show_value)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def entry() -> None:
|
|
476
|
+
"""Setuptools entry point wrapper."""
|
|
477
|
+
app()
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
if __name__ == "__main__":
|
|
481
|
+
entry()
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: envbot
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Local AI-powered environment variable search assistant
|
|
5
|
+
Home-page: https://github.com/ravindraogg/envbot
|
|
6
|
+
Author: Ravi
|
|
7
|
+
Keywords: env environment variables search ai faiss semantic dotenv cli
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Utilities
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: sentence-transformers>=2.7.0
|
|
21
|
+
Requires-Dist: faiss-cpu>=1.8.0
|
|
22
|
+
Requires-Dist: typer>=0.12.0
|
|
23
|
+
Requires-Dist: rich>=13.7.0
|
|
24
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
25
|
+
Requires-Dist: numpy>=2.1.0
|
|
26
|
+
Dynamic: author
|
|
27
|
+
Dynamic: classifier
|
|
28
|
+
Dynamic: description
|
|
29
|
+
Dynamic: description-content-type
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: keywords
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
Dynamic: requires-dist
|
|
34
|
+
Dynamic: requires-python
|
|
35
|
+
Dynamic: summary
|
|
36
|
+
|
|
37
|
+
# envbot
|
|
38
|
+
|
|
39
|
+
**Local AI-powered environment variable search assistant.**
|
|
40
|
+
|
|
41
|
+
Stop digging through dozens of `.env` files scattered across your projects. `envbot` indexes all your environment variables and lets you find them instantly using natural language search -- powered by sentence embeddings and FAISS.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
envbot "mongodb connection string"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Search Results
|
|
49
|
+
+--------------------------------------------------------------------------+
|
|
50
|
+
| # | Variable | Value | Source |
|
|
51
|
+
|-----+-----------------+------------------------------+-------------------|
|
|
52
|
+
| 1 | MONGO_URI | mongodb+srv://user:pass@c... | D:\App\.env |
|
|
53
|
+
| 2 | DATABASE_URL | mongodb://localhost:27017/... | D:\Api\.env |
|
|
54
|
+
| 3 | DB_CONNECTION | mongodb://admin@cluster0... | D:\Svc\.env |
|
|
55
|
+
+--------------------------------------------------------------------------+
|
|
56
|
+
3 result(s) found
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- **AI-Powered Semantic Search** -- Understands what you mean, not just exact matches. Search for `"stripe payment key"` and it finds `STRIPE_SECRET_KEY`.
|
|
64
|
+
- **Multi-Drive / Multi-Directory Scanning** -- Scan one or more drives (Windows) or directories (macOS/Linux) in a single index.
|
|
65
|
+
- **Cross-Platform** -- Works on Windows, macOS, and Linux.
|
|
66
|
+
- **Interactive Configuration Wizard** -- No config files to edit manually. The CLI guides you through setup.
|
|
67
|
+
- **Zero-Leak Security** -- Variable values are **never stored** in the index. They are only read on-the-fly when you explicitly request them with `-v`.
|
|
68
|
+
- **Beautiful CLI** -- Rich terminal UI with progress bars, spinners, tables, and color-coded output.
|
|
69
|
+
- **Fast** -- FAISS vector search returns results in milliseconds after the initial model load.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
### Prerequisites
|
|
76
|
+
|
|
77
|
+
- **Python 3.11+** is required.
|
|
78
|
+
- **pip** (comes with Python).
|
|
79
|
+
|
|
80
|
+
### Option 1: Install from PyPI
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install envbot
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Option 2: Install from GitHub
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install git+https://github.com/your-username/envbot.git
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Option 3: Install from Source
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
git clone https://github.com/your-username/envbot.git
|
|
96
|
+
cd envbot
|
|
97
|
+
pip install .
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
> **Note:** On first run, `envbot` will automatically download the `all-MiniLM-L6-v2` sentence transformer model (~80 MB). This happens only once.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Quick Start
|
|
105
|
+
|
|
106
|
+
### Step 1: Configure & Index
|
|
107
|
+
|
|
108
|
+
Run the reindex command. The interactive wizard will guide you:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
envbot --reindex
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
You will be prompted to:
|
|
115
|
+
|
|
116
|
+
1. **Select drives/directories to scan** -- Choose which drives (Windows: `C, D, E`) or directories (Linux/macOS: `/home/user/projects`) to scan for `.env` files.
|
|
117
|
+
2. **Set storage path** -- Choose where to store the copied `.env` files and the FAISS index. Defaults to `~/.envbot_data`.
|
|
118
|
+
|
|
119
|
+
The wizard saves your preferences to `~/.envbot_config.json`. On subsequent runs, it will ask if you want to reuse the saved configuration.
|
|
120
|
+
|
|
121
|
+
### Step 2: Search
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
envbot "database"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
That's it! You'll see a table of matching environment variable names.
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Usage
|
|
132
|
+
|
|
133
|
+
All options must be placed **before** the search query.
|
|
134
|
+
|
|
135
|
+
### Basic Search
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
envbot "openai api key"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Show Variable Values (`-v`)
|
|
142
|
+
|
|
143
|
+
Read and display the actual value from the `.env` file on-the-fly:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
envbot -v "stripe"
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Show Source File Path (`--show-source`)
|
|
150
|
+
|
|
151
|
+
See which `.env` file each variable came from:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
envbot --show-source "database"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Change Number of Results (`-k`)
|
|
158
|
+
|
|
159
|
+
By default, 3 results are returned. Get more:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
envbot -k 10 "api key"
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Combine All Flags
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
envbot -v --show-source -k 5 "mongodb"
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Rebuild the Index
|
|
172
|
+
|
|
173
|
+
Re-scan all configured drives and rebuild the search database:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
envbot --reindex
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### View Help
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
envbot --help
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## How It Works
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
+------------------+ +------------------+ +------------------+
|
|
191
|
+
| 1. SCAN | --> | 2. PARSE | --> | 3. EMBED |
|
|
192
|
+
| Recursively | | Extract variable | | Generate vector |
|
|
193
|
+
| find .env files | | names (no values)| | embeddings via |
|
|
194
|
+
| across drives | | from all files | | SentenceTransf. |
|
|
195
|
+
+------------------+ +------------------+ +------------------+
|
|
196
|
+
|
|
|
197
|
+
v
|
|
198
|
+
+------------------+ +------------------+ +------------------+
|
|
199
|
+
| 6. DISPLAY | <-- | 5. RANK | <-- | 4. INDEX |
|
|
200
|
+
| Rich table with | | Cosine similar. | | Store vectors |
|
|
201
|
+
| colors & values | | via FAISS | | in FAISS index |
|
|
202
|
+
+------------------+ +------------------+ +------------------+
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Architecture
|
|
206
|
+
|
|
207
|
+
| File | Purpose |
|
|
208
|
+
|---------------|---------------------------------------------------------|
|
|
209
|
+
| `cli.py` | Entry point, argument parsing, Rich UI rendering |
|
|
210
|
+
| `scanner.py` | Recursively walks drives/directories for `.env` files |
|
|
211
|
+
| `parser.py` | Extracts variable names from copied `.env` files |
|
|
212
|
+
| `indexer.py` | Generates embeddings and builds the FAISS vector index |
|
|
213
|
+
| `search.py` | Encodes queries and performs FAISS similarity search |
|
|
214
|
+
|
|
215
|
+
### Security Model
|
|
216
|
+
|
|
217
|
+
- **Values are never stored** in the FAISS index or metadata files.
|
|
218
|
+
- Only variable **names** and **source file paths** are persisted.
|
|
219
|
+
- When you use `-v`, values are read on-the-fly from the local copy at search time.
|
|
220
|
+
- Copied `.env` files are stored in your configured data directory (default: `~/.envbot_data/copied_envs/`).
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Configuration
|
|
225
|
+
|
|
226
|
+
Your configuration is stored at `~/.envbot_config.json`:
|
|
227
|
+
|
|
228
|
+
```json
|
|
229
|
+
{
|
|
230
|
+
"scan_drives": ["D:\\", "E:\\"],
|
|
231
|
+
"data_dir": "C:\\Users\\you\\.envbot_data"
|
|
232
|
+
}
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
| Key | Description |
|
|
236
|
+
|----------------|--------------------------------------------------|
|
|
237
|
+
| `scan_drives` | List of drives (Windows) or directories to scan |
|
|
238
|
+
| `data_dir` | Where the index, metadata, and copies are stored |
|
|
239
|
+
|
|
240
|
+
You can edit this file manually or re-run `envbot --reindex` to use the wizard.
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Command Reference
|
|
245
|
+
|
|
246
|
+
| Command | Description |
|
|
247
|
+
|------------------------------------------------|--------------------------------------|
|
|
248
|
+
| `envbot "query"` | Search for matching variables |
|
|
249
|
+
| `envbot -v "query"` | Search and show values |
|
|
250
|
+
| `envbot --show-source "query"` | Search and show source file paths |
|
|
251
|
+
| `envbot -k 10 "query"` | Return up to 10 results |
|
|
252
|
+
| `envbot -v --show-source -k 5 "query"` | All flags combined |
|
|
253
|
+
| `envbot --reindex` | Configure drives and rebuild index |
|
|
254
|
+
| `envbot --help` | Show help message |
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Requirements
|
|
259
|
+
|
|
260
|
+
| Package | Version | Purpose |
|
|
261
|
+
|-----------------------|------------|--------------------------------|
|
|
262
|
+
| `sentence-transformers` | >= 2.7.0 | Semantic text embeddings |
|
|
263
|
+
| `faiss-cpu` | >= 1.8.0 | Vector similarity search |
|
|
264
|
+
| `typer` | >= 0.12.0 | CLI framework |
|
|
265
|
+
| `rich` | >= 13.7.0 | Terminal UI (tables, spinners) |
|
|
266
|
+
| `python-dotenv` | >= 1.0.0 | .env file parsing |
|
|
267
|
+
| `numpy` | >= 2.1.0 | Numerical operations |
|
|
268
|
+
| `torch` | >= 2.0.0 | ML backend for transformers |
|
|
269
|
+
|
|
270
|
+
---
|
|
271
|
+
|
|
272
|
+
## Platform Support
|
|
273
|
+
|
|
274
|
+
| Platform | Status | Drive/Path Format |
|
|
275
|
+
|----------------|-------------|---------------------------|
|
|
276
|
+
| Windows 10/11 | Supported | `C:\`, `D:\`, `E:\` |
|
|
277
|
+
| macOS | Supported | `/Users/you/projects` |
|
|
278
|
+
| Linux | Supported | `/home/you/projects` |
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## Troubleshooting
|
|
283
|
+
|
|
284
|
+
### "FAISS index not found" error
|
|
285
|
+
|
|
286
|
+
You need to build the index first:
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
envbot --reindex
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Slow first search
|
|
293
|
+
|
|
294
|
+
The first search takes a few seconds because the AI model needs to load into memory. Subsequent searches in the same session are instant.
|
|
295
|
+
|
|
296
|
+
### Permission errors on Windows
|
|
297
|
+
|
|
298
|
+
If you see `PermissionError: [WinError 5]`, make sure the storage path is set to a user-writable directory (the default `~/.envbot_data` should work). Re-run `envbot --reindex` to reconfigure.
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## License
|
|
303
|
+
|
|
304
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
cli.py,sha256=ptA5eaymVwO3saANsOC8-mpZUvxxU9QiWedBio2fXgE,16239
|
|
2
|
+
indexer.py,sha256=OqcsJ0_TezEHpgiLpHFvrJFPYYl5qHfIV47BdTyeILk,7412
|
|
3
|
+
parser.py,sha256=vHYqJwJ9ke1qkhcAoinOq1oLfU-5vo58vhak8slTbMc,2229
|
|
4
|
+
scanner.py,sha256=PBv9ABXl-64krnGxQpNSTGE1Vg7N1w7PDAtpTMIyfR8,4365
|
|
5
|
+
search.py,sha256=_Q5lKAKYuvUBomfCCOIrQTZlccFhxioixpp7mDnGD7c,3567
|
|
6
|
+
envbot-1.0.0.dist-info/licenses/LICENSE,sha256=Ucf2A_iir0c2aTbnUNBXd1M1-_plnHrxy-mv2_Wflfs,1061
|
|
7
|
+
envbot-1.0.0.dist-info/METADATA,sha256=8GZa7EIDTqcOE0ZukXCMnUkK5bsv9tIv4bSygrJH_rA,10128
|
|
8
|
+
envbot-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
envbot-1.0.0.dist-info/entry_points.txt,sha256=Xe5cFNsZfSA28KLgxT_8uGHsoHQwMGSHg853BvRiblM,37
|
|
10
|
+
envbot-1.0.0.dist-info/top_level.txt,sha256=IP_R7JTzFR6DHIcNZRzUDtn7cvQxt291Cua0VXVxng4,34
|
|
11
|
+
envbot-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ravi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
indexer.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
indexer.py
|
|
3
|
+
Generates semantic search text for each variable, embeds with
|
|
4
|
+
all-MiniLM-L6-v2, builds a FAISS index, and persists everything to disk.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import faiss
|
|
12
|
+
import numpy as np
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
os.environ["TRANSFORMERS_NO_TF"] = "1"
|
|
16
|
+
os.environ["USE_TF"] = "0"
|
|
17
|
+
|
|
18
|
+
from sentence_transformers import SentenceTransformer
|
|
19
|
+
|
|
20
|
+
from scanner import DATA_DIR
|
|
21
|
+
from parser import parse_all
|
|
22
|
+
|
|
23
|
+
# ── Paths ─────────────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
FAISS_INDEX_PATH = DATA_DIR / "index.faiss"
|
|
26
|
+
ENV_METADATA_PATH = DATA_DIR / "env_metadata.json"
|
|
27
|
+
|
|
28
|
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
|
29
|
+
|
|
30
|
+
# ── Semantic expansion map ────────────────────────────────────────────────────
|
|
31
|
+
# Maps token fragments (lowercase) that appear in variable names to extra
|
|
32
|
+
# semantic context words injected into the search text.
|
|
33
|
+
_EXPANSION: dict[str, str] = {
|
|
34
|
+
"openai": "openai gpt llm chatgpt ai language model token",
|
|
35
|
+
"gpt": "gpt openai language model chatgpt llm",
|
|
36
|
+
"anthropic": "anthropic claude ai llm language model",
|
|
37
|
+
"gemini": "gemini google ai llm language model",
|
|
38
|
+
"mongo": "mongodb mongo database nosql connection string db",
|
|
39
|
+
"database": "database db sql connection url string",
|
|
40
|
+
"db": "database db sql connection url string",
|
|
41
|
+
"postgres": "postgresql postgres relational database sql connection",
|
|
42
|
+
"mysql": "mysql relational database sql connection string",
|
|
43
|
+
"redis": "redis cache in-memory key-value store connection",
|
|
44
|
+
"azure": "azure microsoft cloud storage blob connection string",
|
|
45
|
+
"aws": "aws amazon cloud s3 bucket credentials access key",
|
|
46
|
+
"gcp": "gcp google cloud platform credentials service account",
|
|
47
|
+
"s3": "s3 amazon aws bucket object storage",
|
|
48
|
+
"firebase": "firebase google realtime database auth",
|
|
49
|
+
"supabase": "supabase postgres database backend auth",
|
|
50
|
+
"stripe": "stripe payment gateway api key billing",
|
|
51
|
+
"paypal": "paypal payment gateway api key billing",
|
|
52
|
+
"twilio": "twilio sms messaging phone api key",
|
|
53
|
+
"sendgrid": "sendgrid email smtp delivery api key",
|
|
54
|
+
"mailgun": "mailgun email smtp delivery api key",
|
|
55
|
+
"smtp": "smtp email mail server host port",
|
|
56
|
+
"email": "email smtp mail server sender credentials",
|
|
57
|
+
"jwt": "jwt json web token secret auth authentication",
|
|
58
|
+
"secret": "secret key token authentication signing",
|
|
59
|
+
"auth": "auth authentication authorization token login",
|
|
60
|
+
"api": "api key token access credentials",
|
|
61
|
+
"key": "key secret token credentials api",
|
|
62
|
+
"token": "token auth key bearer access secret",
|
|
63
|
+
"password": "password credential login secret",
|
|
64
|
+
"private": "private key secret credential",
|
|
65
|
+
"public": "public key access endpoint",
|
|
66
|
+
"url": "url uri endpoint host connection string",
|
|
67
|
+
"uri": "uri url endpoint connection string database",
|
|
68
|
+
"host": "host hostname server address url",
|
|
69
|
+
"port": "port number server address network",
|
|
70
|
+
"storage": "storage bucket blob files cloud",
|
|
71
|
+
"cdn": "cdn content delivery network static assets",
|
|
72
|
+
"webhook": "webhook callback url endpoint event",
|
|
73
|
+
"slack": "slack workspace channel api token webhook",
|
|
74
|
+
"github": "github git repository token oauth",
|
|
75
|
+
"google": "google oauth credentials client id secret",
|
|
76
|
+
"facebook": "facebook oauth credentials app id secret",
|
|
77
|
+
"twitter": "twitter x oauth api key bearer token",
|
|
78
|
+
"debug": "debug logging development mode flag",
|
|
79
|
+
"env": "environment mode staging production development",
|
|
80
|
+
"next": "nextjs next.js react frontend",
|
|
81
|
+
"react": "react frontend javascript spa",
|
|
82
|
+
"node": "nodejs node javascript runtime",
|
|
83
|
+
"flask": "flask python web framework",
|
|
84
|
+
"django": "django python web framework",
|
|
85
|
+
"log": "logging log level debug info",
|
|
86
|
+
"region": "region zone cloud datacenter location",
|
|
87
|
+
"bucket": "bucket s3 storage cloud object files",
|
|
88
|
+
"endpoint": "endpoint url api host address",
|
|
89
|
+
"connection":"connection string url database",
|
|
90
|
+
"cert": "certificate ssl tls security",
|
|
91
|
+
"ssl": "ssl tls certificate security https",
|
|
92
|
+
"tls": "tls ssl certificate security https",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _human_readable(var_name: str) -> str:
|
|
97
|
+
"""
|
|
98
|
+
OPENAI_API_KEY → 'openai api key'
|
|
99
|
+
Convert UPPER_SNAKE_CASE to lowercase spaced words.
|
|
100
|
+
"""
|
|
101
|
+
words = var_name.lower().replace("-", "_").split("_")
|
|
102
|
+
return " ".join(w for w in words if w)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _expand(human: str) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Inject extra semantic tokens based on word fragments in the variable name.
|
|
108
|
+
Returns a deduplicated enriched string.
|
|
109
|
+
"""
|
|
110
|
+
tokens = set(human.split())
|
|
111
|
+
extras: list[str] = [human]
|
|
112
|
+
|
|
113
|
+
for fragment, expansion in _EXPANSION.items():
|
|
114
|
+
if fragment in human:
|
|
115
|
+
extras.append(expansion)
|
|
116
|
+
|
|
117
|
+
combined = " ".join(extras)
|
|
118
|
+
# Deduplicate words while preserving order
|
|
119
|
+
seen: set[str] = set()
|
|
120
|
+
result: list[str] = []
|
|
121
|
+
for word in combined.split():
|
|
122
|
+
if word not in seen:
|
|
123
|
+
seen.add(word)
|
|
124
|
+
result.append(word)
|
|
125
|
+
return " ".join(result)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def build_search_text(var_name: str) -> str:
|
|
129
|
+
human = _human_readable(var_name)
|
|
130
|
+
return _expand(human)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ── Core indexing ─────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def build_index(records: list[dict] | None = None) -> None:
|
|
136
|
+
"""
|
|
137
|
+
Build (or rebuild) the FAISS index from parsed records.
|
|
138
|
+
Persists index.faiss and env_metadata.json to DATA_DIR.
|
|
139
|
+
"""
|
|
140
|
+
if records is None:
|
|
141
|
+
records = parse_all()
|
|
142
|
+
|
|
143
|
+
if not records:
|
|
144
|
+
raise RuntimeError("No env variable records found. Run scan first.")
|
|
145
|
+
|
|
146
|
+
print(" Loading SentenceTransformer...")
|
|
147
|
+
model = SentenceTransformer(MODEL_NAME)
|
|
148
|
+
print(" Model loaded")
|
|
149
|
+
|
|
150
|
+
print(f" Generating embeddings for {len(records)} variables...")
|
|
151
|
+
texts = [build_search_text(r["variable_name"]) for r in records]
|
|
152
|
+
embeddings: np.ndarray = model.encode(
|
|
153
|
+
texts,
|
|
154
|
+
batch_size=128,
|
|
155
|
+
show_progress_bar=False,
|
|
156
|
+
convert_to_numpy=True,
|
|
157
|
+
normalize_embeddings=True, # cosine sim via inner product
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
dim = embeddings.shape[1]
|
|
161
|
+
index = faiss.IndexFlatIP(dim) # Inner Product = cosine on normalised vecs
|
|
162
|
+
index.add(embeddings.astype(np.float32))
|
|
163
|
+
|
|
164
|
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
faiss.write_index(index, str(FAISS_INDEX_PATH))
|
|
166
|
+
|
|
167
|
+
# Strip any 'value' field before persisting (security)
|
|
168
|
+
safe_records = [
|
|
169
|
+
{
|
|
170
|
+
"variable_name": r["variable_name"],
|
|
171
|
+
"source_file": r["source_file"],
|
|
172
|
+
"source_path": r["source_path"],
|
|
173
|
+
}
|
|
174
|
+
for r in records
|
|
175
|
+
]
|
|
176
|
+
ENV_METADATA_PATH.write_text(
|
|
177
|
+
json.dumps(safe_records, indent=2), encoding="utf-8"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
print(
|
|
181
|
+
f" Index built - {index.ntotal} vectors, dim={dim}.\n"
|
|
182
|
+
f" Saved to {FAISS_INDEX_PATH}"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
if __name__ == "__main__":
|
|
187
|
+
build_index()
|
parser.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
parser.py
|
|
3
|
+
Reads all copied .env files from D:\\ENVBOT_DATA\\copied_envs,
|
|
4
|
+
extracts variable names (never values), and returns structured records.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from scanner import COPIED_DIR, METADATA_FILE
|
|
11
|
+
import json
|
|
12
|
+
|
|
13
|
+
# Matches SOME_VAR=... or export SOME_VAR=...
|
|
14
|
+
# Captures only the variable name.
|
|
15
|
+
_VAR_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=", re.MULTILINE)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _load_metadata() -> dict[str, str]:
|
|
19
|
+
"""Return mapping copied_filename → original_path."""
|
|
20
|
+
if not METADATA_FILE.exists():
|
|
21
|
+
return {}
|
|
22
|
+
raw = json.loads(METADATA_FILE.read_text(encoding="utf-8"))
|
|
23
|
+
return {Path(r["copied_path"]).name: r["original_path"] for r in raw}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def parse_all() -> list[dict]:
|
|
27
|
+
"""
|
|
28
|
+
Parse every .txt file in COPIED_DIR.
|
|
29
|
+
Returns list of:
|
|
30
|
+
{
|
|
31
|
+
"variable_name": "OPENAI_API_KEY",
|
|
32
|
+
"source_file": "Projects_App1_env.txt",
|
|
33
|
+
"source_path": "D:\\Projects\\App1\\.env"
|
|
34
|
+
}
|
|
35
|
+
Values are NEVER stored.
|
|
36
|
+
"""
|
|
37
|
+
path_map = _load_metadata()
|
|
38
|
+
records: list[dict] = []
|
|
39
|
+
|
|
40
|
+
if not COPIED_DIR.exists():
|
|
41
|
+
return records
|
|
42
|
+
|
|
43
|
+
for txt_file in sorted(COPIED_DIR.glob("*.txt")):
|
|
44
|
+
try:
|
|
45
|
+
content = txt_file.read_text(encoding="utf-8", errors="replace")
|
|
46
|
+
except OSError:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
original_path = path_map.get(txt_file.name, "unknown")
|
|
50
|
+
|
|
51
|
+
for match in _VAR_RE.finditer(content):
|
|
52
|
+
var_name = match.group(1)
|
|
53
|
+
records.append(
|
|
54
|
+
{
|
|
55
|
+
"variable_name": var_name,
|
|
56
|
+
"source_file": txt_file.name,
|
|
57
|
+
"source_path": original_path,
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Deduplicate: same variable from same source file → keep one entry
|
|
62
|
+
seen: set[tuple[str, str]] = set()
|
|
63
|
+
deduped: list[dict] = []
|
|
64
|
+
for r in records:
|
|
65
|
+
key = (r["variable_name"], r["source_file"])
|
|
66
|
+
if key not in seen:
|
|
67
|
+
seen.add(key)
|
|
68
|
+
deduped.append(r)
|
|
69
|
+
|
|
70
|
+
return deduped
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
entries = parse_all()
|
|
75
|
+
print(f"Parsed {len(entries)} unique variable entries.")
|
|
76
|
+
for e in entries[:10]:
|
|
77
|
+
print(f" {e['variable_name']} ← {e['source_path']}")
|
scanner.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
scanner.py
|
|
3
|
+
Recursively scans D:\\ for .env files, copies them to D:\\ENVBOT_DATA\\copied_envs,
|
|
4
|
+
and writes metadata.json.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import shutil
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
# ── Constants ────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
CONFIG_FILE = Path.home() / ".envbot_config.json"
|
|
15
|
+
|
|
16
|
+
def _load_config() -> dict:
|
|
17
|
+
if CONFIG_FILE.exists():
|
|
18
|
+
try:
|
|
19
|
+
return json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
|
|
20
|
+
except Exception:
|
|
21
|
+
pass
|
|
22
|
+
return {
|
|
23
|
+
"scan_drives": ["D:\\"],
|
|
24
|
+
"data_dir": str(Path.home() / ".envbot_data")
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
_config = _load_config()
|
|
28
|
+
SCAN_DRIVES = _config["scan_drives"]
|
|
29
|
+
DATA_DIR = Path(_config["data_dir"])
|
|
30
|
+
COPIED_DIR = DATA_DIR / "copied_envs"
|
|
31
|
+
METADATA_FILE = DATA_DIR / "metadata.json"
|
|
32
|
+
|
|
33
|
+
TARGET_NAMES = {
|
|
34
|
+
".env",
|
|
35
|
+
".env.local",
|
|
36
|
+
".env.development",
|
|
37
|
+
".env.production",
|
|
38
|
+
".env.test",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
IGNORED_DIRS = {
|
|
42
|
+
"node_modules",
|
|
43
|
+
".git",
|
|
44
|
+
".next",
|
|
45
|
+
"dist",
|
|
46
|
+
"build",
|
|
47
|
+
"venv",
|
|
48
|
+
".venv",
|
|
49
|
+
"__pycache__",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
def _safe_name(original: Path) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Convert D:\\Projects\\App1\\.env → Projects_App1_env.txt
|
|
58
|
+
Strips the drive letter, replaces path separators and dots with underscores,
|
|
59
|
+
collapses leading underscores, appends .txt.
|
|
60
|
+
"""
|
|
61
|
+
parts = list(original.parts)
|
|
62
|
+
if original.drive:
|
|
63
|
+
parts = parts[1:]
|
|
64
|
+
|
|
65
|
+
# Join with underscore, strip leading dots from each part
|
|
66
|
+
sanitised = "_".join(p.lstrip(".").replace(".", "_") for p in parts if p)
|
|
67
|
+
sanitised = sanitised.replace("\\", "_").replace("/", "_").replace(":", "_")
|
|
68
|
+
sanitised = sanitised.strip("_") or "unknown"
|
|
69
|
+
return sanitised + ".txt"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _walk_env_files(root: Path):
|
|
73
|
+
"""Yield Path objects for every matching .env file under root."""
|
|
74
|
+
try:
|
|
75
|
+
entries = list(root.iterdir())
|
|
76
|
+
except (PermissionError, OSError):
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
for entry in entries:
|
|
80
|
+
try:
|
|
81
|
+
if entry.is_symlink():
|
|
82
|
+
continue
|
|
83
|
+
if entry.is_dir():
|
|
84
|
+
if entry.name in IGNORED_DIRS:
|
|
85
|
+
continue
|
|
86
|
+
yield from _walk_env_files(entry)
|
|
87
|
+
elif entry.is_file() and entry.name in TARGET_NAMES:
|
|
88
|
+
yield entry
|
|
89
|
+
except (PermissionError, OSError):
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── Public API ────────────────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
def scan_and_copy() -> list[dict]:
|
|
96
|
+
"""
|
|
97
|
+
Scan D:\\ for .env files, copy them into COPIED_DIR, write metadata.json.
|
|
98
|
+
Returns the metadata list.
|
|
99
|
+
"""
|
|
100
|
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
COPIED_DIR.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
|
|
103
|
+
metadata: list[dict] = []
|
|
104
|
+
seen_names: dict[str, int] = {}
|
|
105
|
+
|
|
106
|
+
for drive in SCAN_DRIVES:
|
|
107
|
+
scan_root = Path(drive)
|
|
108
|
+
for env_file in _walk_env_files(scan_root):
|
|
109
|
+
base_name = _safe_name(env_file)
|
|
110
|
+
|
|
111
|
+
# Deduplicate: if name already used, append counter
|
|
112
|
+
if base_name in seen_names:
|
|
113
|
+
seen_names[base_name] += 1
|
|
114
|
+
stem = base_name[: -len(".txt")]
|
|
115
|
+
base_name = f"{stem}_{seen_names[base_name]}.txt"
|
|
116
|
+
else:
|
|
117
|
+
seen_names[base_name] = 0
|
|
118
|
+
|
|
119
|
+
dest = COPIED_DIR / base_name
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
shutil.copy2(env_file, dest)
|
|
123
|
+
except (PermissionError, OSError):
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
metadata.append(
|
|
127
|
+
{
|
|
128
|
+
"original_path": str(env_file),
|
|
129
|
+
"copied_path": str(dest),
|
|
130
|
+
"discovered_time": datetime.now(timezone.utc).isoformat(),
|
|
131
|
+
}
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
METADATA_FILE.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
|
|
135
|
+
return metadata
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
if __name__ == "__main__":
|
|
139
|
+
records = scan_and_copy()
|
|
140
|
+
print(f"Discovered and copied {len(records)} .env file(s).")
|
search.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
search.py
|
|
3
|
+
Loads the persisted FAISS index and metadata, encodes a query,
|
|
4
|
+
and returns the top-K matching variable names with optional source info.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from functools import lru_cache
|
|
12
|
+
|
|
13
|
+
import faiss
|
|
14
|
+
import numpy as np
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
os.environ["TRANSFORMERS_NO_TF"] = "1"
|
|
18
|
+
os.environ["USE_TF"] = "0"
|
|
19
|
+
|
|
20
|
+
from sentence_transformers import SentenceTransformer
|
|
21
|
+
|
|
22
|
+
from scanner import DATA_DIR
|
|
23
|
+
from indexer import FAISS_INDEX_PATH, ENV_METADATA_PATH, MODEL_NAME
|
|
24
|
+
|
|
25
|
+
DEFAULT_K = 3
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ── Lazy singletons ───────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
@lru_cache(maxsize=1)
|
|
31
|
+
def _get_model() -> SentenceTransformer:
|
|
32
|
+
return SentenceTransformer(MODEL_NAME)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@lru_cache(maxsize=1)
|
|
36
|
+
def _get_index() -> faiss.Index:
|
|
37
|
+
if not FAISS_INDEX_PATH.exists():
|
|
38
|
+
raise FileNotFoundError(
|
|
39
|
+
f"FAISS index not found at {FAISS_INDEX_PATH}. "
|
|
40
|
+
"Run `envbot --reindex` first."
|
|
41
|
+
)
|
|
42
|
+
return faiss.read_index(str(FAISS_INDEX_PATH))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@lru_cache(maxsize=1)
|
|
46
|
+
def _get_metadata() -> list[dict]:
|
|
47
|
+
if not ENV_METADATA_PATH.exists():
|
|
48
|
+
raise FileNotFoundError(
|
|
49
|
+
f"Metadata not found at {ENV_METADATA_PATH}. "
|
|
50
|
+
"Run `envbot --reindex` first."
|
|
51
|
+
)
|
|
52
|
+
return json.loads(ENV_METADATA_PATH.read_text(encoding="utf-8"))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ── Public search API ─────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
class SearchResult:
|
|
58
|
+
__slots__ = ("variable_name", "source_file", "source_path")
|
|
59
|
+
|
|
60
|
+
def __init__(self, variable_name: str, source_file: str, source_path: str) -> None:
|
|
61
|
+
self.variable_name = variable_name
|
|
62
|
+
self.source_file = source_file
|
|
63
|
+
self.source_path = source_path
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def search(query: str, k: int = DEFAULT_K) -> list[SearchResult]:
|
|
67
|
+
"""
|
|
68
|
+
Embed *query*, run FAISS inner-product search, return up to *k* results.
|
|
69
|
+
Deduplicates on variable_name: if the same name appears in multiple
|
|
70
|
+
source files, each unique (name, path) pair is kept, but we avoid
|
|
71
|
+
returning the exact same variable+path combo more than once.
|
|
72
|
+
"""
|
|
73
|
+
model = _get_model()
|
|
74
|
+
index = _get_index()
|
|
75
|
+
metadata = _get_metadata()
|
|
76
|
+
|
|
77
|
+
if index.ntotal == 0:
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
# Encode and normalise query
|
|
81
|
+
vec: np.ndarray = model.encode(
|
|
82
|
+
[query],
|
|
83
|
+
convert_to_numpy=True,
|
|
84
|
+
normalize_embeddings=True,
|
|
85
|
+
).astype(np.float32)
|
|
86
|
+
|
|
87
|
+
# Retrieve more candidates so dedup doesn't leave us short
|
|
88
|
+
fetch_k = min(k * 4, index.ntotal)
|
|
89
|
+
_, indices = index.search(vec, fetch_k)
|
|
90
|
+
|
|
91
|
+
results: list[SearchResult] = []
|
|
92
|
+
seen: set[tuple[str, str]] = set()
|
|
93
|
+
|
|
94
|
+
for idx in indices[0]:
|
|
95
|
+
if idx < 0 or idx >= len(metadata):
|
|
96
|
+
continue
|
|
97
|
+
rec = metadata[idx]
|
|
98
|
+
key = (rec["variable_name"], rec["source_path"])
|
|
99
|
+
if key in seen:
|
|
100
|
+
continue
|
|
101
|
+
seen.add(key)
|
|
102
|
+
results.append(
|
|
103
|
+
SearchResult(
|
|
104
|
+
variable_name=rec["variable_name"],
|
|
105
|
+
source_file=rec["source_file"],
|
|
106
|
+
source_path=rec["source_path"],
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
if len(results) >= k:
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
return results
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
import sys
|
|
117
|
+
q = " ".join(sys.argv[1:]) or "mongodb connection string"
|
|
118
|
+
for r in search(q):
|
|
119
|
+
print(r.variable_name, "←", r.source_path)
|