eth-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eth_mcp-0.2.0.dist-info/METADATA +332 -0
- eth_mcp-0.2.0.dist-info/RECORD +21 -0
- eth_mcp-0.2.0.dist-info/WHEEL +4 -0
- eth_mcp-0.2.0.dist-info/entry_points.txt +3 -0
- ethereum_mcp/__init__.py +3 -0
- ethereum_mcp/cli.py +589 -0
- ethereum_mcp/clients.py +363 -0
- ethereum_mcp/config.py +324 -0
- ethereum_mcp/expert/__init__.py +1 -0
- ethereum_mcp/expert/guidance.py +300 -0
- ethereum_mcp/indexer/__init__.py +8 -0
- ethereum_mcp/indexer/chunker.py +563 -0
- ethereum_mcp/indexer/client_compiler.py +725 -0
- ethereum_mcp/indexer/compiler.py +245 -0
- ethereum_mcp/indexer/downloader.py +521 -0
- ethereum_mcp/indexer/embedder.py +627 -0
- ethereum_mcp/indexer/manifest.py +411 -0
- ethereum_mcp/logging.py +85 -0
- ethereum_mcp/models.py +126 -0
- ethereum_mcp/server.py +555 -0
- ethereum_mcp/tools/__init__.py +1 -0
ethereum_mcp/cli.py
ADDED
|
@@ -0,0 +1,589 @@
|
|
|
1
|
+
"""CLI for Ethereum MCP management."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from .config import get_model_info, load_config
|
|
9
|
+
from .indexer.chunker import chunk_client_code
|
|
10
|
+
from .indexer.client_compiler import compile_client, load_client_constants, load_client_items
|
|
11
|
+
from .indexer.compiler import compile_specs
|
|
12
|
+
from .indexer.downloader import (
|
|
13
|
+
CLIENT_REPOS,
|
|
14
|
+
download_clients,
|
|
15
|
+
download_specs,
|
|
16
|
+
get_builder_spec_files,
|
|
17
|
+
get_eip_files,
|
|
18
|
+
get_spec_files,
|
|
19
|
+
list_downloaded_clients,
|
|
20
|
+
)
|
|
21
|
+
from .indexer.embedder import IncrementalEmbedder, embed_and_store
|
|
22
|
+
from .indexer.manifest import ManifestCorruptedError, load_manifest
|
|
23
|
+
|
|
24
|
+
DEFAULT_DATA_DIR = Path.home() / ".ethereum-mcp"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@click.group()
|
|
28
|
+
def main():
|
|
29
|
+
"""Ethereum MCP - RAG-powered Ethereum specs search."""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@main.command()
|
|
34
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
35
|
+
@click.option("--force", is_flag=True, help="Force re-download")
|
|
36
|
+
@click.option("--include-clients", is_flag=True, help="Also download client source code")
|
|
37
|
+
def download(data_dir: str, force: bool, include_clients: bool):
|
|
38
|
+
"""Download Ethereum specs, EIPs, builder-specs, and optionally client source code."""
|
|
39
|
+
data_path = Path(data_dir)
|
|
40
|
+
click.echo(f"Downloading to {data_path}...")
|
|
41
|
+
|
|
42
|
+
consensus_dir, eips_dir, builder_specs_dir = download_specs(data_path, force=force)
|
|
43
|
+
|
|
44
|
+
click.echo(f"Consensus specs: {consensus_dir}")
|
|
45
|
+
click.echo(f"EIPs: {eips_dir}")
|
|
46
|
+
click.echo(f"Builder specs: {builder_specs_dir}")
|
|
47
|
+
|
|
48
|
+
if include_clients:
|
|
49
|
+
click.echo("\nDownloading client source code...")
|
|
50
|
+
results = download_clients(data_path, progress_callback=click.echo)
|
|
51
|
+
click.echo("\nClient download results:")
|
|
52
|
+
for name, success in results.items():
|
|
53
|
+
status = click.style("✓", fg="green") if success else click.style("✗", fg="red")
|
|
54
|
+
click.echo(f" {status} {name}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@main.command("download-clients")
|
|
58
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
59
|
+
@click.option("--client", multiple=True, help="Specific clients to download (default: all)")
|
|
60
|
+
def download_clients_cmd(data_dir: str, client: tuple):
|
|
61
|
+
"""Download Ethereum client source code (reth, geth, lighthouse, etc.)."""
|
|
62
|
+
data_path = Path(data_dir)
|
|
63
|
+
|
|
64
|
+
clients_to_download = list(client) if client else None
|
|
65
|
+
|
|
66
|
+
click.echo("Available clients:")
|
|
67
|
+
for name, config in CLIENT_REPOS.items():
|
|
68
|
+
click.echo(f" {name}: {config['language']} ({config['layer']})")
|
|
69
|
+
|
|
70
|
+
click.echo(f"\nDownloading to {data_path / 'clients'}...")
|
|
71
|
+
results = download_clients(data_path, clients=clients_to_download, progress_callback=click.echo)
|
|
72
|
+
|
|
73
|
+
click.echo("\nResults:")
|
|
74
|
+
for name, success in results.items():
|
|
75
|
+
status = click.style("✓", fg="green") if success else click.style("✗", fg="red")
|
|
76
|
+
click.echo(f" {status} {name}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@main.command()
|
|
80
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
81
|
+
@click.option("--include-clients", is_flag=True, help="Also compile client source code")
|
|
82
|
+
def compile(data_dir: str, include_clients: bool):
|
|
83
|
+
"""Compile specs and optionally client source code into indexed JSON."""
|
|
84
|
+
data_path = Path(data_dir)
|
|
85
|
+
consensus_dir = data_path / "consensus-specs"
|
|
86
|
+
output_dir = data_path / "compiled"
|
|
87
|
+
|
|
88
|
+
if not consensus_dir.exists():
|
|
89
|
+
click.echo("Error: Consensus specs not found. Run 'download' first.")
|
|
90
|
+
raise click.Abort()
|
|
91
|
+
|
|
92
|
+
click.echo("Compiling specs...")
|
|
93
|
+
compiled = compile_specs(consensus_dir, output_dir)
|
|
94
|
+
click.echo(f"Compiled {len(compiled)} forks to {output_dir}")
|
|
95
|
+
|
|
96
|
+
if include_clients:
|
|
97
|
+
click.echo("\nCompiling client source code...")
|
|
98
|
+
clients_dir = data_path / "clients"
|
|
99
|
+
client_output_dir = output_dir / "clients"
|
|
100
|
+
|
|
101
|
+
total_stats = {
|
|
102
|
+
"files_processed": 0,
|
|
103
|
+
"items_extracted": 0,
|
|
104
|
+
"constants_extracted": 0,
|
|
105
|
+
"functions": 0,
|
|
106
|
+
"structs": 0,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for client_name, config in CLIENT_REPOS.items():
|
|
110
|
+
client_path = clients_dir / client_name
|
|
111
|
+
if not client_path.exists():
|
|
112
|
+
click.echo(f" Skipping {client_name} (not downloaded)")
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
click.echo(f" Compiling {client_name} ({config['language']})...")
|
|
116
|
+
stats = compile_client(
|
|
117
|
+
client_path,
|
|
118
|
+
client_output_dir / client_name,
|
|
119
|
+
client_name,
|
|
120
|
+
config["language"],
|
|
121
|
+
progress_callback=lambda msg: click.echo(f" {msg}"),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if "error" not in stats:
|
|
125
|
+
for key in total_stats:
|
|
126
|
+
total_stats[key] += stats.get(key, 0)
|
|
127
|
+
|
|
128
|
+
click.echo("\nClient compilation complete:")
|
|
129
|
+
click.echo(f" Files: {total_stats['files_processed']}")
|
|
130
|
+
click.echo(f" Functions: {total_stats['functions']}")
|
|
131
|
+
click.echo(f" Structs: {total_stats['structs']}")
|
|
132
|
+
click.echo(f" Constants: {total_stats['constants_extracted']}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@main.command()
|
|
136
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
137
|
+
@click.option("--chunk-size", default=1000, help="Chunk size in characters")
|
|
138
|
+
@click.option("--chunk-overlap", default=200, help="Chunk overlap in characters")
|
|
139
|
+
@click.option("--include-clients", is_flag=True, help="Also index compiled client code")
|
|
140
|
+
@click.option("--full", is_flag=True, help="Force full rebuild (ignore incremental)")
|
|
141
|
+
@click.option("--dry-run", is_flag=True, help="Show what would change without indexing")
|
|
142
|
+
@click.option("--model", default=None, help="Embedding model to use")
|
|
143
|
+
def index(
|
|
144
|
+
data_dir: str,
|
|
145
|
+
chunk_size: int,
|
|
146
|
+
chunk_overlap: int,
|
|
147
|
+
include_clients: bool,
|
|
148
|
+
full: bool,
|
|
149
|
+
dry_run: bool,
|
|
150
|
+
model: str | None,
|
|
151
|
+
):
|
|
152
|
+
"""Build vector index from specs, EIPs, builder-specs, and optionally client code.
|
|
153
|
+
|
|
154
|
+
By default, performs incremental indexing (only re-embeds changed files).
|
|
155
|
+
Use --full to force a complete rebuild.
|
|
156
|
+
"""
|
|
157
|
+
data_path = Path(data_dir)
|
|
158
|
+
consensus_dir = data_path / "consensus-specs"
|
|
159
|
+
eips_dir = data_path / "EIPs"
|
|
160
|
+
builder_specs_dir = data_path / "builder-specs"
|
|
161
|
+
|
|
162
|
+
if not consensus_dir.exists():
|
|
163
|
+
click.echo("Error: Consensus specs not found. Run 'download' first.")
|
|
164
|
+
raise click.Abort()
|
|
165
|
+
|
|
166
|
+
click.echo("Collecting files...")
|
|
167
|
+
|
|
168
|
+
# Collect files with their types
|
|
169
|
+
current_files: dict[str, Path] = {}
|
|
170
|
+
file_types: dict[str, str] = {}
|
|
171
|
+
|
|
172
|
+
spec_files = get_spec_files(consensus_dir)
|
|
173
|
+
for f in spec_files:
|
|
174
|
+
rel = str(f.relative_to(data_path))
|
|
175
|
+
current_files[rel] = f
|
|
176
|
+
file_types[rel] = "spec"
|
|
177
|
+
|
|
178
|
+
eip_files = get_eip_files(eips_dir) if eips_dir.exists() else []
|
|
179
|
+
for f in eip_files:
|
|
180
|
+
rel = str(f.relative_to(data_path))
|
|
181
|
+
current_files[rel] = f
|
|
182
|
+
file_types[rel] = "eip"
|
|
183
|
+
|
|
184
|
+
if builder_specs_dir.exists():
|
|
185
|
+
builder_spec_files = get_builder_spec_files(builder_specs_dir)
|
|
186
|
+
else:
|
|
187
|
+
builder_spec_files = []
|
|
188
|
+
for f in builder_spec_files:
|
|
189
|
+
rel = str(f.relative_to(data_path))
|
|
190
|
+
current_files[rel] = f
|
|
191
|
+
file_types[rel] = "builder"
|
|
192
|
+
|
|
193
|
+
click.echo(
|
|
194
|
+
f"Found {len(spec_files)} spec files, {len(eip_files)} EIP files, "
|
|
195
|
+
f"{len(builder_spec_files)} builder-spec files"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Create incremental embedder
|
|
199
|
+
embedder = IncrementalEmbedder(
|
|
200
|
+
data_dir=data_path,
|
|
201
|
+
model_name=model,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if dry_run:
|
|
205
|
+
# Show what would change
|
|
206
|
+
result = embedder.dry_run(current_files, file_types)
|
|
207
|
+
click.echo("\n" + result.summary())
|
|
208
|
+
if result.files_to_add:
|
|
209
|
+
click.echo("\nFiles to add:")
|
|
210
|
+
for f in result.files_to_add[:10]:
|
|
211
|
+
click.echo(f" + {f}")
|
|
212
|
+
if len(result.files_to_add) > 10:
|
|
213
|
+
click.echo(f" ... and {len(result.files_to_add) - 10} more")
|
|
214
|
+
if result.files_to_modify:
|
|
215
|
+
click.echo("\nFiles to modify:")
|
|
216
|
+
for f in result.files_to_modify[:10]:
|
|
217
|
+
click.echo(f" ~ {f}")
|
|
218
|
+
if len(result.files_to_modify) > 10:
|
|
219
|
+
click.echo(f" ... and {len(result.files_to_modify) - 10} more")
|
|
220
|
+
if result.files_to_delete:
|
|
221
|
+
click.echo("\nFiles to delete:")
|
|
222
|
+
for f in result.files_to_delete[:10]:
|
|
223
|
+
click.echo(f" - {f}")
|
|
224
|
+
if len(result.files_to_delete) > 10:
|
|
225
|
+
click.echo(f" ... and {len(result.files_to_delete) - 10} more")
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Perform indexing
|
|
229
|
+
click.echo("\nIndexing...")
|
|
230
|
+
stats = embedder.index(
|
|
231
|
+
current_files,
|
|
232
|
+
file_types,
|
|
233
|
+
force_full=full,
|
|
234
|
+
chunk_size=chunk_size,
|
|
235
|
+
chunk_overlap=chunk_overlap,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
click.echo(f"\n{stats.summary()}")
|
|
239
|
+
|
|
240
|
+
# Handle client code separately (not part of incremental for now)
|
|
241
|
+
if include_clients:
|
|
242
|
+
click.echo("\nIndexing client code (full rebuild)...")
|
|
243
|
+
_index_client_code(data_path)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _index_client_code(data_path: Path) -> None:
|
|
247
|
+
"""Index compiled client code (separate from specs)."""
|
|
248
|
+
client_compiled_dir = data_path / "compiled" / "clients"
|
|
249
|
+
db_path = data_path / "lancedb"
|
|
250
|
+
|
|
251
|
+
if not client_compiled_dir.exists():
|
|
252
|
+
click.echo(" No compiled clients found. Run 'compile --include-clients' first.")
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
all_chunks = []
|
|
256
|
+
for client_dir in client_compiled_dir.iterdir():
|
|
257
|
+
if client_dir.is_dir():
|
|
258
|
+
items = load_client_items(client_dir)
|
|
259
|
+
constants = load_client_constants(client_dir)
|
|
260
|
+
|
|
261
|
+
if items or constants:
|
|
262
|
+
client_chunks = chunk_client_code(items, constants)
|
|
263
|
+
all_chunks.extend(client_chunks)
|
|
264
|
+
click.echo(
|
|
265
|
+
f" {client_dir.name}: {len(items)} items, "
|
|
266
|
+
f"{len(constants)} constants -> {len(client_chunks)} chunks"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
if all_chunks:
|
|
270
|
+
# Note: Client code uses separate table or appends to main table
|
|
271
|
+
# For now, this is a simple append
|
|
272
|
+
count = embed_and_store(all_chunks, db_path, table_name="ethereum_clients")
|
|
273
|
+
click.echo(f" Indexed {count} client code chunks")
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@main.command()
|
|
277
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data dir")
|
|
278
|
+
@click.option("--force", is_flag=True, help="Force re-download")
|
|
279
|
+
@click.option("--include-clients", is_flag=True, help="Download and compile client code")
|
|
280
|
+
@click.option("--full", is_flag=True, help="Force full index rebuild")
|
|
281
|
+
def build(data_dir: str, force: bool, include_clients: bool, full: bool):
|
|
282
|
+
"""Full build: download, compile, and index."""
|
|
283
|
+
data_path = Path(data_dir)
|
|
284
|
+
|
|
285
|
+
# Download specs
|
|
286
|
+
click.echo("=== Downloading specs ===")
|
|
287
|
+
consensus_dir, eips_dir, builder_specs_dir = download_specs(data_path, force=force)
|
|
288
|
+
|
|
289
|
+
# Download clients if requested
|
|
290
|
+
if include_clients:
|
|
291
|
+
click.echo("\n=== Downloading client source code ===")
|
|
292
|
+
results = download_clients(data_path, progress_callback=click.echo)
|
|
293
|
+
successful = sum(1 for s in results.values() if s)
|
|
294
|
+
click.echo(f"Downloaded {successful}/{len(results)} clients")
|
|
295
|
+
|
|
296
|
+
# Compile specs
|
|
297
|
+
click.echo("\n=== Compiling specs ===")
|
|
298
|
+
output_dir = data_path / "compiled"
|
|
299
|
+
compiled = compile_specs(consensus_dir, output_dir)
|
|
300
|
+
click.echo(f"Compiled {len(compiled)} forks")
|
|
301
|
+
|
|
302
|
+
# Compile clients if requested
|
|
303
|
+
if include_clients:
|
|
304
|
+
click.echo("\n=== Compiling client source code ===")
|
|
305
|
+
clients_dir = data_path / "clients"
|
|
306
|
+
client_output_dir = output_dir / "clients"
|
|
307
|
+
|
|
308
|
+
for client_name, config in CLIENT_REPOS.items():
|
|
309
|
+
client_path = clients_dir / client_name
|
|
310
|
+
if not client_path.exists():
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
click.echo(f" {client_name} ({config['language']})...")
|
|
314
|
+
compile_client(
|
|
315
|
+
client_path,
|
|
316
|
+
client_output_dir / client_name,
|
|
317
|
+
client_name,
|
|
318
|
+
config["language"],
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Index (using incremental by default unless --full)
|
|
322
|
+
click.echo("\n=== Building vector index ===")
|
|
323
|
+
|
|
324
|
+
# Collect files
|
|
325
|
+
current_files: dict[str, Path] = {}
|
|
326
|
+
file_types: dict[str, str] = {}
|
|
327
|
+
|
|
328
|
+
spec_files = get_spec_files(consensus_dir)
|
|
329
|
+
for f in spec_files:
|
|
330
|
+
rel = str(f.relative_to(data_path))
|
|
331
|
+
current_files[rel] = f
|
|
332
|
+
file_types[rel] = "spec"
|
|
333
|
+
|
|
334
|
+
eip_files = get_eip_files(eips_dir) if eips_dir.exists() else []
|
|
335
|
+
for f in eip_files:
|
|
336
|
+
rel = str(f.relative_to(data_path))
|
|
337
|
+
current_files[rel] = f
|
|
338
|
+
file_types[rel] = "eip"
|
|
339
|
+
|
|
340
|
+
if builder_specs_dir.exists():
|
|
341
|
+
builder_files = get_builder_spec_files(builder_specs_dir)
|
|
342
|
+
for f in builder_files:
|
|
343
|
+
rel = str(f.relative_to(data_path))
|
|
344
|
+
current_files[rel] = f
|
|
345
|
+
file_types[rel] = "builder"
|
|
346
|
+
|
|
347
|
+
click.echo(
|
|
348
|
+
f"Found {len(spec_files)} spec files, {len(eip_files)} EIP files, "
|
|
349
|
+
f"{len(builder_files) if builder_specs_dir.exists() else 0} builder-spec files"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Use incremental embedder
|
|
353
|
+
embedder = IncrementalEmbedder(data_dir=data_path)
|
|
354
|
+
stats = embedder.index(
|
|
355
|
+
current_files,
|
|
356
|
+
file_types,
|
|
357
|
+
force_full=full,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
click.echo(f"\n{stats.summary()}")
|
|
361
|
+
|
|
362
|
+
# Index compiled client code if clients were included
|
|
363
|
+
if include_clients:
|
|
364
|
+
click.echo("\nIndexing client code...")
|
|
365
|
+
_index_client_code(data_path)
|
|
366
|
+
|
|
367
|
+
click.echo("\n=== Build complete ===")
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@main.command()
|
|
371
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
372
|
+
@click.option("--full", is_flag=True, help="Force full index rebuild after update")
|
|
373
|
+
def update(data_dir: str, full: bool):
|
|
374
|
+
"""Update repos (git pull) and incrementally re-index."""
|
|
375
|
+
data_path = Path(data_dir)
|
|
376
|
+
|
|
377
|
+
# Git pull for each repo
|
|
378
|
+
repos = [
|
|
379
|
+
("consensus-specs", data_path / "consensus-specs"),
|
|
380
|
+
("EIPs", data_path / "EIPs"),
|
|
381
|
+
("builder-specs", data_path / "builder-specs"),
|
|
382
|
+
]
|
|
383
|
+
|
|
384
|
+
click.echo("=== Updating repositories ===")
|
|
385
|
+
for name, repo_path in repos:
|
|
386
|
+
if not repo_path.exists():
|
|
387
|
+
click.echo(f" {name}: not downloaded")
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
click.echo(f" {name}: ", nl=False)
|
|
391
|
+
try:
|
|
392
|
+
result = subprocess.run(
|
|
393
|
+
["git", "pull", "--ff-only"],
|
|
394
|
+
cwd=repo_path,
|
|
395
|
+
capture_output=True,
|
|
396
|
+
text=True,
|
|
397
|
+
timeout=60,
|
|
398
|
+
)
|
|
399
|
+
if result.returncode == 0:
|
|
400
|
+
# Check if there were changes
|
|
401
|
+
if "Already up to date" in result.stdout:
|
|
402
|
+
click.echo("already up to date")
|
|
403
|
+
else:
|
|
404
|
+
click.echo("updated")
|
|
405
|
+
else:
|
|
406
|
+
click.echo(f"error: {result.stderr.strip()}")
|
|
407
|
+
except subprocess.TimeoutExpired:
|
|
408
|
+
click.echo("timeout")
|
|
409
|
+
except Exception as e:
|
|
410
|
+
click.echo(f"error: {e}")
|
|
411
|
+
|
|
412
|
+
# Re-index
|
|
413
|
+
click.echo("\n=== Rebuilding index ===")
|
|
414
|
+
|
|
415
|
+
# Collect files
|
|
416
|
+
current_files: dict[str, Path] = {}
|
|
417
|
+
file_types: dict[str, str] = {}
|
|
418
|
+
|
|
419
|
+
consensus_dir = data_path / "consensus-specs"
|
|
420
|
+
eips_dir = data_path / "EIPs"
|
|
421
|
+
builder_specs_dir = data_path / "builder-specs"
|
|
422
|
+
|
|
423
|
+
if consensus_dir.exists():
|
|
424
|
+
for f in get_spec_files(consensus_dir):
|
|
425
|
+
rel = str(f.relative_to(data_path))
|
|
426
|
+
current_files[rel] = f
|
|
427
|
+
file_types[rel] = "spec"
|
|
428
|
+
|
|
429
|
+
if eips_dir.exists():
|
|
430
|
+
for f in get_eip_files(eips_dir):
|
|
431
|
+
rel = str(f.relative_to(data_path))
|
|
432
|
+
current_files[rel] = f
|
|
433
|
+
file_types[rel] = "eip"
|
|
434
|
+
|
|
435
|
+
if builder_specs_dir.exists():
|
|
436
|
+
for f in get_builder_spec_files(builder_specs_dir):
|
|
437
|
+
rel = str(f.relative_to(data_path))
|
|
438
|
+
current_files[rel] = f
|
|
439
|
+
file_types[rel] = "builder"
|
|
440
|
+
|
|
441
|
+
if not current_files:
|
|
442
|
+
click.echo("No files to index. Run 'download' first.")
|
|
443
|
+
return
|
|
444
|
+
|
|
445
|
+
# Use incremental embedder
|
|
446
|
+
embedder = IncrementalEmbedder(data_dir=data_path)
|
|
447
|
+
stats = embedder.index(
|
|
448
|
+
current_files,
|
|
449
|
+
file_types,
|
|
450
|
+
force_full=full,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
click.echo(f"\n{stats.summary()}")
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
@main.command()
|
|
457
|
+
@click.argument("query")
|
|
458
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
459
|
+
@click.option("--fork", default=None, help="Filter by fork")
|
|
460
|
+
@click.option("--limit", default=5, help="Max results")
|
|
461
|
+
def search(query: str, data_dir: str, fork: str, limit: int):
|
|
462
|
+
"""Search the indexed specs."""
|
|
463
|
+
from .indexer.embedder import EmbeddingSearcher
|
|
464
|
+
|
|
465
|
+
data_path = Path(data_dir)
|
|
466
|
+
db_path = data_path / "lancedb"
|
|
467
|
+
|
|
468
|
+
if not db_path.exists():
|
|
469
|
+
click.echo("Error: Index not found. Run 'build' first.")
|
|
470
|
+
raise click.Abort()
|
|
471
|
+
|
|
472
|
+
searcher = EmbeddingSearcher(db_path)
|
|
473
|
+
results = searcher.search(query, limit=limit, fork=fork)
|
|
474
|
+
|
|
475
|
+
for i, r in enumerate(results, 1):
|
|
476
|
+
click.echo(f"\n--- Result {i} (score: {r['score']:.3f}) ---")
|
|
477
|
+
click.echo(f"Fork: {r['fork']} | Section: {r['section']} | Type: {r['chunk_type']}")
|
|
478
|
+
click.echo(f"Source: {r['source']}")
|
|
479
|
+
click.echo(f"\n{r['content'][:500]}...")
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
@main.command()
|
|
483
|
+
@click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
|
|
484
|
+
def status(data_dir: str):
|
|
485
|
+
"""Show index status including manifest and embedding model info."""
|
|
486
|
+
data_path = Path(data_dir)
|
|
487
|
+
|
|
488
|
+
click.echo(f"Data directory: {data_path}")
|
|
489
|
+
click.echo(f" Exists: {data_path.exists()}")
|
|
490
|
+
|
|
491
|
+
# Load config
|
|
492
|
+
config = load_config(data_dir=data_path)
|
|
493
|
+
click.echo("\nConfiguration:")
|
|
494
|
+
click.echo(f" Embedding model: {config.embedding.model}")
|
|
495
|
+
click.echo(f" Batch size: {config.embedding.batch_size}")
|
|
496
|
+
click.echo(f" Chunk size: {config.chunking.chunk_size}")
|
|
497
|
+
click.echo(f" Chunk overlap: {config.chunking.chunk_overlap}")
|
|
498
|
+
|
|
499
|
+
# Manifest info
|
|
500
|
+
manifest_path = data_path / "manifest.json"
|
|
501
|
+
click.echo(f"\nManifest: {manifest_path}")
|
|
502
|
+
if manifest_path.exists():
|
|
503
|
+
try:
|
|
504
|
+
manifest = load_manifest(manifest_path)
|
|
505
|
+
if manifest:
|
|
506
|
+
click.echo(f" Version: {manifest.version}")
|
|
507
|
+
click.echo(f" Updated: {manifest.updated_at}")
|
|
508
|
+
click.echo(f" Embedding model: {manifest.embedding_model}")
|
|
509
|
+
click.echo(f" Files tracked: {len(manifest.files)}")
|
|
510
|
+
total_chunks = sum(len(e.chunk_ids) for e in manifest.files.values())
|
|
511
|
+
click.echo(f" Total chunks: {total_chunks}")
|
|
512
|
+
if manifest.repo_versions:
|
|
513
|
+
click.echo(" Repo versions:")
|
|
514
|
+
for repo, version in manifest.repo_versions.items():
|
|
515
|
+
click.echo(f" {repo}: {version[:8]}")
|
|
516
|
+
except ManifestCorruptedError as e:
|
|
517
|
+
click.echo(f" Status: CORRUPTED - {e}")
|
|
518
|
+
else:
|
|
519
|
+
click.echo(" Status: Not found (will be created on first index)")
|
|
520
|
+
|
|
521
|
+
consensus_dir = data_path / "consensus-specs"
|
|
522
|
+
click.echo(f"\nConsensus specs: {consensus_dir}")
|
|
523
|
+
click.echo(f" Exists: {consensus_dir.exists()}")
|
|
524
|
+
|
|
525
|
+
eips_dir = data_path / "EIPs"
|
|
526
|
+
click.echo(f"\nEIPs: {eips_dir}")
|
|
527
|
+
click.echo(f" Exists: {eips_dir.exists()}")
|
|
528
|
+
|
|
529
|
+
builder_specs_dir = data_path / "builder-specs"
|
|
530
|
+
click.echo(f"\nBuilder specs: {builder_specs_dir}")
|
|
531
|
+
click.echo(f" Exists: {builder_specs_dir.exists()}")
|
|
532
|
+
|
|
533
|
+
compiled_dir = data_path / "compiled"
|
|
534
|
+
click.echo(f"\nCompiled specs: {compiled_dir}")
|
|
535
|
+
click.echo(f" Exists: {compiled_dir.exists()}")
|
|
536
|
+
if compiled_dir.exists():
|
|
537
|
+
json_files = list(compiled_dir.glob("*.json"))
|
|
538
|
+
click.echo(f" Forks: {[f.stem.replace('_spec', '') for f in json_files]}")
|
|
539
|
+
|
|
540
|
+
# Client status
|
|
541
|
+
click.echo("\nClient source code:")
|
|
542
|
+
client_status = list_downloaded_clients(data_path)
|
|
543
|
+
for name, info in client_status.items():
|
|
544
|
+
if info["exists"]:
|
|
545
|
+
status_icon = click.style("✓", fg="green")
|
|
546
|
+
version = info["version"] or "unknown"
|
|
547
|
+
click.echo(f" {status_icon} {name} ({info['language']}, {info['layer']}) - {version}")
|
|
548
|
+
else:
|
|
549
|
+
status_icon = click.style("✗", fg="red")
|
|
550
|
+
click.echo(f" {status_icon} {name} (not downloaded)")
|
|
551
|
+
|
|
552
|
+
# Compiled clients
|
|
553
|
+
client_compiled_dir = compiled_dir / "clients"
|
|
554
|
+
if client_compiled_dir.exists():
|
|
555
|
+
click.echo("\nCompiled clients:")
|
|
556
|
+
for client_dir in client_compiled_dir.iterdir():
|
|
557
|
+
if client_dir.is_dir():
|
|
558
|
+
index_file = client_dir / "index.json"
|
|
559
|
+
if index_file.exists():
|
|
560
|
+
import json
|
|
561
|
+
|
|
562
|
+
with open(index_file) as f:
|
|
563
|
+
index = json.load(f)
|
|
564
|
+
funcs = len(index.get("functions", {}))
|
|
565
|
+
structs = len(index.get("structs", {}))
|
|
566
|
+
click.echo(f" {client_dir.name}: {funcs} functions, {structs} structs")
|
|
567
|
+
|
|
568
|
+
db_path = data_path / "lancedb"
|
|
569
|
+
click.echo(f"\nVector index: {db_path}")
|
|
570
|
+
click.echo(f" Exists: {db_path.exists()}")
|
|
571
|
+
if db_path.exists():
|
|
572
|
+
try:
|
|
573
|
+
from .indexer.embedder import EmbeddingSearcher
|
|
574
|
+
|
|
575
|
+
searcher = EmbeddingSearcher(db_path)
|
|
576
|
+
stats = searcher.get_stats()
|
|
577
|
+
click.echo(f" Total chunks: {stats['total_chunks']}")
|
|
578
|
+
except Exception as e:
|
|
579
|
+
click.echo(f" Error reading index: {e}")
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
@main.command("models")
|
|
583
|
+
def list_models():
|
|
584
|
+
"""List available embedding models."""
|
|
585
|
+
click.echo(get_model_info())
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
if __name__ == "__main__":
|
|
589
|
+
main()
|