eth-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ethereum_mcp/cli.py ADDED
@@ -0,0 +1,589 @@
1
+ """CLI for Ethereum MCP management."""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ import click
7
+
8
+ from .config import get_model_info, load_config
9
+ from .indexer.chunker import chunk_client_code
10
+ from .indexer.client_compiler import compile_client, load_client_constants, load_client_items
11
+ from .indexer.compiler import compile_specs
12
+ from .indexer.downloader import (
13
+ CLIENT_REPOS,
14
+ download_clients,
15
+ download_specs,
16
+ get_builder_spec_files,
17
+ get_eip_files,
18
+ get_spec_files,
19
+ list_downloaded_clients,
20
+ )
21
+ from .indexer.embedder import IncrementalEmbedder, embed_and_store
22
+ from .indexer.manifest import ManifestCorruptedError, load_manifest
23
+
24
+ DEFAULT_DATA_DIR = Path.home() / ".ethereum-mcp"
25
+
26
+
27
+ @click.group()
28
+ def main():
29
+ """Ethereum MCP - RAG-powered Ethereum specs search."""
30
+ pass
31
+
32
+
33
+ @main.command()
34
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
35
+ @click.option("--force", is_flag=True, help="Force re-download")
36
+ @click.option("--include-clients", is_flag=True, help="Also download client source code")
37
+ def download(data_dir: str, force: bool, include_clients: bool):
38
+ """Download Ethereum specs, EIPs, builder-specs, and optionally client source code."""
39
+ data_path = Path(data_dir)
40
+ click.echo(f"Downloading to {data_path}...")
41
+
42
+ consensus_dir, eips_dir, builder_specs_dir = download_specs(data_path, force=force)
43
+
44
+ click.echo(f"Consensus specs: {consensus_dir}")
45
+ click.echo(f"EIPs: {eips_dir}")
46
+ click.echo(f"Builder specs: {builder_specs_dir}")
47
+
48
+ if include_clients:
49
+ click.echo("\nDownloading client source code...")
50
+ results = download_clients(data_path, progress_callback=click.echo)
51
+ click.echo("\nClient download results:")
52
+ for name, success in results.items():
53
+ status = click.style("✓", fg="green") if success else click.style("✗", fg="red")
54
+ click.echo(f" {status} {name}")
55
+
56
+
57
+ @main.command("download-clients")
58
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
59
+ @click.option("--client", multiple=True, help="Specific clients to download (default: all)")
60
+ def download_clients_cmd(data_dir: str, client: tuple):
61
+ """Download Ethereum client source code (reth, geth, lighthouse, etc.)."""
62
+ data_path = Path(data_dir)
63
+
64
+ clients_to_download = list(client) if client else None
65
+
66
+ click.echo("Available clients:")
67
+ for name, config in CLIENT_REPOS.items():
68
+ click.echo(f" {name}: {config['language']} ({config['layer']})")
69
+
70
+ click.echo(f"\nDownloading to {data_path / 'clients'}...")
71
+ results = download_clients(data_path, clients=clients_to_download, progress_callback=click.echo)
72
+
73
+ click.echo("\nResults:")
74
+ for name, success in results.items():
75
+ status = click.style("✓", fg="green") if success else click.style("✗", fg="red")
76
+ click.echo(f" {status} {name}")
77
+
78
+
79
+ @main.command()
80
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
81
+ @click.option("--include-clients", is_flag=True, help="Also compile client source code")
82
+ def compile(data_dir: str, include_clients: bool):
83
+ """Compile specs and optionally client source code into indexed JSON."""
84
+ data_path = Path(data_dir)
85
+ consensus_dir = data_path / "consensus-specs"
86
+ output_dir = data_path / "compiled"
87
+
88
+ if not consensus_dir.exists():
89
+ click.echo("Error: Consensus specs not found. Run 'download' first.")
90
+ raise click.Abort()
91
+
92
+ click.echo("Compiling specs...")
93
+ compiled = compile_specs(consensus_dir, output_dir)
94
+ click.echo(f"Compiled {len(compiled)} forks to {output_dir}")
95
+
96
+ if include_clients:
97
+ click.echo("\nCompiling client source code...")
98
+ clients_dir = data_path / "clients"
99
+ client_output_dir = output_dir / "clients"
100
+
101
+ total_stats = {
102
+ "files_processed": 0,
103
+ "items_extracted": 0,
104
+ "constants_extracted": 0,
105
+ "functions": 0,
106
+ "structs": 0,
107
+ }
108
+
109
+ for client_name, config in CLIENT_REPOS.items():
110
+ client_path = clients_dir / client_name
111
+ if not client_path.exists():
112
+ click.echo(f" Skipping {client_name} (not downloaded)")
113
+ continue
114
+
115
+ click.echo(f" Compiling {client_name} ({config['language']})...")
116
+ stats = compile_client(
117
+ client_path,
118
+ client_output_dir / client_name,
119
+ client_name,
120
+ config["language"],
121
+ progress_callback=lambda msg: click.echo(f" {msg}"),
122
+ )
123
+
124
+ if "error" not in stats:
125
+ for key in total_stats:
126
+ total_stats[key] += stats.get(key, 0)
127
+
128
+ click.echo("\nClient compilation complete:")
129
+ click.echo(f" Files: {total_stats['files_processed']}")
130
+ click.echo(f" Functions: {total_stats['functions']}")
131
+ click.echo(f" Structs: {total_stats['structs']}")
132
+ click.echo(f" Constants: {total_stats['constants_extracted']}")
133
+
134
+
135
+ @main.command()
136
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
137
+ @click.option("--chunk-size", default=1000, help="Chunk size in characters")
138
+ @click.option("--chunk-overlap", default=200, help="Chunk overlap in characters")
139
+ @click.option("--include-clients", is_flag=True, help="Also index compiled client code")
140
+ @click.option("--full", is_flag=True, help="Force full rebuild (ignore incremental)")
141
+ @click.option("--dry-run", is_flag=True, help="Show what would change without indexing")
142
+ @click.option("--model", default=None, help="Embedding model to use")
143
+ def index(
144
+ data_dir: str,
145
+ chunk_size: int,
146
+ chunk_overlap: int,
147
+ include_clients: bool,
148
+ full: bool,
149
+ dry_run: bool,
150
+ model: str | None,
151
+ ):
152
+ """Build vector index from specs, EIPs, builder-specs, and optionally client code.
153
+
154
+ By default, performs incremental indexing (only re-embeds changed files).
155
+ Use --full to force a complete rebuild.
156
+ """
157
+ data_path = Path(data_dir)
158
+ consensus_dir = data_path / "consensus-specs"
159
+ eips_dir = data_path / "EIPs"
160
+ builder_specs_dir = data_path / "builder-specs"
161
+
162
+ if not consensus_dir.exists():
163
+ click.echo("Error: Consensus specs not found. Run 'download' first.")
164
+ raise click.Abort()
165
+
166
+ click.echo("Collecting files...")
167
+
168
+ # Collect files with their types
169
+ current_files: dict[str, Path] = {}
170
+ file_types: dict[str, str] = {}
171
+
172
+ spec_files = get_spec_files(consensus_dir)
173
+ for f in spec_files:
174
+ rel = str(f.relative_to(data_path))
175
+ current_files[rel] = f
176
+ file_types[rel] = "spec"
177
+
178
+ eip_files = get_eip_files(eips_dir) if eips_dir.exists() else []
179
+ for f in eip_files:
180
+ rel = str(f.relative_to(data_path))
181
+ current_files[rel] = f
182
+ file_types[rel] = "eip"
183
+
184
+ if builder_specs_dir.exists():
185
+ builder_spec_files = get_builder_spec_files(builder_specs_dir)
186
+ else:
187
+ builder_spec_files = []
188
+ for f in builder_spec_files:
189
+ rel = str(f.relative_to(data_path))
190
+ current_files[rel] = f
191
+ file_types[rel] = "builder"
192
+
193
+ click.echo(
194
+ f"Found {len(spec_files)} spec files, {len(eip_files)} EIP files, "
195
+ f"{len(builder_spec_files)} builder-spec files"
196
+ )
197
+
198
+ # Create incremental embedder
199
+ embedder = IncrementalEmbedder(
200
+ data_dir=data_path,
201
+ model_name=model,
202
+ )
203
+
204
+ if dry_run:
205
+ # Show what would change
206
+ result = embedder.dry_run(current_files, file_types)
207
+ click.echo("\n" + result.summary())
208
+ if result.files_to_add:
209
+ click.echo("\nFiles to add:")
210
+ for f in result.files_to_add[:10]:
211
+ click.echo(f" + {f}")
212
+ if len(result.files_to_add) > 10:
213
+ click.echo(f" ... and {len(result.files_to_add) - 10} more")
214
+ if result.files_to_modify:
215
+ click.echo("\nFiles to modify:")
216
+ for f in result.files_to_modify[:10]:
217
+ click.echo(f" ~ {f}")
218
+ if len(result.files_to_modify) > 10:
219
+ click.echo(f" ... and {len(result.files_to_modify) - 10} more")
220
+ if result.files_to_delete:
221
+ click.echo("\nFiles to delete:")
222
+ for f in result.files_to_delete[:10]:
223
+ click.echo(f" - {f}")
224
+ if len(result.files_to_delete) > 10:
225
+ click.echo(f" ... and {len(result.files_to_delete) - 10} more")
226
+ return
227
+
228
+ # Perform indexing
229
+ click.echo("\nIndexing...")
230
+ stats = embedder.index(
231
+ current_files,
232
+ file_types,
233
+ force_full=full,
234
+ chunk_size=chunk_size,
235
+ chunk_overlap=chunk_overlap,
236
+ )
237
+
238
+ click.echo(f"\n{stats.summary()}")
239
+
240
+ # Handle client code separately (not part of incremental for now)
241
+ if include_clients:
242
+ click.echo("\nIndexing client code (full rebuild)...")
243
+ _index_client_code(data_path)
244
+
245
+
246
+ def _index_client_code(data_path: Path) -> None:
247
+ """Index compiled client code (separate from specs)."""
248
+ client_compiled_dir = data_path / "compiled" / "clients"
249
+ db_path = data_path / "lancedb"
250
+
251
+ if not client_compiled_dir.exists():
252
+ click.echo(" No compiled clients found. Run 'compile --include-clients' first.")
253
+ return
254
+
255
+ all_chunks = []
256
+ for client_dir in client_compiled_dir.iterdir():
257
+ if client_dir.is_dir():
258
+ items = load_client_items(client_dir)
259
+ constants = load_client_constants(client_dir)
260
+
261
+ if items or constants:
262
+ client_chunks = chunk_client_code(items, constants)
263
+ all_chunks.extend(client_chunks)
264
+ click.echo(
265
+ f" {client_dir.name}: {len(items)} items, "
266
+ f"{len(constants)} constants -> {len(client_chunks)} chunks"
267
+ )
268
+
269
+ if all_chunks:
270
+ # Note: Client code uses separate table or appends to main table
271
+ # For now, this is a simple append
272
+ count = embed_and_store(all_chunks, db_path, table_name="ethereum_clients")
273
+ click.echo(f" Indexed {count} client code chunks")
274
+
275
+
276
+ @main.command()
277
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data dir")
278
+ @click.option("--force", is_flag=True, help="Force re-download")
279
+ @click.option("--include-clients", is_flag=True, help="Download and compile client code")
280
+ @click.option("--full", is_flag=True, help="Force full index rebuild")
281
+ def build(data_dir: str, force: bool, include_clients: bool, full: bool):
282
+ """Full build: download, compile, and index."""
283
+ data_path = Path(data_dir)
284
+
285
+ # Download specs
286
+ click.echo("=== Downloading specs ===")
287
+ consensus_dir, eips_dir, builder_specs_dir = download_specs(data_path, force=force)
288
+
289
+ # Download clients if requested
290
+ if include_clients:
291
+ click.echo("\n=== Downloading client source code ===")
292
+ results = download_clients(data_path, progress_callback=click.echo)
293
+ successful = sum(1 for s in results.values() if s)
294
+ click.echo(f"Downloaded {successful}/{len(results)} clients")
295
+
296
+ # Compile specs
297
+ click.echo("\n=== Compiling specs ===")
298
+ output_dir = data_path / "compiled"
299
+ compiled = compile_specs(consensus_dir, output_dir)
300
+ click.echo(f"Compiled {len(compiled)} forks")
301
+
302
+ # Compile clients if requested
303
+ if include_clients:
304
+ click.echo("\n=== Compiling client source code ===")
305
+ clients_dir = data_path / "clients"
306
+ client_output_dir = output_dir / "clients"
307
+
308
+ for client_name, config in CLIENT_REPOS.items():
309
+ client_path = clients_dir / client_name
310
+ if not client_path.exists():
311
+ continue
312
+
313
+ click.echo(f" {client_name} ({config['language']})...")
314
+ compile_client(
315
+ client_path,
316
+ client_output_dir / client_name,
317
+ client_name,
318
+ config["language"],
319
+ )
320
+
321
+ # Index (using incremental by default unless --full)
322
+ click.echo("\n=== Building vector index ===")
323
+
324
+ # Collect files
325
+ current_files: dict[str, Path] = {}
326
+ file_types: dict[str, str] = {}
327
+
328
+ spec_files = get_spec_files(consensus_dir)
329
+ for f in spec_files:
330
+ rel = str(f.relative_to(data_path))
331
+ current_files[rel] = f
332
+ file_types[rel] = "spec"
333
+
334
+ eip_files = get_eip_files(eips_dir) if eips_dir.exists() else []
335
+ for f in eip_files:
336
+ rel = str(f.relative_to(data_path))
337
+ current_files[rel] = f
338
+ file_types[rel] = "eip"
339
+
340
+ if builder_specs_dir.exists():
341
+ builder_files = get_builder_spec_files(builder_specs_dir)
342
+ for f in builder_files:
343
+ rel = str(f.relative_to(data_path))
344
+ current_files[rel] = f
345
+ file_types[rel] = "builder"
346
+
347
+ click.echo(
348
+ f"Found {len(spec_files)} spec files, {len(eip_files)} EIP files, "
349
+ f"{len(builder_files) if builder_specs_dir.exists() else 0} builder-spec files"
350
+ )
351
+
352
+ # Use incremental embedder
353
+ embedder = IncrementalEmbedder(data_dir=data_path)
354
+ stats = embedder.index(
355
+ current_files,
356
+ file_types,
357
+ force_full=full,
358
+ )
359
+
360
+ click.echo(f"\n{stats.summary()}")
361
+
362
+ # Index compiled client code if clients were included
363
+ if include_clients:
364
+ click.echo("\nIndexing client code...")
365
+ _index_client_code(data_path)
366
+
367
+ click.echo("\n=== Build complete ===")
368
+
369
+
370
+ @main.command()
371
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
372
+ @click.option("--full", is_flag=True, help="Force full index rebuild after update")
373
+ def update(data_dir: str, full: bool):
374
+ """Update repos (git pull) and incrementally re-index."""
375
+ data_path = Path(data_dir)
376
+
377
+ # Git pull for each repo
378
+ repos = [
379
+ ("consensus-specs", data_path / "consensus-specs"),
380
+ ("EIPs", data_path / "EIPs"),
381
+ ("builder-specs", data_path / "builder-specs"),
382
+ ]
383
+
384
+ click.echo("=== Updating repositories ===")
385
+ for name, repo_path in repos:
386
+ if not repo_path.exists():
387
+ click.echo(f" {name}: not downloaded")
388
+ continue
389
+
390
+ click.echo(f" {name}: ", nl=False)
391
+ try:
392
+ result = subprocess.run(
393
+ ["git", "pull", "--ff-only"],
394
+ cwd=repo_path,
395
+ capture_output=True,
396
+ text=True,
397
+ timeout=60,
398
+ )
399
+ if result.returncode == 0:
400
+ # Check if there were changes
401
+ if "Already up to date" in result.stdout:
402
+ click.echo("already up to date")
403
+ else:
404
+ click.echo("updated")
405
+ else:
406
+ click.echo(f"error: {result.stderr.strip()}")
407
+ except subprocess.TimeoutExpired:
408
+ click.echo("timeout")
409
+ except Exception as e:
410
+ click.echo(f"error: {e}")
411
+
412
+ # Re-index
413
+ click.echo("\n=== Rebuilding index ===")
414
+
415
+ # Collect files
416
+ current_files: dict[str, Path] = {}
417
+ file_types: dict[str, str] = {}
418
+
419
+ consensus_dir = data_path / "consensus-specs"
420
+ eips_dir = data_path / "EIPs"
421
+ builder_specs_dir = data_path / "builder-specs"
422
+
423
+ if consensus_dir.exists():
424
+ for f in get_spec_files(consensus_dir):
425
+ rel = str(f.relative_to(data_path))
426
+ current_files[rel] = f
427
+ file_types[rel] = "spec"
428
+
429
+ if eips_dir.exists():
430
+ for f in get_eip_files(eips_dir):
431
+ rel = str(f.relative_to(data_path))
432
+ current_files[rel] = f
433
+ file_types[rel] = "eip"
434
+
435
+ if builder_specs_dir.exists():
436
+ for f in get_builder_spec_files(builder_specs_dir):
437
+ rel = str(f.relative_to(data_path))
438
+ current_files[rel] = f
439
+ file_types[rel] = "builder"
440
+
441
+ if not current_files:
442
+ click.echo("No files to index. Run 'download' first.")
443
+ return
444
+
445
+ # Use incremental embedder
446
+ embedder = IncrementalEmbedder(data_dir=data_path)
447
+ stats = embedder.index(
448
+ current_files,
449
+ file_types,
450
+ force_full=full,
451
+ )
452
+
453
+ click.echo(f"\n{stats.summary()}")
454
+
455
+
456
+ @main.command()
457
+ @click.argument("query")
458
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
459
+ @click.option("--fork", default=None, help="Filter by fork")
460
+ @click.option("--limit", default=5, help="Max results")
461
+ def search(query: str, data_dir: str, fork: str, limit: int):
462
+ """Search the indexed specs."""
463
+ from .indexer.embedder import EmbeddingSearcher
464
+
465
+ data_path = Path(data_dir)
466
+ db_path = data_path / "lancedb"
467
+
468
+ if not db_path.exists():
469
+ click.echo("Error: Index not found. Run 'build' first.")
470
+ raise click.Abort()
471
+
472
+ searcher = EmbeddingSearcher(db_path)
473
+ results = searcher.search(query, limit=limit, fork=fork)
474
+
475
+ for i, r in enumerate(results, 1):
476
+ click.echo(f"\n--- Result {i} (score: {r['score']:.3f}) ---")
477
+ click.echo(f"Fork: {r['fork']} | Section: {r['section']} | Type: {r['chunk_type']}")
478
+ click.echo(f"Source: {r['source']}")
479
+ click.echo(f"\n{r['content'][:500]}...")
480
+
481
+
482
+ @main.command()
483
+ @click.option("--data-dir", type=click.Path(), default=str(DEFAULT_DATA_DIR), help="Data directory")
484
+ def status(data_dir: str):
485
+ """Show index status including manifest and embedding model info."""
486
+ data_path = Path(data_dir)
487
+
488
+ click.echo(f"Data directory: {data_path}")
489
+ click.echo(f" Exists: {data_path.exists()}")
490
+
491
+ # Load config
492
+ config = load_config(data_dir=data_path)
493
+ click.echo("\nConfiguration:")
494
+ click.echo(f" Embedding model: {config.embedding.model}")
495
+ click.echo(f" Batch size: {config.embedding.batch_size}")
496
+ click.echo(f" Chunk size: {config.chunking.chunk_size}")
497
+ click.echo(f" Chunk overlap: {config.chunking.chunk_overlap}")
498
+
499
+ # Manifest info
500
+ manifest_path = data_path / "manifest.json"
501
+ click.echo(f"\nManifest: {manifest_path}")
502
+ if manifest_path.exists():
503
+ try:
504
+ manifest = load_manifest(manifest_path)
505
+ if manifest:
506
+ click.echo(f" Version: {manifest.version}")
507
+ click.echo(f" Updated: {manifest.updated_at}")
508
+ click.echo(f" Embedding model: {manifest.embedding_model}")
509
+ click.echo(f" Files tracked: {len(manifest.files)}")
510
+ total_chunks = sum(len(e.chunk_ids) for e in manifest.files.values())
511
+ click.echo(f" Total chunks: {total_chunks}")
512
+ if manifest.repo_versions:
513
+ click.echo(" Repo versions:")
514
+ for repo, version in manifest.repo_versions.items():
515
+ click.echo(f" {repo}: {version[:8]}")
516
+ except ManifestCorruptedError as e:
517
+ click.echo(f" Status: CORRUPTED - {e}")
518
+ else:
519
+ click.echo(" Status: Not found (will be created on first index)")
520
+
521
+ consensus_dir = data_path / "consensus-specs"
522
+ click.echo(f"\nConsensus specs: {consensus_dir}")
523
+ click.echo(f" Exists: {consensus_dir.exists()}")
524
+
525
+ eips_dir = data_path / "EIPs"
526
+ click.echo(f"\nEIPs: {eips_dir}")
527
+ click.echo(f" Exists: {eips_dir.exists()}")
528
+
529
+ builder_specs_dir = data_path / "builder-specs"
530
+ click.echo(f"\nBuilder specs: {builder_specs_dir}")
531
+ click.echo(f" Exists: {builder_specs_dir.exists()}")
532
+
533
+ compiled_dir = data_path / "compiled"
534
+ click.echo(f"\nCompiled specs: {compiled_dir}")
535
+ click.echo(f" Exists: {compiled_dir.exists()}")
536
+ if compiled_dir.exists():
537
+ json_files = list(compiled_dir.glob("*.json"))
538
+ click.echo(f" Forks: {[f.stem.replace('_spec', '') for f in json_files]}")
539
+
540
+ # Client status
541
+ click.echo("\nClient source code:")
542
+ client_status = list_downloaded_clients(data_path)
543
+ for name, info in client_status.items():
544
+ if info["exists"]:
545
+ status_icon = click.style("✓", fg="green")
546
+ version = info["version"] or "unknown"
547
+ click.echo(f" {status_icon} {name} ({info['language']}, {info['layer']}) - {version}")
548
+ else:
549
+ status_icon = click.style("✗", fg="red")
550
+ click.echo(f" {status_icon} {name} (not downloaded)")
551
+
552
+ # Compiled clients
553
+ client_compiled_dir = compiled_dir / "clients"
554
+ if client_compiled_dir.exists():
555
+ click.echo("\nCompiled clients:")
556
+ for client_dir in client_compiled_dir.iterdir():
557
+ if client_dir.is_dir():
558
+ index_file = client_dir / "index.json"
559
+ if index_file.exists():
560
+ import json
561
+
562
+ with open(index_file) as f:
563
+ index = json.load(f)
564
+ funcs = len(index.get("functions", {}))
565
+ structs = len(index.get("structs", {}))
566
+ click.echo(f" {client_dir.name}: {funcs} functions, {structs} structs")
567
+
568
+ db_path = data_path / "lancedb"
569
+ click.echo(f"\nVector index: {db_path}")
570
+ click.echo(f" Exists: {db_path.exists()}")
571
+ if db_path.exists():
572
+ try:
573
+ from .indexer.embedder import EmbeddingSearcher
574
+
575
+ searcher = EmbeddingSearcher(db_path)
576
+ stats = searcher.get_stats()
577
+ click.echo(f" Total chunks: {stats['total_chunks']}")
578
+ except Exception as e:
579
+ click.echo(f" Error reading index: {e}")
580
+
581
+
582
+ @main.command("models")
583
+ def list_models():
584
+ """List available embedding models."""
585
+ click.echo(get_model_info())
586
+
587
+
588
+ if __name__ == "__main__":
589
+ main()