eth-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,725 @@
1
+ """Compile Ethereum client source code into structured JSON for indexing.
2
+
3
+ Supports multiple languages:
4
+ - Rust: reth, lighthouse (tree-sitter)
5
+ - Go: geth, prysm, erigon (tree-sitter)
6
+ - Java: teku (regex-based)
7
+ - C#: nethermind (regex-based)
8
+ - Nim: nimbus-eth2 (regex-based)
9
+
10
+ Each client implementation provides different perspectives on the same protocol,
11
+ making cross-client search valuable for understanding consensus.
12
+ """
13
+
14
+ import json
15
+ import re
16
+ from collections.abc import Callable
17
+ from dataclasses import asdict, dataclass
18
+ from pathlib import Path
19
+
20
+ from ..logging import get_logger
21
+
22
+ logger = get_logger("client_compiler")
23
+
24
+ # Try to import tree-sitter parsers
25
+ TREE_SITTER_RUST = False
26
+ TREE_SITTER_GO = False
27
+
28
+ try:
29
+ import tree_sitter_rust as ts_rust
30
+ from tree_sitter import Language, Parser
31
+ TREE_SITTER_RUST = True
32
+ except ImportError:
33
+ pass
34
+
35
+ try:
36
+ import tree_sitter_go as ts_go
37
+ from tree_sitter import Language, Parser
38
+ TREE_SITTER_GO = True
39
+ except ImportError:
40
+ pass
41
+
42
+
43
+ @dataclass
44
+ class ExtractedItem:
45
+ """An extracted code item."""
46
+
47
+ kind: str # function, struct, enum, interface, type
48
+ name: str
49
+ signature: str
50
+ body: str
51
+ doc_comment: str | None
52
+ file_path: str
53
+ line_number: int
54
+ visibility: str
55
+ language: str # rust, go, java, csharp, nim
56
+ client: str # reth, geth, lighthouse, etc.
57
+
58
+
59
+ @dataclass
60
+ class ExtractedConstant:
61
+ """An extracted constant."""
62
+
63
+ name: str
64
+ value: str
65
+ type_annotation: str | None
66
+ doc_comment: str | None
67
+ file_path: str
68
+ line_number: int
69
+ language: str
70
+ client: str
71
+
72
+
73
+ class RustClientParser:
74
+ """Parse Rust client code (reth, lighthouse)."""
75
+
76
+ def __init__(self, client: str):
77
+ self.client = client
78
+ if TREE_SITTER_RUST:
79
+ self.parser = Parser(Language(ts_rust.language()))
80
+ else:
81
+ self.parser = None
82
+
83
+ def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
84
+ """Parse a Rust file and extract items."""
85
+ try:
86
+ content = file_path.read_text(encoding="utf-8")
87
+ except (OSError, UnicodeDecodeError):
88
+ return [], []
89
+
90
+ items, constants = self._parse(content, str(file_path))
91
+
92
+ # Tag with language and client
93
+ for item in items:
94
+ item.language = "rust"
95
+ item.client = self.client
96
+ for const in constants:
97
+ const.language = "rust"
98
+ const.client = self.client
99
+
100
+ return items, constants
101
+
102
+ def _parse(
103
+ self, content: str, file_path: str
104
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
105
+ """Parse Rust code using tree-sitter or regex fallback."""
106
+ items = []
107
+ constants = []
108
+ lines = content.split("\n")
109
+
110
+ # Use regex parsing for simplicity (tree-sitter can be added later)
111
+ # Function pattern
112
+ fn_pattern = re.compile(
113
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?(?:async\s+)?fn\s+(\w+)\s*(<[^>]+>)?\s*\(([^)]*)\)(\s*->\s*[^{]+)?\s*\{",
114
+ re.MULTILINE,
115
+ )
116
+
117
+ for match in fn_pattern.finditer(content):
118
+ visibility = match.group(2) or ""
119
+ visibility = visibility.strip()
120
+ if not visibility.startswith("pub"):
121
+ continue
122
+
123
+ name = match.group(3)
124
+ params = match.group(5) or ""
125
+ ret = match.group(6) or ""
126
+
127
+ start = match.start()
128
+ line_num = content[:start].count("\n") + 1
129
+
130
+ # Find function body
131
+ brace_count = 1
132
+ idx = match.end()
133
+ while idx < len(content) and brace_count > 0:
134
+ if content[idx] == "{":
135
+ brace_count += 1
136
+ elif content[idx] == "}":
137
+ brace_count -= 1
138
+ idx += 1
139
+
140
+ body = content[match.start():idx]
141
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
142
+
143
+ items.append(ExtractedItem(
144
+ kind="function",
145
+ name=name,
146
+ signature=f"fn {name}({params}){ret}".strip(),
147
+ body=body,
148
+ doc_comment=doc_comment,
149
+ file_path=file_path,
150
+ line_number=line_num,
151
+ visibility=visibility or "pub",
152
+ language="",
153
+ client="",
154
+ ))
155
+
156
+ # Struct pattern
157
+ struct_pattern = re.compile(r"^(\s*)(pub(?:\([^)]+\))?\s+)?struct\s+(\w+)", re.MULTILINE)
158
+ for match in struct_pattern.finditer(content):
159
+ visibility = (match.group(2) or "").strip()
160
+ if not visibility.startswith("pub"):
161
+ continue
162
+
163
+ name = match.group(3)
164
+ start = match.start()
165
+ line_num = content[:start].count("\n") + 1
166
+
167
+ # Find struct body
168
+ idx = match.end()
169
+ brace_count = 0
170
+ while idx < len(content):
171
+ if content[idx] == "{":
172
+ brace_count += 1
173
+ elif content[idx] == "}":
174
+ if brace_count <= 1:
175
+ idx += 1
176
+ break
177
+ brace_count -= 1
178
+ elif content[idx] == ";" and brace_count == 0:
179
+ idx += 1
180
+ break
181
+ idx += 1
182
+
183
+ body = content[match.start():idx]
184
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
185
+
186
+ items.append(ExtractedItem(
187
+ kind="struct",
188
+ name=name,
189
+ signature=f"struct {name}",
190
+ body=body,
191
+ doc_comment=doc_comment,
192
+ file_path=file_path,
193
+ line_number=line_num,
194
+ visibility=visibility or "pub",
195
+ language="",
196
+ client="",
197
+ ))
198
+
199
+ # Constants
200
+ const_pattern = re.compile(
201
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?const\s+(\w+)\s*:\s*([^=]+)\s*=\s*([^;]+);",
202
+ re.MULTILINE,
203
+ )
204
+ for match in const_pattern.finditer(content):
205
+ visibility = (match.group(2) or "").strip()
206
+ name = match.group(3)
207
+ type_ann = match.group(4).strip()
208
+ value = match.group(5).strip()
209
+
210
+ start = match.start()
211
+ line_num = content[:start].count("\n") + 1
212
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
213
+
214
+ constants.append(ExtractedConstant(
215
+ name=name,
216
+ value=value,
217
+ type_annotation=type_ann,
218
+ doc_comment=doc_comment,
219
+ file_path=file_path,
220
+ line_number=line_num,
221
+ language="",
222
+ client="",
223
+ ))
224
+
225
+ return items, constants
226
+
227
+ def _get_doc_comment(self, lines: list[str], line_idx: int) -> str | None:
228
+ """Get doc comment ending at the given line index."""
229
+ doc_lines = []
230
+ idx = line_idx - 1
231
+
232
+ while idx >= 0:
233
+ line = lines[idx].strip()
234
+ if line.startswith("///") or line.startswith("//!"):
235
+ doc_lines.insert(0, line[3:].strip())
236
+ idx -= 1
237
+ elif line == "" or line.startswith("#["):
238
+ idx -= 1
239
+ else:
240
+ break
241
+
242
+ return "\n".join(doc_lines) if doc_lines else None
243
+
244
+
245
+ class GoClientParser:
246
+ """Parse Go client code (geth, prysm, erigon)."""
247
+
248
+ def __init__(self, client: str):
249
+ self.client = client
250
+ if TREE_SITTER_GO:
251
+ self.parser = Parser(Language(ts_go.language()))
252
+ else:
253
+ self.parser = None
254
+
255
+ def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
256
+ """Parse a Go file and extract items."""
257
+ try:
258
+ content = file_path.read_text(encoding="utf-8")
259
+ except (OSError, UnicodeDecodeError):
260
+ return [], []
261
+
262
+ items, constants = self._parse(content, str(file_path))
263
+
264
+ for item in items:
265
+ item.language = "go"
266
+ item.client = self.client
267
+ for const in constants:
268
+ const.language = "go"
269
+ const.client = self.client
270
+
271
+ return items, constants
272
+
273
+ def _parse(
274
+ self, content: str, file_path: str
275
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
276
+ """Parse Go code."""
277
+ items = []
278
+ constants = []
279
+ lines = content.split("\n")
280
+
281
+ # Function pattern (Go uses capitalization for export)
282
+ # func Name(params) return { ... }
283
+ # func (r *Receiver) Name(params) return { ... }
284
+ fn_pattern = re.compile(
285
+ r"^func\s+(?:\([^)]+\)\s+)?([A-Z]\w*)\s*\(([^)]*)\)\s*([^{]*)\{",
286
+ re.MULTILINE,
287
+ )
288
+
289
+ for match in fn_pattern.finditer(content):
290
+ name = match.group(1)
291
+ params = match.group(2) or ""
292
+ ret = match.group(3).strip()
293
+
294
+ start = match.start()
295
+ line_num = content[:start].count("\n") + 1
296
+
297
+ # Find function body
298
+ brace_count = 1
299
+ idx = match.end()
300
+ while idx < len(content) and brace_count > 0:
301
+ if content[idx] == "{":
302
+ brace_count += 1
303
+ elif content[idx] == "}":
304
+ brace_count -= 1
305
+ idx += 1
306
+
307
+ body = content[match.start():idx]
308
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
309
+
310
+ items.append(ExtractedItem(
311
+ kind="function",
312
+ name=name,
313
+ signature=f"func {name}({params}) {ret}".strip(),
314
+ body=body,
315
+ doc_comment=doc_comment,
316
+ file_path=file_path,
317
+ line_number=line_num,
318
+ visibility="pub", # Capitalized = exported in Go
319
+ language="",
320
+ client="",
321
+ ))
322
+
323
+ # Struct pattern
324
+ struct_pattern = re.compile(r"^type\s+([A-Z]\w*)\s+struct\s*\{", re.MULTILINE)
325
+ for match in struct_pattern.finditer(content):
326
+ name = match.group(1)
327
+ start = match.start()
328
+ line_num = content[:start].count("\n") + 1
329
+
330
+ # Find struct body
331
+ brace_count = 1
332
+ idx = match.end()
333
+ while idx < len(content) and brace_count > 0:
334
+ if content[idx] == "{":
335
+ brace_count += 1
336
+ elif content[idx] == "}":
337
+ brace_count -= 1
338
+ idx += 1
339
+
340
+ body = content[match.start():idx]
341
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
342
+
343
+ items.append(ExtractedItem(
344
+ kind="struct",
345
+ name=name,
346
+ signature=f"type {name} struct",
347
+ body=body,
348
+ doc_comment=doc_comment,
349
+ file_path=file_path,
350
+ line_number=line_num,
351
+ visibility="pub",
352
+ language="",
353
+ client="",
354
+ ))
355
+
356
+ # Interface pattern
357
+ iface_pattern = re.compile(r"^type\s+([A-Z]\w*)\s+interface\s*\{", re.MULTILINE)
358
+ for match in iface_pattern.finditer(content):
359
+ name = match.group(1)
360
+ start = match.start()
361
+ line_num = content[:start].count("\n") + 1
362
+
363
+ brace_count = 1
364
+ idx = match.end()
365
+ while idx < len(content) and brace_count > 0:
366
+ if content[idx] == "{":
367
+ brace_count += 1
368
+ elif content[idx] == "}":
369
+ brace_count -= 1
370
+ idx += 1
371
+
372
+ body = content[match.start():idx]
373
+ doc_comment = self._get_doc_comment(lines, line_num - 1)
374
+
375
+ items.append(ExtractedItem(
376
+ kind="interface",
377
+ name=name,
378
+ signature=f"type {name} interface",
379
+ body=body,
380
+ doc_comment=doc_comment,
381
+ file_path=file_path,
382
+ line_number=line_num,
383
+ visibility="pub",
384
+ language="",
385
+ client="",
386
+ ))
387
+
388
+ # Constants
389
+ const_pattern = re.compile(r"^\s*([A-Z]\w*)\s*=\s*(.+)$", re.MULTILINE)
390
+ in_const_block = False
391
+
392
+ for i, line in enumerate(lines):
393
+ stripped = line.strip()
394
+ if stripped == "const (" or stripped.startswith("const ("):
395
+ in_const_block = True
396
+ continue
397
+ if in_const_block and stripped == ")":
398
+ in_const_block = False
399
+ continue
400
+
401
+ if in_const_block or stripped.startswith("const "):
402
+ match = const_pattern.match(stripped.replace("const ", ""))
403
+ if match:
404
+ name = match.group(1)
405
+ value = match.group(2).strip()
406
+
407
+ constants.append(ExtractedConstant(
408
+ name=name,
409
+ value=value,
410
+ type_annotation=None,
411
+ doc_comment=self._get_doc_comment(lines, i),
412
+ file_path=file_path,
413
+ line_number=i + 1,
414
+ language="",
415
+ client="",
416
+ ))
417
+
418
+ return items, constants
419
+
420
+ def _get_doc_comment(self, lines: list[str], line_idx: int) -> str | None:
421
+ """Get doc comment ending at the given line index."""
422
+ doc_lines = []
423
+ idx = line_idx - 1
424
+
425
+ while idx >= 0:
426
+ line = lines[idx].strip()
427
+ if line.startswith("//"):
428
+ doc_lines.insert(0, line[2:].strip())
429
+ idx -= 1
430
+ elif line == "":
431
+ idx -= 1
432
+ else:
433
+ break
434
+
435
+ return "\n".join(doc_lines) if doc_lines else None
436
+
437
+
438
+ class JavaClientParser:
439
+ """Parse Java client code (teku)."""
440
+
441
+ def __init__(self, client: str):
442
+ self.client = client
443
+
444
+ def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
445
+ """Parse a Java file and extract items."""
446
+ try:
447
+ content = file_path.read_text(encoding="utf-8")
448
+ except (OSError, UnicodeDecodeError):
449
+ return [], []
450
+
451
+ items, constants = self._parse(content, str(file_path))
452
+
453
+ for item in items:
454
+ item.language = "java"
455
+ item.client = self.client
456
+ for const in constants:
457
+ const.language = "java"
458
+ const.client = self.client
459
+
460
+ return items, constants
461
+
462
+ def _parse(
463
+ self, content: str, file_path: str
464
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
465
+ """Parse Java code."""
466
+ items = []
467
+ constants = []
468
+ lines = content.split("\n")
469
+
470
+ # Public method pattern
471
+ method_pattern = re.compile(
472
+ r"^\s*public\s+(?:static\s+)?(?:<[^>]+>\s+)?(\w+(?:<[^>]+>)?)\s+(\w+)\s*\(([^)]*)\)",
473
+ re.MULTILINE,
474
+ )
475
+
476
+ for match in method_pattern.finditer(content):
477
+ return_type = match.group(1)
478
+ name = match.group(2)
479
+ params = match.group(3)
480
+
481
+ start = match.start()
482
+ line_num = content[:start].count("\n") + 1
483
+
484
+ # Find method body
485
+ idx = match.end()
486
+ while idx < len(content) and content[idx] != "{":
487
+ idx += 1
488
+
489
+ if idx < len(content):
490
+ brace_count = 1
491
+ idx += 1
492
+ while idx < len(content) and brace_count > 0:
493
+ if content[idx] == "{":
494
+ brace_count += 1
495
+ elif content[idx] == "}":
496
+ brace_count -= 1
497
+ idx += 1
498
+
499
+ body = content[match.start():idx]
500
+ doc_comment = self._get_javadoc(lines, line_num - 1)
501
+
502
+ items.append(ExtractedItem(
503
+ kind="function",
504
+ name=name,
505
+ signature=f"public {return_type} {name}({params})",
506
+ body=body,
507
+ doc_comment=doc_comment,
508
+ file_path=file_path,
509
+ line_number=line_num,
510
+ visibility="pub",
511
+ language="",
512
+ client="",
513
+ ))
514
+
515
+ # Class pattern
516
+ class_pattern = re.compile(r"^\s*public\s+(?:final\s+)?class\s+(\w+)", re.MULTILINE)
517
+ for match in class_pattern.finditer(content):
518
+ name = match.group(1)
519
+ start = match.start()
520
+ line_num = content[:start].count("\n") + 1
521
+
522
+ items.append(ExtractedItem(
523
+ kind="struct", # Treat classes as structs for uniformity
524
+ name=name,
525
+ signature=f"class {name}",
526
+ body="", # Classes are too large to include fully
527
+ doc_comment=self._get_javadoc(lines, line_num - 1),
528
+ file_path=file_path,
529
+ line_number=line_num,
530
+ visibility="pub",
531
+ language="",
532
+ client="",
533
+ ))
534
+
535
+ # Constants (public static final)
536
+ const_pattern = re.compile(
537
+ r"^\s*public\s+static\s+final\s+(\w+)\s+(\w+)\s*=\s*(.+);",
538
+ re.MULTILINE,
539
+ )
540
+ for match in const_pattern.finditer(content):
541
+ type_ann = match.group(1)
542
+ name = match.group(2)
543
+ value = match.group(3).strip()
544
+
545
+ start = match.start()
546
+ line_num = content[:start].count("\n") + 1
547
+
548
+ constants.append(ExtractedConstant(
549
+ name=name,
550
+ value=value,
551
+ type_annotation=type_ann,
552
+ doc_comment=self._get_javadoc(lines, line_num - 1),
553
+ file_path=file_path,
554
+ line_number=line_num,
555
+ language="",
556
+ client="",
557
+ ))
558
+
559
+ return items, constants
560
+
561
+ def _get_javadoc(self, lines: list[str], line_idx: int) -> str | None:
562
+ """Get Javadoc comment ending at the given line index."""
563
+ doc_lines = []
564
+ idx = line_idx - 1
565
+
566
+ # Look for /** ... */ block
567
+ while idx >= 0:
568
+ line = lines[idx].strip()
569
+ if line.startswith("*") and not line.startswith("*/"):
570
+ doc_lines.insert(0, line.lstrip("* ").strip())
571
+ idx -= 1
572
+ elif line.startswith("/**"):
573
+ break
574
+ elif line.endswith("*/"):
575
+ doc_lines.insert(0, line.rstrip("*/").strip())
576
+ idx -= 1
577
+ elif line == "":
578
+ idx -= 1
579
+ else:
580
+ break
581
+
582
+ return "\n".join(doc_lines) if doc_lines else None
583
+
584
+
585
+ def compile_client(
586
+ source_dir: Path,
587
+ output_dir: Path,
588
+ client_name: str,
589
+ language: str,
590
+ progress_callback: Callable[[str], None] | None = None,
591
+ ) -> dict:
592
+ """
593
+ Compile client source code into JSON extracts.
594
+
595
+ Args:
596
+ source_dir: Directory containing client source files
597
+ output_dir: Directory to write JSON output
598
+ client_name: Client name (reth, geth, lighthouse, etc.)
599
+ language: Source language (rust, go, java, csharp, nim)
600
+ progress_callback: Optional callback for progress updates
601
+
602
+ Returns:
603
+ Statistics about extraction
604
+ """
605
+ def log(msg: str):
606
+ if progress_callback:
607
+ progress_callback(msg)
608
+ else:
609
+ logger.info(msg)
610
+
611
+ # Select parser based on language
612
+ if language == "rust":
613
+ parser = RustClientParser(client_name)
614
+ patterns = ["**/*.rs"]
615
+ elif language == "go":
616
+ parser = GoClientParser(client_name)
617
+ patterns = ["**/*.go"]
618
+ elif language == "java":
619
+ parser = JavaClientParser(client_name)
620
+ patterns = ["**/*.java"]
621
+ else:
622
+ log(f"Warning: Unsupported language {language} for {client_name}")
623
+ return {"error": f"Unsupported language: {language}"}
624
+
625
+ all_items = []
626
+ all_constants = []
627
+
628
+ # Find source files
629
+ source_files = []
630
+ for pattern in patterns:
631
+ source_files.extend(source_dir.glob(pattern))
632
+
633
+ log(f" Found {len(source_files)} {language} files")
634
+
635
+ # Parse each file
636
+ for file_path in source_files:
637
+ items, constants = parser.parse_file(file_path)
638
+
639
+ import contextlib
640
+
641
+ # Make paths relative
642
+ for item in items:
643
+ with contextlib.suppress(ValueError):
644
+ item.file_path = str(Path(item.file_path).relative_to(source_dir))
645
+ for const in constants:
646
+ with contextlib.suppress(ValueError):
647
+ const.file_path = str(Path(const.file_path).relative_to(source_dir))
648
+
649
+ all_items.extend(items)
650
+ all_constants.extend(constants)
651
+
652
+ # Write output
653
+ output_dir.mkdir(parents=True, exist_ok=True)
654
+
655
+ items_file = output_dir / "items.json"
656
+ with open(items_file, "w") as f:
657
+ json.dump([asdict(item) for item in all_items], f, indent=2)
658
+
659
+ constants_file = output_dir / "constants.json"
660
+ with open(constants_file, "w") as f:
661
+ json.dump([asdict(const) for const in all_constants], f, indent=2)
662
+
663
+ # Build index
664
+ index = {
665
+ "functions": {},
666
+ "structs": {},
667
+ "interfaces": {},
668
+ "constants": {},
669
+ "client": client_name,
670
+ "language": language,
671
+ }
672
+
673
+ for item in all_items:
674
+ category = f"{item.kind}s"
675
+ if category in index:
676
+ index[category][item.name] = {
677
+ "file": item.file_path,
678
+ "line": item.line_number,
679
+ }
680
+
681
+ for const in all_constants:
682
+ index["constants"][const.name] = {
683
+ "file": const.file_path,
684
+ "line": const.line_number,
685
+ "value": const.value,
686
+ }
687
+
688
+ index_file = output_dir / "index.json"
689
+ with open(index_file, "w") as f:
690
+ json.dump(index, f, indent=2)
691
+
692
+ return {
693
+ "client": client_name,
694
+ "language": language,
695
+ "files_processed": len(source_files),
696
+ "items_extracted": len(all_items),
697
+ "constants_extracted": len(all_constants),
698
+ "functions": len([i for i in all_items if i.kind == "function"]),
699
+ "structs": len([i for i in all_items if i.kind == "struct"]),
700
+ "interfaces": len([i for i in all_items if i.kind == "interface"]),
701
+ }
702
+
703
+
704
+ def load_client_items(compiled_dir: Path) -> list[ExtractedItem]:
705
+ """Load compiled items from JSON."""
706
+ items_file = compiled_dir / "items.json"
707
+ if not items_file.exists():
708
+ return []
709
+
710
+ with open(items_file) as f:
711
+ data = json.load(f)
712
+
713
+ return [ExtractedItem(**item) for item in data]
714
+
715
+
716
+ def load_client_constants(compiled_dir: Path) -> list[ExtractedConstant]:
717
+ """Load compiled constants from JSON."""
718
+ constants_file = compiled_dir / "constants.json"
719
+ if not constants_file.exists():
720
+ return []
721
+
722
+ with open(constants_file) as f:
723
+ data = json.load(f)
724
+
725
+ return [ExtractedConstant(**item) for item in data]