eth-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eth_mcp-0.2.0.dist-info/METADATA +332 -0
- eth_mcp-0.2.0.dist-info/RECORD +21 -0
- eth_mcp-0.2.0.dist-info/WHEEL +4 -0
- eth_mcp-0.2.0.dist-info/entry_points.txt +3 -0
- ethereum_mcp/__init__.py +3 -0
- ethereum_mcp/cli.py +589 -0
- ethereum_mcp/clients.py +363 -0
- ethereum_mcp/config.py +324 -0
- ethereum_mcp/expert/__init__.py +1 -0
- ethereum_mcp/expert/guidance.py +300 -0
- ethereum_mcp/indexer/__init__.py +8 -0
- ethereum_mcp/indexer/chunker.py +563 -0
- ethereum_mcp/indexer/client_compiler.py +725 -0
- ethereum_mcp/indexer/compiler.py +245 -0
- ethereum_mcp/indexer/downloader.py +521 -0
- ethereum_mcp/indexer/embedder.py +627 -0
- ethereum_mcp/indexer/manifest.py +411 -0
- ethereum_mcp/logging.py +85 -0
- ethereum_mcp/models.py +126 -0
- ethereum_mcp/server.py +555 -0
- ethereum_mcp/tools/__init__.py +1 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
"""Compile Ethereum client source code into structured JSON for indexing.
|
|
2
|
+
|
|
3
|
+
Supports multiple languages:
|
|
4
|
+
- Rust: reth, lighthouse (tree-sitter)
|
|
5
|
+
- Go: geth, prysm, erigon (tree-sitter)
|
|
6
|
+
- Java: teku (regex-based)
|
|
7
|
+
- C#: nethermind (regex-based)
|
|
8
|
+
- Nim: nimbus-eth2 (regex-based)
|
|
9
|
+
|
|
10
|
+
Each client implementation provides different perspectives on the same protocol,
|
|
11
|
+
making cross-client search valuable for understanding consensus.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import re
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from dataclasses import asdict, dataclass
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from ..logging import get_logger
|
|
21
|
+
|
|
22
|
+
logger = get_logger("client_compiler")
|
|
23
|
+
|
|
24
|
+
# Try to import tree-sitter parsers
|
|
25
|
+
TREE_SITTER_RUST = False
|
|
26
|
+
TREE_SITTER_GO = False
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
import tree_sitter_rust as ts_rust
|
|
30
|
+
from tree_sitter import Language, Parser
|
|
31
|
+
TREE_SITTER_RUST = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
import tree_sitter_go as ts_go
|
|
37
|
+
from tree_sitter import Language, Parser
|
|
38
|
+
TREE_SITTER_GO = True
|
|
39
|
+
except ImportError:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ExtractedItem:
|
|
45
|
+
"""An extracted code item."""
|
|
46
|
+
|
|
47
|
+
kind: str # function, struct, enum, interface, type
|
|
48
|
+
name: str
|
|
49
|
+
signature: str
|
|
50
|
+
body: str
|
|
51
|
+
doc_comment: str | None
|
|
52
|
+
file_path: str
|
|
53
|
+
line_number: int
|
|
54
|
+
visibility: str
|
|
55
|
+
language: str # rust, go, java, csharp, nim
|
|
56
|
+
client: str # reth, geth, lighthouse, etc.
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ExtractedConstant:
|
|
61
|
+
"""An extracted constant."""
|
|
62
|
+
|
|
63
|
+
name: str
|
|
64
|
+
value: str
|
|
65
|
+
type_annotation: str | None
|
|
66
|
+
doc_comment: str | None
|
|
67
|
+
file_path: str
|
|
68
|
+
line_number: int
|
|
69
|
+
language: str
|
|
70
|
+
client: str
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RustClientParser:
|
|
74
|
+
"""Parse Rust client code (reth, lighthouse)."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, client: str):
|
|
77
|
+
self.client = client
|
|
78
|
+
if TREE_SITTER_RUST:
|
|
79
|
+
self.parser = Parser(Language(ts_rust.language()))
|
|
80
|
+
else:
|
|
81
|
+
self.parser = None
|
|
82
|
+
|
|
83
|
+
def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
84
|
+
"""Parse a Rust file and extract items."""
|
|
85
|
+
try:
|
|
86
|
+
content = file_path.read_text(encoding="utf-8")
|
|
87
|
+
except (OSError, UnicodeDecodeError):
|
|
88
|
+
return [], []
|
|
89
|
+
|
|
90
|
+
items, constants = self._parse(content, str(file_path))
|
|
91
|
+
|
|
92
|
+
# Tag with language and client
|
|
93
|
+
for item in items:
|
|
94
|
+
item.language = "rust"
|
|
95
|
+
item.client = self.client
|
|
96
|
+
for const in constants:
|
|
97
|
+
const.language = "rust"
|
|
98
|
+
const.client = self.client
|
|
99
|
+
|
|
100
|
+
return items, constants
|
|
101
|
+
|
|
102
|
+
def _parse(
|
|
103
|
+
self, content: str, file_path: str
|
|
104
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
105
|
+
"""Parse Rust code using tree-sitter or regex fallback."""
|
|
106
|
+
items = []
|
|
107
|
+
constants = []
|
|
108
|
+
lines = content.split("\n")
|
|
109
|
+
|
|
110
|
+
# Use regex parsing for simplicity (tree-sitter can be added later)
|
|
111
|
+
# Function pattern
|
|
112
|
+
fn_pattern = re.compile(
|
|
113
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?(?:async\s+)?fn\s+(\w+)\s*(<[^>]+>)?\s*\(([^)]*)\)(\s*->\s*[^{]+)?\s*\{",
|
|
114
|
+
re.MULTILINE,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for match in fn_pattern.finditer(content):
|
|
118
|
+
visibility = match.group(2) or ""
|
|
119
|
+
visibility = visibility.strip()
|
|
120
|
+
if not visibility.startswith("pub"):
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
name = match.group(3)
|
|
124
|
+
params = match.group(5) or ""
|
|
125
|
+
ret = match.group(6) or ""
|
|
126
|
+
|
|
127
|
+
start = match.start()
|
|
128
|
+
line_num = content[:start].count("\n") + 1
|
|
129
|
+
|
|
130
|
+
# Find function body
|
|
131
|
+
brace_count = 1
|
|
132
|
+
idx = match.end()
|
|
133
|
+
while idx < len(content) and brace_count > 0:
|
|
134
|
+
if content[idx] == "{":
|
|
135
|
+
brace_count += 1
|
|
136
|
+
elif content[idx] == "}":
|
|
137
|
+
brace_count -= 1
|
|
138
|
+
idx += 1
|
|
139
|
+
|
|
140
|
+
body = content[match.start():idx]
|
|
141
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
142
|
+
|
|
143
|
+
items.append(ExtractedItem(
|
|
144
|
+
kind="function",
|
|
145
|
+
name=name,
|
|
146
|
+
signature=f"fn {name}({params}){ret}".strip(),
|
|
147
|
+
body=body,
|
|
148
|
+
doc_comment=doc_comment,
|
|
149
|
+
file_path=file_path,
|
|
150
|
+
line_number=line_num,
|
|
151
|
+
visibility=visibility or "pub",
|
|
152
|
+
language="",
|
|
153
|
+
client="",
|
|
154
|
+
))
|
|
155
|
+
|
|
156
|
+
# Struct pattern
|
|
157
|
+
struct_pattern = re.compile(r"^(\s*)(pub(?:\([^)]+\))?\s+)?struct\s+(\w+)", re.MULTILINE)
|
|
158
|
+
for match in struct_pattern.finditer(content):
|
|
159
|
+
visibility = (match.group(2) or "").strip()
|
|
160
|
+
if not visibility.startswith("pub"):
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
name = match.group(3)
|
|
164
|
+
start = match.start()
|
|
165
|
+
line_num = content[:start].count("\n") + 1
|
|
166
|
+
|
|
167
|
+
# Find struct body
|
|
168
|
+
idx = match.end()
|
|
169
|
+
brace_count = 0
|
|
170
|
+
while idx < len(content):
|
|
171
|
+
if content[idx] == "{":
|
|
172
|
+
brace_count += 1
|
|
173
|
+
elif content[idx] == "}":
|
|
174
|
+
if brace_count <= 1:
|
|
175
|
+
idx += 1
|
|
176
|
+
break
|
|
177
|
+
brace_count -= 1
|
|
178
|
+
elif content[idx] == ";" and brace_count == 0:
|
|
179
|
+
idx += 1
|
|
180
|
+
break
|
|
181
|
+
idx += 1
|
|
182
|
+
|
|
183
|
+
body = content[match.start():idx]
|
|
184
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
185
|
+
|
|
186
|
+
items.append(ExtractedItem(
|
|
187
|
+
kind="struct",
|
|
188
|
+
name=name,
|
|
189
|
+
signature=f"struct {name}",
|
|
190
|
+
body=body,
|
|
191
|
+
doc_comment=doc_comment,
|
|
192
|
+
file_path=file_path,
|
|
193
|
+
line_number=line_num,
|
|
194
|
+
visibility=visibility or "pub",
|
|
195
|
+
language="",
|
|
196
|
+
client="",
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
# Constants
|
|
200
|
+
const_pattern = re.compile(
|
|
201
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?const\s+(\w+)\s*:\s*([^=]+)\s*=\s*([^;]+);",
|
|
202
|
+
re.MULTILINE,
|
|
203
|
+
)
|
|
204
|
+
for match in const_pattern.finditer(content):
|
|
205
|
+
visibility = (match.group(2) or "").strip()
|
|
206
|
+
name = match.group(3)
|
|
207
|
+
type_ann = match.group(4).strip()
|
|
208
|
+
value = match.group(5).strip()
|
|
209
|
+
|
|
210
|
+
start = match.start()
|
|
211
|
+
line_num = content[:start].count("\n") + 1
|
|
212
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
213
|
+
|
|
214
|
+
constants.append(ExtractedConstant(
|
|
215
|
+
name=name,
|
|
216
|
+
value=value,
|
|
217
|
+
type_annotation=type_ann,
|
|
218
|
+
doc_comment=doc_comment,
|
|
219
|
+
file_path=file_path,
|
|
220
|
+
line_number=line_num,
|
|
221
|
+
language="",
|
|
222
|
+
client="",
|
|
223
|
+
))
|
|
224
|
+
|
|
225
|
+
return items, constants
|
|
226
|
+
|
|
227
|
+
def _get_doc_comment(self, lines: list[str], line_idx: int) -> str | None:
|
|
228
|
+
"""Get doc comment ending at the given line index."""
|
|
229
|
+
doc_lines = []
|
|
230
|
+
idx = line_idx - 1
|
|
231
|
+
|
|
232
|
+
while idx >= 0:
|
|
233
|
+
line = lines[idx].strip()
|
|
234
|
+
if line.startswith("///") or line.startswith("//!"):
|
|
235
|
+
doc_lines.insert(0, line[3:].strip())
|
|
236
|
+
idx -= 1
|
|
237
|
+
elif line == "" or line.startswith("#["):
|
|
238
|
+
idx -= 1
|
|
239
|
+
else:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class GoClientParser:
|
|
246
|
+
"""Parse Go client code (geth, prysm, erigon)."""
|
|
247
|
+
|
|
248
|
+
def __init__(self, client: str):
|
|
249
|
+
self.client = client
|
|
250
|
+
if TREE_SITTER_GO:
|
|
251
|
+
self.parser = Parser(Language(ts_go.language()))
|
|
252
|
+
else:
|
|
253
|
+
self.parser = None
|
|
254
|
+
|
|
255
|
+
def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
256
|
+
"""Parse a Go file and extract items."""
|
|
257
|
+
try:
|
|
258
|
+
content = file_path.read_text(encoding="utf-8")
|
|
259
|
+
except (OSError, UnicodeDecodeError):
|
|
260
|
+
return [], []
|
|
261
|
+
|
|
262
|
+
items, constants = self._parse(content, str(file_path))
|
|
263
|
+
|
|
264
|
+
for item in items:
|
|
265
|
+
item.language = "go"
|
|
266
|
+
item.client = self.client
|
|
267
|
+
for const in constants:
|
|
268
|
+
const.language = "go"
|
|
269
|
+
const.client = self.client
|
|
270
|
+
|
|
271
|
+
return items, constants
|
|
272
|
+
|
|
273
|
+
def _parse(
|
|
274
|
+
self, content: str, file_path: str
|
|
275
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
276
|
+
"""Parse Go code."""
|
|
277
|
+
items = []
|
|
278
|
+
constants = []
|
|
279
|
+
lines = content.split("\n")
|
|
280
|
+
|
|
281
|
+
# Function pattern (Go uses capitalization for export)
|
|
282
|
+
# func Name(params) return { ... }
|
|
283
|
+
# func (r *Receiver) Name(params) return { ... }
|
|
284
|
+
fn_pattern = re.compile(
|
|
285
|
+
r"^func\s+(?:\([^)]+\)\s+)?([A-Z]\w*)\s*\(([^)]*)\)\s*([^{]*)\{",
|
|
286
|
+
re.MULTILINE,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
for match in fn_pattern.finditer(content):
|
|
290
|
+
name = match.group(1)
|
|
291
|
+
params = match.group(2) or ""
|
|
292
|
+
ret = match.group(3).strip()
|
|
293
|
+
|
|
294
|
+
start = match.start()
|
|
295
|
+
line_num = content[:start].count("\n") + 1
|
|
296
|
+
|
|
297
|
+
# Find function body
|
|
298
|
+
brace_count = 1
|
|
299
|
+
idx = match.end()
|
|
300
|
+
while idx < len(content) and brace_count > 0:
|
|
301
|
+
if content[idx] == "{":
|
|
302
|
+
brace_count += 1
|
|
303
|
+
elif content[idx] == "}":
|
|
304
|
+
brace_count -= 1
|
|
305
|
+
idx += 1
|
|
306
|
+
|
|
307
|
+
body = content[match.start():idx]
|
|
308
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
309
|
+
|
|
310
|
+
items.append(ExtractedItem(
|
|
311
|
+
kind="function",
|
|
312
|
+
name=name,
|
|
313
|
+
signature=f"func {name}({params}) {ret}".strip(),
|
|
314
|
+
body=body,
|
|
315
|
+
doc_comment=doc_comment,
|
|
316
|
+
file_path=file_path,
|
|
317
|
+
line_number=line_num,
|
|
318
|
+
visibility="pub", # Capitalized = exported in Go
|
|
319
|
+
language="",
|
|
320
|
+
client="",
|
|
321
|
+
))
|
|
322
|
+
|
|
323
|
+
# Struct pattern
|
|
324
|
+
struct_pattern = re.compile(r"^type\s+([A-Z]\w*)\s+struct\s*\{", re.MULTILINE)
|
|
325
|
+
for match in struct_pattern.finditer(content):
|
|
326
|
+
name = match.group(1)
|
|
327
|
+
start = match.start()
|
|
328
|
+
line_num = content[:start].count("\n") + 1
|
|
329
|
+
|
|
330
|
+
# Find struct body
|
|
331
|
+
brace_count = 1
|
|
332
|
+
idx = match.end()
|
|
333
|
+
while idx < len(content) and brace_count > 0:
|
|
334
|
+
if content[idx] == "{":
|
|
335
|
+
brace_count += 1
|
|
336
|
+
elif content[idx] == "}":
|
|
337
|
+
brace_count -= 1
|
|
338
|
+
idx += 1
|
|
339
|
+
|
|
340
|
+
body = content[match.start():idx]
|
|
341
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
342
|
+
|
|
343
|
+
items.append(ExtractedItem(
|
|
344
|
+
kind="struct",
|
|
345
|
+
name=name,
|
|
346
|
+
signature=f"type {name} struct",
|
|
347
|
+
body=body,
|
|
348
|
+
doc_comment=doc_comment,
|
|
349
|
+
file_path=file_path,
|
|
350
|
+
line_number=line_num,
|
|
351
|
+
visibility="pub",
|
|
352
|
+
language="",
|
|
353
|
+
client="",
|
|
354
|
+
))
|
|
355
|
+
|
|
356
|
+
# Interface pattern
|
|
357
|
+
iface_pattern = re.compile(r"^type\s+([A-Z]\w*)\s+interface\s*\{", re.MULTILINE)
|
|
358
|
+
for match in iface_pattern.finditer(content):
|
|
359
|
+
name = match.group(1)
|
|
360
|
+
start = match.start()
|
|
361
|
+
line_num = content[:start].count("\n") + 1
|
|
362
|
+
|
|
363
|
+
brace_count = 1
|
|
364
|
+
idx = match.end()
|
|
365
|
+
while idx < len(content) and brace_count > 0:
|
|
366
|
+
if content[idx] == "{":
|
|
367
|
+
brace_count += 1
|
|
368
|
+
elif content[idx] == "}":
|
|
369
|
+
brace_count -= 1
|
|
370
|
+
idx += 1
|
|
371
|
+
|
|
372
|
+
body = content[match.start():idx]
|
|
373
|
+
doc_comment = self._get_doc_comment(lines, line_num - 1)
|
|
374
|
+
|
|
375
|
+
items.append(ExtractedItem(
|
|
376
|
+
kind="interface",
|
|
377
|
+
name=name,
|
|
378
|
+
signature=f"type {name} interface",
|
|
379
|
+
body=body,
|
|
380
|
+
doc_comment=doc_comment,
|
|
381
|
+
file_path=file_path,
|
|
382
|
+
line_number=line_num,
|
|
383
|
+
visibility="pub",
|
|
384
|
+
language="",
|
|
385
|
+
client="",
|
|
386
|
+
))
|
|
387
|
+
|
|
388
|
+
# Constants
|
|
389
|
+
const_pattern = re.compile(r"^\s*([A-Z]\w*)\s*=\s*(.+)$", re.MULTILINE)
|
|
390
|
+
in_const_block = False
|
|
391
|
+
|
|
392
|
+
for i, line in enumerate(lines):
|
|
393
|
+
stripped = line.strip()
|
|
394
|
+
if stripped == "const (" or stripped.startswith("const ("):
|
|
395
|
+
in_const_block = True
|
|
396
|
+
continue
|
|
397
|
+
if in_const_block and stripped == ")":
|
|
398
|
+
in_const_block = False
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
if in_const_block or stripped.startswith("const "):
|
|
402
|
+
match = const_pattern.match(stripped.replace("const ", ""))
|
|
403
|
+
if match:
|
|
404
|
+
name = match.group(1)
|
|
405
|
+
value = match.group(2).strip()
|
|
406
|
+
|
|
407
|
+
constants.append(ExtractedConstant(
|
|
408
|
+
name=name,
|
|
409
|
+
value=value,
|
|
410
|
+
type_annotation=None,
|
|
411
|
+
doc_comment=self._get_doc_comment(lines, i),
|
|
412
|
+
file_path=file_path,
|
|
413
|
+
line_number=i + 1,
|
|
414
|
+
language="",
|
|
415
|
+
client="",
|
|
416
|
+
))
|
|
417
|
+
|
|
418
|
+
return items, constants
|
|
419
|
+
|
|
420
|
+
def _get_doc_comment(self, lines: list[str], line_idx: int) -> str | None:
|
|
421
|
+
"""Get doc comment ending at the given line index."""
|
|
422
|
+
doc_lines = []
|
|
423
|
+
idx = line_idx - 1
|
|
424
|
+
|
|
425
|
+
while idx >= 0:
|
|
426
|
+
line = lines[idx].strip()
|
|
427
|
+
if line.startswith("//"):
|
|
428
|
+
doc_lines.insert(0, line[2:].strip())
|
|
429
|
+
idx -= 1
|
|
430
|
+
elif line == "":
|
|
431
|
+
idx -= 1
|
|
432
|
+
else:
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
class JavaClientParser:
|
|
439
|
+
"""Parse Java client code (teku)."""
|
|
440
|
+
|
|
441
|
+
def __init__(self, client: str):
|
|
442
|
+
self.client = client
|
|
443
|
+
|
|
444
|
+
def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
445
|
+
"""Parse a Java file and extract items."""
|
|
446
|
+
try:
|
|
447
|
+
content = file_path.read_text(encoding="utf-8")
|
|
448
|
+
except (OSError, UnicodeDecodeError):
|
|
449
|
+
return [], []
|
|
450
|
+
|
|
451
|
+
items, constants = self._parse(content, str(file_path))
|
|
452
|
+
|
|
453
|
+
for item in items:
|
|
454
|
+
item.language = "java"
|
|
455
|
+
item.client = self.client
|
|
456
|
+
for const in constants:
|
|
457
|
+
const.language = "java"
|
|
458
|
+
const.client = self.client
|
|
459
|
+
|
|
460
|
+
return items, constants
|
|
461
|
+
|
|
462
|
+
def _parse(
|
|
463
|
+
self, content: str, file_path: str
|
|
464
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
465
|
+
"""Parse Java code."""
|
|
466
|
+
items = []
|
|
467
|
+
constants = []
|
|
468
|
+
lines = content.split("\n")
|
|
469
|
+
|
|
470
|
+
# Public method pattern
|
|
471
|
+
method_pattern = re.compile(
|
|
472
|
+
r"^\s*public\s+(?:static\s+)?(?:<[^>]+>\s+)?(\w+(?:<[^>]+>)?)\s+(\w+)\s*\(([^)]*)\)",
|
|
473
|
+
re.MULTILINE,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
for match in method_pattern.finditer(content):
|
|
477
|
+
return_type = match.group(1)
|
|
478
|
+
name = match.group(2)
|
|
479
|
+
params = match.group(3)
|
|
480
|
+
|
|
481
|
+
start = match.start()
|
|
482
|
+
line_num = content[:start].count("\n") + 1
|
|
483
|
+
|
|
484
|
+
# Find method body
|
|
485
|
+
idx = match.end()
|
|
486
|
+
while idx < len(content) and content[idx] != "{":
|
|
487
|
+
idx += 1
|
|
488
|
+
|
|
489
|
+
if idx < len(content):
|
|
490
|
+
brace_count = 1
|
|
491
|
+
idx += 1
|
|
492
|
+
while idx < len(content) and brace_count > 0:
|
|
493
|
+
if content[idx] == "{":
|
|
494
|
+
brace_count += 1
|
|
495
|
+
elif content[idx] == "}":
|
|
496
|
+
brace_count -= 1
|
|
497
|
+
idx += 1
|
|
498
|
+
|
|
499
|
+
body = content[match.start():idx]
|
|
500
|
+
doc_comment = self._get_javadoc(lines, line_num - 1)
|
|
501
|
+
|
|
502
|
+
items.append(ExtractedItem(
|
|
503
|
+
kind="function",
|
|
504
|
+
name=name,
|
|
505
|
+
signature=f"public {return_type} {name}({params})",
|
|
506
|
+
body=body,
|
|
507
|
+
doc_comment=doc_comment,
|
|
508
|
+
file_path=file_path,
|
|
509
|
+
line_number=line_num,
|
|
510
|
+
visibility="pub",
|
|
511
|
+
language="",
|
|
512
|
+
client="",
|
|
513
|
+
))
|
|
514
|
+
|
|
515
|
+
# Class pattern
|
|
516
|
+
class_pattern = re.compile(r"^\s*public\s+(?:final\s+)?class\s+(\w+)", re.MULTILINE)
|
|
517
|
+
for match in class_pattern.finditer(content):
|
|
518
|
+
name = match.group(1)
|
|
519
|
+
start = match.start()
|
|
520
|
+
line_num = content[:start].count("\n") + 1
|
|
521
|
+
|
|
522
|
+
items.append(ExtractedItem(
|
|
523
|
+
kind="struct", # Treat classes as structs for uniformity
|
|
524
|
+
name=name,
|
|
525
|
+
signature=f"class {name}",
|
|
526
|
+
body="", # Classes are too large to include fully
|
|
527
|
+
doc_comment=self._get_javadoc(lines, line_num - 1),
|
|
528
|
+
file_path=file_path,
|
|
529
|
+
line_number=line_num,
|
|
530
|
+
visibility="pub",
|
|
531
|
+
language="",
|
|
532
|
+
client="",
|
|
533
|
+
))
|
|
534
|
+
|
|
535
|
+
# Constants (public static final)
|
|
536
|
+
const_pattern = re.compile(
|
|
537
|
+
r"^\s*public\s+static\s+final\s+(\w+)\s+(\w+)\s*=\s*(.+);",
|
|
538
|
+
re.MULTILINE,
|
|
539
|
+
)
|
|
540
|
+
for match in const_pattern.finditer(content):
|
|
541
|
+
type_ann = match.group(1)
|
|
542
|
+
name = match.group(2)
|
|
543
|
+
value = match.group(3).strip()
|
|
544
|
+
|
|
545
|
+
start = match.start()
|
|
546
|
+
line_num = content[:start].count("\n") + 1
|
|
547
|
+
|
|
548
|
+
constants.append(ExtractedConstant(
|
|
549
|
+
name=name,
|
|
550
|
+
value=value,
|
|
551
|
+
type_annotation=type_ann,
|
|
552
|
+
doc_comment=self._get_javadoc(lines, line_num - 1),
|
|
553
|
+
file_path=file_path,
|
|
554
|
+
line_number=line_num,
|
|
555
|
+
language="",
|
|
556
|
+
client="",
|
|
557
|
+
))
|
|
558
|
+
|
|
559
|
+
return items, constants
|
|
560
|
+
|
|
561
|
+
def _get_javadoc(self, lines: list[str], line_idx: int) -> str | None:
|
|
562
|
+
"""Get Javadoc comment ending at the given line index."""
|
|
563
|
+
doc_lines = []
|
|
564
|
+
idx = line_idx - 1
|
|
565
|
+
|
|
566
|
+
# Look for /** ... */ block
|
|
567
|
+
while idx >= 0:
|
|
568
|
+
line = lines[idx].strip()
|
|
569
|
+
if line.startswith("*") and not line.startswith("*/"):
|
|
570
|
+
doc_lines.insert(0, line.lstrip("* ").strip())
|
|
571
|
+
idx -= 1
|
|
572
|
+
elif line.startswith("/**"):
|
|
573
|
+
break
|
|
574
|
+
elif line.endswith("*/"):
|
|
575
|
+
doc_lines.insert(0, line.rstrip("*/").strip())
|
|
576
|
+
idx -= 1
|
|
577
|
+
elif line == "":
|
|
578
|
+
idx -= 1
|
|
579
|
+
else:
|
|
580
|
+
break
|
|
581
|
+
|
|
582
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def compile_client(
|
|
586
|
+
source_dir: Path,
|
|
587
|
+
output_dir: Path,
|
|
588
|
+
client_name: str,
|
|
589
|
+
language: str,
|
|
590
|
+
progress_callback: Callable[[str], None] | None = None,
|
|
591
|
+
) -> dict:
|
|
592
|
+
"""
|
|
593
|
+
Compile client source code into JSON extracts.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
source_dir: Directory containing client source files
|
|
597
|
+
output_dir: Directory to write JSON output
|
|
598
|
+
client_name: Client name (reth, geth, lighthouse, etc.)
|
|
599
|
+
language: Source language (rust, go, java, csharp, nim)
|
|
600
|
+
progress_callback: Optional callback for progress updates
|
|
601
|
+
|
|
602
|
+
Returns:
|
|
603
|
+
Statistics about extraction
|
|
604
|
+
"""
|
|
605
|
+
def log(msg: str):
|
|
606
|
+
if progress_callback:
|
|
607
|
+
progress_callback(msg)
|
|
608
|
+
else:
|
|
609
|
+
logger.info(msg)
|
|
610
|
+
|
|
611
|
+
# Select parser based on language
|
|
612
|
+
if language == "rust":
|
|
613
|
+
parser = RustClientParser(client_name)
|
|
614
|
+
patterns = ["**/*.rs"]
|
|
615
|
+
elif language == "go":
|
|
616
|
+
parser = GoClientParser(client_name)
|
|
617
|
+
patterns = ["**/*.go"]
|
|
618
|
+
elif language == "java":
|
|
619
|
+
parser = JavaClientParser(client_name)
|
|
620
|
+
patterns = ["**/*.java"]
|
|
621
|
+
else:
|
|
622
|
+
log(f"Warning: Unsupported language {language} for {client_name}")
|
|
623
|
+
return {"error": f"Unsupported language: {language}"}
|
|
624
|
+
|
|
625
|
+
all_items = []
|
|
626
|
+
all_constants = []
|
|
627
|
+
|
|
628
|
+
# Find source files
|
|
629
|
+
source_files = []
|
|
630
|
+
for pattern in patterns:
|
|
631
|
+
source_files.extend(source_dir.glob(pattern))
|
|
632
|
+
|
|
633
|
+
log(f" Found {len(source_files)} {language} files")
|
|
634
|
+
|
|
635
|
+
# Parse each file
|
|
636
|
+
for file_path in source_files:
|
|
637
|
+
items, constants = parser.parse_file(file_path)
|
|
638
|
+
|
|
639
|
+
import contextlib
|
|
640
|
+
|
|
641
|
+
# Make paths relative
|
|
642
|
+
for item in items:
|
|
643
|
+
with contextlib.suppress(ValueError):
|
|
644
|
+
item.file_path = str(Path(item.file_path).relative_to(source_dir))
|
|
645
|
+
for const in constants:
|
|
646
|
+
with contextlib.suppress(ValueError):
|
|
647
|
+
const.file_path = str(Path(const.file_path).relative_to(source_dir))
|
|
648
|
+
|
|
649
|
+
all_items.extend(items)
|
|
650
|
+
all_constants.extend(constants)
|
|
651
|
+
|
|
652
|
+
# Write output
|
|
653
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
654
|
+
|
|
655
|
+
items_file = output_dir / "items.json"
|
|
656
|
+
with open(items_file, "w") as f:
|
|
657
|
+
json.dump([asdict(item) for item in all_items], f, indent=2)
|
|
658
|
+
|
|
659
|
+
constants_file = output_dir / "constants.json"
|
|
660
|
+
with open(constants_file, "w") as f:
|
|
661
|
+
json.dump([asdict(const) for const in all_constants], f, indent=2)
|
|
662
|
+
|
|
663
|
+
# Build index
|
|
664
|
+
index = {
|
|
665
|
+
"functions": {},
|
|
666
|
+
"structs": {},
|
|
667
|
+
"interfaces": {},
|
|
668
|
+
"constants": {},
|
|
669
|
+
"client": client_name,
|
|
670
|
+
"language": language,
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
for item in all_items:
|
|
674
|
+
category = f"{item.kind}s"
|
|
675
|
+
if category in index:
|
|
676
|
+
index[category][item.name] = {
|
|
677
|
+
"file": item.file_path,
|
|
678
|
+
"line": item.line_number,
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
for const in all_constants:
|
|
682
|
+
index["constants"][const.name] = {
|
|
683
|
+
"file": const.file_path,
|
|
684
|
+
"line": const.line_number,
|
|
685
|
+
"value": const.value,
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
index_file = output_dir / "index.json"
|
|
689
|
+
with open(index_file, "w") as f:
|
|
690
|
+
json.dump(index, f, indent=2)
|
|
691
|
+
|
|
692
|
+
return {
|
|
693
|
+
"client": client_name,
|
|
694
|
+
"language": language,
|
|
695
|
+
"files_processed": len(source_files),
|
|
696
|
+
"items_extracted": len(all_items),
|
|
697
|
+
"constants_extracted": len(all_constants),
|
|
698
|
+
"functions": len([i for i in all_items if i.kind == "function"]),
|
|
699
|
+
"structs": len([i for i in all_items if i.kind == "struct"]),
|
|
700
|
+
"interfaces": len([i for i in all_items if i.kind == "interface"]),
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def load_client_items(compiled_dir: Path) -> list[ExtractedItem]:
|
|
705
|
+
"""Load compiled items from JSON."""
|
|
706
|
+
items_file = compiled_dir / "items.json"
|
|
707
|
+
if not items_file.exists():
|
|
708
|
+
return []
|
|
709
|
+
|
|
710
|
+
with open(items_file) as f:
|
|
711
|
+
data = json.load(f)
|
|
712
|
+
|
|
713
|
+
return [ExtractedItem(**item) for item in data]
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def load_client_constants(compiled_dir: Path) -> list[ExtractedConstant]:
|
|
717
|
+
"""Load compiled constants from JSON."""
|
|
718
|
+
constants_file = compiled_dir / "constants.json"
|
|
719
|
+
if not constants_file.exists():
|
|
720
|
+
return []
|
|
721
|
+
|
|
722
|
+
with open(constants_file) as f:
|
|
723
|
+
data = json.load(f)
|
|
724
|
+
|
|
725
|
+
return [ExtractedConstant(**item) for item in data]
|