ethspecify 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ethspecify might be problematic. Click here for more details.

ethspecify/cli.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import sys
5
5
 
6
- from .core import grep, replace_spec_tags, get_pyspec, get_latest_fork
6
+ from .core import grep, replace_spec_tags, get_pyspec, get_latest_fork, get_spec_item_history, load_config, run_checks
7
7
 
8
8
 
9
9
  def process(args):
@@ -13,86 +13,122 @@ def process(args):
13
13
  print(f"Error: The directory {repr(project_dir)} does not exist.")
14
14
  return 1
15
15
 
16
+ # Load config once from the project directory
17
+ config = load_config(project_dir)
18
+
16
19
  for f in grep(project_dir, r"<spec\b.*?>", args.exclude):
17
20
  print(f"Processing file: {f}")
18
- replace_spec_tags(f)
21
+ replace_spec_tags(f, config)
19
22
 
20
23
  return 0
21
24
 
22
25
 
23
26
  def list_tags(args):
24
- """List all available tags for a specific fork and preset."""
25
- # Get the specification data
26
- pyspec = get_pyspec()
27
- fork = args.fork
28
- preset = args.preset
27
+ """List all available tags with their fork history."""
28
+ preset = getattr(args, 'preset', 'mainnet')
29
+ return _list_tags_with_history(args, preset)
30
+
29
31
 
30
- # Validate that the fork exists
31
- if fork not in pyspec[preset]:
32
- print(f"Error: Fork '{fork}' not found in {preset} preset")
33
- available_forks = list(pyspec[preset].keys())
34
- print(f"Available forks: {', '.join(available_forks)}")
32
+ def _list_tags_with_history(args, preset):
33
+ """List all tags with their fork history."""
34
+ try:
35
+ history = get_spec_item_history(preset)
36
+ except ValueError as e:
37
+ print(f"Error: {e}")
35
38
  return 1
36
39
 
37
- # Format output based on requested format
38
40
  if args.format == "json":
39
41
  result = {
40
- "fork": fork,
41
42
  "preset": preset,
42
- "tags": {
43
- "functions": list(pyspec[preset][fork]['functions'].keys()),
44
- "constant_vars": list(pyspec[preset][fork]['constant_vars'].keys()),
45
- "custom_types": list(pyspec[preset][fork]['custom_types'].keys()),
46
- "ssz_objects": list(pyspec[preset][fork]['ssz_objects'].keys()),
47
- "dataclasses": list(pyspec[preset][fork]['dataclasses'].keys()),
48
- "preset_vars": list(pyspec[preset][fork]['preset_vars'].keys()),
49
- "config_vars": list(pyspec[preset][fork]['config_vars'].keys()),
50
- }
43
+ "mode": "history",
44
+ "history": history
51
45
  }
52
46
  print(json.dumps(result, indent=2))
53
47
  else:
54
- # Plain text output
55
- print(f"Available tags for {fork} fork ({preset} preset):")
56
- maybe_fork = f' fork="{fork}"' if fork != get_latest_fork() else ""
57
-
58
- print("\nFunctions:")
59
- for fn_name in sorted(pyspec[preset][fork]['functions'].keys()):
60
- if args.search is None or args.search.lower() in fn_name.lower():
61
- print(f" <spec fn=\"{fn_name}\"{maybe_fork} />")
62
-
63
- print("\nConstants:")
64
- for const_name in sorted(pyspec[preset][fork]['constant_vars'].keys()):
65
- if args.search is None or args.search.lower() in const_name.lower():
66
- print(f" <spec constant_var=\"{const_name}\"{maybe_fork} />")
67
-
68
- print("\nCustom Types:")
69
- for type_name in sorted(pyspec[preset][fork]['custom_types'].keys()):
70
- if args.search is None or args.search.lower() in type_name.lower():
71
- print(f" <spec custom_type=\"{type_name}\"{maybe_fork} />")
72
-
73
- print("\nSSZ Objects:")
74
- for obj_name in sorted(pyspec[preset][fork]['ssz_objects'].keys()):
75
- if args.search is None or args.search.lower() in obj_name.lower():
76
- print(f" <spec ssz_object=\"{obj_name}\"{maybe_fork} />")
77
-
78
- print("\nDataclasses:")
79
- for class_name in sorted(pyspec[preset][fork]['dataclasses'].keys()):
80
- if args.search is None or args.search.lower() in class_name.lower():
81
- print(f" <spec dataclass=\"{class_name}\"{maybe_fork} />")
82
-
83
- print("\nPreset Variables:")
84
- for var_name in sorted(pyspec[preset][fork]['preset_vars'].keys()):
85
- if args.search is None or args.search.lower() in var_name.lower():
86
- print(f" <spec preset_var=\"{var_name}\"{maybe_fork} />")
87
-
88
- print("\nConfig Variables:")
89
- for var_name in sorted(pyspec[preset][fork]['config_vars'].keys()):
90
- if args.search is None or args.search.lower() in var_name.lower():
91
- print(f" <spec config_var=\"{var_name}\"{maybe_fork} />")
48
+ print(f"Available tags across all forks ({preset} preset):")
49
+
50
+ def _print_items_with_history(category_name, items_dict, spec_attr):
51
+ """Helper to print items with their fork history."""
52
+ if not items_dict:
53
+ return
54
+ print(f"\n{category_name}:")
55
+ for item_name in sorted(items_dict.keys()):
56
+ if args.search is None or args.search.lower() in item_name.lower():
57
+ forks = items_dict[item_name]
58
+ fork_list = ", ".join(forks)
59
+ print(f" <spec {spec_attr}=\"{item_name}\" /> ({fork_list})")
60
+
61
+ _print_items_with_history("Functions", history['functions'], "fn")
62
+ _print_items_with_history("Constants", history['constant_vars'], "constant_var")
63
+ _print_items_with_history("Custom Types", history['custom_types'], "custom_type")
64
+ _print_items_with_history("SSZ Objects", history['ssz_objects'], "ssz_object")
65
+ _print_items_with_history("Dataclasses", history['dataclasses'], "dataclass")
66
+ _print_items_with_history("Preset Variables", history['preset_vars'], "preset_var")
67
+ _print_items_with_history("Config Variables", history['config_vars'], "config_var")
92
68
 
93
69
  return 0
94
70
 
95
71
 
72
+ def check(args):
73
+ """Run checks to validate spec references."""
74
+ project_dir = os.path.abspath(os.path.expanduser(args.path))
75
+ if not os.path.isdir(project_dir):
76
+ print(f"Error: The directory {repr(project_dir)} does not exist.")
77
+ return 1
78
+
79
+ # Load config
80
+ config = load_config(project_dir)
81
+
82
+ # Run checks
83
+ success, results = run_checks(project_dir, config)
84
+
85
+ # Collect all missing items and errors
86
+ all_missing = []
87
+ all_errors = []
88
+ total_coverage = {"found": 0, "expected": 0}
89
+ total_source_files = {"valid": 0, "total": 0}
90
+
91
+ for section_name, section_results in results.items():
92
+ # Determine the type prefix from section name
93
+ if "Config Variables" in section_name:
94
+ type_prefix = "config_var"
95
+ elif "Preset Variables" in section_name:
96
+ type_prefix = "preset_var"
97
+ elif "Ssz Objects" in section_name:
98
+ type_prefix = "ssz_object"
99
+ elif "Dataclasses" in section_name:
100
+ type_prefix = "dataclass"
101
+ else:
102
+ type_prefix = section_name.lower().replace(" ", "_")
103
+
104
+ # Collect source file errors
105
+ source = section_results['source_files']
106
+ total_source_files["valid"] += source["valid"]
107
+ total_source_files["total"] += source["total"]
108
+ all_errors.extend(source["errors"])
109
+
110
+ # Collect missing items with type prefix
111
+ coverage = section_results['coverage']
112
+ total_coverage["found"] += coverage["found"]
113
+ total_coverage["expected"] += coverage["expected"]
114
+ for missing in coverage['missing']:
115
+ all_missing.append(f"MISSING: {type_prefix}.{missing}")
116
+
117
+ # Display only errors and missing items
118
+ for error in all_errors:
119
+ print(error)
120
+
121
+ for missing in sorted(all_missing):
122
+ print(missing)
123
+
124
+ if all_errors or all_missing:
125
+ return 1
126
+ else:
127
+ total_refs = total_coverage['expected']
128
+ print(f"All specification references ({total_refs}) are valid.")
129
+ return 0
130
+
131
+
96
132
  def list_forks(args):
97
133
  """List all available forks."""
98
134
  pyspec = get_pyspec()
@@ -103,10 +139,10 @@ def list_forks(args):
103
139
  print(f"Available presets: {', '.join(pyspec.keys())}")
104
140
  return 1
105
141
 
142
+ # Filter out EIP forks
106
143
  forks = sorted(
107
- pyspec[preset].keys(),
108
- # Put phase0 at the top & EIP feature forks at the bottom
109
- key=lambda x: (x != "phase0", x.startswith("eip"), x)
144
+ [fork for fork in pyspec[preset].keys() if not fork.startswith("eip")],
145
+ key=lambda x: (x != "phase0", x)
110
146
  )
111
147
 
112
148
  if args.format == "json":
@@ -148,20 +184,8 @@ def main():
148
184
  )
149
185
 
150
186
  # Parser for 'list-tags' command
151
- list_tags_parser = subparsers.add_parser("list-tags", help="List available specification tags")
187
+ list_tags_parser = subparsers.add_parser("list-tags", help="List available specification tags with fork history")
152
188
  list_tags_parser.set_defaults(func=list_tags)
153
- list_tags_parser.add_argument(
154
- "--fork",
155
- type=str,
156
- help="Fork to list tags for",
157
- default=get_latest_fork(),
158
- )
159
- list_tags_parser.add_argument(
160
- "--preset",
161
- type=str,
162
- help="Preset to use (mainnet or minimal)",
163
- default="mainnet",
164
- )
165
189
  list_tags_parser.add_argument(
166
190
  "--format",
167
191
  type=str,
@@ -176,6 +200,16 @@ def main():
176
200
  default=None,
177
201
  )
178
202
 
203
+ # Parser for 'check' command
204
+ check_parser = subparsers.add_parser("check", help="Check spec reference coverage and validity")
205
+ check_parser.set_defaults(func=check)
206
+ check_parser.add_argument(
207
+ "--path",
208
+ type=str,
209
+ help="Directory containing YAML files to check",
210
+ default=".",
211
+ )
212
+
179
213
  # Parser for 'list-forks' command
180
214
  list_forks_parser = subparsers.add_parser("list-forks", help="List available forks")
181
215
  list_forks_parser.set_defaults(func=list_forks)
ethspecify/core.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import difflib
2
2
  import functools
3
+ import glob
3
4
  import hashlib
4
5
  import io
5
6
  import os
@@ -7,6 +8,50 @@ import re
7
8
  import requests
8
9
  import textwrap
9
10
  import tokenize
11
+ import yaml
12
+
13
+
14
+ def load_config(directory=None):
15
+ """
16
+ Load configuration from .ethspecify.yml file in the specified directory.
17
+ Returns a dict with configuration values, or empty dict if no config file found.
18
+ """
19
+ if directory is None:
20
+ directory = os.getcwd()
21
+
22
+ config_path = os.path.join(directory, '.ethspecify.yml')
23
+
24
+ if os.path.exists(config_path):
25
+ try:
26
+ with open(config_path, 'r') as f:
27
+ config = yaml.safe_load(f)
28
+ return config if config else {}
29
+ except (yaml.YAMLError, IOError) as e:
30
+ print(f"Warning: Error reading .ethspecify.yml file: {e}")
31
+ return {}
32
+
33
+ return {}
34
+
35
+
36
+ def is_excepted(item_name, fork, exceptions):
37
+ """
38
+ Check if an item#fork combination is in the exception list.
39
+ Exceptions can be:
40
+ - Just the item name (applies to all forks)
41
+ - item#fork (specific fork)
42
+ """
43
+ if not exceptions:
44
+ return False
45
+
46
+ # Check for exact match with fork
47
+ if f"{item_name}#{fork}" in exceptions:
48
+ return True
49
+
50
+ # Check for item name only (all forks)
51
+ if item_name in exceptions:
52
+ return True
53
+
54
+ return False
10
55
 
11
56
 
12
57
  def strip_comments(code):
@@ -107,7 +152,9 @@ def get_previous_forks(fork, version="nightly"):
107
152
  if key != f"{fork.upper()}_FORK_VERSION":
108
153
  if key != "GENESIS_FORK_VERSION":
109
154
  f = key.split("_")[0].lower()
110
- previous_forks.append(f)
155
+ # Skip EIP forks
156
+ if not f.startswith("eip"):
157
+ previous_forks.append(f)
111
158
  return list(reversed(previous_forks))
112
159
 
113
160
 
@@ -201,14 +248,155 @@ def get_latest_fork(version="nightly"):
201
248
  """A helper function to get the latest non-eip fork."""
202
249
  pyspec = get_pyspec(version)
203
250
  forks = sorted(
204
- pyspec["mainnet"].keys(),
205
- key=lambda x: (x != "phase0", x.startswith("eip"), x)
251
+ [fork for fork in pyspec["mainnet"].keys() if not fork.startswith("eip")],
252
+ key=lambda x: (x != "phase0", x)
206
253
  )
207
- for fork in reversed(forks):
208
- if not fork.startswith("eip"):
209
- return fork
254
+ return forks[-1] if forks else "phase0"
255
+
256
+
257
+ def get_spec_item_changes(fork, preset="mainnet", version="nightly"):
258
+ """
259
+ Compare spec items in the given fork with previous forks to detect changes.
260
+ Returns dict with categories containing items marked as (new) or (modified).
261
+ """
262
+ pyspec = get_pyspec(version)
263
+ if fork not in pyspec[preset]:
264
+ raise ValueError(f"Fork '{fork}' not found in {preset} preset")
265
+
266
+ current_fork_data = pyspec[preset][fork]
267
+ previous_forks = get_previous_forks(fork, version)
268
+
269
+ changes = {
270
+ 'functions': {},
271
+ 'constant_vars': {},
272
+ 'custom_types': {},
273
+ 'ssz_objects': {},
274
+ 'dataclasses': {},
275
+ 'preset_vars': {},
276
+ 'config_vars': {},
277
+ }
278
+
279
+ # Check each category of spec items
280
+ for category in changes.keys():
281
+ if category not in current_fork_data:
282
+ continue
283
+
284
+ for item_name, item_content in current_fork_data[category].items():
285
+ status = _get_item_status(item_name, item_content, category, previous_forks, pyspec, preset)
286
+ if status:
287
+ changes[category][item_name] = status
288
+
289
+ return changes
290
+
291
+
292
+ def _get_item_status(item_name, current_content, category, previous_forks, pyspec, preset):
293
+ """
294
+ Determine if an item is new or modified compared to previous forks.
295
+ Returns 'new', 'modified', or None if unchanged.
296
+ """
297
+ # Check if item exists in any previous fork
298
+ found_in_previous = False
299
+ previous_content = None
300
+
301
+ for prev_fork in previous_forks:
302
+ if (prev_fork in pyspec[preset] and
303
+ category in pyspec[preset][prev_fork] and
304
+ item_name in pyspec[preset][prev_fork][category]):
305
+
306
+ found_in_previous = True
307
+ prev_content = pyspec[preset][prev_fork][category][item_name]
308
+
309
+ # Compare content with immediate previous version
310
+ if prev_content != current_content:
311
+ return "modified"
312
+ else:
313
+ # Found unchanged version, so this is not new or modified
314
+ return None
315
+
316
+ # If not found in any previous fork, it's new
317
+ if not found_in_previous:
318
+ return "new"
319
+
320
+ return None
321
+
322
+
323
+ def get_spec_item_history(preset="mainnet", version="nightly"):
324
+ """
325
+ Get the complete history of all spec items across all forks.
326
+ Returns dict with categories containing items and their fork history.
327
+ """
328
+ pyspec = get_pyspec(version)
329
+ if preset not in pyspec:
330
+ raise ValueError(f"Preset '{preset}' not found")
331
+
332
+ # Get all forks in chronological order, excluding EIP forks
333
+ all_forks = sorted(
334
+ [fork for fork in pyspec[preset].keys() if not fork.startswith("eip")],
335
+ key=lambda x: (x != "phase0", x)
336
+ )
337
+
338
+ # Track all unique items across all forks
339
+ all_items = {
340
+ 'functions': set(),
341
+ 'constant_vars': set(),
342
+ 'custom_types': set(),
343
+ 'ssz_objects': set(),
344
+ 'dataclasses': set(),
345
+ 'preset_vars': set(),
346
+ 'config_vars': set(),
347
+ }
348
+
349
+ # Collect all item names
350
+ for fork in all_forks:
351
+ if fork not in pyspec[preset]:
352
+ continue
353
+ fork_data = pyspec[preset][fork]
354
+ for category in all_items.keys():
355
+ if category in fork_data:
356
+ all_items[category].update(fork_data[category].keys())
357
+
358
+ # Build history for each item
359
+ history = {}
360
+ for category in all_items.keys():
361
+ history[category] = {}
362
+ for item_name in all_items[category]:
363
+ item_history = _trace_item_history(item_name, category, all_forks, pyspec, preset)
364
+ if item_history:
365
+ history[category][item_name] = item_history
366
+
367
+ return history
368
+
369
+
370
+ def _trace_item_history(item_name, category, all_forks, pyspec, preset):
371
+ """
372
+ Trace the history of a specific item across all forks.
373
+ Returns a list of forks where the item was introduced or modified.
374
+ """
375
+ history_forks = []
376
+ previous_content = None
377
+
378
+ for fork in all_forks:
379
+ if (fork in pyspec[preset] and
380
+ category in pyspec[preset][fork] and
381
+ item_name in pyspec[preset][fork][category]):
382
+
383
+ current_content = pyspec[preset][fork][category][item_name]
384
+
385
+ if previous_content is None:
386
+ # First appearance
387
+ history_forks.append(fork)
388
+ elif current_content != previous_content:
389
+ # Content changed
390
+ history_forks.append(fork)
391
+
392
+ previous_content = current_content
393
+
394
+ return history_forks
395
+
396
+ def parse_common_attributes(attributes, config=None):
397
+ if config is None:
398
+ config = {}
210
399
 
211
- def parse_common_attributes(attributes):
212
400
  try:
213
401
  preset = attributes["preset"]
214
402
  except KeyError:
@@ -217,7 +405,7 @@ def parse_common_attributes(attributes):
217
405
  try:
218
406
  version = attributes["version"]
219
407
  except KeyError:
220
- version = "nightly"
408
+ version = config.get("version", "nightly")
221
409
 
222
410
  try:
223
411
  fork = attributes["fork"]
@@ -227,12 +415,12 @@ def parse_common_attributes(attributes):
227
415
  try:
228
416
  style = attributes["style"]
229
417
  except KeyError:
230
- style = "hash"
418
+ style = config.get("style", "hash")
231
419
 
232
420
  return preset, fork, style, version
233
421
 
234
- def get_spec_item(attributes):
235
- preset, fork, style, version = parse_common_attributes(attributes)
422
+ def get_spec_item(attributes, config=None):
423
+ preset, fork, style, version = parse_common_attributes(attributes, config)
236
424
  spec = get_spec(attributes, preset, fork, version)
237
425
 
238
426
  if style == "full" or style == "hash":
@@ -283,10 +471,14 @@ def extract_attributes(tag):
283
471
  return dict(attr_pattern.findall(tag))
284
472
 
285
473
 
286
- def replace_spec_tags(file_path):
474
+ def replace_spec_tags(file_path, config=None):
287
475
  with open(file_path, 'r') as file:
288
476
  content = file.read()
289
477
 
478
+ # Use provided config or load from file's directory as fallback
479
+ if config is None:
480
+ config = load_config(os.path.dirname(file_path))
481
+
290
482
  # Define regex to match self-closing tags and long (paired) tags separately
291
483
  pattern = re.compile(
292
484
  r'(?P<self><spec\b[^>]*\/>)|(?P<long><spec\b[^>]*>[\s\S]*?</spec>)',
@@ -327,7 +519,7 @@ def replace_spec_tags(file_path):
327
519
 
328
520
  attributes = extract_attributes(original_tag_text)
329
521
  print(f"spec tag: {attributes}")
330
- preset, fork, style, version = parse_common_attributes(attributes)
522
+ preset, fork, style, version = parse_common_attributes(attributes, config)
331
523
  spec = get_spec(attributes, preset, fork, version)
332
524
  hash_value = hashlib.sha256(spec.encode('utf-8')).hexdigest()[:8]
333
525
 
@@ -338,7 +530,7 @@ def replace_spec_tags(file_path):
338
530
  else:
339
531
  # For full/diff styles, rebuild as a long (paired) tag.
340
532
  new_opening = rebuild_opening_tag(attributes, hash_value)
341
- spec_content = get_spec_item(attributes)
533
+ spec_content = get_spec_item(attributes, config)
342
534
  prefix = content[:match.start()].splitlines()[-1]
343
535
  prefixed_spec = "\n".join(
344
536
  f"{prefix}{line}" if line.rstrip() else prefix.rstrip()
@@ -354,3 +546,406 @@ def replace_spec_tags(file_path):
354
546
  # Write the updated content back to the file
355
547
  with open(file_path, 'w') as file:
356
548
  file.write(updated_content)
549
+
550
+
551
+ def check_source_files(yaml_file, project_root, exceptions=None):
552
+ """
553
+ Check that source files referenced in a YAML file exist and contain expected search strings.
554
+ Returns (valid_count, total_count, errors)
555
+ """
556
+ if exceptions is None:
557
+ exceptions = []
558
+ if not os.path.exists(yaml_file):
559
+ return 0, 0, [f"YAML file not found: {yaml_file}"]
560
+
561
+ errors = []
562
+ total_count = 0
563
+
564
+ try:
565
+ with open(yaml_file, 'r') as f:
566
+ content_str = f.read()
567
+
568
+ # Try to fix common YAML issues with unquoted search strings
569
+ # Replace unquoted search values ending with colons
570
+ content_str = re.sub(r'(\s+search:\s+)([^"\n]+:)(\s*$)', r'\1"\2"\3', content_str, flags=re.MULTILINE)
571
+
572
+ try:
573
+ content = yaml.safe_load(content_str)
574
+ except yaml.YAMLError:
575
+ # Fall back to FullLoader if safe_load fails
576
+ content = yaml.load(content_str, Loader=yaml.FullLoader)
577
+ except (yaml.YAMLError, IOError) as e:
578
+ return 0, 0, [f"YAML parsing error in {yaml_file}: {e}"]
579
+
580
+ if not content:
581
+ return 0, 0, []
582
+
583
+ # Handle both array of objects and single object formats
584
+ items = content if isinstance(content, list) else [content]
585
+
586
+ for item in items:
587
+ if not isinstance(item, dict) or 'sources' not in item:
588
+ continue
589
+
590
+ # Extract spec reference information from the item
591
+ spec_ref = None
592
+ if 'spec' in item and isinstance(item['spec'], str):
593
+ # Try to extract spec reference from spec content
594
+ spec_content = item['spec']
595
+ # Look for any spec tag attribute and fork
596
+ spec_tag_match = re.search(r'<spec\s+([^>]+)>', spec_content)
597
+ if spec_tag_match:
598
+ tag_attrs = spec_tag_match.group(1)
599
+ # Extract fork
600
+ fork_match = re.search(r'fork="([^"]+)"', tag_attrs)
601
+ # Extract the main attribute (not hash or fork)
602
+ attr_matches = re.findall(r'(\w+)="([^"]+)"', tag_attrs)
603
+
604
+ if fork_match:
605
+ fork = fork_match.group(1)
606
+ # Find the first non-meta attribute
607
+ for attr_name, attr_value in attr_matches:
608
+ if attr_name not in ['fork', 'hash', 'preset', 'version', 'style']:
609
+ # Map attribute names to type prefixes
610
+ type_map = {
611
+ 'fn': 'functions',
612
+ 'function': 'functions',
613
+ 'constant_var': 'constants',
614
+ 'config_var': 'configs',
615
+ 'preset_var': 'presets',
616
+ 'ssz_object': 'ssz_objects',
617
+ 'dataclass': 'dataclasses',
618
+ 'custom_type': 'custom_types'
619
+ }
620
+ type_prefix = type_map.get(attr_name, attr_name)
621
+ spec_ref = f"{type_prefix}.{attr_value}#{fork}"
622
+ break
623
+
624
+ # Fallback to just the name if spec extraction failed
625
+ if not spec_ref and 'name' in item:
626
+ spec_ref = item['name']
627
+
628
+ # Check if sources list is empty
629
+ if not item['sources']:
630
+ if spec_ref:
631
+ # Extract item name and fork from spec_ref for exception checking
632
+ if '#' in spec_ref and '.' in spec_ref:
633
+ # Format: "functions.item_name#fork"
634
+ _, item_with_fork = spec_ref.split('.', 1)
635
+ if '#' in item_with_fork:
636
+ item_name, fork = item_with_fork.split('#', 1)
637
+ # Check if this item is in exceptions
638
+ if is_excepted(item_name, fork, exceptions):
639
+ total_count += 1
640
+ continue
641
+
642
+ errors.append(f"EMPTY SOURCES: {spec_ref}")
643
+ else:
644
+ # Fallback if we can't extract spec reference
645
+ item_name = item.get('name', 'unknown')
646
+ errors.append(f"EMPTY SOURCES: No sources defined ({item_name})")
647
+ total_count += 1
648
+ continue
649
+
650
+ for source in item['sources']:
651
+ # All sources now use the standardized dict format with file and optional search
652
+ if not isinstance(source, dict) or 'file' not in source:
653
+ continue
654
+
655
+ file_path = source['file']
656
+ search_string = source.get('search')
657
+ is_regex = source.get('regex', False)
658
+
659
+ total_count += 1
660
+
661
+ # Parse line range from file path if present (#L123 or #L123-L456)
662
+ line_range = None
663
+ if '#L' in file_path:
664
+ base_path, line_part = file_path.split('#L', 1)
665
+ file_path = base_path
666
+ # Format is always #L123 or #L123-L456, so just remove all 'L' characters
667
+ line_range = line_part.replace('L', '')
668
+
669
+ full_path = os.path.join(project_root, file_path)
670
+
671
+ # Create error prefix with spec reference if available
672
+ ref_prefix = f"{spec_ref} | " if spec_ref else ""
673
+
674
+ # Check if file exists
675
+ if not os.path.exists(full_path):
676
+ errors.append(f"MISSING FILE: {ref_prefix}{file_path}")
677
+ continue
678
+
679
+ # Check line range if specified
680
+ if line_range:
681
+ try:
682
+ with open(full_path, 'r', encoding='utf-8') as f:
683
+ lines = f.readlines()
684
+ total_lines = len(lines)
685
+
686
+ # Parse line range
687
+ if '-' in line_range:
688
+ # Range like "123-456"
689
+ start_str, end_str = line_range.split('-', 1)
690
+ start_line = int(start_str)
691
+ end_line = int(end_str)
692
+
693
+ if start_line < 1 or end_line < 1 or start_line > end_line:
694
+ errors.append(f"INVALID LINE RANGE: {ref_prefix}#{line_range} - invalid range in {file_path}")
695
+ continue
696
+ elif end_line > total_lines:
697
+ errors.append(f"INVALID LINE RANGE: {ref_prefix}#{line_range} - line {end_line} exceeds file length ({total_lines}) in {file_path}")
698
+ continue
699
+ else:
700
+ # Single line like "123"
701
+ line_num = int(line_range)
702
+ if line_num < 1:
703
+ errors.append(f"INVALID LINE RANGE: {ref_prefix}#{line_range} - invalid line number in {file_path}")
704
+ continue
705
+ elif line_num > total_lines:
706
+ errors.append(f"INVALID LINE RANGE: {ref_prefix}#{line_range} - line {line_num} exceeds file length ({total_lines}) in {file_path}")
707
+ continue
708
+
709
+ except ValueError:
710
+ errors.append(f"INVALID LINE RANGE: {ref_prefix}#{line_range} - invalid line format in {file_path}")
711
+ continue
712
+ except (IOError, UnicodeDecodeError):
713
+ errors.append(f"ERROR READING: {ref_prefix}{file_path}")
714
+ continue
715
+
716
+ # Check search string if provided
717
+ if search_string:
718
+ try:
719
+ with open(full_path, 'r', encoding='utf-8') as f:
720
+ content = f.read()
721
+
722
+ if is_regex:
723
+ # Use regex search
724
+ try:
725
+ pattern = re.compile(search_string, re.MULTILINE)
726
+ matches = list(pattern.finditer(content))
727
+ count = len(matches)
728
+ search_type = "REGEX"
729
+ except re.error as e:
730
+ errors.append(f"INVALID REGEX: {ref_prefix}'{search_string}' in {file_path} - {e}")
731
+ continue
732
+ else:
733
+ # Use literal string search
734
+ count = content.count(search_string)
735
+ search_type = "SEARCH"
736
+
737
+ if count == 0:
738
+ errors.append(f"{search_type} NOT FOUND: {ref_prefix}'{search_string}' in {file_path}")
739
+ elif count > 1:
740
+ errors.append(f"AMBIGUOUS {search_type}: {ref_prefix}'{search_string}' found {count} times in {file_path}")
741
+ except (IOError, UnicodeDecodeError):
742
+ errors.append(f"ERROR READING: {ref_prefix}{file_path}")
743
+
744
+ valid_count = total_count - len(errors)
745
+ return valid_count, total_count, errors
746
+
747
+
748
+ def extract_spec_tags_from_yaml(yaml_file, tag_type):
749
+ """
750
+ Extract spec tags from a YAML file and return item#fork pairs.
751
+ """
752
+ if not os.path.exists(yaml_file):
753
+ return set()
754
+
755
+ pairs = set()
756
+ try:
757
+ with open(yaml_file, 'r') as f:
758
+ content_str = f.read()
759
+
760
+ # Try to fix common YAML issues with unquoted search strings
761
+ # Replace unquoted search values ending with colons
762
+ content_str = re.sub(r'(\s+search:\s+)([^"\n]+:)(\s*$)', r'\1"\2"\3', content_str, flags=re.MULTILINE)
763
+
764
+ try:
765
+ content = yaml.safe_load(content_str)
766
+ except yaml.YAMLError:
767
+ # Fall back to FullLoader if safe_load fails
768
+ content = yaml.load(content_str, Loader=yaml.FullLoader)
769
+
770
+ if not content:
771
+ return set()
772
+
773
+ # Handle both array of objects and single object formats
774
+ items = content if isinstance(content, list) else [content]
775
+
776
+ for item in items:
777
+ if not isinstance(item, dict) or 'spec' not in item:
778
+ continue
779
+
780
+ spec_content = item['spec']
781
+ if not isinstance(spec_content, str):
782
+ continue
783
+
784
+ # Find spec tags using regex in the spec field
785
+ pattern = rf'<spec\s+{tag_type}="([^"]+)"[^>]*fork="([^"]+)"'
786
+ matches = re.findall(pattern, spec_content)
787
+
788
+ for match_item, fork in matches:
789
+ pairs.add(f"{match_item}#{fork}")
790
+
791
+ except (IOError, UnicodeDecodeError, yaml.YAMLError):
792
+ pass
793
+
794
+ return pairs
795
+
796
+
797
+ def check_coverage(yaml_file, tag_type, exceptions, preset="mainnet"):
798
+ """
799
+ Check that all spec items from ethspecify have corresponding tags in the YAML file.
800
+ Returns (found_count, total_count, missing_items)
801
+ """
802
+ # Map tag types to history keys
803
+ history_key_map = {
804
+ 'ssz_object': 'ssz_objects',
805
+ 'config_var': 'config_vars',
806
+ 'preset_var': 'preset_vars',
807
+ 'dataclass': 'dataclasses',
808
+ 'fn': 'functions',
809
+ 'constant_var': 'constant_vars',
810
+ 'custom_type': 'custom_types'
811
+ }
812
+
813
+ # Get expected items from ethspecify
814
+ history = get_spec_item_history(preset)
815
+ expected_pairs = set()
816
+
817
+ history_key = history_key_map.get(tag_type, tag_type)
818
+ if history_key in history:
819
+ for item_name, forks in history[history_key].items():
820
+ for fork in forks:
821
+ expected_pairs.add(f"{item_name}#{fork}")
822
+
823
+ # Get actual pairs from YAML file
824
+ actual_pairs = extract_spec_tags_from_yaml(yaml_file, tag_type)
825
+
826
+ # Find missing items (excluding exceptions)
827
+ missing_items = []
828
+ total_count = len(expected_pairs)
829
+
830
+ for item_fork in expected_pairs:
831
+ item_name, fork = item_fork.split('#', 1)
832
+
833
+ if is_excepted(item_name, fork, exceptions):
834
+ continue
835
+
836
+ if item_fork not in actual_pairs:
837
+ missing_items.append(item_fork)
838
+
839
+ found_count = total_count - len(missing_items)
840
+ return found_count, total_count, missing_items
841
+
842
+
843
+ def run_checks(project_dir, config):
844
+ """
845
+ Run all checks based on the configuration.
846
+ Returns (success, results)
847
+ """
848
+ results = {}
849
+ overall_success = True
850
+
851
+ # Get specrefs config
852
+ specrefs_config = config.get('specrefs', {})
853
+
854
+ # Handle both old format (specrefs as array) and new format (specrefs as dict)
855
+ if isinstance(specrefs_config, list):
856
+ # Old format: specrefs: [file1, file2, ...]
857
+ specrefs_files = specrefs_config
858
+ exceptions = config.get('exceptions', {})
859
+ else:
860
+ # New format: specrefs: { files: [...], exceptions: {...} }
861
+ specrefs_files = specrefs_config.get('files', [])
862
+ exceptions = specrefs_config.get('exceptions', {})
863
+
864
+ if not specrefs_files:
865
+ print("Error: No specrefs files specified in .ethspecify.yml")
866
+ print("Please add a 'specrefs:' section with 'files:' listing the files to check")
867
+ return False, {}
868
+
869
+ # File type mapping for coverage checking
870
+ file_type_mapping = {
871
+ 'ssz-objects': 'ssz_object',
872
+ 'config-variables': 'config_var',
873
+ 'preset-variables': 'preset_var',
874
+ 'dataclasses': 'dataclass',
875
+ 'functions': 'fn',
876
+ 'constants': 'constant_var',
877
+ }
878
+
879
+ # Use explicit file list only
880
+ for filename in specrefs_files:
881
+ yaml_path = os.path.join(project_dir, filename)
882
+
883
+ if not os.path.exists(yaml_path):
884
+ print(f"Error: File {filename} defined in config but not found")
885
+ overall_success = False
886
+ continue
887
+
888
+ # Determine the tag type from filename for coverage checking
889
+ tag_type = None
890
+ preset = "mainnet" # default preset
891
+
892
+ for pattern, file_tag_type in file_type_mapping.items():
893
+ if pattern in filename:
894
+ tag_type = file_tag_type
895
+ # Check for preset indicators
896
+ if 'minimal' in filename.lower():
897
+ preset = "minimal"
898
+ break
899
+
900
+ # Get the appropriate exceptions for this file type
901
+ section_exceptions = []
902
+ if tag_type:
903
+ # Map tag types to exception keys (support both singular and plural)
904
+ exception_key_map = {
905
+ 'ssz_object': ['ssz_objects', 'ssz_object'],
906
+ 'config_var': ['configs', 'config_variables', 'config_var'],
907
+ 'preset_var': ['presets', 'preset_variables', 'preset_var'],
908
+ 'dataclass': ['dataclasses', 'dataclass'],
909
+ 'fn': ['functions', 'fn'],
910
+ 'constant_var': ['constants', 'constant_variables', 'constant_var'],
911
+ 'custom_type': ['custom_types', 'custom_type']
912
+ }
913
+
914
+ # Try plural first, then singular for backward compatibility
915
+ if tag_type in exception_key_map:
916
+ for key in exception_key_map[tag_type]:
917
+ if key in exceptions:
918
+ section_exceptions = exceptions[key]
919
+ break
920
+
921
+ # Check source files
922
+ valid_count, total_count, source_errors = check_source_files(yaml_path, os.path.dirname(project_dir), section_exceptions)
923
+
924
+ # Check coverage if we can determine the type
925
+ found_count, expected_count, missing_items = 0, 0, []
926
+ if tag_type:
927
+ found_count, expected_count, missing_items = check_coverage(yaml_path, tag_type, section_exceptions, preset)
928
+
929
+ # Store results using filename as section name
930
+ section_name = filename.replace('.yml', '').replace('-', ' ').title()
931
+ if preset != "mainnet":
932
+ section_name += f" ({preset.title()})"
933
+
934
+ results[section_name] = {
935
+ 'source_files': {
936
+ 'valid': valid_count,
937
+ 'total': total_count,
938
+ 'errors': source_errors
939
+ },
940
+ 'coverage': {
941
+ 'found': found_count,
942
+ 'expected': expected_count,
943
+ 'missing': missing_items
944
+ }
945
+ }
946
+
947
+ # Update overall success
948
+ if source_errors or missing_items:
949
+ overall_success = False
950
+
951
+ return overall_success, results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ethspecify
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: A utility for processing Ethereum specification tags.
5
5
  Home-page: https://github.com/jtraglia/ethspecify
6
6
  Author: Justin Traglia
@@ -12,6 +12,7 @@ Requires-Python: >=3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: requests==2.32.3
15
+ Requires-Dist: PyYAML>=6.0
15
16
  Dynamic: author
16
17
  Dynamic: author-email
17
18
  Dynamic: classifier
@@ -0,0 +1,9 @@
1
+ ethspecify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ethspecify/cli.py,sha256=SZ47-lgfeDHmzXCE-rx8ydM66N9NfNAA2GDxoC4DE7E,7641
3
+ ethspecify/core.py,sha256=bQ1D7zdR_xDx_OuyPrFBNKBNxXpH3bjn2L4-pqHhgJo,35530
4
+ ethspecify-0.2.3.dist-info/licenses/LICENSE,sha256=Awxsr73mm9YMBVhBYnzeI7bNdRd-bH6RDtO5ItG0DaM,1071
5
+ ethspecify-0.2.3.dist-info/METADATA,sha256=77_CCIaxJ5Cf6Cg_FlIDXhS_djSf-D5nAujQ9DJthys,9212
6
+ ethspecify-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ ethspecify-0.2.3.dist-info/entry_points.txt,sha256=09viGkCg9J3h0c9BFRN-BKaJUEaIc4JyULNgBP5EL_g,51
8
+ ethspecify-0.2.3.dist-info/top_level.txt,sha256=0klaMvlVyOkXW09fwZTijJpdybITEp2c9zQKV5v30VM,11
9
+ ethspecify-0.2.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- ethspecify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ethspecify/cli.py,sha256=73NnY6-xvFcxMnpeZ1LqvU02EiNiPx5jOor1KPERenk,6903
3
- ethspecify/core.py,sha256=Nu5-onBdtmibdO9FkwZL1gGWjRAvZkTPeWZ0H9v2NoI,13292
4
- ethspecify-0.2.1.dist-info/licenses/LICENSE,sha256=Awxsr73mm9YMBVhBYnzeI7bNdRd-bH6RDtO5ItG0DaM,1071
5
- ethspecify-0.2.1.dist-info/METADATA,sha256=qYoT3-9O7b2SbY7mDJrDJo7T--VpjkIJTPrD2Ob03rc,9185
6
- ethspecify-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- ethspecify-0.2.1.dist-info/entry_points.txt,sha256=09viGkCg9J3h0c9BFRN-BKaJUEaIc4JyULNgBP5EL_g,51
8
- ethspecify-0.2.1.dist-info/top_level.txt,sha256=0klaMvlVyOkXW09fwZTijJpdybITEp2c9zQKV5v30VM,11
9
- ethspecify-0.2.1.dist-info/RECORD,,