PyPI - ethspecify - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

ethspecify 0.2.4py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

ethspecify/cli.py +103 -20
ethspecify/core.py +1073 -25
ethspecify-0.3.2.dist-info/METADATA +237 -0
ethspecify-0.3.2.dist-info/RECORD +9 -0
ethspecify-0.2.4.dist-info/METADATA +0 -351
ethspecify-0.2.4.dist-info/RECORD +0 -9
{ethspecify-0.2.4.dist-info → ethspecify-0.3.2.dist-info}/WHEEL +0 -0
{ethspecify-0.2.4.dist-info → ethspecify-0.3.2.dist-info}/entry_points.txt +0 -0
{ethspecify-0.2.4.dist-info → ethspecify-0.3.2.dist-info}/licenses/LICENSE +0 -0
{ethspecify-0.2.4.dist-info → ethspecify-0.3.2.dist-info}/top_level.txt +0 -0

ethspecify/core.py CHANGED Viewed

@@ -11,6 +11,91 @@ import tokenize
 import yaml
+def validate_exception_items(exceptions, version):
+    """
+    Validate that exception items actually exist in the spec.
+    Raises an exception if any item doesn't exist.
+    """
+    if not exceptions:
+        return
+    # Get the pyspec data
+    try:
+        pyspec = get_pyspec(version)
+    except Exception as e:
+        print(f"Warning: Could not validate exceptions - failed to load pyspec: {e}")
+        return
+    # Map exception keys to pyspec keys
+    exception_to_pyspec_map = {
+        'functions': 'functions',
+        'fn': 'functions',
+        'constants': 'constant_vars',
+        'constant_variables': 'constant_vars',
+        'constant_var': 'constant_vars',
+        'configs': 'config_vars',
+        'config_variables': 'config_vars',
+        'config_var': 'config_vars',
+        'presets': 'preset_vars',
+        'preset_variables': 'preset_vars',
+        'preset_var': 'preset_vars',
+        'ssz_objects': 'ssz_objects',
+        'containers': 'ssz_objects',
+        'container': 'ssz_objects',
+        'dataclasses': 'dataclasses',
+        'dataclass': 'dataclasses',
+        'custom_types': 'custom_types',
+        'custom_type': 'custom_types'
+    }
+    errors = []
+    for exception_key, exception_items in exceptions.items():
+        # Get the corresponding pyspec key
+        pyspec_key = exception_to_pyspec_map.get(exception_key)
+        if not pyspec_key:
+            errors.append(f"Unknown exception type: '{exception_key}'")
+            continue
+        # Ensure exception_items is a list
+        if not isinstance(exception_items, list):
+            exception_items = [exception_items]
+        for item in exception_items:
+            # Parse item#fork format
+            if '#' in item:
+                item_name, fork = item.split('#', 1)
+            else:
+                # If no fork specified, we'll check if it exists in any fork
+                item_name = item
+                fork = None
+            # Check if the item exists
+            item_found = False
+            if fork:
+                # Check specific fork
+                if ('mainnet' in pyspec and
+                    fork in pyspec['mainnet'] and
+                    pyspec_key in pyspec['mainnet'][fork] and
+                    item_name in pyspec['mainnet'][fork][pyspec_key]):
+                    item_found = True
+            else:
+                # Check if item exists in any fork
+                if 'mainnet' in pyspec:
+                    for check_fork in pyspec['mainnet']:
+                        if (pyspec_key in pyspec['mainnet'][check_fork] and
+                            item_name in pyspec['mainnet'][check_fork][pyspec_key]):
+                            item_found = True
+                            break
+            if not item_found:
+                fork_suffix = f"#{fork}" if fork else ""
+                errors.append(f"invalid key: {exception_key}.{item_name}{fork_suffix}")
+    if errors:
+        error_msg = "Invalid exception items in configuration:\n" + "\n".join(f"  - {e}" for e in errors)
+        raise Exception(error_msg)
 def load_config(directory=None):
     """
     Load configuration from .ethspecify.yml file in the specified directory.
@@ -25,7 +110,22 @@ def load_config(directory=None):
         try:
             with open(config_path, 'r') as f:
                 config = yaml.safe_load(f)
-                return config if config else {}
+                if not config:
+                    return {}
+                # Get version from config, default to 'nightly'
+                version = config.get('version', 'nightly')
+                # Validate exceptions in root config
+                if 'exceptions' in config:
+                    validate_exception_items(config['exceptions'], version)
+                # Also validate exceptions in specrefs section if present
+                if 'specrefs' in config and isinstance(config['specrefs'], dict):
+                    if 'exceptions' in config['specrefs']:
+                        validate_exception_items(config['specrefs']['exceptions'], version)
+                return config
         except (yaml.YAMLError, IOError) as e:
             print(f"Warning: Error reading .ethspecify.yml file: {e}")
             return {}
@@ -232,10 +332,16 @@ def get_spec(attributes, preset, fork, version="nightly"):
             + " = "
             + pyspec[preset][fork]["custom_types"][attributes["custom_type"]]
         )
-    elif "ssz_object" in attributes:
+    elif "ssz_object" in attributes or "container" in attributes:
         if spec is not None:
             raise Exception(f"Tag can only specify one spec item")
-        spec = pyspec[preset][fork]["ssz_objects"][attributes["ssz_object"]]
+        if "ssz_object" in attributes and "container" in attributes:
+            raise Exception(f"cannot contain 'ssz_object' and 'container'")
+        if "ssz_object" in attributes:
+            object_name = attributes["ssz_object"]
+        else:
+            object_name = attributes["container"]
+        spec = pyspec[preset][fork]["ssz_objects"][object_name]
     elif "dataclass" in attributes:
         if spec is not None:
             raise Exception(f"Tag can only specify one spec item")
@@ -471,6 +577,140 @@ def extract_attributes(tag):
     return dict(attr_pattern.findall(tag))
+def sort_specref_yaml(yaml_file):
+    """
+    Sort specref entries in a YAML file by their name field.
+    Preserves formatting and adds single blank lines between entries.
+    """
+    if not os.path.exists(yaml_file):
+        return False
+    try:
+        with open(yaml_file, 'r') as f:
+            content_str = f.read()
+        # Try to fix common YAML issues with unquoted search strings containing colons
+        # This is the same fix used in check_source_files
+        content_str = re.sub(r'(\s+search:\s+)([^"\n]+:)(\s*$)', r'\1"\2"\3', content_str, flags=re.MULTILINE)
+        try:
+            content = yaml.safe_load(content_str)
+        except yaml.YAMLError:
+            # Fall back to FullLoader if safe_load fails
+            content = yaml.load(content_str, Loader=yaml.FullLoader)
+        if not content:
+            return False
+        # Handle both array of objects and single object formats
+        if isinstance(content, list):
+            # Sort the list by 'name' field if it exists
+            # Special handling for fork ordering within the same item
+            def sort_key(x):
+                name = x.get('name', '') if isinstance(x, dict) else str(x)
+                # Known fork names for ordering
+                forks = ['phase0', 'altair', 'bellatrix', 'capella', 'deneb', 'electra']
+                # Check if name contains a # separator (like "slash_validator#phase0")
+                if '#' in name:
+                    base_name, fork = name.rsplit('#', 1)
+                    # Define fork order based on known forks list
+                    fork_lower = fork.lower()
+                    if fork_lower in forks:
+                        fork_order = forks.index(fork_lower)
+                    else:
+                        # Unknown forks go after known ones, sorted alphabetically
+                        fork_order = len(forks)
+                    return (base_name, fork_order, fork)
+                else:
+                    # Check if name ends with a fork name (like "BeaconStatePhase0")
+                    name_lower = name.lower()
+                    for i, fork in enumerate(forks):
+                        if name_lower.endswith(fork):
+                            # Extract base name
+                            base_name = name[:-len(fork)]
+                            return (base_name, i, name)
+                    # No fork pattern found, just sort by name
+                    return (name, 0, '')
+            sorted_content = sorted(content, key=sort_key)
+            # Custom YAML writing to preserve formatting
+            output_lines = []
+            for i, item in enumerate(sorted_content):
+                if i > 0:
+                    # Add a single blank line between entries
+                    output_lines.append("")
+                # Start each entry with a dash
+                first_line = True
+                for key, value in item.items():
+                    if first_line:
+                        prefix = "- "
+                        first_line = False
+                    else:
+                        prefix = "  "
+                    if key == 'spec':
+                        # Preserve spec content as-is, using literal style
+                        output_lines.append(f"{prefix}{key}: |")
+                        # Indent the spec content - preserve it exactly as-is
+                        spec_lines = value.rstrip().split('\n') if isinstance(value, str) else str(value).rstrip().split('\n')
+                        for spec_line in spec_lines:
+                            output_lines.append(f"    {spec_line}")
+                    elif key == 'sources':
+                        if isinstance(value, list) and len(value) == 0:
+                            # Keep empty lists on the same line for clarity
+                            output_lines.append(f"{prefix}{key}: []")
+                        else:
+                            output_lines.append(f"{prefix}{key}:")
+                            if isinstance(value, list):
+                                for source in value:
+                                    if isinstance(source, dict):
+                                        output_lines.append(f"    - file: {source.get('file', '')}")
+                                        if 'search' in source:
+                                            search_val = source['search']
+                                            # Only quote if:
+                                            # 1. Contains a colon followed by space or end of string (YAML mapping issue)
+                                            # 2. Not already quoted
+                                            # 3. Doesn't contain internal quotes (like regex patterns)
+                                            if ((':' in search_val or search_val.endswith(':')) and
+                                                not (search_val.startswith('"') and search_val.endswith('"')) and
+                                                '"' not in search_val):
+                                                # Only quote simple strings with colons, not complex patterns
+                                                if not any(char in search_val for char in ['\\', '*', '|', '[', ']', '(', ')']):
+                                                    search_val = f'"{search_val}"'
+                                            output_lines.append(f"      search: {search_val}")
+                                        if 'regex' in source:
+                                            # Keep boolean values lowercase for consistency
+                                            regex_val = str(source['regex']).lower()
+                                            output_lines.append(f"      regex: {regex_val}")
+                                    else:
+                                        output_lines.append(f"    - {source}")
+                    else:
+                        # Handle other fields - don't escape or modify the value
+                        output_lines.append(f"{prefix}{key}: {value}")
+            # Strip trailing whitespace from all lines
+            output_lines = [line.rstrip() for line in output_lines]
+            # Write back the sorted content
+            with open(yaml_file, 'w') as f:
+                f.write('\n'.join(output_lines))
+                f.write('\n')  # End file with newline
+            return True
+        elif isinstance(content, dict):
+            # If it's a single dict, we can't sort it
+            return False
+    except (yaml.YAMLError, IOError) as e:
+        print(f"Error sorting {yaml_file}: {e}")
+        return False
+    return False
 def replace_spec_tags(file_path, config=None):
     with open(file_path, 'r') as file:
         content = file.read()
@@ -485,6 +725,9 @@ def replace_spec_tags(file_path, config=None):
         re.DOTALL
     )
+    # Collect processed spec items for potential YAML updates
+    processed_items = []
     def rebuild_opening_tag(attributes, hash_value):
         # Rebuild a fresh opening tag from attributes, overriding any existing hash.
         new_opening = "<spec"
@@ -523,6 +766,17 @@ def replace_spec_tags(file_path, config=None):
         spec = get_spec(attributes, preset, fork, version)
         hash_value = hashlib.sha256(spec.encode('utf-8')).hexdigest()[:8]
+        # Collect this item for potential YAML updates
+        processed_items.append({
+            'attributes': attributes,
+            'preset': preset,
+            'fork': fork,
+            'style': style,
+            'version': version,
+            'spec': spec,
+            'hash': hash_value
+        })
         if style == "hash":
             # Rebuild a fresh self-closing tag.
             updated_tag = rebuild_self_closing_tag(attributes, hash_value)
@@ -534,7 +788,7 @@ def replace_spec_tags(file_path, config=None):
             prefix = content[:match.start()].splitlines()[-1]
             prefixed_spec = "\n".join(
                 f"{prefix}{line}" if line.rstrip() else prefix.rstrip()
-                for line in spec_content.rstrip().split("\n")
+                for line in spec_content.split("\n")
             )
             updated_tag = f"{new_opening}\n{prefixed_spec}\n{prefix}</spec>"
             return updated_tag
@@ -547,6 +801,151 @@ def replace_spec_tags(file_path, config=None):
     with open(file_path, 'w') as file:
         file.write(updated_content)
+    # Return processed items for potential YAML updates
+    return processed_items
+def get_yaml_filename_for_spec_attr(spec_attr):
+    """Map spec attribute to YAML filename."""
+    attr_to_file = {
+        'fn': 'functions.yml',
+        'function': 'functions.yml',
+        'constant_var': 'constants.yml',
+        'config_var': 'configs.yml',
+        'preset_var': 'presets.yml',
+        'container': 'containers.yml',
+        'ssz_object': 'containers.yml',
+        'dataclass': 'dataclasses.yml',
+        'custom_type': 'types.yml',
+    }
+    return attr_to_file.get(spec_attr)
+def get_spec_attr_and_name(attributes):
+    """Extract the spec attribute and item name from tag attributes."""
+    spec_attrs = ['fn', 'function', 'constant_var', 'config_var', 'preset_var',
+                  'container', 'ssz_object', 'dataclass', 'custom_type']
+    for attr in spec_attrs:
+        if attr in attributes:
+            return attr, attributes[attr]
+    return None, None
+def load_yaml_entries(yaml_file):
+    """Load existing entries from a YAML file."""
+    if not os.path.exists(yaml_file):
+        return []
+    try:
+        with open(yaml_file, 'r') as f:
+            content_str = f.read()
+        # Try to fix common YAML issues with unquoted search strings containing colons
+        content_str = re.sub(r'(\s+search:\s+)([^"\n]+:)(\s*$)', r'\1"\2"\3', content_str, flags=re.MULTILINE)
+        try:
+            content = yaml.safe_load(content_str)
+        except yaml.YAMLError:
+            content = yaml.load(content_str, Loader=yaml.FullLoader)
+        if isinstance(content, list):
+            return content
+        return []
+    except (yaml.YAMLError, IOError):
+        return []
+def extract_spec_tag_key(spec_content):
+    """Extract a unique key from spec tag to identify duplicates."""
+    if not spec_content:
+        return None
+    # Extract the opening spec tag
+    match = re.search(r'<spec\b([^>]*)>', spec_content)
+    if not match:
+        return None
+    # Extract attributes from the tag
+    attributes = extract_attributes(match.group(0))
+    # Build a key from the spec attribute and fork
+    # e.g., "constant_var:DOMAIN_PTC_ATTESTER:fork:gloas"
+    key_parts = []
+    for attr in ['fn', 'function', 'constant_var', 'config_var', 'preset_var',
+                 'container', 'ssz_object', 'dataclass', 'custom_type']:
+        if attr in attributes:
+            key_parts.append(f"{attr}:{attributes[attr]}")
+            break
+    if 'fork' in attributes:
+        key_parts.append(f"fork:{attributes['fork']}")
+    return ':'.join(key_parts) if key_parts else None
+def add_missing_entries_to_yaml(yaml_file, new_entries):
+    """Add new entries to a YAML file and sort it."""
+    if not new_entries:
+        return
+    # Load existing entries
+    existing_entries = load_yaml_entries(yaml_file)
+    # Build a set of existing spec tag keys
+    existing_spec_keys = set()
+    for entry in existing_entries:
+        if isinstance(entry, dict) and 'spec' in entry:
+            spec_key = extract_spec_tag_key(entry['spec'])
+            if spec_key:
+                existing_spec_keys.add(spec_key)
+    # Filter out entries that already exist (based on spec tag, not name)
+    entries_to_add = []
+    for entry in new_entries:
+        spec_key = extract_spec_tag_key(entry.get('spec', ''))
+        if spec_key and spec_key not in existing_spec_keys:
+            entries_to_add.append(entry)
+            existing_spec_keys.add(spec_key)  # Avoid duplicates within new entries
+    if not entries_to_add:
+        return
+    # Combine and write
+    all_entries = existing_entries + entries_to_add
+    # Ensure directory exists
+    os.makedirs(os.path.dirname(yaml_file) if os.path.dirname(yaml_file) else '.', exist_ok=True)
+    # Write combined entries using the same format as generate_specref_files
+    with open(yaml_file, 'w') as f:
+        for i, entry in enumerate(all_entries):
+            if i > 0:
+                f.write('\n')
+            f.write(f'- name: {entry["name"]}\n')
+            if 'sources' in entry:
+                if isinstance(entry['sources'], list) and len(entry['sources']) == 0:
+                    f.write('  sources: []\n')
+                else:
+                    f.write('  sources:\n')
+                    for source in entry['sources']:
+                        if isinstance(source, dict):
+                            f.write(f'    - file: {source.get("file", "")}\n')
+                            if 'search' in source:
+                                f.write(f'      search: {source["search"]}\n')
+                            if 'regex' in source:
+                                f.write(f'      regex: {source["regex"]}\n')
+                        else:
+                            f.write(f'    - {source}\n')
+            if 'spec' in entry:
+                f.write('  spec: |\n')
+                for line in entry['spec'].split('\n'):
+                    f.write(f'    {line}\n')
+    # Sort the file
+    sort_specref_yaml(yaml_file)
+    print(f"Added {len(entries_to_add)} new entries to {yaml_file}")
 def check_source_files(yaml_file, project_root, exceptions=None):
     """
@@ -614,6 +1013,7 @@ def check_source_files(yaml_file, project_root, exceptions=None):
                                 'config_var': 'configs',
                                 'preset_var': 'presets',
                                 'ssz_object': 'ssz_objects',
+                                'container': 'ssz_objects',
                                 'dataclass': 'dataclasses',
                                 'custom_type': 'custom_types'
                             }
@@ -625,19 +1025,23 @@ def check_source_files(yaml_file, project_root, exceptions=None):
         if not spec_ref and 'name' in item:
             spec_ref = item['name']
+        # Extract item name and fork for exception checking
+        item_name_for_exception = None
+        fork_for_exception = None
+        if spec_ref and '#' in spec_ref and '.' in spec_ref:
+            # Format: "functions.item_name#fork"
+            _, item_with_fork = spec_ref.split('.', 1)
+            if '#' in item_with_fork:
+                item_name_for_exception, fork_for_exception = item_with_fork.split('#', 1)
         # Check if sources list is empty
         if not item['sources']:
             if spec_ref:
-                # Extract item name and fork from spec_ref for exception checking
-                if '#' in spec_ref and '.' in spec_ref:
-                    # Format: "functions.item_name#fork"
-                    _, item_with_fork = spec_ref.split('.', 1)
-                    if '#' in item_with_fork:
-                        item_name, fork = item_with_fork.split('#', 1)
-                        # Check if this item is in exceptions
-                        if is_excepted(item_name, fork, exceptions):
-                            total_count += 1
-                            continue
+                # Check if this item is in exceptions
+                if item_name_for_exception and fork_for_exception:
+                    if is_excepted(item_name_for_exception, fork_for_exception, exceptions):
+                        total_count += 1
+                        continue
                 errors.append(f"EMPTY SOURCES: {spec_ref}")
             else:
@@ -647,6 +1051,13 @@ def check_source_files(yaml_file, project_root, exceptions=None):
             total_count += 1
             continue
+        # Check if item has non-empty sources but is in exceptions
+        if item_name_for_exception and fork_for_exception:
+            if is_excepted(item_name_for_exception, fork_for_exception, exceptions):
+                errors.append(f"EXCEPTION CONFLICT: {spec_ref} has a specref")
+                total_count += 1
+                continue
         for source in item['sources']:
             # All sources now use the standardized dict format with file and optional search
             if not isinstance(source, dict) or 'file' not in source:
@@ -758,7 +1169,7 @@ def extract_spec_tags_from_yaml(yaml_file, tag_type=None):
     # Known tag type attributes
     tag_attributes = ['fn', 'function', 'constant_var', 'config_var', 'preset_var',
-                      'ssz_object', 'dataclass', 'custom_type']
+                      'ssz_object', 'container', 'dataclass', 'custom_type']
     try:
         with open(yaml_file, 'r') as f:
@@ -807,6 +1218,9 @@ def extract_spec_tags_from_yaml(yaml_file, tag_type=None):
                         # Normalize function to fn
                         if found_tag_type == 'function':
                             found_tag_type = 'fn'
+                        # Normalize container to ssz_object
+                        if found_tag_type == 'container':
+                            found_tag_type = 'ssz_object'
                         break
                 if found_tag_type and 'fork' in attrs:
@@ -822,7 +1236,210 @@ def extract_spec_tags_from_yaml(yaml_file, tag_type=None):
     return tag_types_found, pairs
-def check_coverage(yaml_file, tag_type, exceptions, preset="mainnet"):
+def generate_specrefs_from_files(files_with_spec_tags, project_dir):
+    """
+    Generate specrefs data from files containing spec tags.
+    Returns a dict with spec tag info and their source locations.
+    """
+    specrefs = {}
+    for file_path in files_with_spec_tags:
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            # Find all spec tags in the file
+            spec_tag_pattern = r'<spec\s+([^>]+?)(?:\s*/>|>)'
+            matches = re.finditer(spec_tag_pattern, content)
+            for match in matches:
+                tag_attrs_str = match.group(1)
+                attrs = extract_attributes(f"<spec {tag_attrs_str}>")
+                # Determine the spec type and name
+                spec_type = None
+                spec_name = None
+                fork = attrs.get('fork', None)
+                # Check each possible spec attribute
+                for attr_name in ['fn', 'function', 'constant_var', 'config_var',
+                                  'preset_var', 'ssz_object', 'container', 'dataclass', 'custom_type']:
+                    if attr_name in attrs:
+                        spec_type = attr_name
+                        spec_name = attrs[attr_name]
+                        break
+                if spec_type and spec_name:
+                    # Normalize container to ssz_object for consistency
+                    if spec_type == 'container':
+                        spec_type = 'ssz_object'
+                    # Create a unique key for this spec reference
+                    key = f"{spec_type}.{spec_name}"
+                    if fork:
+                        key += f"#{fork}"
+                    if key not in specrefs:
+                        specrefs[key] = {
+                            'name': spec_name,
+                            'type': spec_type,
+                            'fork': fork,
+                            'sources': []
+                        }
+                    # Add this source location
+                    rel_path = os.path.relpath(file_path, project_dir)
+                    # Get line number of the match
+                    lines_before = content[:match.start()].count('\n')
+                    line_num = lines_before + 1
+                    specrefs[key]['sources'].append({
+                        'file': rel_path,
+                        'line': line_num
+                    })
+        except (IOError, UnicodeDecodeError):
+            continue
+    return specrefs
+def process_generated_specrefs(specrefs, exceptions, version):
+    """
+    Process the generated specrefs and check coverage.
+    Returns (success, results)
+    """
+    results = {}
+    overall_success = True
+    # Group specrefs by type for coverage checking
+    specrefs_by_type = {}
+    for _, data in specrefs.items():
+        spec_type = data['type']
+        if spec_type not in specrefs_by_type:
+            specrefs_by_type[spec_type] = []
+        specrefs_by_type[spec_type].append(data)
+    # Map spec types to history keys
+    type_to_history_key = {
+        'fn': 'functions',
+        'function': 'functions',
+        'constant_var': 'constant_vars',
+        'config_var': 'config_vars',
+        'preset_var': 'preset_vars',
+        'ssz_object': 'ssz_objects',
+        'container': 'ssz_objects',
+        'dataclass': 'dataclasses',
+        'custom_type': 'custom_types'
+    }
+    # Map to exception keys
+    type_to_exception_key = {
+        'fn': 'functions',
+        'function': 'functions',
+        'constant_var': 'constants',
+        'config_var': 'configs',
+        'preset_var': 'presets',
+        'ssz_object': 'ssz_objects',
+        'container': 'ssz_objects',
+        'dataclass': 'dataclasses',
+        'custom_type': 'custom_types'
+    }
+    # Get spec history for coverage checking
+    history = get_spec_item_history("mainnet", version)
+    # Check coverage for each type
+    total_found = 0
+    total_expected = 0
+    all_missing = []
+    for spec_type, items in specrefs_by_type.items():
+        history_key = type_to_history_key.get(spec_type, spec_type)
+        exception_key = type_to_exception_key.get(spec_type, spec_type)
+        # Get exceptions for this type - handle both singular and plural keys
+        type_exceptions = []
+        if exception_key in exceptions:
+            type_exceptions = exceptions[exception_key]
+        # Also check plural forms
+        elif exception_key + 's' in exceptions:
+            type_exceptions = exceptions[exception_key + 's']
+        # Check if singular form exists when we have plural
+        elif exception_key.endswith('s') and exception_key[:-1] in exceptions:
+            type_exceptions = exceptions[exception_key[:-1]]
+        # Special handling for ssz_objects/containers
+        if spec_type in ['ssz_object', 'container'] and not type_exceptions:
+            # Check for 'containers' as an alternative key
+            if 'containers' in exceptions:
+                type_exceptions = exceptions['containers']
+            elif 'container' in exceptions:
+                type_exceptions = exceptions['container']
+        # Build set of what we found
+        found_items = set()
+        for item in items:
+            if item['fork']:
+                found_items.add(f"{item['name']}#{item['fork']}")
+            else:
+                # If no fork specified, we need to check all forks
+                if history_key in history and item['name'] in history[history_key]:
+                    for fork in history[history_key][item['name']]:
+                        found_items.add(f"{item['name']}#{fork}")
+        # Check what's expected
+        if history_key in history:
+            for item_name, forks in history[history_key].items():
+                for fork in forks:
+                    expected_key = f"{item_name}#{fork}"
+                    total_expected += 1
+                    # Check if excepted
+                    if is_excepted(item_name, fork, type_exceptions):
+                        total_found += 1
+                        continue
+                    if expected_key in found_items:
+                        total_found += 1
+                    else:
+                        # Use the proper type prefix for the missing item
+                        type_prefix_map = {
+                            'functions': 'functions',
+                            'constant_vars': 'constants',
+                            'config_vars': 'configs',
+                            'preset_vars': 'presets',
+                            'ssz_objects': 'ssz_objects',
+                            'dataclasses': 'dataclasses',
+                            'custom_types': 'custom_types'
+                        }
+                        prefix = type_prefix_map.get(history_key, history_key)
+                        all_missing.append(f"{prefix}.{expected_key}")
+    # Count total spec references found
+    total_refs = len(specrefs)
+    # Store results
+    results['Project Coverage'] = {
+        'source_files': {
+            'valid': total_refs,
+            'total': total_refs,
+            'errors': []
+        },
+        'coverage': {
+            'found': total_found,
+            'expected': total_expected,
+            'missing': all_missing
+        }
+    }
+    if all_missing:
+        overall_success = False
+    return overall_success, results
+def check_coverage(yaml_file, tag_type, exceptions, preset="mainnet", version="nightly"):
     """
     Check that all spec items from ethspecify have corresponding tags in the YAML file.
     Returns (found_count, total_count, missing_items)
@@ -830,6 +1447,7 @@ def check_coverage(yaml_file, tag_type, exceptions, preset="mainnet"):
     # Map tag types to history keys
     history_key_map = {
         'ssz_object': 'ssz_objects',
+        'container': 'ssz_objects',
         'config_var': 'config_vars',
         'preset_var': 'preset_vars',
         'dataclass': 'dataclasses',
@@ -839,7 +1457,7 @@ def check_coverage(yaml_file, tag_type, exceptions, preset="mainnet"):
     }
     # Get expected items from ethspecify
-    history = get_spec_item_history(preset)
+    history = get_spec_item_history(preset, version)
     expected_pairs = set()
     history_key = history_key_map.get(tag_type, tag_type)
@@ -876,6 +1494,9 @@ def run_checks(project_dir, config):
     results = {}
     overall_success = True
+    # Get version from config
+    version = config.get('version', 'nightly')
     # Get specrefs config
     specrefs_config = config.get('specrefs', {})
@@ -887,16 +1508,53 @@ def run_checks(project_dir, config):
     else:
         # New format: specrefs: { files: [...], exceptions: {...} }
         specrefs_files = specrefs_config.get('files', [])
-        exceptions = specrefs_config.get('exceptions', {})
+        # Support exceptions in either specrefs section or root, but not both
+        specrefs_exceptions = specrefs_config.get('exceptions', {})
+        root_exceptions = config.get('exceptions', {})
+        if specrefs_exceptions and root_exceptions:
+            print("Warning: Exceptions found in both root and specrefs sections. Using specrefs exceptions.")
+            exceptions = specrefs_exceptions
+        elif specrefs_exceptions:
+            exceptions = specrefs_exceptions
+        else:
+            exceptions = root_exceptions
+    # If no files specified, search the whole project for spec tags
     if not specrefs_files:
-        print("Error: No specrefs files specified in .ethspecify.yml")
-        print("Please add a 'specrefs:' section with 'files:' listing the files to check")
-        return False, {}
+        print("No specific files configured, searching entire project for spec tags...")
+        # Determine search root - configurable in specrefs section
+        if 'search_root' in specrefs_config:
+            # Use configured search_root (relative to project_dir)
+            search_root_rel = specrefs_config['search_root']
+            search_root = os.path.join(project_dir, search_root_rel) if not os.path.isabs(search_root_rel) else search_root_rel
+            search_root = os.path.abspath(search_root)
+        else:
+            # Default behavior: if we're in a specrefs directory, search in the parent directory
+            search_root = os.path.dirname(project_dir) if os.path.basename(project_dir) == 'specrefs' else project_dir
+        print(f"Searching for spec tags in: {search_root}")
+        # Use grep to find all files containing spec tags
+        files_with_spec_tags = grep(search_root, r'<spec\b[^>]*>', [])
+        if not files_with_spec_tags:
+            print(f"No files with spec tags found in the project")
+            return True, {}
+        # Generate in-memory specrefs data from the found spec tags
+        all_specrefs = generate_specrefs_from_files(files_with_spec_tags, search_root)
+        # Process the generated specrefs
+        return process_generated_specrefs(all_specrefs, exceptions, version)
     # Map tag types to exception keys (support both singular and plural)
     exception_key_map = {
-        'ssz_object': ['ssz_objects', 'ssz_object'],
+        'ssz_object': ['ssz_objects', 'ssz_object', 'containers', 'container'],
+        'container': ['ssz_objects', 'ssz_object', 'containers', 'container'],
         'config_var': ['configs', 'config_variables', 'config_var'],
         'preset_var': ['presets', 'preset_variables', 'preset_var'],
         'dataclass': ['dataclasses', 'dataclass'],
@@ -925,7 +1583,16 @@ def run_checks(project_dir, config):
         # Process each tag type found in the file
         if not tag_types_found:
             # No spec tags found, still check source files
-            valid_count, total_count, source_errors = check_source_files(yaml_path, os.path.dirname(project_dir), [])
+            # Determine source root - use search_root if configured, otherwise use default behavior
+            if 'search_root' in specrefs_config:
+                search_root_rel = specrefs_config['search_root']
+                source_root = os.path.join(project_dir, search_root_rel) if not os.path.isabs(search_root_rel) else search_root_rel
+                source_root = os.path.abspath(source_root)
+            else:
+                # Default behavior: parent directory
+                source_root = os.path.dirname(project_dir)
+            valid_count, total_count, source_errors = check_source_files(yaml_path, source_root, [])
             # Store results using filename as section name
             section_name = filename.replace('.yml', '').replace('-', ' ').title()
@@ -963,7 +1630,7 @@ def run_checks(project_dir, config):
                             break
                 # Check coverage for this specific tag type
-                found_count, expected_count, missing_items = check_coverage(yaml_path, tag_type, section_exceptions, preset)
+                found_count, expected_count, missing_items = check_coverage(yaml_path, tag_type, section_exceptions, preset, version)
                 total_found += found_count
                 total_expected += expected_count
                 all_missing_items.extend(missing_items)
@@ -977,7 +1644,16 @@ def run_checks(project_dir, config):
                         if key in exceptions:
                             all_exceptions.extend(exceptions[key])
-            valid_count, total_count, source_errors = check_source_files(yaml_path, os.path.dirname(project_dir), all_exceptions)
+            # Determine source root - use search_root if configured, otherwise use default behavior
+            if 'search_root' in specrefs_config:
+                search_root_rel = specrefs_config['search_root']
+                source_root = os.path.join(project_dir, search_root_rel) if not os.path.isabs(search_root_rel) else search_root_rel
+                source_root = os.path.abspath(source_root)
+            else:
+                # Default behavior: parent directory
+                source_root = os.path.dirname(project_dir)
+            valid_count, total_count, source_errors = check_source_files(yaml_path, source_root, all_exceptions)
             # Store results using filename as section name
             section_name = filename.replace('.yml', '').replace('-', ' ').title()
@@ -1002,3 +1678,375 @@ def run_checks(project_dir, config):
                 overall_success = False
     return overall_success, results
+def update_entry_names_in_yaml_files(project_dir, specrefs_files):
+    """
+    Update all entry names to use the format <spec_item>#<fork>.
+    """
+    for yaml_file in specrefs_files:
+        yaml_path = os.path.join(project_dir, yaml_file)
+        if not os.path.exists(yaml_path):
+            continue
+        # Load existing entries
+        existing_entries = load_yaml_entries(yaml_path)
+        if not existing_entries:
+            continue
+        updated = False
+        for entry in existing_entries:
+            if not isinstance(entry, dict) or 'spec' not in entry:
+                continue
+            # Extract spec tag attributes
+            spec_content = entry['spec']
+            match = re.search(r'<spec\b([^>]*)>', spec_content)
+            if not match:
+                continue
+            attributes = extract_attributes(match.group(0))
+            # Get the spec item name and fork
+            spec_attr, item_name = get_spec_attr_and_name(attributes)
+            fork = attributes.get('fork')
+            if item_name and fork:
+                # Build the expected name
+                expected_name = f'{item_name}#{fork}'
+                # Update if different
+                if entry.get('name') != expected_name:
+                    entry['name'] = expected_name
+                    updated = True
+        # Write back if updated
+        if updated:
+            with open(yaml_path, 'w') as f:
+                for i, entry in enumerate(existing_entries):
+                    if i > 0:
+                        f.write('\n')
+                    f.write(f'- name: {entry["name"]}\n')
+                    if 'sources' in entry:
+                        if isinstance(entry['sources'], list) and len(entry['sources']) == 0:
+                            f.write('  sources: []\n')
+                        else:
+                            f.write('  sources:\n')
+                            for source in entry['sources']:
+                                if isinstance(source, dict):
+                                    f.write(f'    - file: {source.get("file", "")}\n')
+                                    if 'search' in source:
+                                        f.write(f'      search: {source["search"]}\n')
+                                    if 'regex' in source:
+                                        f.write(f'      regex: {source["regex"]}\n')
+                                else:
+                                    f.write(f'    - {source}\n')
+                    if 'spec' in entry:
+                        f.write('  spec: |\n')
+                        for line in entry['spec'].split('\n'):
+                            f.write(f'    {line}\n')
+            # Sort the file
+            sort_specref_yaml(yaml_path)
+            print(f"Updated entry names in {yaml_file}")
+def add_missing_spec_items_to_yaml_files(project_dir, config, specrefs_files):
+    """
+    Add missing spec items to existing YAML files.
+    Ensures all spec items from the specification exist in YAML files with sources: []
+    """
+    version = config.get('version', 'nightly')
+    preset = 'mainnet'  # Could make this configurable
+    # Get all spec items
+    pyspec = get_pyspec(version)
+    if preset not in pyspec:
+        print(f"Error: Preset '{preset}' not found")
+        return
+    # Get all forks in chronological order, excluding EIP forks
+    all_forks = sorted(
+        [fork for fork in pyspec[preset].keys() if not fork.startswith("eip")],
+        key=lambda x: (x != "phase0", x)
+    )
+    # Map YAML filenames to category keys and spec attribute names
+    filename_to_category = {
+        'constants.yml': ('constant_vars', 'constant_var'),
+        'configs.yml': ('config_vars', 'config_var'),
+        'presets.yml': ('preset_vars', 'preset_var'),
+        'functions.yml': ('functions', 'fn'),
+        'containers.yml': ('ssz_objects', 'container'),
+        'dataclasses.yml': ('dataclasses', 'dataclass'),
+        'types.yml': ('custom_types', 'custom_type'),
+    }
+    for yaml_file in specrefs_files:
+        yaml_path = os.path.join(project_dir, yaml_file)
+        yaml_basename = os.path.basename(yaml_file)
+        if yaml_basename not in filename_to_category:
+            continue
+        category, spec_attr = filename_to_category[yaml_basename]
+        # Collect all items in this category organized by name and fork
+        items_by_name = {}
+        for fork in all_forks:
+            if fork not in pyspec[preset]:
+                continue
+            fork_data = pyspec[preset][fork]
+            if category not in fork_data:
+                continue
+            for item_name, item_data in fork_data[category].items():
+                if item_name not in items_by_name:
+                    items_by_name[item_name] = []
+                items_by_name[item_name].append((fork, item_data))
+        # Build entries for missing items
+        new_entries = []
+        for item_name in sorted(items_by_name.keys()):
+            forks_data = items_by_name[item_name]
+            # Find all unique versions of this item (where content differs)
+            versions = []  # List of (fork, item_data, spec_content)
+            prev_content = None
+            for fork, item_data in forks_data:
+                # Build the spec content based on category
+                if category == 'functions':
+                    spec_content = item_data
+                elif category in ['constant_vars', 'config_vars', 'preset_vars']:
+                    # item_data is a list: [type, value, ...]
+                    if isinstance(item_data, (list, tuple)) and len(item_data) >= 2:
+                        type_info = item_data[0]
+                        value = item_data[1]
+                        if type_info:
+                            spec_content = f"{item_name}: {type_info} = {value}"
+                        else:
+                            spec_content = f"{item_name} = {value}"
+                    else:
+                        spec_content = str(item_data)
+                elif category == 'ssz_objects':
+                    spec_content = item_data
+                elif category == 'dataclasses':
+                    spec_content = item_data.replace("@dataclass\n", "")
+                elif category == 'custom_types':
+                    # custom_types are simple type aliases: TypeName = SomeType
+                    spec_content = f"{item_name} = {item_data}"
+                else:
+                    spec_content = str(item_data)
+                # Only add this version if it's different from the previous one
+                if prev_content is None or spec_content != prev_content:
+                    versions.append((fork, item_data, spec_content))
+                    prev_content = spec_content
+            # Create entries based on number of unique versions
+            use_fork_suffix = len(versions) > 1
+            for idx, (fork, item_data, spec_content) in enumerate(versions):
+                # Calculate hash of current version
+                hash_value = hashlib.sha256(spec_content.encode('utf-8')).hexdigest()[:8]
+                # For multiple versions after the first, use diff style
+                if use_fork_suffix and idx > 0:
+                    # Get previous version for diff
+                    prev_fork, _, prev_spec_content = versions[idx - 1]
+                    # Generate diff
+                    diff_content = diff(prev_fork, strip_comments(prev_spec_content), fork, strip_comments(spec_content))
+                    # Build spec tag with style="diff"
+                    spec_tag = f'<spec {spec_attr}="{item_name}" fork="{fork}" style="diff" hash="{hash_value}">'
+                    content = diff_content
+                else:
+                    # Build spec tag without style="diff"
+                    spec_tag = f'<spec {spec_attr}="{item_name}" fork="{fork}" hash="{hash_value}">'
+                    content = spec_content
+                # Create entry
+                entry_name = f'{item_name}#{fork}' if use_fork_suffix else item_name
+                entry = {
+                    'name': entry_name,
+                    'sources': [],
+                    'spec': f'{spec_tag}\n{content}\n</spec>'
+                }
+                new_entries.append(entry)
+        # Add missing entries to the YAML file
+        if new_entries:
+            add_missing_entries_to_yaml(yaml_path, new_entries)
+def generate_specref_files(output_dir, version="nightly", preset="mainnet"):
+    """
+    Generate specref YAML files without sources for manual mapping.
+    Creates a basic directory structure with empty sources.
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    # Get all spec items
+    pyspec = get_pyspec(version)
+    if preset not in pyspec:
+        raise ValueError(f"Preset '{preset}' not found")
+    # Get all forks in chronological order, excluding EIP forks
+    all_forks = sorted(
+        [fork for fork in pyspec[preset].keys() if not fork.startswith("eip")],
+        key=lambda x: (x != "phase0", x)
+    )
+    # Map history keys to file names and spec attribute names
+    category_map = {
+        'constant_vars': ('constants.yml', 'constant_var'),
+        'config_vars': ('configs.yml', 'config_var'),
+        'preset_vars': ('presets.yml', 'preset_var'),
+        'functions': ('functions.yml', 'fn'),
+        'ssz_objects': ('containers.yml', 'container'),
+        'dataclasses': ('dataclasses.yml', 'dataclass'),
+        'custom_types': ('types.yml', 'custom_type'),
+    }
+    # Collect all items organized by category
+    items_by_category = {cat: {} for cat in category_map.keys()}
+    for fork in all_forks:
+        if fork not in pyspec[preset]:
+            continue
+        fork_data = pyspec[preset][fork]
+        for category in items_by_category.keys():
+            if category not in fork_data:
+                continue
+            for item_name, item_data in fork_data[category].items():
+                # Track which forks have this item
+                if item_name not in items_by_category[category]:
+                    items_by_category[category][item_name] = []
+                items_by_category[category][item_name].append((fork, item_data))
+    # Generate YAML files for each category
+    for category, (filename, spec_attr) in category_map.items():
+        if not items_by_category[category]:
+            continue
+        output_path = os.path.join(output_dir, filename)
+        entries = []
+        # Sort items alphabetically
+        for item_name in sorted(items_by_category[category].keys()):
+            forks_data = items_by_category[category][item_name]
+            # Find all unique versions of this item (where content differs)
+            versions = []  # List of (fork, item_data, spec_content)
+            prev_content = None
+            for fork, item_data in forks_data:
+                # Build the spec content based on category
+                if category == 'functions':
+                    spec_content = item_data
+                elif category in ['constant_vars', 'config_vars', 'preset_vars']:
+                    # item_data is a list: [type, value, ...]
+                    if isinstance(item_data, (list, tuple)) and len(item_data) >= 2:
+                        type_info = item_data[0]
+                        value = item_data[1]
+                        if type_info:
+                            spec_content = f"{item_name}: {type_info} = {value}"
+                        else:
+                            spec_content = f"{item_name} = {value}"
+                    else:
+                        spec_content = str(item_data)
+                elif category == 'ssz_objects':
+                    spec_content = item_data
+                elif category == 'dataclasses':
+                    spec_content = item_data.replace("@dataclass\n", "")
+                elif category == 'custom_types':
+                    # custom_types are simple type aliases: TypeName = SomeType
+                    spec_content = f"{item_name} = {item_data}"
+                else:
+                    spec_content = str(item_data)
+                # Only add this version if it's different from the previous one
+                if prev_content is None or spec_content != prev_content:
+                    versions.append((fork, item_data, spec_content))
+                    prev_content = spec_content
+            # Create entries based on number of unique versions
+            use_fork_suffix = len(versions) > 1
+            for idx, (fork, item_data, spec_content) in enumerate(versions):
+                # Calculate hash of current version
+                hash_value = hashlib.sha256(spec_content.encode('utf-8')).hexdigest()[:8]
+                # For multiple versions after the first, use diff style
+                if use_fork_suffix and idx > 0:
+                    # Get previous version for diff
+                    prev_fork, _, prev_spec_content = versions[idx - 1]
+                    # Generate diff
+                    diff_content = diff(prev_fork, strip_comments(prev_spec_content), fork, strip_comments(spec_content))
+                    # Build spec tag with style="diff"
+                    spec_tag = f'<spec {spec_attr}="{item_name}" fork="{fork}" style="diff" hash="{hash_value}">'
+                    content = diff_content
+                else:
+                    # Build spec tag without style="diff"
+                    spec_tag = f'<spec {spec_attr}="{item_name}" fork="{fork}" hash="{hash_value}">'
+                    content = spec_content
+                # Create entry
+                entry_name = f'{item_name}#{fork}' if use_fork_suffix else item_name
+                entry = {
+                    'name': entry_name,
+                    'sources': [],
+                    'spec': f'{spec_tag}\n{content}\n</spec>'
+                }
+                entries.append(entry)
+        # Write YAML file
+        if entries:
+            with open(output_path, 'w') as f:
+                for i, entry in enumerate(entries):
+                    if i > 0:
+                        f.write('\n')
+                    f.write(f'- name: {entry["name"]}\n')
+                    f.write('  sources: []\n')
+                    f.write('  spec: |\n')
+                    for line in entry['spec'].split('\n'):
+                        f.write(f'    {line}\n')
+    # Create .ethspecify.yml config file
+    config_path = os.path.join(output_dir, '.ethspecify.yml')
+    with open(config_path, 'w') as f:
+        f.write(f'version: {version}\n')
+        f.write('style: full\n')
+        f.write('\n')
+        f.write('specrefs:\n')
+        f.write('  files:\n')
+        for category, (filename, _) in category_map.items():
+            if items_by_category[category]:
+                f.write(f'    - {filename}\n')
+        f.write('\n')
+        f.write('  exceptions:\n')
+        f.write('    # Add any exceptions here\n')
+    # Strip trailing whitespace from all generated files
+    all_files = [config_path]
+    for category, (filename, _) in category_map.items():
+        if items_by_category[category]:
+            all_files.append(os.path.join(output_dir, filename))
+    for file_path in all_files:
+        with open(file_path, 'r') as f:
+            lines = f.readlines()
+        with open(file_path, 'w') as f:
+            for line in lines:
+                f.write(line.rstrip() + '\n')
+    return list(category_map.values())

ethspecify 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl

ethspecify 0.2.4py3-none-any.whl → 0.3.2py3-none-any.whl