PyPI - dissect.cstruct - Versions diffs - 4.5.dev4__tar.gz → 4.6__tar.gz - Mend

dissect.cstruct 4.5.dev4tar.gz → 4.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

dissect_cstruct-4.6/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,54 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
+## [Unreleased]
+### Changed
+- Optimize storage of field sizes.
+- Rename `_sizes` property of `Structure` to `__sizes__`.
+- Rename `_values` property of `Structure` to `__values__`.
+- Added `load` argument to `cstruct` class, allowing direct initialization with a definition (i.e. `cstruct(cdef)` instead of `cstruct().load(cdef)`. Other arguments to `cstruct` are now keyword only.
+## [4.5] - 20-05-2025
+### Added
+- Introduce experimental tool `cstruct-stubgen` to generate type stubs for cstruct definitions.
+### Fixed
+- Generated classes are now hashable.
+- Suppress spurious `TypeError: Dynamic size` errors when using cstruct interactively.
+## [4.4] - 03-10-2025
+### Fixed
+- Resolve documentation warnings.
+### Changed
+- Use the Ruff linter.
+## [4.3] - 11-18-2024
+### Fixed
+- All cstruct types are now correctly default-initialized using the `__default__` member.
+## [4.2] - 10-10-2024
+### Fixed
+- The string representation of enums now outputs the name of the constants.
+## [4.1] - 10-09-2024
+### Fixed
+- Declaring an array of a nested struct type now works as intended.

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,13 @@
 Metadata-Version: 2.4
 Name: dissect.cstruct
-Version: 4.5.dev4
+Version: 4.6
 Summary: A Dissect module implementing a parser for C-like structures: structure parsing in Python made easy
 Author-email: Dissect Team <dissect@fox-it.com>
 License: Apache License 2.0
 Project-URL: homepage, https://dissect.tools
 Project-URL: documentation, https://docs.dissect.tools/en/latest/projects/dissect.cstruct
 Project-URL: repository, https://github.com/fox-it/dissect.cstruct
+Project-URL: changelog, https://github.com/fox-it/dissect.cstruct/blob/main/CHANGELOG.md
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Console
 Classifier: Intended Audience :: Developers

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/compiler.py RENAMED Viewed

@@ -21,6 +21,7 @@ from dissect.cstruct.types import (
     Structure,
     Union,
     Void,
+    VoidArray,
     Wchar,
     WcharArray,
 )
@@ -48,6 +49,7 @@ SUPPORTED_TYPES = (
     Void,
     Wchar,
     WcharArray,
+    VoidArray,
 )
 log = logging.getLogger(__name__)
@@ -121,8 +123,7 @@ class _ReadSourceGenerator:
         outro = """
         obj = type.__call__(cls, **r)
-        obj._sizes = s
-        obj._values = r
+        obj.__dynamic_sizes__ = s
         return obj
         """
@@ -225,18 +226,18 @@ class _ReadSourceGenerator:
     def _generate_structure(self, field: Field) -> Iterator[str]:
         template = f"""
-        _s = stream.tell()
+        {"_s = stream.tell()" if field.type.dynamic else ""}
         r["{field._name}"] = {self._map_field(field)}._read(stream, context=r)
-        s["{field._name}"] = stream.tell() - _s
+        {f's["{field._name}"] = stream.tell() - _s' if field.type.dynamic else ""}
         """
         yield dedent(template)
     def _generate_array(self, field: Field) -> Iterator[str]:
         template = f"""
-        _s = stream.tell()
+        {"_s = stream.tell()" if field.type.dynamic else ""}
         r["{field._name}"] = {self._map_field(field)}._read(stream, context=r)
-        s["{field._name}"] = stream.tell() - _s
+        {f's["{field._name}"] = stream.tell() - _s' if field.type.dynamic else ""}
         """
         yield dedent(template)
@@ -324,7 +325,6 @@ class _ReadSourceGenerator:
                 parser = parser_template.format(type=self._map_field(field), getter=getter)
             reads.append(f'r["{field._name}"] = {parser}')
-            reads.append(f's["{field._name}"] = {field_type.size}')
             reads.append("")  # Generates a newline in the resulting code
             size += field_type.size
@@ -365,7 +365,7 @@ def _generate_struct_info(cs: cstruct, fields: list[Field], align: bool = False)
         read_type = _get_read_type(cs, field.type)
         # Drop voids
-        if issubclass(read_type, Void):
+        if issubclass(read_type, (Void, VoidArray)):
             continue
         # Array of more complex types are handled elsewhere

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/cstruct.py RENAMED Viewed

@@ -48,11 +48,12 @@ class cstruct:
     DEF_CSTYLE = 1
     DEF_LEGACY = 2
-    def __init__(self, endian: str = "<", pointer: str | None = None):
+    def __init__(self, load: str = "", *, endian: str = "<", pointer: str | None = None):
         self.endian = endian
         self.consts = {}
         self.lookups = {}
+        self.includes = []
         # fmt: off
         self.typedefs = {
             # Internal types
@@ -187,6 +188,9 @@ class cstruct:
         self.pointer: type[BaseType] = self.resolve(pointer)
         self._anonymous_count = 0
+        if load:
+            self.load(load)
     def __getattr__(self, attr: str) -> Any:
         try:
             return self.consts[attr]
@@ -368,14 +372,14 @@ class cstruct:
             "null_terminated": null_terminated,
         }
-        return cast(type[Array], self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs))
+        return cast("type[Array]", self._make_type(name, bases, size, alignment=type_.alignment, attrs=attrs))
     def _make_int_type(self, name: str, size: int, signed: bool, *, alignment: int | None = None) -> type[Int]:
-        return cast(type[Int], self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed}))
+        return cast("type[Int]", self._make_type(name, (Int,), size, alignment=alignment, attrs={"signed": signed}))
     def _make_packed_type(self, name: str, packchar: str, base: type, *, alignment: int | None = None) -> type[Packed]:
         return cast(
-            type[Packed],
+            "type[Packed]",
             self._make_type(
                 name,
                 (base, Packed),

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/expression.py RENAMED Viewed

@@ -141,7 +141,7 @@ class ExpressionTokenizer:
                 self.tokens.append(">>")
             elif self.match(expected="<", append=False) and self.match(expected="<", append=False):
                 self.tokens.append("<<")
-            elif self.match(expected={" ", "\t"}, append=False):
+            elif self.match(expected={" ", "\n", "\t"}, append=False):
                 continue
             else:
                 raise ExpressionTokenizerError(
@@ -187,8 +187,7 @@ class Expression:
         "sizeof": 6,
     }
-    def __init__(self, cstruct: cstruct, expression: str):
-        self.cstruct = cstruct
+    def __init__(self, expression: str):
         self.expression = expression
         self.tokens = ExpressionTokenizer(expression).tokenize()
         self.stack = []
@@ -222,7 +221,7 @@ class Expression:
     def is_number(self, token: str) -> bool:
         return token.isnumeric() or (len(token) > 2 and token[0] == "0" and token[1] in ("x", "X", "b", "B", "o", "O"))
-    def evaluate(self, context: dict[str, int] | None = None) -> int:
+    def evaluate(self, cs: cstruct, context: dict[str, int] | None = None) -> int:
         """Evaluates an expression using a Shunting-Yard implementation."""
         self.stack = []
@@ -249,14 +248,14 @@ class Expression:
                 self.queue.append(int(current_token, 0))
             elif current_token in context:
                 self.queue.append(int(context[current_token]))
-            elif current_token in self.cstruct.consts:
-                self.queue.append(int(self.cstruct.consts[current_token]))
+            elif current_token in cs.consts:
+                self.queue.append(int(cs.consts[current_token]))
             elif current_token in self.unary_operators:
                 self.stack.append(current_token)
             elif current_token == "sizeof":
                 if len(tmp_expression) < i + 3 or (tmp_expression[i + 1] != "(" or tmp_expression[i + 3] != ")"):
                     raise ExpressionParserError("Invalid sizeof operation")
-                self.queue.append(len(self.cstruct.resolve(tmp_expression[i + 2])))
+                self.queue.append(len(cs.resolve(tmp_expression[i + 2])))
                 i += 3
             elif current_token in operators:
                 while (

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/parser.py RENAMED Viewed

@@ -49,12 +49,19 @@ class TokenParser(Parser):
         self.compiled = compiled
         self.align = align
         self.TOK = self._tokencollection()
+        self._conditionals = []
+        self._conditionals_depth = 0
     @staticmethod
     def _tokencollection() -> TokenCollection:
         TOK = TokenCollection()
         TOK.add(r"#\[(?P<values>[^\]]+)\](?=\s*)", "CONFIG_FLAG")
-        TOK.add(r"#define\s+(?P<name>[^\s]+)\s+(?P<value>[^\r\n]+)\s*", "DEFINE")
+        TOK.add(r"#define\s+(?P<name>[^\s]+)(?P<value>[^\r\n]*)", "DEFINE")
+        TOK.add(r"#undef\s+(?P<name>[^\s]+)\s*", "UNDEF")
+        TOK.add(r"#ifdef\s+(?P<name>[^\s]+)\s*", "IFDEF")
+        TOK.add(r"#ifndef\s+(?P<name>[^\s]+)\s*", "IFNDEF")
+        TOK.add(r"#else\s*", "ELSE")
+        TOK.add(r"#endif\s*", "ENDIF")
         TOK.add(r"typedef(?=\s)", "TYPEDEF")
         TOK.add(r"(?:struct|union)(?=\s|{)", "STRUCT")
         TOK.add(
@@ -63,7 +70,8 @@ class TokenParser(Parser):
             "ENUM",
         )
         TOK.add(r"(?<=})\s*(?P<defs>(?:[a-zA-Z0-9_]+\s*,\s*)+[a-zA-Z0-9_]+)\s*(?=;)", "DEFS")
-        TOK.add(r"(?P<name>\**?\s*[a-zA-Z0-9_]+)(?:\s*:\s*(?P<bits>\d+))?(?:\[(?P<count>[^;\n]*)\])?\s*(?=;)", "NAME")
+        TOK.add(r"(?P<name>\**?\s*[a-zA-Z0-9_]+)(?:\s*:\s*(?P<bits>\d+))?(?:\[(?P<count>[^;]*)\])?\s*(?=;)", "NAME")
+        TOK.add(r"#include\s+(?P<name>[^\s]+)\s*", "INCLUDE")
         TOK.add(r"[a-zA-Z_][a-zA-Z0-9_]*", "IDENTIFIER")
         TOK.add(r"[{}]", "BLOCK")
         TOK.add(r"\$(?P<name>[^\s]+) = (?P<value>{[^}]+})\w*[\r\n]+", "LOOKUP")
@@ -79,12 +87,61 @@ class TokenParser(Parser):
             idents.append(tokens.consume())
         return " ".join([i.value for i in idents])
+    def _conditional(self, tokens: TokenConsumer) -> None:
+        token = tokens.consume()
+        pattern = self.TOK.patterns[token.token]
+        match = pattern.match(token.value).groupdict()
+        value = match["name"]
+        if token.token == self.TOK.IFDEF:
+            self._conditionals.append(value in self.cstruct.consts)
+        elif token.token == self.TOK.IFNDEF:
+            self._conditionals.append(value not in self.cstruct.consts)
+    def _check_conditional(self, tokens: TokenConsumer) -> bool:
+        """Check and handle conditionals. Return a boolean indicating if we need to continue to the next token."""
+        if self._conditionals and self._conditionals_depth == len(self._conditionals):
+            # If we have a conditional and the depth matches, handle it accordingly
+            if tokens.next == self.TOK.ELSE:
+                # Flip the last conditional
+                tokens.consume()
+                self._conditionals[-1] = not self._conditionals[-1]
+                return True
+            if tokens.next == self.TOK.ENDIF:
+                # Pop the last conditional
+                tokens.consume()
+                self._conditionals.pop()
+                self._conditionals_depth -= 1
+                return True
+        if tokens.next in (self.TOK.IFDEF, self.TOK.IFNDEF):
+            # If we encounter a new conditional, increase the depth
+            self._conditionals_depth += 1
+        if tokens.next == self.TOK.ENDIF:
+            # Similarly, decrease the depth if needed
+            self._conditionals_depth -= 1
+        if self._conditionals and not self._conditionals[-1]:
+            # If the last conditional evaluated to False, skip the next token
+            tokens.consume()
+            return True
+        if tokens.next in (self.TOK.IFDEF, self.TOK.IFNDEF):
+            # If the next token is a conditional, process it
+            self._conditional(tokens)
+            return True
+        return False
     def _constant(self, tokens: TokenConsumer) -> None:
         const = tokens.consume()
         pattern = self.TOK.patterns[self.TOK.DEFINE]
         match = pattern.match(const.value).groupdict()
-        value = match["value"]
+        value = match["value"].strip()
         try:
             value = ast.literal_eval(value)
         except (ValueError, SyntaxError):
@@ -92,12 +149,22 @@ class TokenParser(Parser):
         if isinstance(value, str):
             try:
-                value = Expression(self.cstruct, value).evaluate()
+                value = Expression(value).evaluate(self.cstruct)
             except (ExpressionParserError, ExpressionTokenizerError):
                 pass
         self.cstruct.consts[match["name"]] = value
+    def _undef(self, tokens: TokenConsumer) -> None:
+        const = tokens.consume()
+        pattern = self.TOK.patterns[self.TOK.UNDEF]
+        match = pattern.match(const.value).groupdict()
+        if match["name"] in self.cstruct.consts:
+            del self.cstruct.consts[match["name"]]
+        else:
+            raise ParserError(f"line {self._lineno(const)}: constant {match['name']!r} not defined")
     def _enum(self, tokens: TokenConsumer) -> None:
         # We cheat with enums because the entire enum is in the token
         etok = tokens.consume()
@@ -120,7 +187,7 @@ class TokenParser(Parser):
                 if not key:
                     continue
-                val = nextval if not val else Expression(self.cstruct, val).evaluate(values)
+                val = nextval if not val else Expression(val).evaluate(self.cstruct, values)
                 if enumtype == "flag":
                     high_bit = val.bit_length() - 1
@@ -193,7 +260,7 @@ class TokenParser(Parser):
         if tokens.next == self.TOK.NAME:
             # As part of a struct field
             # struct type_name field_name;
-            if not len(names):
+            if not names:
                 raise ParserError(f"line {self._lineno(tokens.next)}: unexpected anonymous struct")
             return self.cstruct.resolve(names[0])
@@ -207,6 +274,9 @@ class TokenParser(Parser):
                 tokens.consume()
                 break
+            if self._check_conditional(tokens):
+                continue
             field = self._parse_field(tokens)
             fields.append(field)
@@ -265,7 +335,7 @@ class TokenParser(Parser):
                 return Field(None, type_, None)
         if tokens.next != self.TOK.NAME:
-            raise ParserError(f"line {self._lineno(tokens.next)}: expected name")
+            raise ParserError(f"line {self._lineno(tokens.next)}: expected name, got {tokens.next!r}")
         nametok = tokens.consume()
         type_, name, bits = self._parse_field_type(type_, nametok.value)
@@ -293,9 +363,9 @@ class TokenParser(Parser):
                 if count == "":
                     count = None
                 else:
-                    count = Expression(self.cstruct, count)
+                    count = Expression(count)
                     try:
-                        count = count.evaluate()
+                        count = count.evaluate(self.cstruct)
                     except Exception:
                         pass
@@ -313,17 +383,24 @@ class TokenParser(Parser):
                 tokens.eol()
                 break
-            if tokens.next not in (self.TOK.NAME, self.TOK.DEFS):
+            if tokens.next not in (self.TOK.NAME, self.TOK.DEFS, self.TOK.IDENTIFIER):
                 break
             ntoken = tokens.consume()
-            if ntoken == self.TOK.NAME:
+            if ntoken in (self.TOK.NAME, self.TOK.IDENTIFIER):
                 names.append(ntoken.value.strip())
             elif ntoken == self.TOK.DEFS:
                 names.extend([name.strip() for name in ntoken.value.strip().split(",")])
         return names
+    def _include(self, tokens: TokenConsumer) -> None:
+        include = tokens.consume()
+        pattern = self.TOK.patterns[self.TOK.INCLUDE]
+        match = pattern.match(include.value).groupdict()
+        self.cstruct.includes.append(match["name"].strip().strip("'\""))
     @staticmethod
     def _remove_comments(string: str) -> str:
         # https://stackoverflow.com/a/18381470
@@ -370,10 +447,15 @@ class TokenParser(Parser):
             if token is None:
                 break
+            if self._check_conditional(tokens):
+                continue
             if token == self.TOK.CONFIG_FLAG:
                 self._config_flag(tokens)
             elif token == self.TOK.DEFINE:
                 self._constant(tokens)
+            elif token == self.TOK.UNDEF:
+                self._undef(tokens)
             elif token == self.TOK.TYPEDEF:
                 self._typedef(tokens)
             elif token == self.TOK.STRUCT:
@@ -382,9 +464,14 @@ class TokenParser(Parser):
                 self._enum(tokens)
             elif token == self.TOK.LOOKUP:
                 self._lookup(tokens)
+            elif token == self.TOK.INCLUDE:
+                self._include(tokens)
             else:
                 raise ParserError(f"line {self._lineno(token)}: unexpected token {token!r}")
+        if self._conditionals:
+            raise ParserError(f"line {self._lineno(tokens.previous)}: unclosed conditional statement")
 class CStyleParser(Parser):
     """Definition parser for C-like structure syntax.
@@ -434,7 +521,7 @@ class CStyleParser(Parser):
                     if not key:
                         continue
-                    val = nextval if not val else Expression(self.cstruct, val).evaluate()
+                    val = nextval if not val else Expression(val).evaluate(self.cstruct)
                     if enumtype == "flag":
                         high_bit = val.bit_length() - 1
@@ -515,9 +602,9 @@ class CStyleParser(Parser):
                 if d["count"] == "":
                     count = None
                 else:
-                    count = Expression(self.cstruct, d["count"])
+                    count = Expression(d["count"])
                     try:
-                        count = count.evaluate()
+                        count = count.evaluate(self.cstruct)
                     except Exception:
                         pass

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/tools/stubgen.py RENAMED Viewed

@@ -92,6 +92,9 @@ def generate_cstruct_stub(cs: cstruct, module_prefix: str = "", cls_name: str =
             stub = f"{name}: TypeAlias = {typedef.__name__}"
         elif issubclass(typedef, (types.Enum, types.Flag)):
             stub = generate_enum_stub(typedef, cs_prefix=cs_prefix, module_prefix=module_prefix)
+        elif issubclass(typedef, types.Pointer):
+            typehint = generate_typehint(typedef, prefix=cs_prefix, module_prefix=module_prefix)
+            stub = f"{name}: TypeAlias = {typehint}"
         elif issubclass(typedef, types.Structure):
             stub = generate_structure_stub(typedef, cs_prefix=cs_prefix, module_prefix=module_prefix)
         elif issubclass(typedef, types.BaseType):

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/types/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ from dissect.cstruct.types.leb128 import LEB128
 from dissect.cstruct.types.packed import Packed
 from dissect.cstruct.types.pointer import Pointer
 from dissect.cstruct.types.structure import Field, Structure, Union
-from dissect.cstruct.types.void import Void
+from dissect.cstruct.types.void import Void, VoidArray
 from dissect.cstruct.types.wchar import Wchar, WcharArray
 __all__ = [
@@ -27,6 +27,7 @@ __all__ = [
     "Structure",
     "Union",
     "Void",
+    "VoidArray",
     "Wchar",
     "WcharArray",
 ]

{dissect_cstruct-4.5.dev4 → dissect_cstruct-4.6}/dissect/cstruct/types/base.py RENAMED Viewed

@@ -54,6 +54,10 @@ class MetaType(type):
         """Create a new array with the given number of entries."""
         return cls.cs._make_array(cls, num_entries)
+    def __bool__(cls) -> bool:
+        """Type class is always truthy."""
+        return True
     def __len__(cls) -> int:
         """Return the byte size of the type."""
         # Python 3.9 compat thing for bound type vars
@@ -258,7 +262,7 @@ class BaseArray(BaseType):
             num = EOF
         elif isinstance(cls.num_entries, Expression):
             try:
-                num = max(0, cls.num_entries.evaluate(context))
+                num = max(0, cls.num_entries.evaluate(cls.cs, context))
             except Exception:
                 if cls.num_entries.expression != "EOF":
                     raise

dissect.cstruct 4.5.dev4__tar.gz → 4.6__tar.gz

dissect.cstruct 4.5.dev4tar.gz → 4.6tar.gz