PyPI - richprint-pe - Versions diffs - 1.0.0__py3-none-any.whl - Mend

richprint-pe 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

richprint/__init__.py +62 -0
richprint/__main__.py +7 -0
richprint/cli.py +112 -0
richprint/constants.py +47 -0
richprint/data/__init__.py +1 -0
richprint/data/comp_id.txt +4566 -0
richprint/database.py +94 -0
richprint/exceptions.py +41 -0
richprint/models.py +72 -0
richprint/parser.py +338 -0
richprint_pe-1.0.0.dist-info/METADATA +119 -0
richprint_pe-1.0.0.dist-info/RECORD +15 -0
richprint_pe-1.0.0.dist-info/WHEEL +4 -0
richprint_pe-1.0.0.dist-info/entry_points.txt +2 -0
richprint_pe-1.0.0.dist-info/licenses/LICENSE +22 -0

richprint/database.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Compiler ID database loading and lookup."""
+import importlib.resources
+from typing import Dict, Optional
+# Type alias for the database
+CompilerDatabase = Dict[int, str]
+def load_database(path: Optional[str] = None) -> CompilerDatabase:
+    """
+    Load compiler ID database from file.
+    Args:
+        path: Path to comp_id.txt file. If None, uses bundled database.
+    Returns:
+        Dictionary mapping comp.id values to descriptions.
+    """
+    descriptions: CompilerDatabase = {}
+    if path is None:
+        # Use bundled database
+        try:
+            # Python 3.9+
+            files = importlib.resources.files("richprint.data")
+            content = (files / "comp_id.txt").read_text(encoding="utf-8")
+        except AttributeError:
+            # Python 3.8 fallback
+            import pkg_resources
+            content = pkg_resources.resource_string(
+                "richprint.data", "comp_id.txt"
+            ).decode("utf-8")
+        lines = content.splitlines()
+    else:
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                lines = f.readlines()
+        except (IOError, OSError):
+            return descriptions
+    for line in lines:
+        line = line.rstrip("\n\r")
+        # Remove trailing comments
+        comment_pos = line.rfind("#")
+        if comment_pos != -1:
+            # Trim trailing spaces before comment
+            while comment_pos > 0 and line[comment_pos - 1] == " ":
+                comment_pos -= 1
+            line = line[:comment_pos]
+        # Skip empty lines and comment-only lines
+        if len(line) <= 8 or line.startswith("#"):
+            continue
+        # Parse: <hex_id> <description>
+        try:
+            hex_part = line[:8]
+            comp_id = int(hex_part, 16)
+            desc = line[9:] if len(line) > 9 else ""
+            # Skip duplicates (keep first)
+            if comp_id not in descriptions:
+                descriptions[comp_id] = desc
+        except ValueError:
+            continue
+    return descriptions
+def lookup_description(
+    db: CompilerDatabase, comp_id: int, product_id: int
+) -> str:
+    """
+    Look up description for a compiler entry.
+    First tries exact comp_id match, then falls back to product_id only.
+    Args:
+        db: Compiler database dictionary.
+        comp_id: Full compiler ID (product_id << 16 | build_version).
+        product_id: Product ID (high 16 bits of comp_id).
+    Returns:
+        Description string, or empty string if not found.
+    """
+    # Try exact match first
+    if comp_id in db:
+        return db[comp_id]
+    # Fall back to product_id only
+    if product_id in db:
+        return db[product_id]
+    return ""

richprint/exceptions.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Custom exceptions for richprint."""
+class RichPrintError(Exception):
+    """Base exception for richprint errors."""
+    pass
+class FileOpenError(RichPrintError):
+    """Failed to open or read file."""
+    pass
+class NoMZHeaderError(RichPrintError):
+    """File does not have MZ (DOS) header signature."""
+    pass
+class NoPEHeaderError(RichPrintError):
+    """File does not have valid PE header."""
+    pass
+class InvalidDOSHeaderError(RichPrintError):
+    """DOS header has invalid values."""
+    pass
+class NoRichHeaderError(RichPrintError):
+    """Rich header not found in file."""
+    pass
+class NoDanSTokenError(RichPrintError):
+    """Rich header's DanS token not found."""
+    pass
+class InvalidRichHeaderError(RichPrintError):
+    """Rich header structure is invalid."""
+    pass

richprint/models.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""Data models for richprint."""
+from dataclasses import dataclass, field
+from typing import List, Optional
+@dataclass
+class CompilerEntry:
+    """A single entry from the Rich header."""
+    comp_id: int  # Full @comp.id value (product_id << 16 | build_version)
+    product_id: int  # Product/tool identifier (high 16 bits)
+    build_version: int  # Build version number (low 16 bits)
+    count: int  # Number of objects with this comp.id
+    description: str = ""  # Human-readable description from database
+@dataclass
+class RichHeader:
+    """Parsed Rich header data."""
+    xor_key: int  # XOR key used to encode the header
+    entries: List[CompilerEntry] = field(default_factory=list)
+    dans_offset: int = 0  # File offset of DanS marker
+    rich_offset: int = 0  # File offset of Rich marker
+@dataclass
+class PEInfo:
+    """Basic PE file information."""
+    machine_type: int  # Machine type value
+    machine_name: str  # Human-readable machine name
+    pe_offset: int  # File offset of PE header
+    dos_stub_end: int  # End of DOS stub (start of search area)
+@dataclass
+class ParseResult:
+    """Complete result of parsing a PE file."""
+    filename: str
+    success: bool = False
+    error: Optional[str] = None
+    pe_info: Optional[PEInfo] = None
+    rich_header: Optional[RichHeader] = None
+    def to_dict(self) -> dict:
+        """Convert result to dictionary for JSON serialization."""
+        result = {
+            "filename": self.filename,
+            "success": self.success,
+        }
+        if self.error:
+            result["error"] = self.error
+        if self.pe_info:
+            result["pe_info"] = {
+                "machine_type": self.pe_info.machine_type,
+                "machine_name": self.pe_info.machine_name,
+                "pe_offset": self.pe_info.pe_offset,
+            }
+        if self.rich_header:
+            result["rich_header"] = {
+                "xor_key": f"0x{self.rich_header.xor_key:08x}",
+                "entries": [
+                    {
+                        "comp_id": f"0x{e.comp_id:08x}",
+                        "product_id": e.product_id,
+                        "build_version": e.build_version,
+                        "count": e.count,
+                        "description": e.description,
+                    }
+                    for e in self.rich_header.entries
+                ],
+            }
+        return result

richprint/parser.py ADDED Viewed

@@ -0,0 +1,338 @@
+"""Core PE/Rich header parsing logic."""
+import struct
+from typing import BinaryIO, List, Optional, Tuple
+from .constants import (
+    MZ_SIGNATURE,
+    PE_SIGNATURE,
+    RICH_SIGNATURE,
+    DANS_SIGNATURE,
+    DOS_NUM_RELOCS_OFFSET,
+    DOS_HEADER_PARA_OFFSET,
+    DOS_RELOC_OFFSET,
+    DOS_PE_OFFSET,
+    PE_MACHINE_OFFSET,
+    get_machine_type,
+)
+from .database import CompilerDatabase, lookup_description
+from .exceptions import (
+    FileOpenError,
+    NoMZHeaderError,
+    NoPEHeaderError,
+    InvalidDOSHeaderError,
+    NoRichHeaderError,
+    NoDanSTokenError,
+    InvalidRichHeaderError,
+)
+from .models import CompilerEntry, RichHeader, PEInfo, ParseResult
+def read_word(data: bytes, offset: int) -> int:
+    """Read unsigned 16-bit little-endian value."""
+    return struct.unpack_from("<H", data, offset)[0]
+def read_dword(data: bytes, offset: int) -> int:
+    """Read unsigned 32-bit little-endian value."""
+    return struct.unpack_from("<I", data, offset)[0]
+def parse_pe_info(data: bytes) -> PEInfo:
+    """
+    Parse basic PE information from file data.
+    Args:
+        data: File contents as bytes.
+    Returns:
+        PEInfo with machine type, PE offset, and DOS stub end offset.
+    Raises:
+        NoMZHeaderError: If MZ signature not found.
+        InvalidDOSHeaderError: If DOS header values are invalid.
+        NoPEHeaderError: If PE signature not found.
+    """
+    # Check MZ header
+    if len(data) < 2:
+        raise NoMZHeaderError("File too small for MZ header")
+    mz = read_word(data, 0)
+    if mz != MZ_SIGNATURE:
+        raise NoMZHeaderError(f"No MZ header - magic is: 0x{mz:x}")
+    # Read DOS header metrics
+    if len(data) < 0x40:
+        raise InvalidDOSHeaderError("File too small for DOS header")
+    num_relocs = read_word(data, DOS_NUM_RELOCS_OFFSET)
+    header_para = read_word(data, DOS_HEADER_PARA_OFFSET)
+    if header_para < 4:
+        raise InvalidDOSHeaderError(
+            f"Too few paragraphs in DOS header: {header_para}, not a PE executable"
+        )
+    reloc_offset = read_word(data, DOS_RELOC_OFFSET)
+    pe_offset = read_word(data, DOS_PE_OFFSET)
+    if pe_offset < header_para * 16:
+        raise InvalidDOSHeaderError(
+            f"PE offset is too small: {pe_offset}, not a PE executable"
+        )
+    # Check PE signature
+    if len(data) < pe_offset + 6:
+        raise NoPEHeaderError("File too small for PE header")
+    pe_sig = read_dword(data, pe_offset)
+    if pe_sig != PE_SIGNATURE:
+        raise NoPEHeaderError(
+            f"No PE header signature: 0x{pe_sig:x}, not a PE executable"
+        )
+    # Get machine type
+    machine_type = read_word(data, pe_offset + PE_MACHINE_OFFSET)
+    machine_name = get_machine_type(machine_type)
+    # Calculate DOS stub end offset
+    dos_stub_end = reloc_offset
+    if num_relocs > 0:
+        dos_stub_end += 4 * num_relocs
+    # Align to 16-byte paragraph boundary
+    if dos_stub_end % 16:
+        dos_stub_end += 16 - (dos_stub_end % 16)
+    return PEInfo(
+        machine_type=machine_type,
+        machine_name=machine_name,
+        pe_offset=pe_offset,
+        dos_stub_end=dos_stub_end,
+    )
+def find_rich_header(data: bytes, pe_info: PEInfo) -> Tuple[int, int, int]:
+    """
+    Find Rich header markers and XOR key.
+    Args:
+        data: File contents as bytes.
+        pe_info: Parsed PE info with search boundaries.
+    Returns:
+        Tuple of (rich_offset, dans_offset, xor_key).
+    Raises:
+        NoRichHeaderError: If Rich signature not found.
+        NoDanSTokenError: If DanS token not found.
+        InvalidRichHeaderError: If header structure is invalid.
+    """
+    start = pe_info.dos_stub_end
+    end = pe_info.pe_offset
+    # Search for "Rich" signature
+    rich_offset = -1
+    for i in range(start, end, 4):
+        if i + 4 > len(data):
+            break
+        val = read_dword(data, i)
+        if val == RICH_SIGNATURE:
+            rich_offset = i
+            break
+    if rich_offset == -1:
+        raise NoRichHeaderError("Rich header not found")
+    # XOR key is immediately after "Rich"
+    if rich_offset + 8 > len(data):
+        raise InvalidRichHeaderError("File truncated after Rich signature")
+    xor_key = read_dword(data, rich_offset + 4)
+    # Search for "DanS" signature (XOR'd with key)
+    dans_offset = -1
+    target = DANS_SIGNATURE ^ xor_key
+    for i in range(start, end, 4):
+        if i + 4 > len(data):
+            break
+        val = read_dword(data, i)
+        if val == target:
+            dans_offset = i
+            break
+    if dans_offset == -1:
+        raise NoDanSTokenError("Rich header's DanS token not found")
+    # Validate end offset doesn't run into PE header
+    end_offset = rich_offset + 8  # Rich + key
+    if end_offset > pe_info.pe_offset:
+        raise InvalidRichHeaderError(
+            f"Calculated end offset runs into PE header: 0x{end_offset:x}"
+        )
+    return rich_offset, dans_offset, xor_key
+def decode_rich_header(
+    data: bytes,
+    rich_offset: int,
+    dans_offset: int,
+    xor_key: int,
+    db: Optional[CompilerDatabase] = None,
+) -> RichHeader:
+    """
+    Decode Rich header entries.
+    Args:
+        data: File contents as bytes.
+        rich_offset: File offset of Rich marker.
+        dans_offset: File offset of DanS marker.
+        xor_key: XOR key for decoding.
+        db: Optional compiler database for descriptions.
+    Returns:
+        RichHeader with decoded entries.
+    """
+    entries: List[CompilerEntry] = []
+    # Entries start at DanS + 16 (skip DanS + 3 padding DWORDs)
+    # Entries end at Rich - 8 (stop before last empty entry)
+    start = dans_offset + 16
+    end = rich_offset
+    for pos in range(start, end, 8):
+        if pos + 8 > len(data):
+            break
+        # Read and decode version and count
+        ver_raw = read_dword(data, pos)
+        count_raw = read_dword(data, pos + 4)
+        ver = ver_raw ^ xor_key
+        count = count_raw ^ xor_key
+        # Extract product_id and build_version
+        product_id = ver >> 16
+        build_version = ver & 0xFFFF
+        # Look up description
+        description = ""
+        if db is not None:
+            description = lookup_description(db, ver, product_id)
+        entries.append(CompilerEntry(
+            comp_id=ver,
+            product_id=product_id,
+            build_version=build_version,
+            count=count,
+            description=description,
+        ))
+    return RichHeader(
+        xor_key=xor_key,
+        entries=entries,
+        dans_offset=dans_offset,
+        rich_offset=rich_offset,
+    )
+def parse_file(
+    filename: str,
+    db: Optional[CompilerDatabase] = None,
+) -> ParseResult:
+    """
+    Parse Rich header from a PE file.
+    Args:
+        filename: Path to PE file.
+        db: Optional compiler database for descriptions.
+    Returns:
+        ParseResult with parsed data or error information.
+    """
+    result = ParseResult(filename=filename)
+    try:
+        with open(filename, "rb") as f:
+            data = f.read()
+    except (IOError, OSError) as e:
+        result.error = f"Failed to open file: {e}"
+        return result
+    try:
+        # Parse PE info
+        pe_info = parse_pe_info(data)
+        result.pe_info = pe_info
+        # Find Rich header
+        rich_offset, dans_offset, xor_key = find_rich_header(data, pe_info)
+        # Decode entries
+        rich_header = decode_rich_header(
+            data, rich_offset, dans_offset, xor_key, db
+        )
+        result.rich_header = rich_header
+        result.success = True
+    except (
+        NoMZHeaderError,
+        NoPEHeaderError,
+        InvalidDOSHeaderError,
+        NoRichHeaderError,
+        NoDanSTokenError,
+        InvalidRichHeaderError,
+    ) as e:
+        result.error = str(e)
+    except Exception as e:
+        result.error = f"Unexpected error: {e}"
+    return result
+def parse_bytes(
+    data: bytes,
+    db: Optional[CompilerDatabase] = None,
+    filename: str = "<bytes>",
+) -> ParseResult:
+    """
+    Parse Rich header from raw bytes.
+    Args:
+        data: PE file contents as bytes.
+        db: Optional compiler database for descriptions.
+        filename: Optional filename for result.
+    Returns:
+        ParseResult with parsed data or error information.
+    """
+    result = ParseResult(filename=filename)
+    try:
+        # Parse PE info
+        pe_info = parse_pe_info(data)
+        result.pe_info = pe_info
+        # Find Rich header
+        rich_offset, dans_offset, xor_key = find_rich_header(data, pe_info)
+        # Decode entries
+        rich_header = decode_rich_header(
+            data, rich_offset, dans_offset, xor_key, db
+        )
+        result.rich_header = rich_header
+        result.success = True
+    except (
+        NoMZHeaderError,
+        NoPEHeaderError,
+        InvalidDOSHeaderError,
+        NoRichHeaderError,
+        NoDanSTokenError,
+        InvalidRichHeaderError,
+    ) as e:
+        result.error = str(e)
+    except Exception as e:
+        result.error = f"Unexpected error: {e}"
+    return result

richprint_pe-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,119 @@
+Metadata-Version: 2.4
+Name: richprint-pe
+Version: 1.0.0
+Summary: Decode and print Rich headers from Windows PE executables
+Project-URL: Homepage, https://github.com/dishather/richprint
+Project-URL: Repository, https://github.com/dishather/richprint
+Author-email: dishather <noreply@github.com>
+License-Expression: BSD-2-Clause
+License-File: LICENSE
+Keywords: compiler,executable,forensics,pe,rich-header,windows
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: Topic :: System :: Systems Administration
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+# richprint
+A Python tool to decode and print compiler information stored in the Rich Header of Windows PE executables.
+## Installation
+```bash
+pip install richprint-pe
+```
+## What is the Rich Header?
+The Rich Header is a section of binary data created by Microsoft's linker, located between the DOS stub and PE header in Windows executables. It contains a list of compiler/tool IDs (@comp.id) used to build the executable, allowing identification of exact compiler versions down to build numbers.
+The data is XOR-encoded, with "Rich" being the only readable marker. Files created by non-Microsoft linkers will not have this header.
+For technical details, see [Daniel Pistelli's article](http://www.ntcore.com/files/richsign.htm).
+## Usage
+### Command Line
+```bash
+# Analyze one or more files
+richprint notepad.exe
+richprint file1.exe file2.dll file3.sys
+# JSON output
+richprint --json notepad.exe
+# Use custom compiler ID database
+richprint --database /path/to/comp_id.txt notepad.exe
+```
+### Python API
+```python
+from richprint import parse_file, load_database
+# Load the bundled compiler ID database
+db = load_database()
+# Parse a PE file
+result = parse_file("notepad.exe", db)
+if result.success:
+    print(f"Machine: {result.pe_info.machine_name}")
+    print(f"XOR Key: 0x{result.rich_header.xor_key:08x}")
+    for entry in result.rich_header.entries:
+        print(f"  {entry.comp_id:08x} {entry.description}")
+else:
+    print(f"Error: {result.error}")
+```
+## Output Format
+```
+Processing notepad.exe
+Target machine: x64
+@comp.id   id version count   description
+00e1520d   e1  21005    10   [C++] VS2013 build 21005
+00df520d   df  21005     1   [ASM] VS2013 build 21005
+00de520d   de  21005     1   [LNK] VS2013 build 21005
+```
+## Compiler ID Database
+The bundled `comp_id.txt` database maps compiler IDs to human-readable descriptions. The format supports:
+- `[ C ]` - C compiler
+- `[C++]` - C++ compiler
+- `[ASM]` - Assembler
+- `[LNK]` - Linker
+- `[RES]` - Resource converter
+- `[IMP]` / `[EXP]` - DLL import/export records
+- And many more...
+## Suppressing Rich Headers
+To prevent Microsoft tools from emitting this header, use the undocumented linker option:
+```
+/emittoolversioninfo:no
+```
+Available since VS2019 Update 11.
+## License
+BSD 2-Clause License. See [LICENSE](LICENSE) for details.
+## Credits
+Original C++ implementation and compiler ID database by [dishather](https://github.com/dishather/richprint).

richprint_pe-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+richprint/__init__.py,sha256=9Z_Z13IE3ib5zzuoeiGDJQ5PMxOfhHlwIejZWeTHWGc,1438
+richprint/__main__.py,sha256=9u51HKotzF8v0FOOjE8xKDe-RgqpLbfVHa19yxZOado,135
+richprint/cli.py,sha256=6nbp8AxonlXZRwPHp_8DEVccASuZ6OSlleGyzAfAuLQ,3017
+richprint/constants.py,sha256=ueg9NjzlRaeymExJTMwa5DJRPyzDulDRUDvsyqt0NrI,1502
+richprint/database.py,sha256=QrW1v84wZaOdJqbK-BSEjH46-1dLkLXHi5fKzbWhqL8,2793
+richprint/exceptions.py,sha256=2_wSOdPszMhmE9syhkw42HlaBfcpGJBgabaiVH0ocGs,848
+richprint/models.py,sha256=agKEUybeg62NMvv-haOel86hCumowPBfCZSK3p122aI,2475
+richprint/parser.py,sha256=RCv-L4u247out-9d1Chv8vfIQZDDAgaXq3zM3dhA9gk,9393
+richprint/data/__init__.py,sha256=rkFDdGUU9barWpkoJjOqqk7VnOQVHgowqMPy-W7FNVE,42
+richprint/data/comp_id.txt,sha256=GGyxqv0yR5ek0UhZ1Wb2jvlh-2f6USD-yi1RNxxf82A,205937
+richprint_pe-1.0.0.dist-info/METADATA,sha256=EIxEVkJPQVCVkQPsC56186c67VZrUXO60Bso5kyCRXc,3574
+richprint_pe-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+richprint_pe-1.0.0.dist-info/entry_points.txt,sha256=GaBZUbl51q-6xLefwvWwy5NzRjV2In9p7-WQPPEScW4,49
+richprint_pe-1.0.0.dist-info/licenses/LICENSE,sha256=ej8fVsICMQH4n8rjr8BBLevIsip0bVoyLGID5Jx80SA,1333
+richprint_pe-1.0.0.dist-info/RECORD,,

richprint_pe-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

richprint_pe-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ richprint = richprint.cli:main