richprint-pe 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015-2024 dishather (https://github.com/dishather)
2
+
3
+ Redistribution and use in source and binary forms, with or without modification,
4
+ are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice,
7
+ this list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16
+ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
17
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
19
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
20
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
22
+ OF SUCH DAMAGE.
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: richprint-pe
3
+ Version: 1.0.0
4
+ Summary: Decode and print Rich headers from Windows PE executables
5
+ Project-URL: Homepage, https://github.com/dishather/richprint
6
+ Project-URL: Repository, https://github.com/dishather/richprint
7
+ Author-email: dishather <noreply@github.com>
8
+ License-Expression: BSD-2-Clause
9
+ License-File: LICENSE
10
+ Keywords: compiler,executable,forensics,pe,rich-header,windows
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: BSD License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Software Development :: Build Tools
23
+ Classifier: Topic :: System :: Systems Administration
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+
27
+ # richprint
28
+
29
+ A Python tool to decode and print compiler information stored in the Rich Header of Windows PE executables.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install richprint-pe
35
+ ```
36
+
37
+ ## What is the Rich Header?
38
+
39
+ The Rich Header is a section of binary data created by Microsoft's linker, located between the DOS stub and PE header in Windows executables. It contains a list of compiler/tool IDs (@comp.id) used to build the executable, allowing identification of exact compiler versions down to build numbers.
40
+
41
+ The data is XOR-encoded, with "Rich" being the only readable marker. Files created by non-Microsoft linkers will not have this header.
42
+
43
+ For technical details, see [Daniel Pistelli's article](http://www.ntcore.com/files/richsign.htm).
44
+
45
+ ## Usage
46
+
47
+ ### Command Line
48
+
49
+ ```bash
50
+ # Analyze one or more files
51
+ richprint notepad.exe
52
+ richprint file1.exe file2.dll file3.sys
53
+
54
+ # JSON output
55
+ richprint --json notepad.exe
56
+
57
+ # Use custom compiler ID database
58
+ richprint --database /path/to/comp_id.txt notepad.exe
59
+ ```
60
+
61
+ ### Python API
62
+
63
+ ```python
64
+ from richprint import parse_file, load_database
65
+
66
+ # Load the bundled compiler ID database
67
+ db = load_database()
68
+
69
+ # Parse a PE file
70
+ result = parse_file("notepad.exe", db)
71
+
72
+ if result.success:
73
+ print(f"Machine: {result.pe_info.machine_name}")
74
+ print(f"XOR Key: 0x{result.rich_header.xor_key:08x}")
75
+ for entry in result.rich_header.entries:
76
+ print(f" {entry.comp_id:08x} {entry.description}")
77
+ else:
78
+ print(f"Error: {result.error}")
79
+ ```
80
+
81
+ ## Output Format
82
+
83
+ ```
84
+ Processing notepad.exe
85
+ Target machine: x64
86
+ @comp.id id version count description
87
+ 00e1520d e1 21005 10 [C++] VS2013 build 21005
88
+ 00df520d df 21005 1 [ASM] VS2013 build 21005
89
+ 00de520d de 21005 1 [LNK] VS2013 build 21005
90
+ ```
91
+
92
+ ## Compiler ID Database
93
+
94
+ The bundled `comp_id.txt` database maps compiler IDs to human-readable descriptions. The format supports:
95
+
96
+ - `[ C ]` - C compiler
97
+ - `[C++]` - C++ compiler
98
+ - `[ASM]` - Assembler
99
+ - `[LNK]` - Linker
100
+ - `[RES]` - Resource converter
101
+ - `[IMP]` / `[EXP]` - DLL import/export records
102
+ - And many more...
103
+
104
+ ## Suppressing Rich Headers
105
+
106
+ To prevent Microsoft tools from emitting this header, use the undocumented linker option:
107
+ ```
108
+ /emittoolversioninfo:no
109
+ ```
110
+
111
+ Available since VS2019 Update 11.
112
+
113
+ ## License
114
+
115
+ BSD 2-Clause License. See [LICENSE](LICENSE) for details.
116
+
117
+ ## Credits
118
+
119
+ Original C++ implementation and compiler ID database by [dishather](https://github.com/dishather/richprint).
@@ -0,0 +1,93 @@
1
+ # richprint
2
+
3
+ A Python tool to decode and print compiler information stored in the Rich Header of Windows PE executables.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install richprint-pe
9
+ ```
10
+
11
+ ## What is the Rich Header?
12
+
13
+ The Rich Header is a section of binary data created by Microsoft's linker, located between the DOS stub and PE header in Windows executables. It contains a list of compiler/tool IDs (@comp.id) used to build the executable, allowing identification of exact compiler versions down to build numbers.
14
+
15
+ The data is XOR-encoded, with "Rich" being the only readable marker. Files created by non-Microsoft linkers will not have this header.
16
+
17
+ For technical details, see [Daniel Pistelli's article](http://www.ntcore.com/files/richsign.htm).
18
+
19
+ ## Usage
20
+
21
+ ### Command Line
22
+
23
+ ```bash
24
+ # Analyze one or more files
25
+ richprint notepad.exe
26
+ richprint file1.exe file2.dll file3.sys
27
+
28
+ # JSON output
29
+ richprint --json notepad.exe
30
+
31
+ # Use custom compiler ID database
32
+ richprint --database /path/to/comp_id.txt notepad.exe
33
+ ```
34
+
35
+ ### Python API
36
+
37
+ ```python
38
+ from richprint import parse_file, load_database
39
+
40
+ # Load the bundled compiler ID database
41
+ db = load_database()
42
+
43
+ # Parse a PE file
44
+ result = parse_file("notepad.exe", db)
45
+
46
+ if result.success:
47
+ print(f"Machine: {result.pe_info.machine_name}")
48
+ print(f"XOR Key: 0x{result.rich_header.xor_key:08x}")
49
+ for entry in result.rich_header.entries:
50
+ print(f" {entry.comp_id:08x} {entry.description}")
51
+ else:
52
+ print(f"Error: {result.error}")
53
+ ```
54
+
55
+ ## Output Format
56
+
57
+ ```
58
+ Processing notepad.exe
59
+ Target machine: x64
60
+ @comp.id id version count description
61
+ 00e1520d e1 21005 10 [C++] VS2013 build 21005
62
+ 00df520d df 21005 1 [ASM] VS2013 build 21005
63
+ 00de520d de 21005 1 [LNK] VS2013 build 21005
64
+ ```
65
+
66
+ ## Compiler ID Database
67
+
68
+ The bundled `comp_id.txt` database maps compiler IDs to human-readable descriptions. The format supports:
69
+
70
+ - `[ C ]` - C compiler
71
+ - `[C++]` - C++ compiler
72
+ - `[ASM]` - Assembler
73
+ - `[LNK]` - Linker
74
+ - `[RES]` - Resource converter
75
+ - `[IMP]` / `[EXP]` - DLL import/export records
76
+ - And many more...
77
+
78
+ ## Suppressing Rich Headers
79
+
80
+ To prevent Microsoft tools from emitting this header, use the undocumented linker option:
81
+ ```
82
+ /emittoolversioninfo:no
83
+ ```
84
+
85
+ Available since VS2019 Update 11.
86
+
87
+ ## License
88
+
89
+ BSD 2-Clause License. See [LICENSE](LICENSE) for details.
90
+
91
+ ## Credits
92
+
93
+ Original C++ implementation and compiler ID database by [dishather](https://github.com/dishather/richprint).
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "richprint-pe"
7
+ version = "1.0.0"
8
+ description = "Decode and print Rich headers from Windows PE executables"
9
+ readme = "README.md"
10
+ license = "BSD-2-Clause"
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ { name = "dishather", email = "noreply@github.com" }
14
+ ]
15
+ keywords = ["pe", "executable", "rich-header", "windows", "compiler", "forensics"]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Environment :: Console",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: BSD License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Software Development :: Build Tools",
29
+ "Topic :: System :: Systems Administration",
30
+ ]
31
+ dependencies = []
32
+
33
+ [project.scripts]
34
+ richprint = "richprint.cli:main"
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/dishather/richprint"
38
+ Repository = "https://github.com/dishather/richprint"
39
+
40
+ [tool.hatch.build.targets.wheel]
41
+ packages = ["src/richprint"]
42
+
43
+ [tool.hatch.build.targets.sdist]
44
+ include = [
45
+ "/src",
46
+ "/tests",
47
+ "/README.md",
48
+ "/LICENSE",
49
+ ]
@@ -0,0 +1,62 @@
1
+ """
2
+ richprint - Decode and print Rich headers from Windows PE executables.
3
+
4
+ The Rich header is metadata embedded by Microsoft's linker containing
5
+ compiler version information (@comp.id records).
6
+ """
7
+
8
+ from .parser import parse_file, parse_bytes
9
+ from .database import load_database, lookup_description, CompilerDatabase
10
+ from .models import CompilerEntry, RichHeader, PEInfo, ParseResult
11
+ from .constants import (
12
+ MZ_SIGNATURE,
13
+ PE_SIGNATURE,
14
+ RICH_SIGNATURE,
15
+ DANS_SIGNATURE,
16
+ MACHINE_TYPES,
17
+ get_machine_type,
18
+ )
19
+ from .exceptions import (
20
+ RichPrintError,
21
+ FileOpenError,
22
+ NoMZHeaderError,
23
+ NoPEHeaderError,
24
+ InvalidDOSHeaderError,
25
+ NoRichHeaderError,
26
+ NoDanSTokenError,
27
+ InvalidRichHeaderError,
28
+ )
29
+
30
+ __version__ = "1.0.0"
31
+
32
+ __all__ = [
33
+ # Main API
34
+ "parse_file",
35
+ "parse_bytes",
36
+ "load_database",
37
+ "lookup_description",
38
+ # Models
39
+ "CompilerEntry",
40
+ "RichHeader",
41
+ "PEInfo",
42
+ "ParseResult",
43
+ "CompilerDatabase",
44
+ # Constants
45
+ "MZ_SIGNATURE",
46
+ "PE_SIGNATURE",
47
+ "RICH_SIGNATURE",
48
+ "DANS_SIGNATURE",
49
+ "MACHINE_TYPES",
50
+ "get_machine_type",
51
+ # Exceptions
52
+ "RichPrintError",
53
+ "FileOpenError",
54
+ "NoMZHeaderError",
55
+ "NoPEHeaderError",
56
+ "InvalidDOSHeaderError",
57
+ "NoRichHeaderError",
58
+ "NoDanSTokenError",
59
+ "InvalidRichHeaderError",
60
+ # Version
61
+ "__version__",
62
+ ]
@@ -0,0 +1,7 @@
1
+ """Enable running as: python -m richprint"""
2
+
3
+ import sys
4
+ from .cli import main
5
+
6
+ if __name__ == "__main__":
7
+ sys.exit(main())
@@ -0,0 +1,112 @@
1
+ """Command-line interface for richprint."""
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from typing import List, Optional
7
+
8
+ from .database import load_database
9
+ from .parser import parse_file
10
+ from .models import ParseResult
11
+
12
+
13
+ def format_entry_line(entry) -> str:
14
+ """Format a single Rich header entry for display."""
15
+ return (
16
+ f"{entry.comp_id:08x} {entry.product_id:4x} {entry.build_version:6d} "
17
+ f"{entry.count:5d}"
18
+ + (f" {entry.description}" if entry.description else "")
19
+ )
20
+
21
+
22
+ def print_result(result: ParseResult) -> None:
23
+ """Print parse result in human-readable format."""
24
+ print(f"Processing {result.filename}")
25
+
26
+ if not result.success:
27
+ print(result.error, file=sys.stderr)
28
+ return
29
+
30
+ if result.pe_info:
31
+ print(f"Target machine: {result.pe_info.machine_name}")
32
+
33
+ if result.rich_header and result.rich_header.entries:
34
+ print("@comp.id id version count description")
35
+ for entry in result.rich_header.entries:
36
+ print(format_entry_line(entry))
37
+
38
+
39
+ def create_parser() -> argparse.ArgumentParser:
40
+ """Create argument parser."""
41
+ parser = argparse.ArgumentParser(
42
+ prog="richprint",
43
+ description="Decode and print Rich headers from Windows PE executables.",
44
+ epilog=(
45
+ "Rich headers contain compiler version information embedded by "
46
+ "Microsoft's linker."
47
+ ),
48
+ )
49
+
50
+ parser.add_argument(
51
+ "files",
52
+ nargs="*",
53
+ metavar="FILE",
54
+ help="PE executable file(s) to analyze",
55
+ )
56
+
57
+ parser.add_argument(
58
+ "--json",
59
+ action="store_true",
60
+ help="Output results as JSON",
61
+ )
62
+
63
+ parser.add_argument(
64
+ "--database", "-d",
65
+ metavar="PATH",
66
+ help="Path to custom comp_id.txt database file",
67
+ )
68
+
69
+ parser.add_argument(
70
+ "--version", "-V",
71
+ action="version",
72
+ version="%(prog)s 1.0.0",
73
+ )
74
+
75
+ return parser
76
+
77
+
78
+ def main(argv: Optional[List[str]] = None) -> int:
79
+ """Main entry point for CLI."""
80
+ parser = create_parser()
81
+ args = parser.parse_args(argv)
82
+
83
+ if not args.files:
84
+ print(
85
+ "Rich header decoder. Usage:\n\n"
86
+ " richprint file ...\n\n"
87
+ "Rich headers can be found in executable files, DLLs, "
88
+ "and other binary files\ncreated by Microsoft linker."
89
+ )
90
+ return 0
91
+
92
+ # Load database
93
+ db = load_database(args.database)
94
+
95
+ results = []
96
+ for filename in args.files:
97
+ result = parse_file(filename, db)
98
+ results.append(result)
99
+
100
+ if args.json:
101
+ output = [r.to_dict() for r in results]
102
+ print(json.dumps(output, indent=2))
103
+ else:
104
+ for result in results:
105
+ print_result(result)
106
+
107
+ # Return non-zero if any file failed
108
+ return 0 if all(r.success for r in results) else 1
109
+
110
+
111
+ if __name__ == "__main__":
112
+ sys.exit(main())
@@ -0,0 +1,47 @@
1
+ """Magic numbers and constants for PE/Rich header parsing."""
2
+
3
+ # Signature magic values
4
+ MZ_SIGNATURE = 0x5A4D # "MZ" - DOS executable signature
5
+ PE_SIGNATURE = 0x4550 # "PE\0\0" - PE header signature
6
+ RICH_SIGNATURE = 0x68636952 # "Rich" (little-endian)
7
+ DANS_SIGNATURE = 0x536E6144 # "DanS" (little-endian)
8
+
9
+ # DOS header offsets
10
+ DOS_NUM_RELOCS_OFFSET = 0x06 # Number of relocations
11
+ DOS_HEADER_PARA_OFFSET = 0x08 # Size of header in paragraphs
12
+ DOS_RELOC_OFFSET = 0x18 # File address of relocation table
13
+ DOS_PE_OFFSET = 0x3C # File address of PE header
14
+
15
+ # PE header offsets (relative to PE signature)
16
+ PE_MACHINE_OFFSET = 4 # Machine type field
17
+
18
+ # Machine type mapping
19
+ # From https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
20
+ MACHINE_TYPES = {
21
+ 0x8664: "x64",
22
+ 0x14C: "x32",
23
+ 0x1D3: "Matsushita AM33",
24
+ 0x1C0: "ARM LE",
25
+ 0x1C4: "ARMv7+ Thumb",
26
+ 0xAA64: "ARMv8 64bit",
27
+ 0xEBC: "EFI bytecode",
28
+ 0x200: "Intel Itanium",
29
+ 0x9041: "Mitsubishi M32R LE",
30
+ 0x266: "MIPS16",
31
+ 0x366: "MIPS w/FPU",
32
+ 0x466: "MIPS16 w/FPU",
33
+ 0x1F0: "PowerPC LE",
34
+ 0x1F1: "PowerPC w/FPU",
35
+ 0x166: "MIPS LE",
36
+ 0x1A2: "Hitachi SH3",
37
+ 0x1A3: "Hitachi SH3 DSP",
38
+ 0x1A6: "Hitachi SH4",
39
+ 0x1A8: "Hitachi SH5",
40
+ 0x1C2: "ARM or Thumb",
41
+ 0x169: "MIPS LE WCE v2",
42
+ }
43
+
44
+
45
+ def get_machine_type(machine_id: int) -> str:
46
+ """Get human-readable machine type name."""
47
+ return MACHINE_TYPES.get(machine_id, "Unknown")
@@ -0,0 +1 @@
1
+ # Package marker for bundled data files.