cs-binding-generator 1.0.1.dev38__tar.gz → 1.0.1.dev40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/PKG-INFO +9 -3
  2. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/README.md +8 -2
  3. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/_version.py +3 -3
  4. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/code_generators.py +46 -1
  5. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/config.py +19 -6
  6. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/generator.py +342 -65
  7. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/main.py +1 -0
  8. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/PKG-INFO +9 -3
  9. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/SOURCES.txt +2 -0
  10. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/ARCHITECTURE.md +18 -8
  11. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/XML_CONFIG.md +189 -15
  12. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_generator.py +5 -3
  13. cs_binding_generator-1.0.1.dev40/tests/test_macro_constants.py +501 -0
  14. cs_binding_generator-1.0.1.dev40/tests/test_utf8_byte_overloads.py +161 -0
  15. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/.coverage +0 -0
  16. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/.flake8 +0 -0
  17. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/.github/workflows/publish.yml +0 -0
  18. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/.gitignore +0 -0
  19. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/COPILOT_CONTEXT.md +0 -0
  20. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/LICENSE +0 -0
  21. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/__init__.py +0 -0
  22. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/constants.py +0 -0
  23. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator/type_mapper.py +0 -0
  24. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/dependency_links.txt +0 -0
  25. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/entry_points.txt +0 -0
  26. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/requires.txt +0 -0
  27. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/cs_binding_generator.egg-info/top_level.txt +0 -0
  28. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/INCLUDE_DIRECTORIES.md +0 -0
  29. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/MULTI_FILE_OUTPUT.md +0 -0
  30. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/RENAMING_EXAMPLE.xml +0 -0
  31. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/docs/TROUBLESHOOTING.md +0 -0
  32. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/enter_devenv.sh +0 -0
  33. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/pyproject.toml +0 -0
  34. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/run_tests.sh +0 -0
  35. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/setup.cfg +0 -0
  36. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/FreeTypeTest/FreeTypeTest.csproj +0 -0
  37. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/FreeTypeTest/bindings.cs +0 -0
  38. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/FreeTypeTest/cs-bindings.xml +0 -0
  39. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/FreeTypeTest/freetype.cs +0 -0
  40. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/LibtcodTest/LibtcodTest.csproj +0 -0
  41. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/LibtcodTest/SDL3.cs +0 -0
  42. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/LibtcodTest/bindings.cs +0 -0
  43. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/LibtcodTest/cs-bindings.xml +0 -0
  44. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/LibtcodTest/libtcod.cs +0 -0
  45. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/SDL3Test/SDL3.cs +0 -0
  46. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/SDL3Test/SDL3Test.csproj +0 -0
  47. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/SDL3Test/SDL3Test.sln +0 -0
  48. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/SDL3Test/bindings.cs +0 -0
  49. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/test_dotnet/SDL3Test/cs-bindings.xml +0 -0
  50. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/__init__.py +0 -0
  51. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/conftest.py +0 -0
  52. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/fixtures.py +0 -0
  53. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_cli.py +0 -0
  54. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_cli_extended.py +0 -0
  55. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_code_generators.py +0 -0
  56. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_defines.py +0 -0
  57. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_edge_cases.py +0 -0
  58. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_error_handling.py +0 -0
  59. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_flag_enums.py +0 -0
  60. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_multi_file_deduplication.py +0 -0
  61. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_opaque_typedef_underlying.py +0 -0
  62. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_removal.py +0 -0
  63. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_renaming.py +0 -0
  64. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_type_mapper.py +0 -0
  65. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_type_mapping_extended.py +0 -0
  66. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_variadic_functions.py +0 -0
  67. {cs_binding_generator-1.0.1.dev38 → cs_binding_generator-1.0.1.dev40}/tests/test_xml_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cs_binding_generator
3
- Version: 1.0.1.dev38
3
+ Version: 1.0.1.dev40
4
4
  Summary: Generate C# bindings from C headers using libclang with modern LibraryImport
5
5
  Author-email: Robin 'Ruadeil' Degen <mail@ruadeil.lgbt>
6
6
  License-Expression: MIT
@@ -48,7 +48,10 @@ The tool is configured primarily through XML configuration files, providing powe
48
48
  - **Automatic Type Mapping**: Intelligently maps C types to C# equivalents
49
49
  - **Renaming Support**: Simple and regex-based renaming rules to transform C names to C# conventions
50
50
  - **Removal Support**: Filter out unwanted functions, types, or patterns
51
- - **Macro Constants**: Extract C `#define` constants as C# enums with optional `[Flags]` attribute
51
+ - **Compiler Defines**: Pass `-D` flags to libclang via `<define>` to enable optional API blocks and gate platform-specific code paths during parsing
52
+ - **Flag Enum Marking**: Tag auto-discovered C enums with `[Flags]` via `<flags>`, with exact-name or regex matching
53
+ - **UTF-8 Byte Overloads**: Opt in via `<utf8-byte-overloads/>` to emit a second `byte*`-param partial alongside every `string?`-param `[LibraryImport]`, so callers can hand pre-encoded UTF-8 buffers (u8 literals, pinned spans) to native code without re-encoding through a managed string
54
+ - **Macro Constants**: Extract C `#define` constants as C# enums (numeric mode) or as UTF-8 `ReadOnlySpan<byte>` members (string mode). Object-like and function-like macros are recursively expanded, and C-style casts in macro values are stripped before the numeric check, so chains like `SDL_BUTTON_MASK(SDL_BUTTON_LEFT)` and values like `((SDL_AudioDeviceID) 0xFFFFFFFFu)` resolve cleanly.
52
55
  - **String Handling**: Provides both raw pointer and helper string methods for `char*` returns
53
56
  - **Struct Generation**: Creates explicit layout structs with proper field offsets
54
57
  - **Union Support**: Converts C unions to C# structs with `LayoutKind.Explicit` and field offsets
@@ -449,7 +452,10 @@ CsBindingGenerator/
449
452
  ## Limitations
450
453
 
451
454
  - Variadic functions are not supported (skipped)
452
- - Complex macros with expressions are not extracted
455
+ - Macro expansion is textual, not a full C preprocessor: token-pasting (`##`),
456
+ stringizing (`#`), and multi-line backslash continuations are not handled.
457
+ Multi-token type names inside casts are recognized (`unsigned int`, `long long`),
458
+ but pointer casts (`(int*)`) are not stripped.
453
459
  - Bitfields in structs are not supported
454
460
  - Function pointers are mapped to `nint`
455
461
  - Requires manual handling of callbacks
@@ -19,7 +19,10 @@ The tool is configured primarily through XML configuration files, providing powe
19
19
  - **Automatic Type Mapping**: Intelligently maps C types to C# equivalents
20
20
  - **Renaming Support**: Simple and regex-based renaming rules to transform C names to C# conventions
21
21
  - **Removal Support**: Filter out unwanted functions, types, or patterns
22
- - **Macro Constants**: Extract C `#define` constants as C# enums with optional `[Flags]` attribute
22
+ - **Compiler Defines**: Pass `-D` flags to libclang via `<define>` to enable optional API blocks and gate platform-specific code paths during parsing
23
+ - **Flag Enum Marking**: Tag auto-discovered C enums with `[Flags]` via `<flags>`, with exact-name or regex matching
24
+ - **UTF-8 Byte Overloads**: Opt in via `<utf8-byte-overloads/>` to emit a second `byte*`-param partial alongside every `string?`-param `[LibraryImport]`, so callers can hand pre-encoded UTF-8 buffers (u8 literals, pinned spans) to native code without re-encoding through a managed string
25
+ - **Macro Constants**: Extract C `#define` constants as C# enums (numeric mode) or as UTF-8 `ReadOnlySpan<byte>` members (string mode). Object-like and function-like macros are recursively expanded, and C-style casts in macro values are stripped before the numeric check, so chains like `SDL_BUTTON_MASK(SDL_BUTTON_LEFT)` and values like `((SDL_AudioDeviceID) 0xFFFFFFFFu)` resolve cleanly.
23
26
  - **String Handling**: Provides both raw pointer and helper string methods for `char*` returns
24
27
  - **Struct Generation**: Creates explicit layout structs with proper field offsets
25
28
  - **Union Support**: Converts C unions to C# structs with `LayoutKind.Explicit` and field offsets
@@ -420,7 +423,10 @@ CsBindingGenerator/
420
423
  ## Limitations
421
424
 
422
425
  - Variadic functions are not supported (skipped)
423
- - Complex macros with expressions are not extracted
426
+ - Macro expansion is textual, not a full C preprocessor: token-pasting (`##`),
427
+ stringizing (`#`), and multi-line backslash continuations are not handled.
428
+ Multi-token type names inside casts are recognized (`unsigned int`, `long long`),
429
+ but pointer casts (`(int*)`) are not stripped.
424
430
  - Bitfields in structs are not supported
425
431
  - Function pointers are mapped to `nint`
426
432
  - Requires manual handling of callbacks
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '1.0.1.dev38'
22
- __version_tuple__ = version_tuple = (1, 0, 1, 'dev38')
21
+ __version__ = version = '1.0.1.dev40'
22
+ __version_tuple__ = version_tuple = (1, 0, 1, 'dev40')
23
23
 
24
- __commit_id__ = commit_id = 'g88b2816f6'
24
+ __commit_id__ = commit_id = 'g7677f88ad'
@@ -12,12 +12,23 @@ from .type_mapper import TypeMapper
12
12
  class CodeGenerator:
13
13
  """Generates C# code from libclang AST nodes"""
14
14
 
15
- def __init__(self, type_mapper: TypeMapper, visibility: str = "public", skip_variadic: bool = False):
15
+ def __init__(
16
+ self,
17
+ type_mapper: TypeMapper,
18
+ visibility: str = "public",
19
+ skip_variadic: bool = False,
20
+ utf8_byte_overloads: bool = False,
21
+ ):
16
22
  self.type_mapper = type_mapper
17
23
  self.anonymous_enum_counter = 0
18
24
  self.visibility = visibility
19
25
  self.skip_variadic = skip_variadic
20
26
  self.has_variadic_functions = False
27
+ # When True, generate_function emits a parallel `byte*`-param overload alongside
28
+ # the primary `[LibraryImport]` for any function whose original signature contains
29
+ # a `string?` parameter. Lets callers pass pre-encoded UTF-8 buffers without the
30
+ # managed-string → UTF-8 re-encode round trip.
31
+ self.utf8_byte_overloads = utf8_byte_overloads
21
32
 
22
33
  def generate_function(self, cursor, library_name: str) -> str:
23
34
  """Generate C# LibraryImport for a function"""
@@ -100,6 +111,40 @@ class CodeGenerator:
100
111
  code = f""" [LibraryImport("{library_name}", EntryPoint = "{original_func_name}", StringMarshalling = StringMarshalling.Utf8)]
101
112
  [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
102
113
  {return_marshal} {self.visibility} static partial {result_type} {func_name}({params_str});
114
+ """
115
+
116
+ # Optional byte*-param overload. Emitted only when (a) the opt-in flag is set,
117
+ # (b) the function isn't variadic, and (c) at least one parameter was mapped to
118
+ # `string?`. The overload swaps every `string?` param for `byte*`, letting callers
119
+ # pass pre-encoded UTF-8 buffers (u8 literals, pinned spans) without re-encoding.
120
+ # The C# compiler picks between the two overloads by argument type at the call site.
121
+ if self.utf8_byte_overloads and not is_variadic_for_generation:
122
+ byte_params: list[str] = []
123
+ has_string_param = False
124
+ for i, arg in enumerate(cursor.get_arguments()):
125
+ arg_type = self.type_mapper.map_type(arg.type, is_return_type=False)
126
+ if arg_type is None:
127
+ # If the primary emit accepted these params, the secondary should too.
128
+ # Skip silently rather than emit a half-built overload.
129
+ byte_params = []
130
+ has_string_param = False
131
+ break
132
+ arg_name = arg.spelling or f"param{i}"
133
+ arg_name = self._escape_keyword(arg_name)
134
+ if arg_type == "string?":
135
+ has_string_param = True
136
+ byte_params.append(f"byte* {arg_name}")
137
+ elif arg_type == "bool":
138
+ byte_params.append(f"[MarshalAs(UnmanagedType.I1)] {arg_type} {arg_name}")
139
+ else:
140
+ byte_params.append(f"{arg_type} {arg_name}")
141
+
142
+ if has_string_param:
143
+ byte_params_str = ", ".join(byte_params)
144
+ code += f"""
145
+ [LibraryImport("{library_name}", EntryPoint = "{original_func_name}", StringMarshalling = StringMarshalling.Utf8)]
146
+ [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
147
+ {return_marshal} {self.visibility} static partial {result_type} {func_name}({byte_params_str});
103
148
  """
104
149
 
105
150
  # Add helper function for char* return types (skip for variadic functions)
@@ -20,6 +20,7 @@ class BindingConfig:
20
20
  visibility: str = "public"
21
21
  global_constants: list[tuple[str, str, str, bool]] = field(default_factory=list)
22
22
  global_defines: list[tuple[str, str | None]] = field(default_factory=list)
23
+ utf8_byte_overloads: bool = False
23
24
 
24
25
 
25
26
  def parse_config_file(config_path):
@@ -84,19 +85,31 @@ def parse_config_file(config_path):
84
85
 
85
86
  # Get global constants (macros to extract)
86
87
  # These are stored as a list of (name, pattern, type, is_flags) tuples
87
- # They will be applied to all libraries during processing
88
+ # They will be applied to all libraries during processing.
89
+ # The `name` attribute is required for numeric constants groups (which become a
90
+ # named C# enum) but optional for `type="string"` groups (which emit each macro
91
+ # as a member of the library's static class with no wrapper type).
88
92
  for const in root.findall("constants"):
89
93
  const_name = const.get("name")
90
94
  const_pattern = const.get("pattern")
91
- const_type = const.get("type", "uint") # Default to uint
92
- const_flags = const.get("flags", "false").lower() == "true" # Default to false
95
+ const_type = const.get("type", "uint").strip()
96
+ const_flags = const.get("flags", "false").lower() == "true"
93
97
 
94
- if not const_name:
95
- raise ValueError("Constants element missing 'name' attribute")
96
98
  if not const_pattern:
97
99
  raise ValueError("Constants element missing 'pattern' attribute")
100
+ if const_type != "string" and not const_name:
101
+ raise ValueError("Constants element missing 'name' attribute")
102
+
103
+ config.global_constants.append(
104
+ ((const_name or "").strip(), const_pattern.strip(), const_type, const_flags)
105
+ )
98
106
 
99
- config.global_constants.append((const_name.strip(), const_pattern.strip(), const_type.strip(), const_flags))
107
+ # Opt-in: for every function that has at least one `string?` parameter (mapped
108
+ # from a C char* arg), emit a parallel byte*-param overload alongside the primary
109
+ # P/Invoke. Lets callers pass pre-encoded UTF-8 buffers (u8 literals, pinned spans)
110
+ # without the round trip through Encoding.UTF8 → marshaller → native.
111
+ if root.find("utf8-byte-overloads") is not None:
112
+ config.utf8_byte_overloads = True
100
113
 
101
114
  for library in root.findall("library"):
102
115
  library_name = library.get("name")
@@ -60,51 +60,293 @@ class CSharpBindingsGenerator:
60
60
  self.source_file = None
61
61
 
62
62
  def _extract_macros_from_file(self, file_path: str, patterns: list[str]) -> dict[str, str]:
63
- """Extract #define macros from a header file that match the given patterns
63
+ """Numeric-only extractor (legacy signature, retained for internal tests).
64
64
 
65
- Args:
66
- file_path: Path to the header file
67
- patterns: List of regex patterns to match macro names
65
+ Forwards to `_extract_typed_macros_from_file` treating every pattern as
66
+ ``numeric``, and strips the kind from each entry so the return shape stays
67
+ ``{name: value}``.
68
+ """
69
+ typed = [(p, "numeric") for p in patterns]
70
+ result = self._extract_typed_macros_from_file(file_path, typed)
71
+ return {name: value for name, (value, _kind) in result.items()}
72
+
73
+ def _extract_typed_macros_from_file(
74
+ self,
75
+ file_path: str,
76
+ typed_patterns: list[tuple[str, str]],
77
+ ) -> dict[str, tuple[str, str]]:
78
+ """Extract ``#define`` macros from a header, dispatching by pattern kind.
79
+
80
+ ``typed_patterns`` is a list of ``(regex, kind)`` pairs where ``kind`` is either
81
+ ``"string"`` or ``"numeric"``. Each macro is tested against the patterns for its
82
+ kind:
83
+
84
+ - ``"string"``: the macro body must be a single C string literal (``"..."``).
85
+ No expansion is done — string macros that reference other identifiers are
86
+ out of scope. The stored value is the literal including the bounding quotes.
87
+ - ``"numeric"``: the body is expanded against the file's full macro table,
88
+ C casts are stripped, and the result must pass `_is_numeric_macro_value`.
89
+
90
+ Returns a dict ``{name: (value, kind)}`` so callers can route emit decisions
91
+ (enum vs. UTF-8 property) without re-classifying.
92
+ """
93
+ macros: dict[str, tuple[str, str]] = {}
94
+ table = self._scan_macros(file_path)
95
+
96
+ numeric_patterns = [p for p, k in typed_patterns if k != "string"]
97
+ string_patterns = [p for p, k in typed_patterns if k == "string"]
98
+
99
+ for name, (params, body) in table.items():
100
+ if params is not None:
101
+ continue # function-like macros stay in the table only as expansion targets
102
+
103
+ wants_numeric = any(re.fullmatch(p, name) for p in numeric_patterns)
104
+ wants_string = any(re.fullmatch(p, name) for p in string_patterns)
105
+
106
+ # Prefer the string check first when the macro body literally looks like a
107
+ # quoted string — that way `<constants type="string">` doesn't accidentally
108
+ # lose to a wider `numeric` pattern that happens to also match the name.
109
+ if wants_string and self._is_string_macro_value(body):
110
+ macros[name] = (body, "string")
111
+ continue
112
+
113
+ if wants_numeric:
114
+ value = self._expand_macros(body, table)
115
+ value = self._strip_c_casts(value)
116
+
117
+ # Legacy single-arg cast-macro form `WRAP(value)` (e.g. SDL_UINT64_C(0x123)).
118
+ # Run AFTER expansion so we only fall back when expansion didn't replace
119
+ # the wrapper.
120
+ cast_match = re.match(r'^\w+\((.*)\)$', value)
121
+ if cast_match:
122
+ value = cast_match.group(1).strip()
68
123
 
69
- Returns:
70
- Dict mapping macro names to their values
124
+ if self._is_numeric_macro_value(value):
125
+ macros[name] = (value, "numeric")
126
+
127
+ return macros
128
+
129
+ @staticmethod
130
+ def _is_string_macro_value(value: str) -> bool:
131
+ """True if `value` is a single C string literal (e.g. `"hello"`).
132
+
133
+ Backslash escapes are honored so that `"a\"b"` is recognized as one literal,
134
+ not a quote-balanced pair. Concatenated literals like `"a" "b"` are rejected;
135
+ we don't try to fuse them.
136
+ """
137
+ if len(value) < 2 or value[0] != '"' or value[-1] != '"':
138
+ return False
139
+ i = 1
140
+ end = len(value) - 1
141
+ while i < end:
142
+ if value[i] == '\\' and i + 1 < end:
143
+ i += 2
144
+ elif value[i] == '"':
145
+ return False
146
+ else:
147
+ i += 1
148
+ return True
149
+
150
+ def _scan_macros(self, file_path: str) -> dict[str, tuple[list[str] | None, str]]:
151
+ """Pass-1 scan: turn every `#define` in a file into a lookup table.
152
+
153
+ Returns a dict from macro name to `(params, body)` where:
154
+ - object-like macros (`#define NAME body`) have `params=None`
155
+ - function-like macros (`#define NAME(arg1, arg2) body`) have `params=[...]`
156
+
157
+ Bodies are stripped of trailing `/* ... */` comments and trailing commas to
158
+ match the legacy single-pass behavior. Multi-line continuations (backslash-
159
+ newline) are not handled — same limitation the previous scanner had.
71
160
  """
72
- macros = {}
161
+ # Function-like has to be tried first because its `NAME(` opening would otherwise
162
+ # be consumed by the object-like `\w+` group and leave `(args) body` as the value.
163
+ func_re = re.compile(r'^\s*#\s*define\s+(\w+)\(([^)]*)\)\s+(.+?)(?://.*)?$')
164
+ obj_re = re.compile(r'^\s*#\s*define\s+(\w+)\s+(.+?)(?://.*)?$')
165
+ table: dict[str, tuple[list[str] | None, str]] = {}
73
166
 
74
167
  try:
75
168
  with open(file_path, 'r', encoding='utf-8') as f:
76
169
  for line in f:
77
- # Look for #define directives with simple numeric values
78
- # Pattern: #define NAME VALUE
79
- match = re.match(r'^\s*#\s*define\s+(\w+)\s+(.+?)(?://.*)?$', line)
80
- if match:
81
- macro_name = match.group(1)
82
- macro_value = match.group(2).strip()
83
-
84
- # Strip C-style comments (/**< ... */ or /* ... */)
85
- macro_value = re.sub(r'/\*.*?\*/', '', macro_value).strip()
86
-
87
- # Strip trailing commas
88
- macro_value = macro_value.rstrip(',')
89
-
90
- # Strip C cast macros like SDL_UINT64_C(0x...) and extract the value
91
- cast_match = re.match(r'^\w+\((.*)\)$', macro_value)
92
- if cast_match:
93
- macro_value = cast_match.group(1).strip()
94
-
95
- # Only capture macros with numeric-looking values or simple expressions
96
- # Skip macros that reference other identifiers (which would need evaluation)
97
- if self._is_numeric_macro_value(macro_value):
98
- # Check if this macro matches any of the patterns
99
- for pattern in patterns:
100
- if re.fullmatch(pattern, macro_name):
101
- macros[macro_name] = macro_value
102
- break
103
- except Exception as e:
104
- # If we can't read the file, just skip it
170
+ fm = func_re.match(line)
171
+ if fm:
172
+ name = fm.group(1)
173
+ params = [p.strip() for p in fm.group(2).split(',') if p.strip()]
174
+ body = self._clean_macro_body(fm.group(3))
175
+ table[name] = (params, body)
176
+ continue
177
+ om = obj_re.match(line)
178
+ if om:
179
+ name = om.group(1)
180
+ body = self._clean_macro_body(om.group(2))
181
+ table[name] = (None, body)
182
+ except Exception:
105
183
  pass
106
184
 
107
- return macros
185
+ return table
186
+
187
+ @staticmethod
188
+ def _clean_macro_body(body: str) -> str:
189
+ body = body.strip()
190
+ body = re.sub(r'/\*.*?\*/', '', body).strip()
191
+ return body.rstrip(',')
192
+
193
+ def _expand_macros(self, value: str, macros: dict, max_depth: int = 8) -> str:
194
+ """Iteratively substitute identifier and function-call references in `value`
195
+ until the value stops changing or we hit `max_depth`.
196
+
197
+ The depth cap is a hard stop against self-referential macros (`#define FOO FOO`)
198
+ and mutually-referential pairs; we'd rather return a partially-expanded value the
199
+ numeric check rejects than hang.
200
+ """
201
+ for _ in range(max_depth):
202
+ new_value = self._expand_once(value, macros)
203
+ if new_value == value:
204
+ return value
205
+ value = new_value
206
+ return value
207
+
208
+ def _expand_once(self, value: str, macros: dict) -> str:
209
+ """One substitution pass over `value`.
210
+
211
+ Walks the string left-to-right. When we hit an identifier, decide:
212
+ - if it's followed by `(`, treat it as a function-like macro call and substitute
213
+ the body with the args bound to the parameter names;
214
+ - otherwise treat it as an object-like macro reference and substitute its body.
215
+
216
+ Identifiers inside `"..."` string literals are skipped so that string-valued
217
+ macros aren't corrupted by accidental substitution.
218
+ """
219
+ ident_re = re.compile(r'[A-Za-z_]\w*')
220
+ out: list[str] = []
221
+ i = 0
222
+ n = len(value)
223
+
224
+ while i < n:
225
+ ch = value[i]
226
+
227
+ if ch == '"':
228
+ # Copy a string literal verbatim, honoring backslash escapes so that an
229
+ # escaped `\"` doesn't terminate the string prematurely.
230
+ j = i + 1
231
+ while j < n:
232
+ if value[j] == '\\' and j + 1 < n:
233
+ j += 2
234
+ elif value[j] == '"':
235
+ j += 1
236
+ break
237
+ else:
238
+ j += 1
239
+ out.append(value[i:j])
240
+ i = j
241
+ continue
242
+
243
+ if ch.isalpha() or ch == '_':
244
+ m = ident_re.match(value, i)
245
+ assert m is not None # the leading-char check above guarantees this
246
+ name = m.group(0)
247
+ end = i + len(name)
248
+
249
+ if end < n and value[end] == '(':
250
+ # Possible function-like call.
251
+ close = self._find_matching_paren(value, end)
252
+ if close is not None and name in macros and macros[name][0] is not None:
253
+ params, body = macros[name]
254
+ args = self._split_macro_args(value[end + 1:close])
255
+ if len(args) == len(params):
256
+ out.append(self._substitute_params(body, params, args))
257
+ i = close + 1
258
+ continue
259
+ # Not a known function-like macro (or arity mismatch): leave it alone.
260
+ out.append(name)
261
+ i = end
262
+ continue
263
+
264
+ # Bare identifier.
265
+ if name in macros and macros[name][0] is None:
266
+ out.append(macros[name][1])
267
+ else:
268
+ out.append(name)
269
+ i = end
270
+ else:
271
+ out.append(ch)
272
+ i += 1
273
+
274
+ return ''.join(out)
275
+
276
+ @staticmethod
277
+ def _find_matching_paren(s: str, open_idx: int) -> int | None:
278
+ """Return the index of the `)` that closes the `(` at `open_idx`, or None
279
+ if the parens never balance."""
280
+ depth = 0
281
+ for i in range(open_idx, len(s)):
282
+ if s[i] == '(':
283
+ depth += 1
284
+ elif s[i] == ')':
285
+ depth -= 1
286
+ if depth == 0:
287
+ return i
288
+ return None
289
+
290
+ @staticmethod
291
+ def _split_macro_args(args_str: str) -> list[str]:
292
+ """Split a comma-separated function-like macro argument list, respecting paren
293
+ depth so that `(a, b)` inside an argument is not split into two args."""
294
+ if args_str.strip() == '':
295
+ return []
296
+ args: list[str] = []
297
+ buf: list[str] = []
298
+ depth = 0
299
+ for ch in args_str:
300
+ if ch == ',' and depth == 0:
301
+ args.append(''.join(buf).strip())
302
+ buf = []
303
+ else:
304
+ if ch == '(':
305
+ depth += 1
306
+ elif ch == ')':
307
+ depth -= 1
308
+ buf.append(ch)
309
+ args.append(''.join(buf).strip())
310
+ return args
311
+
312
+ @staticmethod
313
+ def _substitute_params(body: str, params: list[str], args: list[str]) -> str:
314
+ """Replace every whole-word occurrence of each parameter name in `body` with the
315
+ corresponding argument text. Identifiers inside string literals are not skipped
316
+ here because function-like macro bodies that contain strings AND reference params
317
+ in them are vanishingly rare in C and unsupported."""
318
+ mapping = dict(zip(params, args))
319
+
320
+ def repl(m: re.Match) -> str:
321
+ name = m.group(0)
322
+ return mapping[name] if name in mapping else name
323
+
324
+ return re.sub(r'\b[A-Za-z_]\w*\b', repl, body)
325
+
326
+ def _strip_c_casts(self, value: str) -> str:
327
+ """Strip C-style casts `(IDENT)` from a macro value when they sit in front of a
328
+ numeric token.
329
+
330
+ The lookahead is what makes this safe: we only remove `(name)` when the next
331
+ non-space character is a digit, opening paren, minus, or bitwise NOT — i.e. the
332
+ start of a numeric expression that the cast is converting. Parens around a bare
333
+ identifier (e.g. `(x)+1`) are left alone, and parens around a number (e.g. `(1)`)
334
+ are not casts so the leading-letter requirement on the identifier skips them.
335
+ """
336
+ # Match a `(IDENT)` or `(IDENT IDENT ...)` cast where each token is whitespace-
337
+ # separated. Covers `(uint32_t)`, `(unsigned int)`, `(long long)`, etc. We do not
338
+ # try to handle pointer casts (`(int*)`) — those contain `*` and the numeric check
339
+ # would reject the surrounding expression anyway.
340
+ cast_pattern = re.compile(
341
+ r'\(\s*[A-Za-z_]\w*(?:\s+[A-Za-z_]\w*)*\s*\)\s*(?=[\d(\-~])'
342
+ )
343
+ # Loop until stable: nested casts like `((Foo)(Bar)0)` need a couple of passes.
344
+ prev = None
345
+ result = value
346
+ while prev != result:
347
+ prev = result
348
+ result = cast_pattern.sub('', result)
349
+ return result
108
350
 
109
351
  def _is_numeric_macro_value(self, value: str) -> bool:
110
352
  """Check if a macro value looks numeric (number, cast, or simple expression)
@@ -537,6 +779,7 @@ class CSharpBindingsGenerator:
537
779
  visibility: str = "public",
538
780
  global_constants: Optional[list[tuple[str, str, str, bool]]] = None,
539
781
  global_defines: Optional[list[tuple[str, Optional[str]]]] = None,
782
+ utf8_byte_overloads: bool = False,
540
783
  ) -> dict[str, str]:
541
784
  """Generate C# bindings from C header file(s)
542
785
 
@@ -556,8 +799,10 @@ class CSharpBindingsGenerator:
556
799
  # Store visibility setting
557
800
  self.visibility = visibility
558
801
 
559
- # Initialize code generator with visibility and skip_variadic flag
560
- self.code_generator = CodeGenerator(self.type_mapper, visibility, skip_variadic)
802
+ # Initialize code generator with visibility, skip_variadic, and byte-overload flag
803
+ self.code_generator = CodeGenerator(
804
+ self.type_mapper, visibility, skip_variadic, utf8_byte_overloads
805
+ )
561
806
 
562
807
  # Store library class names
563
808
  self.library_class_names = library_class_names or {}
@@ -670,10 +915,13 @@ class CSharpBindingsGenerator:
670
915
  if library_name not in self.captured_macros:
671
916
  self.captured_macros[library_name] = {}
672
917
 
673
- # Collect all patterns from global constants
674
- patterns = []
918
+ # Tag each pattern with its emission kind so the per-file scanner can
919
+ # filter accordingly. Anything other than "string" routes to the numeric
920
+ # path (existing behavior).
921
+ typed_patterns: list[tuple[str, str]] = []
675
922
  for const_name, const_pattern, const_type, const_flags in self.global_constants:
676
- patterns.append(const_pattern)
923
+ kind = "string" if const_type == "string" else "numeric"
924
+ typed_patterns.append((const_pattern, kind))
677
925
 
678
926
  # Extract macros from all files in the translation unit (not just the main header)
679
927
  # This includes all #included files, which is where macros like SDL_WINDOW_* live
@@ -685,13 +933,13 @@ class CSharpBindingsGenerator:
685
933
  files_set.add(file_path)
686
934
  for child in cursor.get_children():
687
935
  collect_files(child, files_set)
688
-
936
+
689
937
  all_files = set()
690
938
  collect_files(tu.cursor, all_files)
691
-
939
+
692
940
  # Extract macros from all non-system files
693
941
  for file_path in all_files:
694
- file_macros = self._extract_macros_from_file(file_path, patterns)
942
+ file_macros = self._extract_typed_macros_from_file(file_path, typed_patterns)
695
943
  self.captured_macros[library_name].update(file_macros)
696
944
 
697
945
  if self.captured_macros[library_name]:
@@ -737,36 +985,65 @@ class CSharpBindingsGenerator:
737
985
  """
738
986
  self._add_to_library_collection(self.generated_enums, library, code)
739
987
 
740
- # Generate enums from captured macros using global constants
988
+ # Generate enums or UTF-8 string members from captured macros using global constants
741
989
  for library_name in self.captured_macros:
742
990
  for const_name, const_pattern, const_type, const_flags in self.global_constants:
743
- # Get all macros matching this pattern
744
- matching_macros = {}
745
- for macro_name, macro_value in self.captured_macros[library_name].items():
746
- if re.fullmatch(const_pattern, macro_name):
747
- matching_macros[macro_name] = macro_value
748
-
749
- if matching_macros:
750
- # Apply rename rules to the enum name and member names
751
- enum_name = self.type_mapper.apply_rename(const_name)
752
-
753
- # Build enum members with renamed names
754
- members = []
755
- for macro_name, macro_value in sorted(matching_macros.items()):
991
+ wants_string = const_type == "string"
992
+
993
+ # Get all macros matching this pattern, filtering by kind so that
994
+ # a numeric constants group can't accidentally pick up a string macro
995
+ # (or vice-versa) when their name patterns overlap.
996
+ matching_macros: dict[str, str] = {}
997
+ for macro_name, (macro_value, kind) in self.captured_macros[library_name].items():
998
+ if not re.fullmatch(const_pattern, macro_name):
999
+ continue
1000
+ if wants_string and kind != "string":
1001
+ continue
1002
+ if not wants_string and kind != "numeric":
1003
+ continue
1004
+ matching_macros[macro_name] = macro_value
1005
+
1006
+ if not matching_macros:
1007
+ continue
1008
+
1009
+ if wants_string:
1010
+ # Each macro lands as a ReadOnlySpan<byte> member directly on the
1011
+ # library's static class. We use the fully-qualified type so we don't
1012
+ # need to add `using System;` to every generated file.
1013
+ for macro_name, raw_string in sorted(matching_macros.items()):
756
1014
  renamed_member = self.type_mapper.apply_rename(macro_name)
757
- members.append(f" {renamed_member} = unchecked(({const_type})({macro_value})),")
1015
+ prop = (
1016
+ f" {self.visibility} static System.ReadOnlySpan<byte> "
1017
+ f"{renamed_member} => {raw_string}u8;\n"
1018
+ )
1019
+ self._add_to_library_collection(
1020
+ self.generated_functions, library_name, prop
1021
+ )
1022
+ continue
1023
+
1024
+ # Numeric (enum) path — unchanged from before.
1025
+ # Apply rename rules to the enum name and member names
1026
+ enum_name = self.type_mapper.apply_rename(const_name)
1027
+
1028
+ # Build enum members with renamed names
1029
+ members = []
1030
+ for macro_name, macro_value in sorted(matching_macros.items()):
1031
+ renamed_member = self.type_mapper.apply_rename(macro_name)
1032
+ members.append(
1033
+ f" {renamed_member} = unchecked(({const_type})({macro_value})),"
1034
+ )
758
1035
 
759
- members_str = "\n".join(members)
1036
+ members_str = "\n".join(members)
760
1037
 
761
- # Generate enum with specified type and optional [Flags] attribute
762
- flags_attr = "[Flags]\n" if const_flags else ""
763
- type_clause = f" : {const_type}" if const_type != "int" else ""
764
- code = f"""{flags_attr}{self.visibility} enum {enum_name}{type_clause}
1038
+ # Generate enum with specified type and optional [Flags] attribute
1039
+ flags_attr = "[Flags]\n" if const_flags else ""
1040
+ type_clause = f" : {const_type}" if const_type != "int" else ""
1041
+ code = f"""{flags_attr}{self.visibility} enum {enum_name}{type_clause}
765
1042
  {{
766
1043
  {members_str}
767
1044
  }}
768
1045
  """
769
- self._add_to_library_collection(self.generated_enums, library_name, code)
1046
+ self._add_to_library_collection(self.generated_enums, library_name, code)
770
1047
 
771
1048
  return self._generate_multi_file_output(output)
772
1049