@redpanda-data/docs-extensions-and-macros 4.11.1 → 4.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/doc-tools.js +201 -10
- package/package.json +3 -1
- package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
- package/tools/property-extractor/Makefile +12 -1
- package/tools/property-extractor/README.adoc +828 -97
- package/tools/property-extractor/compare-properties.js +38 -13
- package/tools/property-extractor/constant_resolver.py +610 -0
- package/tools/property-extractor/file_pair.py +42 -0
- package/tools/property-extractor/generate-handlebars-docs.js +41 -8
- package/tools/property-extractor/helpers/gt.js +9 -0
- package/tools/property-extractor/helpers/includes.js +17 -0
- package/tools/property-extractor/helpers/index.js +3 -0
- package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
- package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
- package/tools/property-extractor/overrides.json +248 -0
- package/tools/property-extractor/parser.py +254 -32
- package/tools/property-extractor/property_bag.py +40 -0
- package/tools/property-extractor/property_extractor.py +1417 -430
- package/tools/property-extractor/requirements.txt +1 -0
- package/tools/property-extractor/templates/property-backup.hbs +161 -0
- package/tools/property-extractor/templates/property.hbs +104 -49
- package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
- package/tools/property-extractor/templates/topic-property.hbs +72 -34
- package/tools/property-extractor/tests/test_known_values.py +617 -0
- package/tools/property-extractor/tests/transformers_test.py +81 -6
- package/tools/property-extractor/topic_property_extractor.py +23 -10
- package/tools/property-extractor/transformers.py +2191 -369
- package/tools/property-extractor/type_definition_extractor.py +669 -0
- package/tools/redpanda-connect/helpers/renderConnectFields.js +33 -1
- package/tools/redpanda-connect/report-delta.js +132 -9
- package/tools/property-extractor/definitions.json +0 -245
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Dynamic C++ Type Definition Extractor
|
|
4
|
+
|
|
5
|
+
This module dynamically extracts type definitions from Redpanda C++ source code:
|
|
6
|
+
- Struct and class definitions with their fields
|
|
7
|
+
- Enum definitions with their values
|
|
8
|
+
- Type aliases (using/typedef)
|
|
9
|
+
|
|
10
|
+
The extracted definitions are formatted into a JSON schema-like dictionary for use in property documentation generation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import logging
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TypeDefinitionExtractor:
|
|
22
|
+
"""
|
|
23
|
+
Extracts C++ type definitions from source code to build a dynamic definitions dictionary.
|
|
24
|
+
|
|
25
|
+
This automatically discovers:
|
|
26
|
+
- Structs used in properties (model::broker_endpoint, config::tls_config, etc.)
|
|
27
|
+
- Enums and their values (model::compression, model::cleanup_policy_bitflags, etc.)
|
|
28
|
+
- Nested type definitions
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, source_path):
|
|
32
|
+
"""
|
|
33
|
+
Initialize the extractor.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
source_path (str): Path to Redpanda source directory
|
|
37
|
+
"""
|
|
38
|
+
self.source_path = Path(source_path)
|
|
39
|
+
self.definitions = {}
|
|
40
|
+
self.enum_cache = {}
|
|
41
|
+
self.struct_cache = {}
|
|
42
|
+
|
|
43
|
+
def extract_all_definitions(self):
|
|
44
|
+
"""
|
|
45
|
+
Extract all type definitions from the source tree.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
dict: Dictionary of {type_name: definition} in JSON schema format
|
|
49
|
+
"""
|
|
50
|
+
logger.info("🔍 Extracting type definitions from C++ source...")
|
|
51
|
+
|
|
52
|
+
# Scan these directories for type definitions
|
|
53
|
+
# These are relative to the source_path provided
|
|
54
|
+
search_dirs = [
|
|
55
|
+
'model',
|
|
56
|
+
'config',
|
|
57
|
+
'net',
|
|
58
|
+
'kafka',
|
|
59
|
+
'pandaproxy',
|
|
60
|
+
'security',
|
|
61
|
+
'utils',
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
for search_dir in search_dirs:
|
|
65
|
+
full_path = self.source_path / search_dir
|
|
66
|
+
if not full_path.exists():
|
|
67
|
+
logger.debug(f"Skipping non-existent directory: {search_dir}")
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
self._scan_directory(full_path, search_dir)
|
|
71
|
+
|
|
72
|
+
logger.info(f"✅ Extracted {len(self.definitions)} type definitions")
|
|
73
|
+
logger.info(f" - {len(self.enum_cache)} enums")
|
|
74
|
+
logger.info(f" - {len(self.struct_cache)} structs/classes")
|
|
75
|
+
|
|
76
|
+
return self.definitions
|
|
77
|
+
|
|
78
|
+
def _scan_directory(self, directory, relative_path):
|
|
79
|
+
"""
|
|
80
|
+
Recursively scan a directory for C++ header files and extract definitions.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
directory (Path): Directory to scan
|
|
84
|
+
relative_path (str): Relative path for source references
|
|
85
|
+
"""
|
|
86
|
+
for header_file in directory.rglob('*.h'):
|
|
87
|
+
try:
|
|
88
|
+
self._extract_from_file(header_file, relative_path)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.debug(f"Error processing {header_file}: {e}")
|
|
91
|
+
|
|
92
|
+
def _extract_from_file(self, file_path, relative_path):
|
|
93
|
+
"""
|
|
94
|
+
Extract type definitions from a single C++ header file.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
file_path (Path): Path to header file
|
|
98
|
+
relative_path (str): Relative path for source references
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
102
|
+
content = f.read()
|
|
103
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
104
|
+
logger.debug(f"Cannot read {file_path}: {e}")
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
# Extract enums first (they're simpler)
|
|
108
|
+
self._extract_enums(content, file_path, relative_path)
|
|
109
|
+
|
|
110
|
+
# Then extract type aliases (using/typedef)
|
|
111
|
+
self._extract_type_aliases(content, file_path, relative_path)
|
|
112
|
+
|
|
113
|
+
# Then extract struct/class definitions
|
|
114
|
+
self._extract_structs(content, file_path, relative_path)
|
|
115
|
+
|
|
116
|
+
def _extract_enums(self, content, file_path, relative_path):
|
|
117
|
+
"""
|
|
118
|
+
Extract enum definitions and their values.
|
|
119
|
+
|
|
120
|
+
Pattern matches:
|
|
121
|
+
- enum class name : type { value1, value2, ... };
|
|
122
|
+
- enum name { value1, value2, ... };
|
|
123
|
+
"""
|
|
124
|
+
# Pattern for enum class with optional underlying type
|
|
125
|
+
# Handle both simple types (uint8_t) and qualified types (std::uint16_t)
|
|
126
|
+
enum_pattern = re.compile(
|
|
127
|
+
r'enum\s+(?:class\s+)?(\w+)\s*(?::\s*[\w:]+)?\s*\{([^}]+)\}',
|
|
128
|
+
re.MULTILINE | re.DOTALL
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
for match in enum_pattern.finditer(content):
|
|
132
|
+
enum_name = match.group(1)
|
|
133
|
+
enum_body = match.group(2)
|
|
134
|
+
|
|
135
|
+
# Extract enum values (handle comments and assignments)
|
|
136
|
+
# Remove comments FIRST before splitting by comma to avoid issues with commas in comments
|
|
137
|
+
cleaned_body = re.sub(r'//.*$', '', enum_body, flags=re.MULTILINE)
|
|
138
|
+
cleaned_body = re.sub(r'/\*.*?\*/', '', cleaned_body, flags=re.DOTALL)
|
|
139
|
+
|
|
140
|
+
values = []
|
|
141
|
+
for line in cleaned_body.split(','):
|
|
142
|
+
line = line.strip()
|
|
143
|
+
|
|
144
|
+
if not line:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
# Extract value name (before = if assignment exists)
|
|
148
|
+
value_match = re.match(r'^(\w+)', line)
|
|
149
|
+
if value_match:
|
|
150
|
+
value_name = value_match.group(1)
|
|
151
|
+
values.append(value_name)
|
|
152
|
+
|
|
153
|
+
if values:
|
|
154
|
+
# Try to determine the namespace/qualified name
|
|
155
|
+
namespace = self._extract_namespace(content, match.start())
|
|
156
|
+
qualified_name = f"{namespace}::{enum_name}" if namespace else enum_name
|
|
157
|
+
|
|
158
|
+
# Look for a corresponding _to_string() conversion function
|
|
159
|
+
string_mappings = self._extract_enum_to_string_mappings(content, enum_name, values, file_path)
|
|
160
|
+
|
|
161
|
+
definition = {
|
|
162
|
+
"type": "enum",
|
|
163
|
+
"enum": values,
|
|
164
|
+
"defined_in": str(file_path.relative_to(self.source_path))
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# If we found string mappings, add them to the definition
|
|
168
|
+
if string_mappings:
|
|
169
|
+
definition["enum_string_mappings"] = string_mappings
|
|
170
|
+
# Use mapped strings as the enum values for documentation
|
|
171
|
+
definition["enum"] = [string_mappings.get(v, v) for v in values]
|
|
172
|
+
logger.debug(f"Found {len(string_mappings)} string mappings for enum: {qualified_name}")
|
|
173
|
+
|
|
174
|
+
self.definitions[qualified_name] = definition
|
|
175
|
+
self.enum_cache[qualified_name] = definition["enum"]
|
|
176
|
+
|
|
177
|
+
logger.debug(f"Found enum: {qualified_name} with {len(values)} values")
|
|
178
|
+
|
|
179
|
+
def _extract_enum_to_string_mappings(self, content, enum_name, enum_values, file_path=None):
|
|
180
|
+
"""
|
|
181
|
+
Extract enum-to-string conversion mappings from multiple C++ patterns.
|
|
182
|
+
|
|
183
|
+
Looks for patterns like:
|
|
184
|
+
1. _to_string() function:
|
|
185
|
+
const char* write_caching_mode_to_string(write_caching_mode s) {
|
|
186
|
+
switch (s) {
|
|
187
|
+
case write_caching_mode::default_true:
|
|
188
|
+
return "true";
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
2. operator<< overload:
|
|
193
|
+
std::ostream& operator<<(std::ostream& os, timestamp_type ts) {
|
|
194
|
+
switch (ts) {
|
|
195
|
+
case timestamp_type::append_time:
|
|
196
|
+
return os << "LogAppendTime";
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
3. string_switch pattern (operator>> or parse functions):
|
|
201
|
+
ts_type = string_switch<timestamp_type>(s)
|
|
202
|
+
.match("LogAppendTime", timestamp_type::append_time)
|
|
203
|
+
.match("CreateTime", timestamp_type::create_time);
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
content: The file content to search
|
|
207
|
+
enum_name: Name of the enum
|
|
208
|
+
enum_values: List of enum value names
|
|
209
|
+
file_path: Optional Path object of the file being processed
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
dict: Mapping of enum values to their string representations
|
|
213
|
+
"""
|
|
214
|
+
mappings = {}
|
|
215
|
+
|
|
216
|
+
# Helper function to search for mappings in content
|
|
217
|
+
def search_content(search_content):
|
|
218
|
+
found_mappings = {}
|
|
219
|
+
|
|
220
|
+
# Pattern 1: Look for _to_string() function
|
|
221
|
+
to_string_pattern = rf'{enum_name}_to_string\s*\([^)]+\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
|
|
222
|
+
match = re.search(to_string_pattern, search_content, re.MULTILINE | re.DOTALL)
|
|
223
|
+
if match:
|
|
224
|
+
function_body = match.group(1)
|
|
225
|
+
case_pattern = rf'case\s+(?:{enum_name}::)?(\w+)\s*:\s*(?:.*?return\s*"([^"]+)"|.*?return\s*std::string_view\{{"([^"]+)"\}})'
|
|
226
|
+
for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
|
|
227
|
+
enum_value = case_match.group(1)
|
|
228
|
+
string_value = case_match.group(2) or case_match.group(3)
|
|
229
|
+
if enum_value in enum_values and string_value:
|
|
230
|
+
found_mappings[enum_value] = string_value
|
|
231
|
+
|
|
232
|
+
# Pattern 2: Look for operator<< overload
|
|
233
|
+
operator_pattern = rf'operator<<\s*\([^,]+,\s*(?:const\s+)?{enum_name}\s+\w+\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
|
|
234
|
+
match = re.search(operator_pattern, search_content, re.MULTILINE | re.DOTALL)
|
|
235
|
+
if match:
|
|
236
|
+
function_body = match.group(1)
|
|
237
|
+
case_pattern = rf'case\s+(?:{enum_name}::)?(\w+)\s*:\s*.*?(?:os|o)\s*<<\s*"([^"]+)"'
|
|
238
|
+
for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
|
|
239
|
+
enum_value = case_match.group(1)
|
|
240
|
+
string_value = case_match.group(2)
|
|
241
|
+
if enum_value in enum_values and string_value:
|
|
242
|
+
found_mappings[enum_value] = string_value
|
|
243
|
+
|
|
244
|
+
# Pattern 3: Look for string_switch pattern
|
|
245
|
+
string_switch_pattern = rf'string_switch<{enum_name}>\s*\([^)]+\)((?:\s*\.match\s*\([^)]+\))+)'
|
|
246
|
+
for switch_match in re.finditer(string_switch_pattern, search_content, re.MULTILINE | re.DOTALL):
|
|
247
|
+
matches_block = switch_match.group(1)
|
|
248
|
+
match_pattern = r'\.match\s*\(\s*"([^"]+)"\s*,\s*(?:' + enum_name + r'::)?(\w+)\s*\)'
|
|
249
|
+
for match_call in re.finditer(match_pattern, matches_block):
|
|
250
|
+
string_value = match_call.group(1)
|
|
251
|
+
enum_value = match_call.group(2)
|
|
252
|
+
if enum_value in enum_values and string_value:
|
|
253
|
+
found_mappings[enum_value] = string_value
|
|
254
|
+
|
|
255
|
+
# Pattern 4: Look for to_string_view() or to_string() standalone functions
|
|
256
|
+
# Handles: constexpr std::string_view to_string_view(enum_name v) { switch(v) { case enum_name::value: return "string"; } }
|
|
257
|
+
to_string_view_pattern = rf'(?:constexpr\s+)?(?:std::string_view|const\s+char\*|ss::sstring)\s+to_string(?:_view)?\s*\(\s*{enum_name}\s+\w+\s*\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
|
|
258
|
+
match = re.search(to_string_view_pattern, search_content, re.MULTILINE | re.DOTALL)
|
|
259
|
+
if match:
|
|
260
|
+
function_body = match.group(1)
|
|
261
|
+
# Match: case enum_name::value: return "string";
|
|
262
|
+
case_pattern = rf'case\s+{enum_name}::(\w+)\s*:\s*return\s+"([^"]+)"'
|
|
263
|
+
for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
|
|
264
|
+
enum_value = case_match.group(1)
|
|
265
|
+
string_value = case_match.group(2)
|
|
266
|
+
if enum_value in enum_values and string_value:
|
|
267
|
+
found_mappings[enum_value] = string_value
|
|
268
|
+
|
|
269
|
+
return found_mappings
|
|
270
|
+
|
|
271
|
+
# First try the current file content
|
|
272
|
+
mappings = search_content(content)
|
|
273
|
+
|
|
274
|
+
# If no mappings found and we have a file path, search related files
|
|
275
|
+
if not mappings and file_path:
|
|
276
|
+
files_to_search = []
|
|
277
|
+
|
|
278
|
+
# If this is a .h file, look for corresponding .cc file
|
|
279
|
+
if file_path.suffix == '.h':
|
|
280
|
+
cc_path = file_path.with_suffix('.cc')
|
|
281
|
+
if cc_path.exists():
|
|
282
|
+
files_to_search.append(cc_path)
|
|
283
|
+
|
|
284
|
+
# Also look for parent directory's main .cc file (e.g., model/model.cc)
|
|
285
|
+
parent_dir = file_path.parent
|
|
286
|
+
parent_cc = parent_dir / f"{parent_dir.name}.cc"
|
|
287
|
+
if parent_cc.exists() and parent_cc != cc_path:
|
|
288
|
+
files_to_search.append(parent_cc)
|
|
289
|
+
|
|
290
|
+
# Search each related file
|
|
291
|
+
for search_file in files_to_search:
|
|
292
|
+
try:
|
|
293
|
+
with open(search_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
294
|
+
file_content = f.read()
|
|
295
|
+
file_mappings = search_content(file_content)
|
|
296
|
+
if file_mappings:
|
|
297
|
+
mappings.update(file_mappings)
|
|
298
|
+
logger.debug(f"Found {len(file_mappings)} string mappings for {enum_name} in {search_file.name}")
|
|
299
|
+
break # Stop after finding mappings
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.debug(f"Could not read {search_file}: {e}")
|
|
302
|
+
|
|
303
|
+
# Log the found mappings
|
|
304
|
+
for enum_value, string_value in mappings.items():
|
|
305
|
+
logger.debug(f"Mapped {enum_name}::{enum_value} -> \"{string_value}\"")
|
|
306
|
+
|
|
307
|
+
return mappings
|
|
308
|
+
|
|
309
|
+
def _extract_type_aliases(self, content, file_path, relative_path):
|
|
310
|
+
"""
|
|
311
|
+
Extract type aliases (using/typedef declarations).
|
|
312
|
+
|
|
313
|
+
Pattern matches:
|
|
314
|
+
- using name = type;
|
|
315
|
+
- using name = named_type<underlying_type, ...>;
|
|
316
|
+
- typedef type name;
|
|
317
|
+
"""
|
|
318
|
+
# Pattern for 'using' declarations
|
|
319
|
+
# Matches: using node_id = named_type<int32_t, ...>;
|
|
320
|
+
# using my_type = std::string;
|
|
321
|
+
using_pattern = re.compile(
|
|
322
|
+
r'using\s+(\w+)\s*=\s*(.+?);',
|
|
323
|
+
re.MULTILINE
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
for match in using_pattern.finditer(content):
|
|
327
|
+
alias_name = match.group(1)
|
|
328
|
+
alias_type = match.group(2).strip()
|
|
329
|
+
|
|
330
|
+
# Try to determine the underlying type
|
|
331
|
+
json_type = self._resolve_alias_type(alias_type)
|
|
332
|
+
|
|
333
|
+
if json_type:
|
|
334
|
+
# Try to determine the namespace/qualified name
|
|
335
|
+
namespace = self._extract_namespace(content, match.start())
|
|
336
|
+
qualified_name = f"{namespace}::{alias_name}" if namespace else alias_name
|
|
337
|
+
|
|
338
|
+
definition = {
|
|
339
|
+
"type": json_type,
|
|
340
|
+
"defined_in": str(file_path.relative_to(self.source_path)),
|
|
341
|
+
"alias_for": alias_type
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# Add min/max for integer types
|
|
345
|
+
if json_type == "integer":
|
|
346
|
+
if "int32_t" in alias_type:
|
|
347
|
+
definition["minimum"] = -2147483648
|
|
348
|
+
definition["maximum"] = 2147483647
|
|
349
|
+
elif "int64_t" in alias_type:
|
|
350
|
+
definition["minimum"] = -9223372036854775808
|
|
351
|
+
definition["maximum"] = 9223372036854775807
|
|
352
|
+
elif "uint32_t" in alias_type:
|
|
353
|
+
definition["minimum"] = 0
|
|
354
|
+
definition["maximum"] = 4294967295
|
|
355
|
+
elif "uint64_t" in alias_type:
|
|
356
|
+
definition["minimum"] = 0
|
|
357
|
+
definition["maximum"] = 18446744073709551615
|
|
358
|
+
|
|
359
|
+
self.definitions[qualified_name] = definition
|
|
360
|
+
logger.debug(f"Found type alias: {qualified_name} = {alias_type} → {json_type}")
|
|
361
|
+
|
|
362
|
+
def _resolve_alias_type(self, alias_type):
|
|
363
|
+
"""
|
|
364
|
+
Resolve a C++ type alias to a JSON schema type.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
alias_type (str): The C++ type expression (e.g., "named_type<int32_t, ...>")
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
str: JSON schema type (integer, string, etc.) or None if unknown
|
|
371
|
+
"""
|
|
372
|
+
alias_type = alias_type.strip()
|
|
373
|
+
|
|
374
|
+
# Handle named_type<T, ...> pattern - extract the underlying type
|
|
375
|
+
named_type_match = re.match(r'named_type<\s*([^,>]+)', alias_type)
|
|
376
|
+
if named_type_match:
|
|
377
|
+
underlying_type = named_type_match.group(1).strip()
|
|
378
|
+
return self._cpp_type_to_json_type(underlying_type)
|
|
379
|
+
|
|
380
|
+
# Handle direct type aliases
|
|
381
|
+
return self._cpp_type_to_json_type(alias_type)
|
|
382
|
+
|
|
383
|
+
def _extract_structs(self, content, file_path, relative_path):
|
|
384
|
+
"""
|
|
385
|
+
Extract struct/class definitions and their fields.
|
|
386
|
+
|
|
387
|
+
Pattern matches:
|
|
388
|
+
- struct name { field_type field_name; ... };
|
|
389
|
+
- class name { public: field_type field_name; ... };
|
|
390
|
+
"""
|
|
391
|
+
# Pattern for struct/class declaration (without capturing body)
|
|
392
|
+
# Handles: struct name { ... }, class name { ... }
|
|
393
|
+
# With optional: final, override keywords and inheritance
|
|
394
|
+
struct_decl_pattern = re.compile(
|
|
395
|
+
r'(?:struct|class)\s+(\w+)\s*(?:final|override)?\s*(?::\s*[^{]+)?\s*\{',
|
|
396
|
+
re.MULTILINE
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
for match in struct_decl_pattern.finditer(content):
|
|
400
|
+
struct_name = match.group(1)
|
|
401
|
+
|
|
402
|
+
# Skip template definitions (too complex for now)
|
|
403
|
+
if '<' in struct_name or 'template' in content[max(0, match.start()-50):match.start()]:
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
# Use brace-counting to extract the complete body
|
|
407
|
+
body_start = match.end()
|
|
408
|
+
struct_body = self._extract_braced_content(content, body_start)
|
|
409
|
+
|
|
410
|
+
if not struct_body:
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
# Extract fields
|
|
414
|
+
properties = self._extract_fields(struct_body)
|
|
415
|
+
|
|
416
|
+
if properties:
|
|
417
|
+
# Try to determine the namespace/qualified name
|
|
418
|
+
namespace = self._extract_namespace(content, match.start())
|
|
419
|
+
qualified_name = f"{namespace}::{struct_name}" if namespace else struct_name
|
|
420
|
+
|
|
421
|
+
definition = {
|
|
422
|
+
"type": "object",
|
|
423
|
+
"properties": properties,
|
|
424
|
+
"defined_in": str(file_path.relative_to(self.source_path))
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
self.definitions[qualified_name] = definition
|
|
428
|
+
self.struct_cache[qualified_name] = properties
|
|
429
|
+
|
|
430
|
+
logger.debug(f"Found struct: {qualified_name} with {len(properties)} fields")
|
|
431
|
+
|
|
432
|
+
def _extract_braced_content(self, content, start_pos):
|
|
433
|
+
"""
|
|
434
|
+
Extract content within matching braces using brace-counting.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
content (str): Full file content
|
|
438
|
+
start_pos (int): Position right after opening brace
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
str: Content between braces (not including the braces themselves)
|
|
442
|
+
"""
|
|
443
|
+
brace_count = 1
|
|
444
|
+
pos = start_pos
|
|
445
|
+
|
|
446
|
+
while brace_count > 0 and pos < len(content):
|
|
447
|
+
if content[pos] == '{':
|
|
448
|
+
brace_count += 1
|
|
449
|
+
elif content[pos] == '}':
|
|
450
|
+
brace_count -= 1
|
|
451
|
+
pos += 1
|
|
452
|
+
|
|
453
|
+
if brace_count == 0:
|
|
454
|
+
# Successfully found matching brace
|
|
455
|
+
return content[start_pos:pos-1]
|
|
456
|
+
|
|
457
|
+
return ""
|
|
458
|
+
|
|
459
|
+
def _extract_fields(self, struct_body):
|
|
460
|
+
"""
|
|
461
|
+
Extract field definitions from a struct/class body.
|
|
462
|
+
Extracts all fields (including private) and public accessor methods.
|
|
463
|
+
Private fields will be filtered out when outputting to JSON.
|
|
464
|
+
|
|
465
|
+
Extracts:
|
|
466
|
+
- All data members (including private fields starting with _)
|
|
467
|
+
- Public const methods that return a value (simple accessors like `host()`, `port()`)
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
dict: {field_name: field_definition}
|
|
471
|
+
"""
|
|
472
|
+
properties = {}
|
|
473
|
+
|
|
474
|
+
# Track current access level (structs default to public, classes to private)
|
|
475
|
+
# We'll assume public for simplicity since most config types use structs
|
|
476
|
+
current_access = 'public'
|
|
477
|
+
|
|
478
|
+
# Split body into lines to track access specifiers
|
|
479
|
+
lines = struct_body.split('\n')
|
|
480
|
+
|
|
481
|
+
# Pattern for access specifiers
|
|
482
|
+
access_pattern = re.compile(r'^\s*(public|private|protected)\s*:')
|
|
483
|
+
|
|
484
|
+
# Pattern for field declarations
|
|
485
|
+
# Matches: type field_name; or type field_name{default};
|
|
486
|
+
field_pattern = re.compile(
|
|
487
|
+
r'([\w:]+(?:<[^>]+>)?)\s+(\w+)\s*(?:\{[^}]*\})?;'
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Pattern for getter methods (accessor methods)
|
|
491
|
+
# Matches: type name() const { return _name; } or const type& name() const;
|
|
492
|
+
getter_pattern = re.compile(
|
|
493
|
+
r'(?:const\s+)?([\w:]+(?:<[^>]+>)?)\s*(?:&)?\s+(\w+)\s*\(\s*\)\s*const'
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
for line in lines:
|
|
497
|
+
# Check for access specifier
|
|
498
|
+
access_match = access_pattern.match(line)
|
|
499
|
+
if access_match:
|
|
500
|
+
current_access = access_match.group(1)
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
# Extract getter methods only from public sections
|
|
504
|
+
if current_access == 'public':
|
|
505
|
+
# Skip lines with friend or operator declarations
|
|
506
|
+
if 'friend' in line or 'operator' in line:
|
|
507
|
+
continue
|
|
508
|
+
|
|
509
|
+
# Look for getter methods (public accessor methods)
|
|
510
|
+
getter_match = getter_pattern.search(line)
|
|
511
|
+
if getter_match:
|
|
512
|
+
return_type = getter_match.group(1).strip()
|
|
513
|
+
method_name = getter_match.group(2).strip()
|
|
514
|
+
|
|
515
|
+
# Skip special methods and single-letter names (likely from multiline parsing)
|
|
516
|
+
if method_name in ('operator', 'get', 'begin', 'end', 'size', 'empty') or len(method_name) == 1:
|
|
517
|
+
continue
|
|
518
|
+
|
|
519
|
+
# Skip methods with common getter prefixes (get_, is_, has_, can_, should_)
|
|
520
|
+
# We only want simple accessors like host(), port(), family()
|
|
521
|
+
# Not complex getters like get_crl_file(), is_enabled(), etc.
|
|
522
|
+
if any(method_name.startswith(prefix) for prefix in ['get_', 'is_', 'has_', 'can_', 'should_']):
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
# Convert C++ type to JSON schema type
|
|
526
|
+
json_type = self._cpp_type_to_json_type(return_type)
|
|
527
|
+
|
|
528
|
+
# Use method name as field name (e.g., host() becomes "host")
|
|
529
|
+
properties[method_name] = {"type": json_type}
|
|
530
|
+
continue
|
|
531
|
+
|
|
532
|
+
# Extract field declarations from all sections (public and private)
|
|
533
|
+
field_match = field_pattern.search(line)
|
|
534
|
+
if field_match:
|
|
535
|
+
field_type = field_match.group(1).strip()
|
|
536
|
+
field_name = field_match.group(2).strip()
|
|
537
|
+
|
|
538
|
+
# Skip non-data members
|
|
539
|
+
if field_name in ('public', 'private', 'protected', 'static', 'const'):
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
# Convert C++ type to JSON schema type
|
|
543
|
+
json_type = self._cpp_type_to_json_type(field_type)
|
|
544
|
+
|
|
545
|
+
properties[field_name] = {"type": json_type}
|
|
546
|
+
|
|
547
|
+
return properties
|
|
548
|
+
|
|
549
|
+
def _extract_namespace(self, content, position):
|
|
550
|
+
"""
|
|
551
|
+
Extract the namespace at a given position in the file.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
content (str): File content
|
|
555
|
+
position (int): Position in the file
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
str: Namespace (e.g., "model" or "config::tls")
|
|
559
|
+
"""
|
|
560
|
+
# Look backwards from position to find namespace declaration
|
|
561
|
+
preceding = content[:position]
|
|
562
|
+
|
|
563
|
+
# Find all namespace declarations before this position
|
|
564
|
+
namespace_pattern = re.compile(r'namespace\s+(\w+)\s*\{')
|
|
565
|
+
namespaces = []
|
|
566
|
+
|
|
567
|
+
for match in namespace_pattern.finditer(preceding):
|
|
568
|
+
ns_name = match.group(1)
|
|
569
|
+
# Check if we're still inside this namespace by tracking brace depth
|
|
570
|
+
# Start with depth=1 (we entered the namespace with its opening brace)
|
|
571
|
+
after_ns = content[match.end():position]
|
|
572
|
+
brace_depth = 1
|
|
573
|
+
|
|
574
|
+
for char in after_ns:
|
|
575
|
+
if char == '{':
|
|
576
|
+
brace_depth += 1
|
|
577
|
+
elif char == '}':
|
|
578
|
+
brace_depth -= 1
|
|
579
|
+
if brace_depth == 0:
|
|
580
|
+
# Namespace was closed before reaching current position
|
|
581
|
+
break
|
|
582
|
+
|
|
583
|
+
if brace_depth > 0:
|
|
584
|
+
# Still inside this namespace
|
|
585
|
+
namespaces.append(ns_name)
|
|
586
|
+
|
|
587
|
+
return '::'.join(namespaces) if namespaces else ''
|
|
588
|
+
|
|
589
|
+
def _cpp_type_to_json_type(self, cpp_type):
|
|
590
|
+
"""
|
|
591
|
+
Convert a C++ type to a JSON schema type.
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
cpp_type (str): C++ type name
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
str: JSON schema type (object, string, integer, boolean, array)
|
|
598
|
+
"""
|
|
599
|
+
cpp_type = cpp_type.strip()
|
|
600
|
+
|
|
601
|
+
# Remove const, reference, pointer qualifiers
|
|
602
|
+
cpp_type = re.sub(r'\bconst\b', '', cpp_type)
|
|
603
|
+
cpp_type = re.sub(r'[&*]', '', cpp_type)
|
|
604
|
+
cpp_type = cpp_type.strip()
|
|
605
|
+
|
|
606
|
+
# Map common C++ types to JSON types
|
|
607
|
+
if cpp_type in ('bool', 'boolean'):
|
|
608
|
+
return 'boolean'
|
|
609
|
+
|
|
610
|
+
if cpp_type in ('int', 'int32_t', 'int64_t', 'uint32_t', 'uint64_t',
|
|
611
|
+
'size_t', 'long', 'short', 'unsigned'):
|
|
612
|
+
return 'integer'
|
|
613
|
+
|
|
614
|
+
if cpp_type in ('float', 'double'):
|
|
615
|
+
return 'number'
|
|
616
|
+
|
|
617
|
+
if 'string' in cpp_type.lower() or cpp_type == 'ss::sstring':
|
|
618
|
+
return 'string'
|
|
619
|
+
|
|
620
|
+
if 'vector' in cpp_type or 'array' in cpp_type:
|
|
621
|
+
return 'array'
|
|
622
|
+
|
|
623
|
+
if 'optional' in cpp_type:
|
|
624
|
+
# Extract inner type from std::optional<T>
|
|
625
|
+
inner_match = re.match(r'std::optional<(.+)>', cpp_type)
|
|
626
|
+
if inner_match:
|
|
627
|
+
inner_type = inner_match.group(1)
|
|
628
|
+
return self._cpp_type_to_json_type(inner_type)
|
|
629
|
+
|
|
630
|
+
# Handle empty types (edge case from complex parsing)
|
|
631
|
+
if not cpp_type:
|
|
632
|
+
return 'string'
|
|
633
|
+
|
|
634
|
+
# Default to object for complex types
|
|
635
|
+
if '::' in cpp_type or cpp_type[0].isupper():
|
|
636
|
+
return 'object'
|
|
637
|
+
|
|
638
|
+
# Unknown type - default to string
|
|
639
|
+
return 'string'
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def extract_definitions_from_source(source_path):
|
|
643
|
+
"""
|
|
644
|
+
Convenience function to extract all type definitions from Redpanda source.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
source_path (str): Path to Redpanda source directory
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
dict: Dictionary of type definitions in JSON schema format
|
|
651
|
+
"""
|
|
652
|
+
extractor = TypeDefinitionExtractor(source_path)
|
|
653
|
+
return extractor.extract_all_definitions()
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
if __name__ == "__main__":
|
|
657
|
+
import sys
|
|
658
|
+
import json
|
|
659
|
+
|
|
660
|
+
if len(sys.argv) < 2:
|
|
661
|
+
print("Usage: python3 type_definition_extractor.py <redpanda_source_path>")
|
|
662
|
+
sys.exit(1)
|
|
663
|
+
|
|
664
|
+
logging.basicConfig(level=logging.INFO)
|
|
665
|
+
|
|
666
|
+
source_path = sys.argv[1]
|
|
667
|
+
definitions = extract_definitions_from_source(source_path)
|
|
668
|
+
|
|
669
|
+
print(json.dumps(definitions, indent=2))
|