@redpanda-data/docs-extensions-and-macros 4.11.1 → 4.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/bin/doc-tools.js +201 -10
  2. package/package.json +3 -1
  3. package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
  4. package/tools/property-extractor/Makefile +12 -1
  5. package/tools/property-extractor/README.adoc +828 -97
  6. package/tools/property-extractor/compare-properties.js +38 -13
  7. package/tools/property-extractor/constant_resolver.py +610 -0
  8. package/tools/property-extractor/file_pair.py +42 -0
  9. package/tools/property-extractor/generate-handlebars-docs.js +41 -8
  10. package/tools/property-extractor/helpers/gt.js +9 -0
  11. package/tools/property-extractor/helpers/includes.js +17 -0
  12. package/tools/property-extractor/helpers/index.js +3 -0
  13. package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
  14. package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
  15. package/tools/property-extractor/overrides.json +248 -0
  16. package/tools/property-extractor/parser.py +254 -32
  17. package/tools/property-extractor/property_bag.py +40 -0
  18. package/tools/property-extractor/property_extractor.py +1417 -430
  19. package/tools/property-extractor/requirements.txt +1 -0
  20. package/tools/property-extractor/templates/property-backup.hbs +161 -0
  21. package/tools/property-extractor/templates/property.hbs +104 -49
  22. package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
  23. package/tools/property-extractor/templates/topic-property.hbs +72 -34
  24. package/tools/property-extractor/tests/test_known_values.py +617 -0
  25. package/tools/property-extractor/tests/transformers_test.py +81 -6
  26. package/tools/property-extractor/topic_property_extractor.py +23 -10
  27. package/tools/property-extractor/transformers.py +2191 -369
  28. package/tools/property-extractor/type_definition_extractor.py +669 -0
  29. package/tools/redpanda-connect/helpers/renderConnectFields.js +33 -1
  30. package/tools/redpanda-connect/report-delta.js +132 -9
  31. package/tools/property-extractor/definitions.json +0 -245
@@ -0,0 +1,669 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Dynamic C++ Type Definition Extractor
4
+
5
+ This module dynamically extracts type definitions from Redpanda C++ source code:
6
+ - Struct and class definitions with their fields
7
+ - Enum definitions with their values
8
+ - Type aliases (using/typedef)
9
+
10
+ The extracted definitions are formatted into a JSON schema-like dictionary for use in property documentation generation.
11
+ """
12
+
13
+ import os
14
+ import re
15
+ import logging
16
+ from pathlib import Path
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class TypeDefinitionExtractor:
22
+ """
23
+ Extracts C++ type definitions from source code to build a dynamic definitions dictionary.
24
+
25
+ This automatically discovers:
26
+ - Structs used in properties (model::broker_endpoint, config::tls_config, etc.)
27
+ - Enums and their values (model::compression, model::cleanup_policy_bitflags, etc.)
28
+ - Nested type definitions
29
+ """
30
+
31
+ def __init__(self, source_path):
32
+ """
33
+ Initialize the extractor.
34
+
35
+ Args:
36
+ source_path (str): Path to Redpanda source directory
37
+ """
38
+ self.source_path = Path(source_path)
39
+ self.definitions = {}
40
+ self.enum_cache = {}
41
+ self.struct_cache = {}
42
+
43
+ def extract_all_definitions(self):
44
+ """
45
+ Extract all type definitions from the source tree.
46
+
47
+ Returns:
48
+ dict: Dictionary of {type_name: definition} in JSON schema format
49
+ """
50
+ logger.info("🔍 Extracting type definitions from C++ source...")
51
+
52
+ # Scan these directories for type definitions
53
+ # These are relative to the source_path provided
54
+ search_dirs = [
55
+ 'model',
56
+ 'config',
57
+ 'net',
58
+ 'kafka',
59
+ 'pandaproxy',
60
+ 'security',
61
+ 'utils',
62
+ ]
63
+
64
+ for search_dir in search_dirs:
65
+ full_path = self.source_path / search_dir
66
+ if not full_path.exists():
67
+ logger.debug(f"Skipping non-existent directory: {search_dir}")
68
+ continue
69
+
70
+ self._scan_directory(full_path, search_dir)
71
+
72
+ logger.info(f"✅ Extracted {len(self.definitions)} type definitions")
73
+ logger.info(f" - {len(self.enum_cache)} enums")
74
+ logger.info(f" - {len(self.struct_cache)} structs/classes")
75
+
76
+ return self.definitions
77
+
78
+ def _scan_directory(self, directory, relative_path):
79
+ """
80
+ Recursively scan a directory for C++ header files and extract definitions.
81
+
82
+ Args:
83
+ directory (Path): Directory to scan
84
+ relative_path (str): Relative path for source references
85
+ """
86
+ for header_file in directory.rglob('*.h'):
87
+ try:
88
+ self._extract_from_file(header_file, relative_path)
89
+ except Exception as e:
90
+ logger.debug(f"Error processing {header_file}: {e}")
91
+
92
+ def _extract_from_file(self, file_path, relative_path):
93
+ """
94
+ Extract type definitions from a single C++ header file.
95
+
96
+ Args:
97
+ file_path (Path): Path to header file
98
+ relative_path (str): Relative path for source references
99
+ """
100
+ try:
101
+ with open(file_path, 'r', encoding='utf-8') as f:
102
+ content = f.read()
103
+ except (OSError, UnicodeDecodeError) as e:
104
+ logger.debug(f"Cannot read {file_path}: {e}")
105
+ return
106
+
107
+ # Extract enums first (they're simpler)
108
+ self._extract_enums(content, file_path, relative_path)
109
+
110
+ # Then extract type aliases (using/typedef)
111
+ self._extract_type_aliases(content, file_path, relative_path)
112
+
113
+ # Then extract struct/class definitions
114
+ self._extract_structs(content, file_path, relative_path)
115
+
116
+ def _extract_enums(self, content, file_path, relative_path):
117
+ """
118
+ Extract enum definitions and their values.
119
+
120
+ Pattern matches:
121
+ - enum class name : type { value1, value2, ... };
122
+ - enum name { value1, value2, ... };
123
+ """
124
+ # Pattern for enum class with optional underlying type
125
+ # Handle both simple types (uint8_t) and qualified types (std::uint16_t)
126
+ enum_pattern = re.compile(
127
+ r'enum\s+(?:class\s+)?(\w+)\s*(?::\s*[\w:]+)?\s*\{([^}]+)\}',
128
+ re.MULTILINE | re.DOTALL
129
+ )
130
+
131
+ for match in enum_pattern.finditer(content):
132
+ enum_name = match.group(1)
133
+ enum_body = match.group(2)
134
+
135
+ # Extract enum values (handle comments and assignments)
136
+ # Remove comments FIRST before splitting by comma to avoid issues with commas in comments
137
+ cleaned_body = re.sub(r'//.*$', '', enum_body, flags=re.MULTILINE)
138
+ cleaned_body = re.sub(r'/\*.*?\*/', '', cleaned_body, flags=re.DOTALL)
139
+
140
+ values = []
141
+ for line in cleaned_body.split(','):
142
+ line = line.strip()
143
+
144
+ if not line:
145
+ continue
146
+
147
+ # Extract value name (before = if assignment exists)
148
+ value_match = re.match(r'^(\w+)', line)
149
+ if value_match:
150
+ value_name = value_match.group(1)
151
+ values.append(value_name)
152
+
153
+ if values:
154
+ # Try to determine the namespace/qualified name
155
+ namespace = self._extract_namespace(content, match.start())
156
+ qualified_name = f"{namespace}::{enum_name}" if namespace else enum_name
157
+
158
+ # Look for a corresponding _to_string() conversion function
159
+ string_mappings = self._extract_enum_to_string_mappings(content, enum_name, values, file_path)
160
+
161
+ definition = {
162
+ "type": "enum",
163
+ "enum": values,
164
+ "defined_in": str(file_path.relative_to(self.source_path))
165
+ }
166
+
167
+ # If we found string mappings, add them to the definition
168
+ if string_mappings:
169
+ definition["enum_string_mappings"] = string_mappings
170
+ # Use mapped strings as the enum values for documentation
171
+ definition["enum"] = [string_mappings.get(v, v) for v in values]
172
+ logger.debug(f"Found {len(string_mappings)} string mappings for enum: {qualified_name}")
173
+
174
+ self.definitions[qualified_name] = definition
175
+ self.enum_cache[qualified_name] = definition["enum"]
176
+
177
+ logger.debug(f"Found enum: {qualified_name} with {len(values)} values")
178
+
179
+ def _extract_enum_to_string_mappings(self, content, enum_name, enum_values, file_path=None):
180
+ """
181
+ Extract enum-to-string conversion mappings from multiple C++ patterns.
182
+
183
+ Looks for patterns like:
184
+ 1. _to_string() function:
185
+ const char* write_caching_mode_to_string(write_caching_mode s) {
186
+ switch (s) {
187
+ case write_caching_mode::default_true:
188
+ return "true";
189
+ }
190
+ }
191
+
192
+ 2. operator<< overload:
193
+ std::ostream& operator<<(std::ostream& os, timestamp_type ts) {
194
+ switch (ts) {
195
+ case timestamp_type::append_time:
196
+ return os << "LogAppendTime";
197
+ }
198
+ }
199
+
200
+ 3. string_switch pattern (operator>> or parse functions):
201
+ ts_type = string_switch<timestamp_type>(s)
202
+ .match("LogAppendTime", timestamp_type::append_time)
203
+ .match("CreateTime", timestamp_type::create_time);
204
+
205
+ Args:
206
+ content: The file content to search
207
+ enum_name: Name of the enum
208
+ enum_values: List of enum value names
209
+ file_path: Optional Path object of the file being processed
210
+
211
+ Returns:
212
+ dict: Mapping of enum values to their string representations
213
+ """
214
+ mappings = {}
215
+
216
+ # Helper function to search for mappings in content
217
+ def search_content(search_content):
218
+ found_mappings = {}
219
+
220
+ # Pattern 1: Look for _to_string() function
221
+ to_string_pattern = rf'{enum_name}_to_string\s*\([^)]+\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
222
+ match = re.search(to_string_pattern, search_content, re.MULTILINE | re.DOTALL)
223
+ if match:
224
+ function_body = match.group(1)
225
+ case_pattern = rf'case\s+(?:{enum_name}::)?(\w+)\s*:\s*(?:.*?return\s*"([^"]+)"|.*?return\s*std::string_view\{{"([^"]+)"\}})'
226
+ for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
227
+ enum_value = case_match.group(1)
228
+ string_value = case_match.group(2) or case_match.group(3)
229
+ if enum_value in enum_values and string_value:
230
+ found_mappings[enum_value] = string_value
231
+
232
+ # Pattern 2: Look for operator<< overload
233
+ operator_pattern = rf'operator<<\s*\([^,]+,\s*(?:const\s+)?{enum_name}\s+\w+\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
234
+ match = re.search(operator_pattern, search_content, re.MULTILINE | re.DOTALL)
235
+ if match:
236
+ function_body = match.group(1)
237
+ case_pattern = rf'case\s+(?:{enum_name}::)?(\w+)\s*:\s*.*?(?:os|o)\s*<<\s*"([^"]+)"'
238
+ for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
239
+ enum_value = case_match.group(1)
240
+ string_value = case_match.group(2)
241
+ if enum_value in enum_values and string_value:
242
+ found_mappings[enum_value] = string_value
243
+
244
+ # Pattern 3: Look for string_switch pattern
245
+ string_switch_pattern = rf'string_switch<{enum_name}>\s*\([^)]+\)((?:\s*\.match\s*\([^)]+\))+)'
246
+ for switch_match in re.finditer(string_switch_pattern, search_content, re.MULTILINE | re.DOTALL):
247
+ matches_block = switch_match.group(1)
248
+ match_pattern = r'\.match\s*\(\s*"([^"]+)"\s*,\s*(?:' + enum_name + r'::)?(\w+)\s*\)'
249
+ for match_call in re.finditer(match_pattern, matches_block):
250
+ string_value = match_call.group(1)
251
+ enum_value = match_call.group(2)
252
+ if enum_value in enum_values and string_value:
253
+ found_mappings[enum_value] = string_value
254
+
255
+ # Pattern 4: Look for to_string_view() or to_string() standalone functions
256
+ # Handles: constexpr std::string_view to_string_view(enum_name v) { switch(v) { case enum_name::value: return "string"; } }
257
+ to_string_view_pattern = rf'(?:constexpr\s+)?(?:std::string_view|const\s+char\*|ss::sstring)\s+to_string(?:_view)?\s*\(\s*{enum_name}\s+\w+\s*\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
258
+ match = re.search(to_string_view_pattern, search_content, re.MULTILINE | re.DOTALL)
259
+ if match:
260
+ function_body = match.group(1)
261
+ # Match: case enum_name::value: return "string";
262
+ case_pattern = rf'case\s+{enum_name}::(\w+)\s*:\s*return\s+"([^"]+)"'
263
+ for case_match in re.finditer(case_pattern, function_body, re.MULTILINE | re.DOTALL):
264
+ enum_value = case_match.group(1)
265
+ string_value = case_match.group(2)
266
+ if enum_value in enum_values and string_value:
267
+ found_mappings[enum_value] = string_value
268
+
269
+ return found_mappings
270
+
271
+ # First try the current file content
272
+ mappings = search_content(content)
273
+
274
+ # If no mappings found and we have a file path, search related files
275
+ if not mappings and file_path:
276
+ files_to_search = []
277
+
278
+ # If this is a .h file, look for corresponding .cc file
279
+ if file_path.suffix == '.h':
280
+ cc_path = file_path.with_suffix('.cc')
281
+ if cc_path.exists():
282
+ files_to_search.append(cc_path)
283
+
284
+ # Also look for parent directory's main .cc file (e.g., model/model.cc)
285
+ parent_dir = file_path.parent
286
+ parent_cc = parent_dir / f"{parent_dir.name}.cc"
287
+ if parent_cc.exists() and parent_cc != cc_path:
288
+ files_to_search.append(parent_cc)
289
+
290
+ # Search each related file
291
+ for search_file in files_to_search:
292
+ try:
293
+ with open(search_file, 'r', encoding='utf-8', errors='ignore') as f:
294
+ file_content = f.read()
295
+ file_mappings = search_content(file_content)
296
+ if file_mappings:
297
+ mappings.update(file_mappings)
298
+ logger.debug(f"Found {len(file_mappings)} string mappings for {enum_name} in {search_file.name}")
299
+ break # Stop after finding mappings
300
+ except Exception as e:
301
+ logger.debug(f"Could not read {search_file}: {e}")
302
+
303
+ # Log the found mappings
304
+ for enum_value, string_value in mappings.items():
305
+ logger.debug(f"Mapped {enum_name}::{enum_value} -> \"{string_value}\"")
306
+
307
+ return mappings
308
+
309
+ def _extract_type_aliases(self, content, file_path, relative_path):
310
+ """
311
+ Extract type aliases (using/typedef declarations).
312
+
313
+ Pattern matches:
314
+ - using name = type;
315
+ - using name = named_type<underlying_type, ...>;
316
+ - typedef type name;
317
+ """
318
+ # Pattern for 'using' declarations
319
+ # Matches: using node_id = named_type<int32_t, ...>;
320
+ # using my_type = std::string;
321
+ using_pattern = re.compile(
322
+ r'using\s+(\w+)\s*=\s*(.+?);',
323
+ re.MULTILINE
324
+ )
325
+
326
+ for match in using_pattern.finditer(content):
327
+ alias_name = match.group(1)
328
+ alias_type = match.group(2).strip()
329
+
330
+ # Try to determine the underlying type
331
+ json_type = self._resolve_alias_type(alias_type)
332
+
333
+ if json_type:
334
+ # Try to determine the namespace/qualified name
335
+ namespace = self._extract_namespace(content, match.start())
336
+ qualified_name = f"{namespace}::{alias_name}" if namespace else alias_name
337
+
338
+ definition = {
339
+ "type": json_type,
340
+ "defined_in": str(file_path.relative_to(self.source_path)),
341
+ "alias_for": alias_type
342
+ }
343
+
344
+ # Add min/max for integer types
345
+ if json_type == "integer":
346
+ if "int32_t" in alias_type:
347
+ definition["minimum"] = -2147483648
348
+ definition["maximum"] = 2147483647
349
+ elif "int64_t" in alias_type:
350
+ definition["minimum"] = -9223372036854775808
351
+ definition["maximum"] = 9223372036854775807
352
+ elif "uint32_t" in alias_type:
353
+ definition["minimum"] = 0
354
+ definition["maximum"] = 4294967295
355
+ elif "uint64_t" in alias_type:
356
+ definition["minimum"] = 0
357
+ definition["maximum"] = 18446744073709551615
358
+
359
+ self.definitions[qualified_name] = definition
360
+ logger.debug(f"Found type alias: {qualified_name} = {alias_type} → {json_type}")
361
+
362
+ def _resolve_alias_type(self, alias_type):
363
+ """
364
+ Resolve a C++ type alias to a JSON schema type.
365
+
366
+ Args:
367
+ alias_type (str): The C++ type expression (e.g., "named_type<int32_t, ...>")
368
+
369
+ Returns:
370
+ str: JSON schema type (integer, string, etc.) or None if unknown
371
+ """
372
+ alias_type = alias_type.strip()
373
+
374
+ # Handle named_type<T, ...> pattern - extract the underlying type
375
+ named_type_match = re.match(r'named_type<\s*([^,>]+)', alias_type)
376
+ if named_type_match:
377
+ underlying_type = named_type_match.group(1).strip()
378
+ return self._cpp_type_to_json_type(underlying_type)
379
+
380
+ # Handle direct type aliases
381
+ return self._cpp_type_to_json_type(alias_type)
382
+
383
+ def _extract_structs(self, content, file_path, relative_path):
384
+ """
385
+ Extract struct/class definitions and their fields.
386
+
387
+ Pattern matches:
388
+ - struct name { field_type field_name; ... };
389
+ - class name { public: field_type field_name; ... };
390
+ """
391
+ # Pattern for struct/class declaration (without capturing body)
392
+ # Handles: struct name { ... }, class name { ... }
393
+ # With optional: final, override keywords and inheritance
394
+ struct_decl_pattern = re.compile(
395
+ r'(?:struct|class)\s+(\w+)\s*(?:final|override)?\s*(?::\s*[^{]+)?\s*\{',
396
+ re.MULTILINE
397
+ )
398
+
399
+ for match in struct_decl_pattern.finditer(content):
400
+ struct_name = match.group(1)
401
+
402
+ # Skip template definitions (too complex for now)
403
+ if '<' in struct_name or 'template' in content[max(0, match.start()-50):match.start()]:
404
+ continue
405
+
406
+ # Use brace-counting to extract the complete body
407
+ body_start = match.end()
408
+ struct_body = self._extract_braced_content(content, body_start)
409
+
410
+ if not struct_body:
411
+ continue
412
+
413
+ # Extract fields
414
+ properties = self._extract_fields(struct_body)
415
+
416
+ if properties:
417
+ # Try to determine the namespace/qualified name
418
+ namespace = self._extract_namespace(content, match.start())
419
+ qualified_name = f"{namespace}::{struct_name}" if namespace else struct_name
420
+
421
+ definition = {
422
+ "type": "object",
423
+ "properties": properties,
424
+ "defined_in": str(file_path.relative_to(self.source_path))
425
+ }
426
+
427
+ self.definitions[qualified_name] = definition
428
+ self.struct_cache[qualified_name] = properties
429
+
430
+ logger.debug(f"Found struct: {qualified_name} with {len(properties)} fields")
431
+
432
+ def _extract_braced_content(self, content, start_pos):
433
+ """
434
+ Extract content within matching braces using brace-counting.
435
+
436
+ Args:
437
+ content (str): Full file content
438
+ start_pos (int): Position right after opening brace
439
+
440
+ Returns:
441
+ str: Content between braces (not including the braces themselves)
442
+ """
443
+ brace_count = 1
444
+ pos = start_pos
445
+
446
+ while brace_count > 0 and pos < len(content):
447
+ if content[pos] == '{':
448
+ brace_count += 1
449
+ elif content[pos] == '}':
450
+ brace_count -= 1
451
+ pos += 1
452
+
453
+ if brace_count == 0:
454
+ # Successfully found matching brace
455
+ return content[start_pos:pos-1]
456
+
457
+ return ""
458
+
459
+ def _extract_fields(self, struct_body):
460
+ """
461
+ Extract field definitions from a struct/class body.
462
+ Extracts all fields (including private) and public accessor methods.
463
+ Private fields will be filtered out when outputting to JSON.
464
+
465
+ Extracts:
466
+ - All data members (including private fields starting with _)
467
+ - Public const methods that return a value (simple accessors like `host()`, `port()`)
468
+
469
+ Returns:
470
+ dict: {field_name: field_definition}
471
+ """
472
+ properties = {}
473
+
474
+ # Track current access level (structs default to public, classes to private)
475
+ # We'll assume public for simplicity since most config types use structs
476
+ current_access = 'public'
477
+
478
+ # Split body into lines to track access specifiers
479
+ lines = struct_body.split('\n')
480
+
481
+ # Pattern for access specifiers
482
+ access_pattern = re.compile(r'^\s*(public|private|protected)\s*:')
483
+
484
+ # Pattern for field declarations
485
+ # Matches: type field_name; or type field_name{default};
486
+ field_pattern = re.compile(
487
+ r'([\w:]+(?:<[^>]+>)?)\s+(\w+)\s*(?:\{[^}]*\})?;'
488
+ )
489
+
490
+ # Pattern for getter methods (accessor methods)
491
+ # Matches: type name() const { return _name; } or const type& name() const;
492
+ getter_pattern = re.compile(
493
+ r'(?:const\s+)?([\w:]+(?:<[^>]+>)?)\s*(?:&)?\s+(\w+)\s*\(\s*\)\s*const'
494
+ )
495
+
496
+ for line in lines:
497
+ # Check for access specifier
498
+ access_match = access_pattern.match(line)
499
+ if access_match:
500
+ current_access = access_match.group(1)
501
+ continue
502
+
503
+ # Extract getter methods only from public sections
504
+ if current_access == 'public':
505
+ # Skip lines with friend or operator declarations
506
+ if 'friend' in line or 'operator' in line:
507
+ continue
508
+
509
+ # Look for getter methods (public accessor methods)
510
+ getter_match = getter_pattern.search(line)
511
+ if getter_match:
512
+ return_type = getter_match.group(1).strip()
513
+ method_name = getter_match.group(2).strip()
514
+
515
+ # Skip special methods and single-letter names (likely from multiline parsing)
516
+ if method_name in ('operator', 'get', 'begin', 'end', 'size', 'empty') or len(method_name) == 1:
517
+ continue
518
+
519
+ # Skip methods with common getter prefixes (get_, is_, has_, can_, should_)
520
+ # We only want simple accessors like host(), port(), family()
521
+ # Not complex getters like get_crl_file(), is_enabled(), etc.
522
+ if any(method_name.startswith(prefix) for prefix in ['get_', 'is_', 'has_', 'can_', 'should_']):
523
+ continue
524
+
525
+ # Convert C++ type to JSON schema type
526
+ json_type = self._cpp_type_to_json_type(return_type)
527
+
528
+ # Use method name as field name (e.g., host() becomes "host")
529
+ properties[method_name] = {"type": json_type}
530
+ continue
531
+
532
+ # Extract field declarations from all sections (public and private)
533
+ field_match = field_pattern.search(line)
534
+ if field_match:
535
+ field_type = field_match.group(1).strip()
536
+ field_name = field_match.group(2).strip()
537
+
538
+ # Skip non-data members
539
+ if field_name in ('public', 'private', 'protected', 'static', 'const'):
540
+ continue
541
+
542
+ # Convert C++ type to JSON schema type
543
+ json_type = self._cpp_type_to_json_type(field_type)
544
+
545
+ properties[field_name] = {"type": json_type}
546
+
547
+ return properties
548
+
549
+ def _extract_namespace(self, content, position):
550
+ """
551
+ Extract the namespace at a given position in the file.
552
+
553
+ Args:
554
+ content (str): File content
555
+ position (int): Position in the file
556
+
557
+ Returns:
558
+ str: Namespace (e.g., "model" or "config::tls")
559
+ """
560
+ # Look backwards from position to find namespace declaration
561
+ preceding = content[:position]
562
+
563
+ # Find all namespace declarations before this position
564
+ namespace_pattern = re.compile(r'namespace\s+(\w+)\s*\{')
565
+ namespaces = []
566
+
567
+ for match in namespace_pattern.finditer(preceding):
568
+ ns_name = match.group(1)
569
+ # Check if we're still inside this namespace by tracking brace depth
570
+ # Start with depth=1 (we entered the namespace with its opening brace)
571
+ after_ns = content[match.end():position]
572
+ brace_depth = 1
573
+
574
+ for char in after_ns:
575
+ if char == '{':
576
+ brace_depth += 1
577
+ elif char == '}':
578
+ brace_depth -= 1
579
+ if brace_depth == 0:
580
+ # Namespace was closed before reaching current position
581
+ break
582
+
583
+ if brace_depth > 0:
584
+ # Still inside this namespace
585
+ namespaces.append(ns_name)
586
+
587
+ return '::'.join(namespaces) if namespaces else ''
588
+
589
+ def _cpp_type_to_json_type(self, cpp_type):
590
+ """
591
+ Convert a C++ type to a JSON schema type.
592
+
593
+ Args:
594
+ cpp_type (str): C++ type name
595
+
596
+ Returns:
597
+ str: JSON schema type (object, string, integer, boolean, array)
598
+ """
599
+ cpp_type = cpp_type.strip()
600
+
601
+ # Remove const, reference, pointer qualifiers
602
+ cpp_type = re.sub(r'\bconst\b', '', cpp_type)
603
+ cpp_type = re.sub(r'[&*]', '', cpp_type)
604
+ cpp_type = cpp_type.strip()
605
+
606
+ # Map common C++ types to JSON types
607
+ if cpp_type in ('bool', 'boolean'):
608
+ return 'boolean'
609
+
610
+ if cpp_type in ('int', 'int32_t', 'int64_t', 'uint32_t', 'uint64_t',
611
+ 'size_t', 'long', 'short', 'unsigned'):
612
+ return 'integer'
613
+
614
+ if cpp_type in ('float', 'double'):
615
+ return 'number'
616
+
617
+ if 'string' in cpp_type.lower() or cpp_type == 'ss::sstring':
618
+ return 'string'
619
+
620
+ if 'vector' in cpp_type or 'array' in cpp_type:
621
+ return 'array'
622
+
623
+ if 'optional' in cpp_type:
624
+ # Extract inner type from std::optional<T>
625
+ inner_match = re.match(r'std::optional<(.+)>', cpp_type)
626
+ if inner_match:
627
+ inner_type = inner_match.group(1)
628
+ return self._cpp_type_to_json_type(inner_type)
629
+
630
+ # Handle empty types (edge case from complex parsing)
631
+ if not cpp_type:
632
+ return 'string'
633
+
634
+ # Default to object for complex types
635
+ if '::' in cpp_type or cpp_type[0].isupper():
636
+ return 'object'
637
+
638
+ # Unknown type - default to string
639
+ return 'string'
640
+
641
+
642
+ def extract_definitions_from_source(source_path):
643
+ """
644
+ Convenience function to extract all type definitions from Redpanda source.
645
+
646
+ Args:
647
+ source_path (str): Path to Redpanda source directory
648
+
649
+ Returns:
650
+ dict: Dictionary of type definitions in JSON schema format
651
+ """
652
+ extractor = TypeDefinitionExtractor(source_path)
653
+ return extractor.extract_all_definitions()
654
+
655
+
656
+ if __name__ == "__main__":
657
+ import sys
658
+ import json
659
+
660
+ if len(sys.argv) < 2:
661
+ print("Usage: python3 type_definition_extractor.py <redpanda_source_path>")
662
+ sys.exit(1)
663
+
664
+ logging.basicConfig(level=logging.INFO)
665
+
666
+ source_path = sys.argv[1]
667
+ definitions = extract_definitions_from_source(source_path)
668
+
669
+ print(json.dumps(definitions, indent=2))