@redpanda-data/docs-extensions-and-macros 4.8.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/bin/doc-tools.js +88 -53
  2. package/package.json +1 -1
  3. package/tools/property-extractor/Makefile +62 -34
  4. package/tools/property-extractor/generate-handlebars-docs.js +344 -0
  5. package/tools/property-extractor/helpers/and.js +10 -0
  6. package/tools/property-extractor/helpers/eq.js +9 -0
  7. package/tools/property-extractor/helpers/formatPropertyValue.js +128 -0
  8. package/tools/property-extractor/helpers/formatUnits.js +26 -0
  9. package/tools/property-extractor/helpers/index.js +13 -0
  10. package/tools/property-extractor/helpers/join.js +18 -0
  11. package/tools/property-extractor/helpers/ne.js +9 -0
  12. package/tools/property-extractor/helpers/not.js +8 -0
  13. package/tools/property-extractor/helpers/or.js +10 -0
  14. package/tools/property-extractor/helpers/renderPropertyExample.js +42 -0
  15. package/tools/property-extractor/package-lock.json +77 -0
  16. package/tools/property-extractor/package.json +6 -0
  17. package/tools/property-extractor/property_extractor.py +1163 -20
  18. package/tools/property-extractor/requirements.txt +1 -0
  19. package/tools/property-extractor/templates/deprecated-properties.hbs +25 -0
  20. package/tools/property-extractor/templates/deprecated-property.hbs +7 -0
  21. package/tools/property-extractor/templates/property-page.hbs +22 -0
  22. package/tools/property-extractor/templates/property.hbs +70 -0
  23. package/tools/property-extractor/templates/topic-property.hbs +59 -0
  24. package/tools/property-extractor/transformers.py +80 -4
  25. package/tools/property-extractor/json-to-asciidoc/generate_docs.py +0 -491
@@ -1,9 +1,65 @@
1
1
  #!/usr/bin/env python3
2
+ """
3
+ Redpanda Configuration Property Extractor
4
+
5
+ This script extracts configuration properties from Redpanda's C++ source code and generates
6
+ JSON schema definitions with proper type resolution and default value expansion.
7
+
8
+ SPECIAL HANDLING FOR one_or_many_property TYPES:
9
+
10
+ Redpanda uses a custom C++ type called `one_or_many_property<T>` for configuration properties
11
+ that can accept either a single value or an array of values. Examples include:
12
+
13
+ - admin: one_or_many_property<model::broker_endpoint>
14
+ - admin_api_tls: one_or_many_property<endpoint_tls_config>
15
+ - kafka_api_tls: one_or_many_property<endpoint_tls_config>
16
+
17
+ These properties allow flexible configuration syntax:
18
+ Single value: admin: {address: "127.0.0.1", port: 9644}
19
+ Array syntax: admin: [{address: "127.0.0.1", port: 9644}, {address: "0.0.0.0", port: 9645}]
20
+
21
+ PROCESSING PIPELINE:
22
+
23
+ 1. **Property Detection & Transformation** (transformers.py):
24
+ - IsArrayTransformer detects one_or_many_property<T> declarations
25
+ - Marks these properties as type="array" with items.type extracted from T
26
+ - TypeTransformer extracts inner types from template declarations
27
+
28
+ 2. **Type Resolution & Default Expansion** (property_extractor.py):
29
+ - resolve_type_and_default() converts C++ types to JSON schema types
30
+ - Expands C++ constructor defaults to structured JSON objects
31
+ - Ensures array-type properties have array defaults (wraps single objects in arrays)
32
+
33
+ 3. **Documentation Generation** (generate-handlebars-docs.js):
34
+ - Properly formats array defaults as [{ }] instead of { }
35
+ - Displays correct types in documentation (array vs object)
36
+
37
+ EXAMPLE TRANSFORMATION:
38
+
39
+ C++ Source:
40
+ one_or_many_property<model::broker_endpoint> admin(
41
+ *this, "admin", "Network address for Admin API",
42
+ {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
43
+ );
44
+
45
+ JSON Output:
46
+ "admin": {
47
+ "type": "array",
48
+ "items": {"type": "object"},
49
+ "default": [{"address": "127.0.0.1", "port": 9644}]
50
+ }
51
+
52
+ Documentation Output:
53
+ Type: array
54
+ Default: [{address: "127.0.0.1", port: 9644}]
55
+ """
2
56
  import logging
3
57
  import sys
4
58
  import os
5
59
  import json
6
60
  import re
61
+ import yaml
62
+ from copy import deepcopy
7
63
 
8
64
  from pathlib import Path
9
65
  from file_pair import FilePair
@@ -13,9 +69,138 @@ from parser import build_treesitter_cpp_library, extract_properties_from_file_pa
13
69
  from property_bag import PropertyBag
14
70
  from transformers import *
15
71
 
72
+ # Import topic property extractor
73
+ try:
74
+ from topic_property_extractor import TopicPropertyExtractor
75
+ except ImportError:
76
+ # TopicPropertyExtractor not available, will skip topic property extraction
77
+ TopicPropertyExtractor = None
78
+
16
79
  logger = logging.getLogger("viewer")
17
80
 
18
81
 
82
+ def resolve_cpp_function_call(function_name):
83
+ """
84
+ Dynamically resolve C++ function calls to their return values by searching the source code.
85
+
86
+ Args:
87
+ function_name: The C++ function name (e.g., "model::kafka_audit_logging_topic")
88
+
89
+ Returns:
90
+ The resolved string value or None if not found
91
+ """
92
+ # Map function names to likely search patterns and file locations
93
+ search_patterns = {
94
+ 'model::kafka_audit_logging_topic': {
95
+ 'patterns': [
96
+ r'inline\s+const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
97
+ r'const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
98
+ r'model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
99
+ r'std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
100
+ r'inline\s+std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
101
+ ],
102
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
103
+ },
104
+ 'model::kafka_consumer_offsets_topic': {
105
+ 'patterns': [
106
+ r'inline\s+const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
107
+ r'const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
108
+ r'model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
109
+ r'std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
110
+ r'inline\s+std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
111
+ ],
112
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
113
+ },
114
+ 'model::kafka_internal_namespace': {
115
+ 'patterns': [
116
+ r'inline\s+const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
117
+ r'const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
118
+ r'model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
119
+ r'std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
120
+ r'inline\s+std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
121
+ ],
122
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
123
+ }
124
+ }
125
+
126
+ # Check if we have search patterns for this function
127
+ if function_name not in search_patterns:
128
+ logger.debug(f"No search patterns defined for function: {function_name}")
129
+ return None
130
+
131
+ config = search_patterns[function_name]
132
+
133
+ # Try to find the Redpanda source directory
134
+ # Look for it in the standard locations used by the property extractor
135
+ redpanda_source_paths = [
136
+ 'tmp/redpanda', # Current directory
137
+ '../tmp/redpanda', # Parent directory
138
+ 'tools/property-extractor/tmp/redpanda', # From project root
139
+ os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda')
140
+ ]
141
+
142
+ redpanda_source = None
143
+ for path in redpanda_source_paths:
144
+ if os.path.exists(path):
145
+ redpanda_source = path
146
+ break
147
+
148
+ if not redpanda_source:
149
+ logger.warning(f"Could not find Redpanda source directory to resolve function: {function_name}")
150
+ return None
151
+
152
+ # Search in the specified files
153
+ for file_path in config['files']:
154
+ full_path = os.path.join(redpanda_source, file_path)
155
+ if not os.path.exists(full_path):
156
+ continue
157
+
158
+ try:
159
+ with open(full_path, 'r', encoding='utf-8') as f:
160
+ content = f.read()
161
+
162
+ # Try each pattern
163
+ for pattern in config['patterns']:
164
+ match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
165
+ if match:
166
+ resolved_value = match.group(1)
167
+ logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
168
+ return resolved_value
169
+
170
+ except Exception as e:
171
+ logger.debug(f"Error reading {full_path}: {e}")
172
+ continue
173
+
174
+ # If not found in specific files, do a broader search
175
+ logger.debug(f"Function {function_name} not found in expected files, doing broader search...")
176
+
177
+ # Search more broadly in the model directory
178
+ model_dir = os.path.join(redpanda_source, 'src', 'v', 'model')
179
+ if os.path.exists(model_dir):
180
+ for root, dirs, files in os.walk(model_dir):
181
+ for file in files:
182
+ if file.endswith('.h') or file.endswith('.cc'):
183
+ file_path = os.path.join(root, file)
184
+ try:
185
+ with open(file_path, 'r', encoding='utf-8') as f:
186
+ content = f.read()
187
+
188
+ # Try patterns for this file
189
+ for pattern in config['patterns']:
190
+ match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
191
+ if match:
192
+ resolved_value = match.group(1)
193
+ logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
194
+ return resolved_value
195
+
196
+ except Exception as e:
197
+ logger.debug(f"Error reading {file_path}: {e}")
198
+ continue
199
+
200
+ logger.warning(f"Could not resolve function call: {function_name}()")
201
+ return None
202
+
203
+
19
204
  def validate_paths(options):
20
205
  path = options.path
21
206
 
@@ -128,23 +313,910 @@ def transform_files_with_properties(files_with_properties):
128
313
 
129
314
  # The definitions.json file contains type definitions that the extractor uses to standardize and centralize type information. After extracting and transforming the properties from the source code, the function merge_properties_and_definitions looks up each property's type in the definitions. If a property's type (or the type of its items, in the case of arrays) matches one of the definitions, the transformer replaces that type with a JSON pointer ( such as #/definitions/<type>) to the corresponding entry in definitions.json. The final JSON output then includes both a properties section (with types now referencing the definitions) and a definitions section, so that consumers of the output can easily resolve the full type information.
130
315
  def merge_properties_and_definitions(properties, definitions):
131
- for name in properties:
132
- property = properties[name]
133
- # guard against missing "type"
134
- prop_type = property.get("type")
135
- if prop_type and prop_type in definitions:
136
- properties[name]["type"] = "#/definitions/{prop_type}"
137
- elif (
138
- prop_type == "array"
139
- and property.get("items", {}).get("type") in definitions
140
- ):
141
- properties[name]["items"]["type"] = (
142
- f"#/definitions/{property['items']['type']}"
143
- )
144
-
316
+ # Do not overwrite the resolved type/default with a reference. Just return the resolved properties and definitions.
145
317
  return dict(properties=properties, definitions=definitions)
146
318
 
147
319
 
320
+ def apply_property_overrides(properties, overrides, overrides_file_path=None):
321
+ """
322
+ Apply property overrides from the overrides JSON file to enhance property documentation.
323
+
324
+ This function allows customizing property documentation by providing overrides for:
325
+
326
+ 1. description: Override the auto-extracted property description with custom text
327
+ 2. version: Add version information showing when the property was introduced
328
+ 3. example: Add AsciiDoc example sections with flexible input formats (see below)
329
+ 4. default: Override the auto-extracted default value
330
+
331
+ Multiple example input formats are supported for user convenience:
332
+
333
+ 1. Direct AsciiDoc string:
334
+ "example": ".Example\n[,yaml]\n----\nredpanda:\n property_name: value\n----"
335
+
336
+ 2. Multi-line array (each element becomes a line):
337
+ "example": [
338
+ ".Example",
339
+ "[,yaml]",
340
+ "----",
341
+ "redpanda:",
342
+ " property_name: value",
343
+ "----"
344
+ ]
345
+
346
+ 3. External file reference:
347
+ "example_file": "examples/property_name.adoc"
348
+
349
+ 4. Auto-formatted YAML with title and description:
350
+ "example_yaml": {
351
+ "title": "Example Configuration",
352
+ "description": "This shows how to configure the property.",
353
+ "config": {
354
+ "redpanda": {
355
+ "property_name": "value"
356
+ }
357
+ }
358
+ }
359
+
360
+ Args:
361
+ properties: Dictionary of extracted properties from C++ source
362
+ overrides: Dictionary loaded from overrides JSON file
363
+ overrides_file_path: Path to the overrides file (for resolving relative example_file paths)
364
+
365
+ Returns:
366
+ Updated properties dictionary with overrides applied
367
+ """
368
+ if overrides and "properties" in overrides:
369
+ for prop, override in overrides["properties"].items():
370
+ if prop in properties:
371
+ # Apply description override
372
+ if "description" in override:
373
+ properties[prop]["description"] = override["description"]
374
+
375
+ # Apply version override (introduced in version)
376
+ if "version" in override:
377
+ properties[prop]["version"] = override["version"]
378
+
379
+ # Apply example override with multiple input format support
380
+ example_content = _process_example_override(override, overrides_file_path)
381
+ if example_content:
382
+ properties[prop]["example"] = example_content
383
+
384
+ # Apply default override
385
+ if "default" in override:
386
+ properties[prop]["default"] = override["default"]
387
+ return properties
388
+
389
+
390
+ def _process_example_override(override, overrides_file_path=None):
391
+ """
392
+ Process example overrides in various user-friendly formats.
393
+
394
+ Supports multiple input formats for examples:
395
+ 1. Direct string: "example": "content"
396
+ 2. Multi-line array: "example": ["line1", "line2", ...]
397
+ 3. External file: "example_file": "path/to/file"
398
+ 4. Auto-formatted YAML: "example_yaml": {...}
399
+
400
+ Args:
401
+ override: Dictionary containing override data for a property
402
+ overrides_file_path: Path to the overrides file (for resolving relative paths)
403
+
404
+ Returns:
405
+ Processed AsciiDoc example content as string, or None if no example found
406
+ """
407
+ # Format 1: Direct AsciiDoc string
408
+ if "example" in override:
409
+ example = override["example"]
410
+ if isinstance(example, str):
411
+ return example
412
+ elif isinstance(example, list):
413
+ # Format 2: Multi-line array - join with newlines
414
+ return "\n".join(example)
415
+
416
+ # Format 3: External file reference
417
+ if "example_file" in override:
418
+ file_path = override["example_file"]
419
+
420
+ # Support both absolute and relative paths
421
+ if not os.path.isabs(file_path):
422
+ # Build search paths starting with the overrides file directory
423
+ search_paths = []
424
+
425
+ # If we have the overrides file path, try relative to its directory first
426
+ if overrides_file_path:
427
+ overrides_dir = os.path.dirname(overrides_file_path)
428
+ search_paths.append(os.path.join(overrides_dir, file_path))
429
+
430
+ # Then try common locations relative to current working directory
431
+ search_paths.extend([
432
+ file_path,
433
+ os.path.join("examples", file_path),
434
+ os.path.join("docs-data", file_path),
435
+ os.path.join("__tests__", "docs-data", file_path)
436
+ ])
437
+
438
+ found_path = None
439
+ for search_path in search_paths:
440
+ if os.path.exists(search_path):
441
+ found_path = search_path
442
+ break
443
+
444
+ if found_path:
445
+ file_path = found_path
446
+ else:
447
+ print(f"Warning: Example file not found: {override['example_file']}")
448
+ print(f"Searched in: {', '.join(search_paths)}")
449
+ return None
450
+
451
+ try:
452
+ with open(file_path, 'r', encoding='utf-8') as f:
453
+ return f.read().strip()
454
+ except Exception as e:
455
+ print(f"Error reading example file {file_path}: {e}")
456
+ return None
457
+
458
+ # Format 4: Auto-formatted YAML configuration
459
+ if "example_yaml" in override:
460
+ yaml_data = override["example_yaml"]
461
+ title = yaml_data.get("title", "Example")
462
+ description = yaml_data.get("description", "")
463
+ config = yaml_data.get("config", {})
464
+
465
+ # Build AsciiDoc content
466
+ lines = [f".{title}"]
467
+ if description:
468
+ lines.append(f"{description}\n")
469
+
470
+ lines.extend([
471
+ "[,yaml]",
472
+ "----"
473
+ ])
474
+
475
+ # Convert config to YAML and add to lines
476
+ try:
477
+ yaml_content = yaml.dump(config, default_flow_style=False, indent=2)
478
+ lines.append(yaml_content.rstrip())
479
+ except Exception as e:
480
+ import traceback
481
+ logger.error(f"Error formatting YAML config: {e}")
482
+ logger.debug(f"Full traceback:\n{traceback.format_exc()}")
483
+ return None
484
+
485
+ lines.append("----")
486
+
487
+ return "\n".join(lines)
488
+
489
+ return None
490
+
491
+
492
+ def add_config_scope(properties):
493
+ """
494
+ Add a config_scope field to each property based on its defined_in value or property type.
495
+ 'cluster' if defined_in == src/v/config/configuration.cc
496
+ 'broker' if defined_in == src/v/config/node_config.cc
497
+ 'topic' if is_topic_property == True
498
+ """
499
+ for prop in properties.values():
500
+ # Check if this is a topic property first
501
+ if prop.get("is_topic_property", False):
502
+ prop["config_scope"] = "topic"
503
+ else:
504
+ defined_in = prop.get("defined_in", "")
505
+ if defined_in == "src/v/config/configuration.cc":
506
+ prop["config_scope"] = "cluster"
507
+ elif defined_in == "src/v/config/node_config.cc":
508
+ prop["config_scope"] = "broker"
509
+ else:
510
+ prop["config_scope"] = None
511
+ return properties
512
+
513
+
514
+ def resolve_type_and_default(properties, definitions):
515
+ """
516
+ Resolve type references and expand default values for all properties.
517
+
518
+ This function performs several critical transformations:
519
+
520
+ 1. **Type Resolution**: Converts C++ type names to JSON schema types
521
+ - model::broker_endpoint -> "object"
522
+ - std::string -> "string"
523
+ - Handles both direct type names and JSON pointer references (#/definitions/...)
524
+
525
+ 2. **Default Value Expansion**: Transforms C++ constructor syntax to JSON objects
526
+ - model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))
527
+ -> {address: "127.0.0.1", port: 9644}
528
+
529
+ 3. **Array Default Handling**: Ensures one_or_many_property defaults are arrays
530
+ - For properties with type="array", wraps single object defaults in arrays
531
+ - Converts empty object strings "{}" to empty arrays []
532
+
533
+ This is essential for one_or_many_property types like 'admin' which should show:
534
+ - Type: array
535
+ - Default: [{address: "127.0.0.1", port: 9644}] (not just {address: ...})
536
+ """
537
+ import ast
538
+ import re
539
+
540
+ def resolve_definition_type(defn):
541
+ """Recursively resolve $ref pointers to get the actual type definition."""
542
+ # Recursively resolve $ref
543
+ while isinstance(defn, dict) and "$ref" in defn:
544
+ ref = defn["$ref"]
545
+ ref_name = ref.split("/")[-1]
546
+ defn = definitions.get(ref_name, defn)
547
+ return defn
548
+
549
+ def parse_constructor(s):
550
+ """Parse C++ constructor syntax into type name and arguments."""
551
+ s = s.strip()
552
+ if s.startswith("{") and s.endswith("}"):
553
+ s = s[1:-1].strip()
554
+ match = re.match(r'([a-zA-Z0-9_:]+)\((.*)\)', s)
555
+ if not match:
556
+ # Primitive or enum
557
+ if s.startswith('"') and s.endswith('"'):
558
+ return None, [ast.literal_eval(s)]
559
+ try:
560
+ return None, [int(s)]
561
+ except Exception:
562
+ return None, [s]
563
+ type_name, arg_str = match.groups()
564
+ args = []
565
+ depth = 0
566
+ current = ''
567
+ in_string = False
568
+ for c in arg_str:
569
+ if c == '"' and (not current or current[-1] != '\\'):
570
+ in_string = not in_string
571
+ if c == ',' and depth == 0 and not in_string:
572
+ if current.strip():
573
+ args.append(current.strip())
574
+ current = ''
575
+ else:
576
+ if c == '(' and not in_string:
577
+ depth += 1
578
+ elif c == ')' and not in_string:
579
+ depth -= 1
580
+ current += c
581
+ if current.strip():
582
+ args.append(current.strip())
583
+ return type_name, args
584
+
585
+ def process_cpp_patterns(arg_str):
586
+ """
587
+ Process specific C++ patterns to user-friendly values.
588
+
589
+ Handles:
590
+ - net::unresolved_address("127.0.0.1", 9092) -> expands based on type definition
591
+ - std::nullopt -> null
592
+ - fips_mode_flag::disabled -> "disabled"
593
+ - model::kafka_audit_logging_topic() -> dynamically looked up from source
594
+ """
595
+ arg_str = arg_str.strip()
596
+
597
+ # Handle std::nullopt -> null
598
+ if arg_str == "std::nullopt":
599
+ return "null"
600
+
601
+ # Handle C++ function calls that return constant values
602
+ # Dynamically look up function return values from the source code
603
+ function_call_match = re.match(r'([a-zA-Z0-9_:]+)\(\)', arg_str)
604
+ if function_call_match:
605
+ function_name = function_call_match.group(1)
606
+ resolved_value = resolve_cpp_function_call(function_name)
607
+ if resolved_value is not None:
608
+ return f'"{resolved_value}"'
609
+
610
+ # Handle enum-like patterns (such as fips_mode_flag::disabled -> "disabled")
611
+ enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', arg_str)
612
+ if enum_match:
613
+ enum_value = enum_match.group(1)
614
+ return f'"{enum_value}"'
615
+
616
+ # Handle default constructors and their default values
617
+ # This handles cases where C++ default constructors are used but should map to specific values
618
+
619
+ # Pattern 1: Full constructor syntax like config::leaders_preference{}
620
+ constructor_patterns = {
621
+ r'config::leaders_preference\{\}': '"none"', # Based on C++ code analysis
622
+ r'std::chrono::seconds\{0\}': '0',
623
+ r'std::chrono::milliseconds\{0\}': '0',
624
+ r'model::timeout_clock::duration\{\}': '0',
625
+ r'config::data_directory_path\{\}': '""',
626
+ r'std::optional<[^>]+>\{\}': 'null', # Empty optional
627
+ }
628
+
629
+ for pattern, replacement in constructor_patterns.items():
630
+ if re.match(pattern, arg_str):
631
+ return replacement
632
+
633
+ # Pattern 2: Truncated type names that likely came from default constructors
634
+ # These are cases where tree-sitter parsing truncated "config::type{}" to just "type"
635
+ truncated_patterns = {
636
+ 'leaders_preference': '"none"', # config::leaders_preference{} -> none
637
+ 'data_directory_path': '""', # config::data_directory_path{} -> empty string
638
+ 'timeout_clock_duration': '0', # model::timeout_clock::duration{} -> 0
639
+ 'log_level': '"info"', # Default log level
640
+ 'compression_type': '"none"', # Default compression
641
+ }
642
+
643
+ # Check if arg_str is exactly one of these truncated patterns
644
+ if arg_str in truncated_patterns:
645
+ return truncated_patterns[arg_str]
646
+
647
+ # Pattern 3: Handle remaining default constructor syntax generically
648
+ generic_constructor_match = re.match(r'[a-zA-Z0-9_:]+\{\}', arg_str)
649
+ if generic_constructor_match:
650
+ # For unknown constructors, try to infer a reasonable default
651
+ type_name = arg_str[:-2] # Remove the {}
652
+ if 'duration' in type_name.lower() or 'time' in type_name.lower():
653
+ return '0'
654
+ elif 'path' in type_name.lower() or 'directory' in type_name.lower():
655
+ return '""'
656
+ elif 'optional' in type_name.lower():
657
+ return 'null'
658
+ else:
659
+ return '""' # Conservative default to empty string
660
+
661
+ # Handle string concatenation with + operator (such as "128_kib + 1")
662
+ if " + " in arg_str:
663
+ return f'"{arg_str}"'
664
+
665
+ return arg_str
666
+
667
+ def expand_default(type_name, default_str):
668
+ """
669
+ Expand C++ default values into structured JSON objects.
670
+
671
+ For array types with initializer list syntax like:
672
+ {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
673
+
674
+ This creates: [{address: "127.0.0.1", port: 9644}]
675
+ """
676
+ # Handle non-string defaults
677
+ if not isinstance(default_str, str):
678
+ return default_str
679
+
680
+ # Apply C++ pattern processing for simple cases (not complex constructor calls)
681
+ if not ("(" in default_str and "::" in default_str):
682
+ processed = process_cpp_patterns(default_str)
683
+ if processed != default_str:
684
+ # Pattern was processed, return the result
685
+ if processed == "null":
686
+ return None
687
+ elif processed.startswith('"') and processed.endswith('"'):
688
+ return ast.literal_eval(processed)
689
+ else:
690
+ return processed
691
+
692
+ type_def = resolve_definition_type(definitions.get(type_name, {}))
693
+ if "enum" in type_def:
694
+ return default_str
695
+ # If it has properties but no explicit type, it's an object
696
+ if type_def.get("type") == "object" or (type_def.get("properties") and not type_def.get("type")):
697
+ tname, args = parse_constructor(default_str)
698
+ if tname is None:
699
+ return default_str
700
+
701
+ props = list(type_def["properties"].keys())
702
+ result = {}
703
+
704
+ # For each constructor argument, try to expand it and map to the correct property
705
+ for i, prop in enumerate(props):
706
+ prop_def = type_def["properties"][prop]
707
+ if "$ref" in prop_def:
708
+ sub_type = prop_def["$ref"].split("/")[-1]
709
+ else:
710
+ sub_type = prop_def.get("type")
711
+
712
+ if i < len(args):
713
+ arg = args[i]
714
+ # Check if this argument is a nested constructor call
715
+ if "(" in arg and "::" in arg:
716
+ # Parse the nested constructor
717
+ nested_tname, nested_args = parse_constructor(arg)
718
+ if nested_tname and nested_tname in definitions:
719
+ # Get the definition for the nested type
720
+ nested_type_def = resolve_definition_type(definitions.get(nested_tname, {}))
721
+ nested_props = list(nested_type_def.get("properties", {}).keys())
722
+
723
+ # Expand the nested constructor by mapping its arguments to its properties
724
+ nested_result = {}
725
+ for j, nested_prop in enumerate(nested_props):
726
+ nested_prop_def = nested_type_def["properties"][nested_prop]
727
+ if j < len(nested_args):
728
+ nested_arg = nested_args[j]
729
+ # Apply simple C++ pattern processing to the argument
730
+ processed_nested_arg = process_cpp_patterns(nested_arg)
731
+
732
+ # Convert the processed argument based on the property type
733
+ if nested_prop_def.get("type") == "string":
734
+ if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
735
+ nested_result[nested_prop] = ast.literal_eval(processed_nested_arg)
736
+ else:
737
+ nested_result[nested_prop] = processed_nested_arg
738
+ elif nested_prop_def.get("type") == "integer":
739
+ try:
740
+ nested_result[nested_prop] = int(processed_nested_arg)
741
+ except ValueError:
742
+ nested_result[nested_prop] = processed_nested_arg
743
+ elif nested_prop_def.get("type") == "boolean":
744
+ nested_result[nested_prop] = processed_nested_arg.lower() == "true"
745
+ else:
746
+ nested_result[nested_prop] = processed_nested_arg
747
+ else:
748
+ nested_result[nested_prop] = None
749
+
750
+ # Now we have the expanded nested object, we need to map it to the parent object's properties
751
+ # This is where the type-aware mapping happens
752
+
753
+ # Special case: if the nested type is net::unresolved_address and parent is broker_endpoint
754
+ if nested_tname == "net::unresolved_address" and type_name == "model::broker_endpoint":
755
+ # Map net::unresolved_address properties to broker_endpoint
756
+ # Only map the fields that actually exist in the net::unresolved_address
757
+ result["address"] = nested_result.get("address")
758
+ result["port"] = nested_result.get("port")
759
+ break
760
+ else:
761
+ # General case: if we have a single nested constructor argument,
762
+ # try to merge its properties into the parent
763
+ if i == 0 and len(args) == 1:
764
+ result.update(nested_result)
765
+ # Set remaining properties to None
766
+ for remaining_prop in props[i+1:]:
767
+ if remaining_prop not in result:
768
+ result[remaining_prop] = None
769
+ break
770
+ else:
771
+ # Map the nested object to the current property
772
+ result[prop] = nested_result
773
+ else:
774
+ # Fallback: recursively expand with the expected property type
775
+ expanded_arg = expand_default(sub_type, arg)
776
+ result[prop] = expanded_arg
777
+ else:
778
+ # Simple value, parse based on the property type
779
+ # First apply C++ pattern processing
780
+ processed_arg = process_cpp_patterns(arg)
781
+
782
+ if sub_type == "string":
783
+ # If processed_arg is already quoted, use ast.literal_eval, otherwise keep as is
784
+ if processed_arg.startswith('"') and processed_arg.endswith('"'):
785
+ result[prop] = ast.literal_eval(processed_arg)
786
+ else:
787
+ result[prop] = processed_arg
788
+ elif sub_type == "integer":
789
+ try:
790
+ result[prop] = int(processed_arg)
791
+ except ValueError:
792
+ # If conversion fails, keep as string (might be processed C++ pattern)
793
+ result[prop] = processed_arg
794
+ elif sub_type == "boolean":
795
+ result[prop] = processed_arg.lower() == "true"
796
+ else:
797
+ result[prop] = processed_arg
798
+ else:
799
+ result[prop] = None
800
+ return result
801
+ elif type_def.get("type") == "array":
802
+ # Handle array defaults with C++ initializer list syntax like {model::broker_endpoint(...)}
803
+ # This is specifically important for one_or_many_property types that use initializer lists
804
+ # in their C++ defaults but should produce JSON arrays in the output.
805
+ #
806
+ # Example transformation:
807
+ # C++: {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
808
+ # JSON: [{"address": "127.0.0.1", "port": 9644, "name": "127.0.0.1:9644"}]
809
+ if isinstance(default_str, str) and default_str.strip().startswith("{") and default_str.strip().endswith("}"):
810
+ # This is an initializer list, parse the elements
811
+ initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces
812
+ if initializer_content:
813
+ # Parse multiple comma-separated elements
814
+ elements = []
815
+ current_element = ""
816
+ paren_depth = 0
817
+ in_quotes = False
818
+
819
+ # Parse elements while respecting nested parentheses and quoted strings
820
+ for char in initializer_content:
821
+ if char == '"' and (not current_element or current_element[-1] != '\\'):
822
+ in_quotes = not in_quotes
823
+
824
+ if not in_quotes:
825
+ if char == '(':
826
+ paren_depth += 1
827
+ elif char == ')':
828
+ paren_depth -= 1
829
+ elif char == ',' and paren_depth == 0:
830
+ # Found a top-level comma, this is a separator
831
+ if current_element.strip():
832
+ elements.append(current_element.strip())
833
+ current_element = ""
834
+ continue
835
+
836
+ current_element += char
837
+
838
+ # Add the last element
839
+ if current_element.strip():
840
+ elements.append(current_element.strip())
841
+
842
+ # Try to determine the item type from the type_def
843
+ items_def = type_def.get("items", {})
844
+ if "$ref" in items_def:
845
+ item_type_name = items_def["$ref"].split("/")[-1]
846
+ else:
847
+ item_type_name = items_def.get("type", "string") # Default to string for arrays
848
+
849
+ # Process each element
850
+ result_array = []
851
+ for element_str in elements:
852
+ # Check if this element is a function call that needs resolution
853
+ if "::" in element_str and element_str.endswith("()"):
854
+ # This is a function call, resolve it
855
+ resolved_value = process_cpp_patterns(element_str)
856
+ if resolved_value.startswith('"') and resolved_value.endswith('"'):
857
+ # Remove quotes from resolved string values
858
+ result_array.append(ast.literal_eval(resolved_value))
859
+ else:
860
+ result_array.append(resolved_value)
861
+ elif element_str.startswith('"') and element_str.endswith('"'):
862
+ # This is a quoted string, parse it
863
+ result_array.append(ast.literal_eval(element_str))
864
+ elif item_type_name == "string":
865
+ # For string items, expand using the item type (might be constructor)
866
+ expanded_element = expand_default(item_type_name, element_str)
867
+ result_array.append(expanded_element)
868
+ else:
869
+ # For other types, expand using the item type
870
+ expanded_element = expand_default(item_type_name, element_str)
871
+ result_array.append(expanded_element)
872
+
873
+ return result_array
874
+ else:
875
+ return []
876
+ else:
877
+ return default_str
878
+ else:
879
+ return default_str
880
+
881
+ for prop in properties.values():
882
+ t = prop.get("type")
883
+ ref_name = None
884
+
885
+ # Handle both JSON pointer references and direct type names
886
+ if isinstance(t, str):
887
+ if t.startswith("#/definitions/"):
888
+ ref_name = t.split("/")[-1]
889
+ elif t in definitions:
890
+ ref_name = t
891
+
892
+ if ref_name and ref_name in definitions:
893
+ defn = definitions.get(ref_name)
894
+ if defn:
895
+ resolved = resolve_definition_type(defn)
896
+ # Always set type to the resolved type string (object, string, etc.)
897
+ resolved_type = resolved.get("type")
898
+ if resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
899
+ prop["type"] = resolved_type
900
+ else:
901
+ prop["type"] = "object" # fallback for complex types
902
+ # Expand default if possible
903
+ if "default" in prop and prop["default"] is not None:
904
+ expanded = expand_default(ref_name, prop["default"])
905
+ prop["default"] = expanded
906
+
907
+ # Handle case where default is already an object with nested constructors
908
+ elif prop.get("type") == "object" and isinstance(prop.get("default"), dict):
909
+ default_obj = prop["default"]
910
+ for field_name, field_value in default_obj.items():
911
+ if isinstance(field_value, str) and "::" in field_value and "(" in field_value:
912
+ # This field contains a nested constructor, try to expand it
913
+ tname, args = parse_constructor(field_value)
914
+ if tname and tname in definitions:
915
+ expanded = expand_default(tname, field_value)
916
+ if isinstance(expanded, dict):
917
+ # Update the existing object fields with the expanded values
918
+ for exp_key, exp_value in expanded.items():
919
+ if exp_key in default_obj:
920
+ default_obj[exp_key] = exp_value
921
+ # Remove the field that contained the constructor
922
+ # unless it's supposed to remain (like 'name' field)
923
+ # For now, let's replace entire default with expanded version
924
+ prop["default"] = expanded
925
+ break
926
+
927
+ # Handle case where property type is array and default contains C++ constructor syntax
928
+ # This is a backup mechanism for cases where the expand_default function above
929
+ # didn't catch array initialization patterns. It specifically looks for properties
930
+ # that are already marked as array type but still have string defaults with
931
+ # C++ constructor syntax that need expansion.
932
+ elif prop.get("type") == "array" and isinstance(prop.get("default"), str):
933
+ default_str = prop["default"]
934
+ if default_str.strip().startswith("{") and default_str.strip().endswith("}"):
935
+ # This is an initializer list for an array, expand it using the same logic as expand_default
936
+ initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces
937
+ if initializer_content:
938
+ # Parse multiple comma-separated elements
939
+ elements = []
940
+ current_element = ""
941
+ paren_depth = 0
942
+ in_quotes = False
943
+
944
+ # Parse elements while respecting nested parentheses and quoted strings
945
+ for char in initializer_content:
946
+ if char == '"' and (not current_element or current_element[-1] != '\\'):
947
+ in_quotes = not in_quotes
948
+
949
+ if not in_quotes:
950
+ if char == '(':
951
+ paren_depth += 1
952
+ elif char == ')':
953
+ paren_depth -= 1
954
+ elif char == ',' and paren_depth == 0:
955
+ # Found a top-level comma, this is a separator
956
+ if current_element.strip():
957
+ elements.append(current_element.strip())
958
+ current_element = ""
959
+ continue
960
+
961
+ current_element += char
962
+
963
+ # Add the last element
964
+ if current_element.strip():
965
+ elements.append(current_element.strip())
966
+
967
+ # Get the item type from the property definition
968
+ items_type = prop.get("items", {}).get("type", "string")
969
+
970
+ # Process each element
971
+ result_array = []
972
+ for element_str in elements:
973
+ # Check if this element is a function call that needs resolution
974
+ if "::" in element_str and element_str.endswith("()"):
975
+ # This is a function call, resolve it
976
+ resolved_value = process_cpp_patterns(element_str)
977
+ if resolved_value.startswith('"') and resolved_value.endswith('"'):
978
+ # Remove quotes from resolved string values
979
+ result_array.append(ast.literal_eval(resolved_value))
980
+ else:
981
+ result_array.append(resolved_value)
982
+ elif element_str.startswith('"') and element_str.endswith('"'):
983
+ # This is a quoted string, parse it
984
+ result_array.append(ast.literal_eval(element_str))
985
+ elif items_type in definitions:
986
+ # For complex types, expand using the item type
987
+ expanded_element = expand_default(items_type, element_str)
988
+ result_array.append(expanded_element)
989
+ else:
990
+ # For simple types, just use the element as-is (likely a string)
991
+ result_array.append(element_str)
992
+
993
+ prop["default"] = result_array
994
+ else:
995
+ prop["default"] = []
996
+
997
+ # Handle array properties where the default is a single object but should be an array
998
+ # This is crucial for one_or_many_property types that are detected as arrays
999
+ # but have defaults that were parsed as single objects by the transformers.
1000
+ #
1001
+ # Background: The transformer chain processes defaults before type resolution,
1002
+ # so a property like admin with default {model::broker_endpoint(...)} gets
1003
+ # expanded to {address: "127.0.0.1", port: 9644} (single object).
1004
+ # But since admin is one_or_many_property<model::broker_endpoint>, it should
1005
+ # be an array: [{address: "127.0.0.1", port: 9644}]
1006
+ if prop.get("type") == "array":
1007
+ default = prop.get("default")
1008
+ if isinstance(default, dict):
1009
+ # If we have an array type but the default is a single object, wrap it in an array
1010
+ # This handles cases like admin: {address: "127.0.0.1", port: 9644} -> [{address: ...}]
1011
+ prop["default"] = [default]
1012
+ elif isinstance(default, str) and default.strip() == "{}":
1013
+ # Empty object string should become empty array for array types
1014
+ # This handles cases like admin_api_tls: "{}" -> []
1015
+ prop["default"] = []
1016
+
1017
+ # Also handle array item types
1018
+ if prop.get("type") == "array" and "items" in prop:
1019
+ items_type = prop["items"].get("type")
1020
+ if isinstance(items_type, str) and items_type in definitions:
1021
+ item_defn = definitions.get(items_type)
1022
+ if item_defn:
1023
+ resolved_item = resolve_definition_type(item_defn)
1024
+ resolved_item_type = resolved_item.get("type")
1025
+ if resolved_item_type in ("object", "string", "integer", "boolean", "array", "number"):
1026
+ prop["items"]["type"] = resolved_item_type
1027
+ else:
1028
+ prop["items"]["type"] = "object" # fallback for complex types
1029
+
1030
+ # Final pass: apply C++ pattern processing to any remaining unprocessed defaults
1031
+ for prop in properties.values():
1032
+ if "default" in prop:
1033
+ default_value = prop["default"]
1034
+
1035
+ if isinstance(default_value, str):
1036
+ # Process string defaults
1037
+ processed = process_cpp_patterns(default_value)
1038
+ if processed != default_value:
1039
+ if processed == "null":
1040
+ prop["default"] = None
1041
+ elif isinstance(processed, str) and processed.startswith('"') and processed.endswith('"'):
1042
+ prop["default"] = ast.literal_eval(processed)
1043
+ else:
1044
+ prop["default"] = processed
1045
+
1046
+ elif isinstance(default_value, list):
1047
+ # Process array defaults - apply C++ pattern processing to each element
1048
+ processed_array = []
1049
+ for item in default_value:
1050
+ if isinstance(item, dict):
1051
+ # Process each field in the object
1052
+ processed_item = {}
1053
+ for field_name, field_value in item.items():
1054
+ if isinstance(field_value, str) and "::" in field_value and "(" in field_value:
1055
+ # This field contains a C++ constructor pattern - try to expand it using type definitions
1056
+ tname, args = parse_constructor(field_value)
1057
+ if tname and tname in definitions:
1058
+ # Get the definition for the nested type and expand the constructor
1059
+ nested_type_def = resolve_definition_type(definitions.get(tname, {}))
1060
+ if nested_type_def.get("properties"):
1061
+ nested_props = list(nested_type_def["properties"].keys())
1062
+ nested_result = {}
1063
+
1064
+ # Map constructor arguments to type properties
1065
+ for j, nested_prop in enumerate(nested_props):
1066
+ nested_prop_def = nested_type_def["properties"][nested_prop]
1067
+ if j < len(args):
1068
+ nested_arg = args[j]
1069
+ processed_nested_arg = process_cpp_patterns(nested_arg)
1070
+
1071
+ # Convert based on property type
1072
+ if nested_prop_def.get("type") == "string":
1073
+ if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
1074
+ nested_result[nested_prop] = ast.literal_eval(processed_nested_arg)
1075
+ else:
1076
+ nested_result[nested_prop] = processed_nested_arg
1077
+ elif nested_prop_def.get("type") == "integer":
1078
+ try:
1079
+ nested_result[nested_prop] = int(processed_nested_arg)
1080
+ except ValueError:
1081
+ nested_result[nested_prop] = processed_nested_arg
1082
+ elif nested_prop_def.get("type") == "boolean":
1083
+ nested_result[nested_prop] = processed_nested_arg.lower() == "true"
1084
+ else:
1085
+ nested_result[nested_prop] = processed_nested_arg
1086
+ else:
1087
+ nested_result[nested_prop] = None
1088
+
1089
+ # For special case of net::unresolved_address inside broker_authn_endpoint
1090
+ if tname == "net::unresolved_address":
1091
+ # Replace the entire object with expanded net::unresolved_address values
1092
+ # Only include the fields that are actually defined in the type
1093
+ processed_item.update(nested_result)
1094
+ break # Don't process other fields since we replaced the whole object
1095
+ else:
1096
+ processed_item[field_name] = nested_result
1097
+ else:
1098
+ # Fallback to simple pattern processing
1099
+ processed_field = process_cpp_patterns(field_value)
1100
+ if processed_field == "null":
1101
+ processed_item[field_name] = None
1102
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1103
+ processed_item[field_name] = ast.literal_eval(processed_field)
1104
+ else:
1105
+ processed_item[field_name] = processed_field
1106
+ else:
1107
+ # Fallback to simple pattern processing
1108
+ processed_field = process_cpp_patterns(field_value)
1109
+ if processed_field == "null":
1110
+ processed_item[field_name] = None
1111
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1112
+ processed_item[field_name] = ast.literal_eval(processed_field)
1113
+ else:
1114
+ processed_item[field_name] = processed_field
1115
+ elif isinstance(field_value, str):
1116
+ # Simple string field - apply C++ pattern processing
1117
+ processed_field = process_cpp_patterns(field_value)
1118
+ if processed_field == "null":
1119
+ processed_item[field_name] = None
1120
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1121
+ processed_item[field_name] = ast.literal_eval(processed_field)
1122
+ else:
1123
+ processed_item[field_name] = processed_field
1124
+ else:
1125
+ processed_item[field_name] = field_value
1126
+ processed_array.append(processed_item)
1127
+ else:
1128
+ # Non-object array item
1129
+ if isinstance(item, str):
1130
+ processed_item = process_cpp_patterns(item)
1131
+ if processed_item == "null":
1132
+ processed_array.append(None)
1133
+ elif isinstance(processed_item, str) and processed_item.startswith('"') and processed_item.endswith('"'):
1134
+ processed_array.append(ast.literal_eval(processed_item))
1135
+ else:
1136
+ processed_array.append(processed_item)
1137
+ else:
1138
+ processed_array.append(item)
1139
+ prop["default"] = processed_array
1140
+
1141
+ elif isinstance(default_value, dict):
1142
+ # Process object defaults - apply C++ pattern processing to each field
1143
+ processed_object = {}
1144
+ for field_name, field_value in default_value.items():
1145
+ if isinstance(field_value, str):
1146
+ processed_field = process_cpp_patterns(field_value)
1147
+ if processed_field == "null":
1148
+ processed_object[field_name] = None
1149
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1150
+ processed_object[field_name] = ast.literal_eval(processed_field)
1151
+ else:
1152
+ processed_object[field_name] = processed_field
1153
+ else:
1154
+ processed_object[field_name] = field_value
1155
+ prop["default"] = processed_object
1156
+
1157
+ # Handle unresolved C++ types
1158
+ prop_type = prop.get("type")
1159
+ if isinstance(prop_type, str):
1160
+ # Check if it's an unresolved C++ type (contains :: or ends with >)
1161
+ if ("::" in prop_type or prop_type.endswith(">") or
1162
+ prop_type.endswith("_t") or prop_type.startswith("std::")):
1163
+ # Default unresolved C++ types to string, unless they look like numbers
1164
+ if any(word in prop_type.lower() for word in ["int", "long", "short", "double", "float", "number"]):
1165
+ prop["type"] = "integer"
1166
+ elif any(word in prop_type.lower() for word in ["bool"]):
1167
+ prop["type"] = "boolean"
1168
+ else:
1169
+ prop["type"] = "string"
1170
+
1171
+ return properties
1172
+
1173
+
1174
+ def extract_topic_properties(source_path):
1175
+ """
1176
+ Extract topic properties and convert them to the standard properties format.
1177
+
1178
+ Args:
1179
+ source_path: Path to the Redpanda source code
1180
+
1181
+ Returns:
1182
+ Dictionary of topic properties in the standard format with config_scope: "topic"
1183
+ """
1184
+ if TopicPropertyExtractor is None:
1185
+ logging.warning("TopicPropertyExtractor not available, skipping topic property extraction")
1186
+ return {}
1187
+
1188
+ try:
1189
+ extractor = TopicPropertyExtractor(source_path)
1190
+ topic_data = extractor.extract_topic_properties()
1191
+ topic_properties = topic_data.get("topic_properties", {})
1192
+
1193
+ # Convert topic properties to the standard properties format
1194
+ converted_properties = {}
1195
+ for prop_name, prop_data in topic_properties.items():
1196
+ # Skip no-op properties
1197
+ if prop_data.get("is_noop", False):
1198
+ continue
1199
+
1200
+ converted_properties[prop_name] = {
1201
+ "name": prop_name,
1202
+ "description": prop_data.get("description", ""),
1203
+ "type": prop_data.get("type", "string"),
1204
+ "config_scope": "topic",
1205
+ "source_file": prop_data.get("source_file", ""),
1206
+ "corresponding_cluster_property": prop_data.get("corresponding_cluster_property", ""),
1207
+ "acceptable_values": prop_data.get("acceptable_values", ""),
1208
+ "is_deprecated": False,
1209
+ "is_topic_property": True
1210
+ }
1211
+
1212
+ logging.info(f"Extracted {len(converted_properties)} topic properties (excluding {len([p for p in topic_properties.values() if p.get('is_noop', False)])} no-op properties)")
1213
+ return converted_properties
1214
+
1215
+ except Exception as e:
1216
+ logging.error(f"Failed to extract topic properties: {e}")
1217
+ return {}
1218
+
1219
+
148
1220
  def main():
149
1221
  import argparse
150
1222
 
@@ -170,6 +1242,13 @@ def main():
170
1242
  help="File to store the JSON output. If no file is provided, the JSON will be printed to the standard output",
171
1243
  )
172
1244
 
1245
+ arg_parser.add_argument(
1246
+ "--enhanced-output",
1247
+ type=str,
1248
+ required=False,
1249
+ help="File to store the enhanced JSON output with overrides applied (such as 'dev-properties.json')",
1250
+ )
1251
+
173
1252
  arg_parser.add_argument(
174
1253
  "--definitions",
175
1254
  type=str,
@@ -178,6 +1257,13 @@ def main():
178
1257
  help='JSON file with the type definitions. This file will be merged in the output under the "definitions" field',
179
1258
  )
180
1259
 
1260
+ arg_parser.add_argument(
1261
+ "--overrides",
1262
+ type=str,
1263
+ required=False,
1264
+ help='Optional JSON file with property description overrides',
1265
+ )
1266
+
181
1267
  arg_parser.add_argument("-v", "--verbose", action="store_true")
182
1268
 
183
1269
  return arg_parser
@@ -208,6 +1294,16 @@ def main():
208
1294
  logging.error(f"Failed to parse definitions file: {e}")
209
1295
  sys.exit(1)
210
1296
 
1297
+ # Load property overrides if provided
1298
+ overrides = None
1299
+ if options.overrides:
1300
+ try:
1301
+ with open(options.overrides) as f:
1302
+ overrides = json.load(f)
1303
+ except Exception as e:
1304
+ logging.error(f"Failed to load overrides file: {e}")
1305
+ sys.exit(1)
1306
+
211
1307
  treesitter_dir = os.path.join(os.getcwd(), "tree-sitter/tree-sitter-cpp")
212
1308
  destination_path = os.path.join(treesitter_dir, "tree-sitter-cpp.so")
213
1309
 
@@ -219,25 +1315,72 @@ def main():
219
1315
  treesitter_dir, destination_path
220
1316
  )
221
1317
 
1318
+
222
1319
  files_with_properties = get_files_with_properties(
223
1320
  file_pairs, treesitter_parser, cpp_language
224
1321
  )
225
1322
  properties = transform_files_with_properties(files_with_properties)
226
- properties_and_definitions = merge_properties_and_definitions(
227
- properties, definitions
1323
+
1324
+ # Extract topic properties and add them to the main properties dictionary
1325
+ topic_properties = extract_topic_properties(options.path)
1326
+ if topic_properties:
1327
+ properties.update(topic_properties)
1328
+ logging.info(f"Added {len(topic_properties)} topic properties to the main properties collection")
1329
+
1330
+ # First, create the original properties without overrides for the base JSON output
1331
+ # 1. Add config_scope field based on which source file defines the property
1332
+ original_properties = add_config_scope(deepcopy(properties))
1333
+
1334
+ # 2. Resolve type references and expand default values for original properties
1335
+ original_properties = resolve_type_and_default(original_properties, definitions)
1336
+
1337
+ # Generate original properties JSON (without overrides)
1338
+ original_properties_and_definitions = merge_properties_and_definitions(
1339
+ original_properties, definitions
228
1340
  )
1341
+ original_json_output = json.dumps(original_properties_and_definitions, indent=4, sort_keys=True)
1342
+
1343
+ # Now create enhanced properties with overrides applied
1344
+ # 1. Apply any description overrides from external override files
1345
+ enhanced_properties = apply_property_overrides(deepcopy(properties), overrides, options.overrides)
1346
+
1347
+ # 2. Add config_scope field based on which source file defines the property
1348
+ enhanced_properties = add_config_scope(enhanced_properties)
1349
+
1350
+ # 3. Resolve type references and expand default values
1351
+ # This step converts:
1352
+ # - C++ type names (model::broker_endpoint) to JSON schema types (object)
1353
+ # - C++ constructor defaults to structured JSON objects
1354
+ # - Single object defaults to arrays for one_or_many_property types
1355
+ enhanced_properties = resolve_type_and_default(enhanced_properties, definitions)
229
1356
 
230
- json_output = json.dumps(properties_and_definitions, indent=4, sort_keys=True)
1357
+ # Generate enhanced properties JSON (with overrides)
1358
+ enhanced_properties_and_definitions = merge_properties_and_definitions(
1359
+ enhanced_properties, definitions
1360
+ )
1361
+ enhanced_json_output = json.dumps(enhanced_properties_and_definitions, indent=4, sort_keys=True)
231
1362
 
1363
+ # Write original properties file (for backward compatibility)
232
1364
  if options.output:
233
1365
  try:
234
1366
  with open(options.output, "w+") as json_file:
235
- json_file.write(json_output)
1367
+ json_file.write(original_json_output)
1368
+ print(f"✅ Original properties JSON generated at {options.output}")
236
1369
  except IOError as e:
237
- logging.error(f"Failed to write output file: {e}")
1370
+ logging.error(f"Failed to write original output file: {e}")
238
1371
  sys.exit(1)
239
1372
  else:
240
- print(json_output)
1373
+ print(original_json_output)
1374
+
1375
+ # Write enhanced properties file (with overrides applied)
1376
+ if options.enhanced_output:
1377
+ try:
1378
+ with open(options.enhanced_output, "w+") as json_file:
1379
+ json_file.write(enhanced_json_output)
1380
+ print(f"✅ Enhanced properties JSON (with overrides) generated at {options.enhanced_output}")
1381
+ except IOError as e:
1382
+ logging.error(f"Failed to write enhanced output file: {e}")
1383
+ sys.exit(1)
241
1384
 
242
1385
  if __name__ == "__main__":
243
1386
  main()