@redpanda-data/docs-extensions-and-macros 4.8.0 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/bin/doc-tools.js +236 -54
  2. package/package.json +1 -1
  3. package/tools/property-extractor/Makefile +68 -50
  4. package/tools/property-extractor/cloud_config.py +594 -0
  5. package/tools/property-extractor/compare-properties.js +378 -0
  6. package/tools/property-extractor/generate-handlebars-docs.js +444 -0
  7. package/tools/property-extractor/helpers/and.js +10 -0
  8. package/tools/property-extractor/helpers/eq.js +9 -0
  9. package/tools/property-extractor/helpers/formatPropertyValue.js +128 -0
  10. package/tools/property-extractor/helpers/formatUnits.js +26 -0
  11. package/tools/property-extractor/helpers/index.js +13 -0
  12. package/tools/property-extractor/helpers/join.js +18 -0
  13. package/tools/property-extractor/helpers/ne.js +9 -0
  14. package/tools/property-extractor/helpers/not.js +8 -0
  15. package/tools/property-extractor/helpers/or.js +10 -0
  16. package/tools/property-extractor/helpers/renderPropertyExample.js +42 -0
  17. package/tools/property-extractor/package-lock.json +77 -0
  18. package/tools/property-extractor/package.json +6 -0
  19. package/tools/property-extractor/parser.py +27 -1
  20. package/tools/property-extractor/property_extractor.py +1428 -49
  21. package/tools/property-extractor/requirements.txt +2 -0
  22. package/tools/property-extractor/templates/deprecated-properties.hbs +25 -0
  23. package/tools/property-extractor/templates/deprecated-property.hbs +7 -0
  24. package/tools/property-extractor/templates/property-cloud.hbs +105 -0
  25. package/tools/property-extractor/templates/property-page.hbs +22 -0
  26. package/tools/property-extractor/templates/property.hbs +85 -0
  27. package/tools/property-extractor/templates/topic-property-cloud.hbs +97 -0
  28. package/tools/property-extractor/templates/topic-property.hbs +73 -0
  29. package/tools/property-extractor/transformers.py +178 -6
  30. package/tools/property-extractor/json-to-asciidoc/generate_docs.py +0 -491
@@ -1,9 +1,66 @@
1
1
  #!/usr/bin/env python3
2
+ """
3
+ Redpanda Configuration Property Extractor
4
+
5
+ This script extracts configuration properties from Redpanda's C++ source code and generates
6
+ JSON schema definitions with proper type resolution and default value expansion.
7
+
8
+ SPECIAL HANDLING FOR one_or_many_property TYPES:
9
+
10
+ Redpanda uses a custom C++ type called `one_or_many_property<T>` for configuration properties
11
+ that can accept either a single value or an array of values. Examples include:
12
+
13
+ - admin: one_or_many_property<model::broker_endpoint>
14
+ - admin_api_tls: one_or_many_property<endpoint_tls_config>
15
+ - kafka_api_tls: one_or_many_property<endpoint_tls_config>
16
+
17
+ These properties allow flexible configuration syntax:
18
+ Single value: admin: {address: "127.0.0.1", port: 9644}
19
+ Array syntax: admin: [{address: "127.0.0.1", port: 9644}, {address: "0.0.0.0", port: 9645}]
20
+
21
+ PROCESSING PIPELINE:
22
+
23
+ 1. **Property Detection & Transformation** (transformers.py):
24
+ - IsArrayTransformer detects one_or_many_property<T> declarations
25
+ - Marks these properties as type="array" with items.type extracted from T
26
+ - TypeTransformer extracts inner types from template declarations
27
+
28
+ 2. **Type Resolution & Default Expansion** (property_extractor.py):
29
+ - resolve_type_and_default() converts C++ types to JSON schema types
30
+ - Expands C++ constructor defaults to structured JSON objects
31
+ - Ensures array-type properties have array defaults (wraps single objects in arrays)
32
+
33
+ 3. **Documentation Generation** (generate-handlebars-docs.js):
34
+ - Properly formats array defaults as [{ }] instead of { }
35
+ - Displays correct types in documentation (array vs object)
36
+
37
+ EXAMPLE TRANSFORMATION:
38
+
39
+ C++ Source:
40
+ one_or_many_property<model::broker_endpoint> admin(
41
+ *this, "admin", "Network address for Admin API",
42
+ {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
43
+ );
44
+
45
+ JSON Output:
46
+ "admin": {
47
+ "type": "array",
48
+ "items": {"type": "object"},
49
+ "default": [{"address": "127.0.0.1", "port": 9644}]
50
+ }
51
+
52
+ Documentation Output:
53
+ Type: array
54
+ Default: [{address: "127.0.0.1", port: 9644}]
55
+ """
2
56
  import logging
3
57
  import sys
4
58
  import os
5
59
  import json
6
60
  import re
61
+ import yaml
62
+ import ast
63
+ from copy import deepcopy
7
64
 
8
65
  from pathlib import Path
9
66
  from file_pair import FilePair
@@ -13,9 +70,258 @@ from parser import build_treesitter_cpp_library, extract_properties_from_file_pa
13
70
  from property_bag import PropertyBag
14
71
  from transformers import *
15
72
 
73
+ # Import topic property extractor
74
+ try:
75
+ from topic_property_extractor import TopicPropertyExtractor
76
+ except ImportError:
77
+ # TopicPropertyExtractor not available, will skip topic property extraction
78
+ TopicPropertyExtractor = None
79
+
80
+ # Import cloud configuration support
81
+ try:
82
+ from cloud_config import fetch_cloud_config, add_cloud_support_metadata
83
+ # Configure cloud_config logger to suppress INFO logs by default
84
+ import logging
85
+ logging.getLogger('cloud_config').setLevel(logging.WARNING)
86
+ except ImportError as e:
87
+ # Cloud configuration support not available due to missing dependencies
88
+ logging.warning(f"Cloud configuration support not available: {e}")
89
+ fetch_cloud_config = None
90
+ add_cloud_support_metadata = None
91
+
16
92
  logger = logging.getLogger("viewer")
17
93
 
18
94
 
95
+ def process_enterprise_value(enterprise_str):
96
+ """
97
+ Convert a raw C++ "enterprise" expression into a JSON-friendly value.
98
+
99
+ Accepts a string extracted from C++ sources and returns a value suitable for JSON
100
+ serialization: a Python list for initializer-lists, a simple string for enum-like
101
+ tokens, a boolean-like or quoted string unchanged, or a human-readable hint for
102
+ lambda-based expressions.
103
+
104
+ The function applies pattern matching in the following order (order is significant):
105
+ 1. std::vector<...>{...} initializer lists → Python list (quoted strings are unescaped,
106
+ unqualified enum tokens are reduced to their final identifier).
107
+ 2. C++ scoped enum-like tokens (foo::bar::BAZ) → "BAZ".
108
+ 3. Lambda expressions (strings starting with "[](" and ending with "}") → a short
109
+ human-readable hint such as "Enterprise feature enabled" or context-specific text.
110
+ 4. Simple literal values (e.g., "true", "false", "OIDC", or quoted strings) → returned as-is.
111
+
112
+ Parameters:
113
+ enterprise_str (str): Raw C++ expression text to be converted.
114
+
115
+ Returns:
116
+ Union[str, bool, list]: A JSON-serializable representation of the input.
117
+ """
118
+ enterprise_str = enterprise_str.strip()
119
+
120
+ # FIRST: Handle std::vector initialization patterns (highest priority)
121
+ # This must come before enum processing because vectors can contain enums
122
+ # Tolerate optional whitespace around braces
123
+ vector_match = re.match(r'std::vector<[^>]+>\s*\{\s*([^}]*)\s*\}', enterprise_str)
124
+ if vector_match:
125
+ content = vector_match.group(1).strip()
126
+ if not content:
127
+ return []
128
+
129
+ # Parse the content as a list of values
130
+ values = []
131
+ current_value = ""
132
+ in_quotes = False
133
+
134
+ for char in content:
135
+ if char == '"' and (not current_value or current_value[-1] != '\\'):
136
+ in_quotes = not in_quotes
137
+ current_value += char
138
+ elif char == ',' and not in_quotes:
139
+ if current_value.strip():
140
+ # Clean up the value
141
+ value = current_value.strip()
142
+ if value.startswith('"') and value.endswith('"'):
143
+ values.append(ast.literal_eval(value))
144
+ else:
145
+ # Handle enum values in the vector
146
+ enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', value)
147
+ if enum_match:
148
+ values.append(enum_match.group(1))
149
+ else:
150
+ values.append(value)
151
+ current_value = ""
152
+ else:
153
+ current_value += char
154
+
155
+ # Add the last value
156
+ if current_value.strip():
157
+ value = current_value.strip()
158
+ if value.startswith('"') and value.endswith('"'):
159
+ values.append(ast.literal_eval(value))
160
+ else:
161
+ # Handle enum values in the vector
162
+ enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', value)
163
+ if enum_match:
164
+ values.append(enum_match.group(1))
165
+ else:
166
+ values.append(value)
167
+
168
+ return values
169
+
170
+ # SECOND: Handle enum-like patterns (extract the last part after ::)
171
+ enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', enterprise_str)
172
+ if enum_match:
173
+ enum_value = enum_match.group(1)
174
+ return enum_value
175
+
176
+ # THIRD: Handle C++ lambda expressions - these usually indicate "any non-default value"
177
+ if enterprise_str.startswith("[](") and enterprise_str.endswith("}"):
178
+ # For lambda expressions, try to extract meaningful info from the logic
179
+ if "leaders_preference" in enterprise_str:
180
+ return "Any rack preference (not `none`)"
181
+ else:
182
+ return "Enterprise feature enabled"
183
+
184
+ # FOURTH: Handle simple values with proper JSON types
185
+ # Convert boolean literals to actual boolean values for JSON compatibility
186
+ if enterprise_str == "true":
187
+ return True
188
+ elif enterprise_str == "false":
189
+ return False
190
+ elif enterprise_str == "OIDC" or enterprise_str.startswith('"'):
191
+ return enterprise_str
192
+
193
+ # Fallback: return the original value
194
+ return enterprise_str
195
+
196
+
197
+ def resolve_cpp_function_call(function_name):
198
+ """
199
+ Resolve certain small, known C++ zero-argument functions to their literal return values by searching Redpanda source files.
200
+
201
+ This function looks up predefined search patterns for well-known functions (currently a small set under `model::*`), locates a local Redpanda source tree from several commonly used paths, and scans the listed files (and, if needed, the broader model directory) for a regex match that captures the string returned by the function. If a match is found the captured string is returned; if the source tree cannot be found or no match is located the function returns None.
202
+
203
+ Parameters:
204
+ function_name (str): Fully-qualified C++ function name to resolve (e.g., "model::kafka_audit_logging_topic").
205
+
206
+ Returns:
207
+ str or None: The resolved literal string returned by the C++ function, or None when unresolved (source not found or no matching pattern).
208
+
209
+ Notes:
210
+ - The function performs filesystem I/O and regex-based source searching; it does not raise on read errors but logs and continues.
211
+ - Only a small, hard-coded set of function names/patterns is supported; unknown names immediately return None.
212
+ """
213
+ # Map function names to likely search patterns and file locations
214
+ search_patterns = {
215
+ 'model::kafka_audit_logging_topic': {
216
+ 'patterns': [
217
+ r'inline\s+const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
218
+ r'const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
219
+ r'model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)',
220
+ r'std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
221
+ r'inline\s+std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
222
+ ],
223
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
224
+ },
225
+ 'model::kafka_consumer_offsets_topic': {
226
+ 'patterns': [
227
+ r'inline\s+const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
228
+ r'const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
229
+ r'model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)',
230
+ r'std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
231
+ r'inline\s+std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
232
+ ],
233
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
234
+ },
235
+ 'model::kafka_internal_namespace': {
236
+ 'patterns': [
237
+ r'inline\s+const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
238
+ r'const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
239
+ r'model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
240
+ r'std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
241
+ r'inline\s+std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
242
+ ],
243
+ 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
244
+ }
245
+ }
246
+
247
+ # Check if we have search patterns for this function
248
+ if function_name not in search_patterns:
249
+ logger.debug(f"No search patterns defined for function: {function_name}")
250
+ return None
251
+
252
+ config = search_patterns[function_name]
253
+
254
+ # Try to find the Redpanda source directory
255
+ # Look for it in the standard locations used by the property extractor
256
+ redpanda_source_paths = [
257
+ 'tmp/redpanda', # Current directory
258
+ '../tmp/redpanda', # Parent directory
259
+ 'tools/property-extractor/tmp/redpanda', # From project root
260
+ os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda')
261
+ ]
262
+
263
+ redpanda_source = None
264
+ for path in redpanda_source_paths:
265
+ if os.path.exists(path):
266
+ redpanda_source = path
267
+ break
268
+
269
+ if not redpanda_source:
270
+ logger.warning(f"Could not find Redpanda source directory to resolve function: {function_name}")
271
+ return None
272
+
273
+ # Search in the specified files
274
+ for file_path in config['files']:
275
+ full_path = os.path.join(redpanda_source, file_path)
276
+ if not os.path.exists(full_path):
277
+ continue
278
+
279
+ try:
280
+ with open(full_path, 'r', encoding='utf-8') as f:
281
+ content = f.read()
282
+
283
+ # Try each pattern
284
+ for pattern in config['patterns']:
285
+ match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
286
+ if match:
287
+ resolved_value = match.group(1)
288
+ logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
289
+ return resolved_value
290
+
291
+ except Exception as e:
292
+ logger.debug(f"Error reading {full_path}: {e}")
293
+ continue
294
+
295
+ # If not found in specific files, do a broader search
296
+ logger.debug(f"Function {function_name} not found in expected files, doing broader search...")
297
+
298
+ # Search more broadly in the model directory
299
+ model_dir = os.path.join(redpanda_source, 'src', 'v', 'model')
300
+ if os.path.exists(model_dir):
301
+ for root, dirs, files in os.walk(model_dir):
302
+ for file in files:
303
+ if file.endswith('.h') or file.endswith('.cc'):
304
+ file_path = os.path.join(root, file)
305
+ try:
306
+ with open(file_path, 'r', encoding='utf-8') as f:
307
+ content = f.read()
308
+
309
+ # Try patterns for this file
310
+ for pattern in config['patterns']:
311
+ match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
312
+ if match:
313
+ resolved_value = match.group(1)
314
+ logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
315
+ return resolved_value
316
+
317
+ except Exception as e:
318
+ logger.debug(f"Error reading {file_path}: {e}")
319
+ continue
320
+
321
+ logger.warning(f"Could not resolve function call: {function_name}()")
322
+ return None
323
+
324
+
19
325
  def validate_paths(options):
20
326
  path = options.path
21
327
 
@@ -128,57 +434,1049 @@ def transform_files_with_properties(files_with_properties):
128
434
 
129
435
  # The definitions.json file contains type definitions that the extractor uses to standardize and centralize type information. After extracting and transforming the properties from the source code, the function merge_properties_and_definitions looks up each property's type in the definitions. If a property's type (or the type of its items, in the case of arrays) matches one of the definitions, the transformer replaces that type with a JSON pointer ( such as #/definitions/<type>) to the corresponding entry in definitions.json. The final JSON output then includes both a properties section (with types now referencing the definitions) and a definitions section, so that consumers of the output can easily resolve the full type information.
130
436
  def merge_properties_and_definitions(properties, definitions):
131
- for name in properties:
132
- property = properties[name]
133
- # guard against missing "type"
134
- prop_type = property.get("type")
135
- if prop_type and prop_type in definitions:
136
- properties[name]["type"] = "#/definitions/{prop_type}"
137
- elif (
138
- prop_type == "array"
139
- and property.get("items", {}).get("type") in definitions
140
- ):
141
- properties[name]["items"]["type"] = (
142
- f"#/definitions/{property['items']['type']}"
143
- )
144
-
437
+ # Do not overwrite the resolved type/default with a reference. Just return the resolved properties and definitions.
145
438
  return dict(properties=properties, definitions=definitions)
146
439
 
147
440
 
441
+ def apply_property_overrides(properties, overrides, overrides_file_path=None):
442
+ """
443
+ Apply property overrides from the overrides JSON file to enhance property documentation.
444
+
445
+ This function allows customizing property documentation by providing overrides for:
446
+
447
+ 1. description: Override the auto-extracted property description with custom text
448
+ 2. version: Add version information showing when the property was introduced
449
+ 3. example: Add AsciiDoc example sections with flexible input formats (see below)
450
+ 4. default: Override the auto-extracted default value
451
+ 5. related_topics: Add an array of related topic links for cross-referencing
452
+ 6. config_scope: Specify the scope for new properties ("topic", "cluster", "broker")
453
+ 7. type: Specify the type for new properties
454
+
455
+ Properties that don't exist in the extracted source can be created from overrides.
456
+ This is useful for topic properties or other configurations that aren't auto-detected.
457
+
458
+ Multiple example input formats are supported for user convenience:
459
+
460
+ 1. Direct AsciiDoc string:
461
+ "example": ".Example\n[,yaml]\n----\nredpanda:\n property_name: value\n----"
462
+
463
+ 2. Multi-line array (each element becomes a line):
464
+ "example": [
465
+ ".Example",
466
+ "[,yaml]",
467
+ "----",
468
+ "redpanda:",
469
+ " property_name: value",
470
+ "----"
471
+ ]
472
+
473
+ 3. External file reference:
474
+ "example_file": "examples/property_name.adoc"
475
+
476
+ 4. Auto-formatted YAML with title and description:
477
+ "example_yaml": {
478
+ "title": "Example Configuration",
479
+ "description": "This shows how to configure the property.",
480
+ "config": {
481
+ "redpanda": {
482
+ "property_name": "value"
483
+ }
484
+ }
485
+ }
486
+
487
+ Args:
488
+ properties: Dictionary of extracted properties from C++ source
489
+ overrides: Dictionary loaded from overrides JSON file
490
+ overrides_file_path: Path to the overrides file (for resolving relative example_file paths)
491
+
492
+ Returns:
493
+ Updated properties dictionary with overrides applied and new properties created
494
+ """
495
+ if overrides and "properties" in overrides:
496
+ for prop, override in overrides["properties"].items():
497
+ if prop in properties:
498
+ # Apply overrides to existing properties
499
+ _apply_override_to_existing_property(properties[prop], override, overrides_file_path)
500
+ else:
501
+ # Create new property from override
502
+ logger.info(f"Creating new property from override: {prop}")
503
+ properties[prop] = _create_property_from_override(prop, override, overrides_file_path)
504
+ return properties
505
+
506
+
507
+ def _apply_override_to_existing_property(property_dict, override, overrides_file_path):
508
+ """Apply overrides to an existing property."""
509
+ # Apply description override
510
+ if "description" in override:
511
+ property_dict["description"] = override["description"]
512
+
513
+ # Apply version override (introduced in version)
514
+ if "version" in override:
515
+ property_dict["version"] = override["version"]
516
+
517
+ # Apply example override with multiple input format support
518
+ example_content = _process_example_override(override, overrides_file_path)
519
+ if example_content:
520
+ property_dict["example"] = example_content
521
+
522
+ # Apply default override
523
+ if "default" in override:
524
+ property_dict["default"] = override["default"]
525
+
526
+ # Apply type override
527
+ if "type" in override:
528
+ property_dict["type"] = override["type"]
529
+
530
+ # Apply config_scope override
531
+ if "config_scope" in override:
532
+ property_dict["config_scope"] = override["config_scope"]
533
+
534
+ # Apply related_topics override
535
+ if "related_topics" in override:
536
+ if isinstance(override["related_topics"], list):
537
+ property_dict["related_topics"] = override["related_topics"]
538
+ else:
539
+ logger.warning(f"related_topics for property must be an array")
540
+
541
+
542
+ def _create_property_from_override(prop_name, override, overrides_file_path):
543
+ """Create a new property from override specification."""
544
+ # Create base property structure
545
+ new_property = {
546
+ "name": prop_name,
547
+ "description": override.get("description", f"Configuration property: {prop_name}"),
548
+ "type": override.get("type", "string"),
549
+ "default": override.get("default", None),
550
+ "defined_in": "override", # Mark as override-created
551
+ "config_scope": override.get("config_scope", "topic"), # Default to topic for new properties
552
+ "is_topic_property": override.get("config_scope", "topic") == "topic",
553
+ "is_deprecated": override.get("is_deprecated", False),
554
+ "visibility": override.get("visibility", "user")
555
+ }
556
+
557
+ # Add version if specified
558
+ if "version" in override:
559
+ new_property["version"] = override["version"]
560
+
561
+ # Add example if specified
562
+ example_content = _process_example_override(override, overrides_file_path)
563
+ if example_content:
564
+ new_property["example"] = example_content
565
+
566
+ # Add related_topics if specified
567
+ if "related_topics" in override:
568
+ if isinstance(override["related_topics"], list):
569
+ new_property["related_topics"] = override["related_topics"]
570
+ else:
571
+ logger.warning(f"related_topics for property '{prop_name}' must be an array")
572
+
573
+ # Add any other custom fields from override
574
+ for key, value in override.items():
575
+ if key not in ["description", "type", "default", "config_scope", "version",
576
+ "example", "example_file", "example_yaml", "related_topics",
577
+ "is_deprecated", "visibility"]:
578
+ new_property[key] = value
579
+
580
+ return new_property
581
+
582
+
583
+ def _process_example_override(override, overrides_file_path=None):
584
+ """
585
+ Process example overrides in various user-friendly formats.
586
+
587
+ Supports multiple input formats for examples:
588
+ 1. Direct string: "example": "content"
589
+ 2. Multi-line array: "example": ["line1", "line2", ...]
590
+ 3. External file: "example_file": "path/to/file"
591
+ 4. Auto-formatted YAML: "example_yaml": {...}
592
+
593
+ Args:
594
+ override: Dictionary containing override data for a property
595
+ overrides_file_path: Path to the overrides file (for resolving relative paths)
596
+
597
+ Returns:
598
+ Processed AsciiDoc example content as string, or None if no example found
599
+ """
600
+ # Format 1: Direct AsciiDoc string
601
+ if "example" in override:
602
+ example = override["example"]
603
+ if isinstance(example, str):
604
+ return example
605
+ elif isinstance(example, list):
606
+ # Format 2: Multi-line array - join with newlines
607
+ return "\n".join(example)
608
+
609
+ # Format 3: External file reference
610
+ if "example_file" in override:
611
+ file_path = override["example_file"]
612
+
613
+ # Support both absolute and relative paths
614
+ if not os.path.isabs(file_path):
615
+ # Build search paths starting with the overrides file directory
616
+ search_paths = []
617
+
618
+ # If we have the overrides file path, try relative to its directory first
619
+ if overrides_file_path:
620
+ overrides_dir = os.path.dirname(overrides_file_path)
621
+ search_paths.append(os.path.join(overrides_dir, file_path))
622
+
623
+ # Then try common locations relative to current working directory
624
+ search_paths.extend([
625
+ file_path,
626
+ os.path.join("examples", file_path),
627
+ os.path.join("docs-data", file_path),
628
+ os.path.join("__tests__", "docs-data", file_path)
629
+ ])
630
+
631
+ found_path = None
632
+ for search_path in search_paths:
633
+ if os.path.exists(search_path):
634
+ found_path = search_path
635
+ break
636
+
637
+ if found_path:
638
+ file_path = found_path
639
+ else:
640
+ logger.warning(f"Example file not found: {override['example_file']}")
641
+ logger.warning(f"Searched in: {', '.join(search_paths)}")
642
+ return None
643
+
644
+ try:
645
+ with open(file_path, 'r', encoding='utf-8') as f:
646
+ return f.read().strip()
647
+ except Exception as e:
648
+ logger.error(f"Error reading example file {file_path}: {e}")
649
+ return None
650
+
651
+ # Format 4: Auto-formatted YAML configuration
652
+ if "example_yaml" in override:
653
+ yaml_data = override["example_yaml"]
654
+ title = yaml_data.get("title", "Example")
655
+ description = yaml_data.get("description", "")
656
+ config = yaml_data.get("config", {})
657
+
658
+ # Build AsciiDoc content
659
+ lines = [f".{title}"]
660
+ if description:
661
+ lines.append(f"{description}\n")
662
+
663
+ lines.extend([
664
+ "[,yaml]",
665
+ "----"
666
+ ])
667
+
668
+ # Convert config to YAML and add to lines
669
+ try:
670
+ yaml_content = yaml.dump(config, default_flow_style=False, indent=2)
671
+ lines.append(yaml_content.rstrip())
672
+ except Exception as e:
673
+ import traceback
674
+ logger.error(f"Error formatting YAML config: {e}")
675
+ logger.debug(f"Full traceback:\n{traceback.format_exc()}")
676
+ return None
677
+
678
+ lines.append("----")
679
+
680
+ return "\n".join(lines)
681
+
682
+ return None
683
+
684
+
685
+ def add_config_scope(properties):
686
+ """
687
+ Add a config_scope field to each property based on its defined_in value or property type.
688
+ 'cluster' if defined_in == src/v/config/configuration.cc
689
+ 'broker' if defined_in == src/v/config/node_config.cc
690
+ 'topic' if is_topic_property == True
691
+
692
+ For override-created properties, preserve existing config_scope if already set.
693
+ """
694
+ for prop in properties.values():
695
+ # Check if this is a topic property first
696
+ if prop.get("is_topic_property", False):
697
+ prop["config_scope"] = "topic"
698
+ else:
699
+ # For override-created properties, preserve existing config_scope if set
700
+ if prop.get("defined_in") == "override" and prop.get("config_scope") is not None:
701
+ # Keep the existing config_scope from override
702
+ pass
703
+ else:
704
+ defined_in = prop.get("defined_in", "")
705
+ if defined_in == "src/v/config/configuration.cc":
706
+ prop["config_scope"] = "cluster"
707
+ elif defined_in == "src/v/config/node_config.cc":
708
+ prop["config_scope"] = "broker"
709
+ else:
710
+ prop["config_scope"] = None
711
+ return properties
712
+
713
+
714
+ def resolve_type_and_default(properties, definitions):
715
+ """
716
+ Resolve JSON Schema types and expand C++-style default values for all properties.
717
+
718
+ This function:
719
+ - Resolves type references found in `properties` against `definitions` (supports "$ref" and direct type names) and normalizes property "type" to a JSON Schema primitive ("object", "string", "integer", "boolean", "array", "number") with sensible fallbacks.
720
+ - Expands C++ constructor/initializer syntax and common C++ patterns appearing in default values into JSON-compatible Python values (e.g., nested constructor calls -> dicts, initializer lists -> lists, `std::nullopt` -> None, enum-like tokens -> strings).
721
+ - Ensures array-typed properties (including one_or_many_property cases) have array defaults: single-object defaults are wrapped into a one-element list and "{}" string defaults become [].
722
+ - Updates array item type information when item types reference definitions.
723
+ - Applies a final pass to convert any remaining C++-patterned defaults and to transform any `enterprise_value` strings via process_enterprise_value.
724
+
725
+ Parameters:
726
+ properties (dict): Mapping of property names to property metadata dictionaries. Each property may include keys like "type", "default", "items", and "enterprise_value".
727
+ definitions (dict): Mapping of type names to JSON Schema definition dictionaries used to resolve $ref targets and to infer property shapes when expanding constructors.
728
+
729
+ Returns:
730
+ dict: The same `properties` mapping after in-place normalization and expansion of types and defaults.
731
+ """
732
+ import ast
733
+ import re
734
+
735
+ def resolve_definition_type(defn):
736
+ """Recursively resolve $ref pointers to get the actual type definition."""
737
+ # Recursively resolve $ref
738
+ while isinstance(defn, dict) and "$ref" in defn:
739
+ ref = defn["$ref"]
740
+ ref_name = ref.split("/")[-1]
741
+ defn = definitions.get(ref_name, defn)
742
+ return defn
743
+
744
+ def parse_constructor(s):
745
+ """Parse C++ constructor syntax into type name and arguments."""
746
+ s = s.strip()
747
+ if s.startswith("{") and s.endswith("}"):
748
+ s = s[1:-1].strip()
749
+ match = re.match(r'([a-zA-Z0-9_:]+)\((.*)\)', s)
750
+ if not match:
751
+ # Primitive or enum
752
+ if s.startswith('"') and s.endswith('"'):
753
+ return None, [ast.literal_eval(s)]
754
+ try:
755
+ return None, [int(s)]
756
+ except Exception:
757
+ return None, [s]
758
+ type_name, arg_str = match.groups()
759
+ args = []
760
+ depth = 0
761
+ current = ''
762
+ in_string = False
763
+ for c in arg_str:
764
+ if c == '"' and (not current or current[-1] != '\\'):
765
+ in_string = not in_string
766
+ if c == ',' and depth == 0 and not in_string:
767
+ if current.strip():
768
+ args.append(current.strip())
769
+ current = ''
770
+ else:
771
+ if c == '(' and not in_string:
772
+ depth += 1
773
+ elif c == ')' and not in_string:
774
+ depth -= 1
775
+ current += c
776
+ if current.strip():
777
+ args.append(current.strip())
778
+ return type_name, args
779
+
780
+ def process_cpp_patterns(arg_str):
781
+ """
782
+ Process specific C++ patterns to user-friendly values.
783
+
784
+ Handles:
785
+ - net::unresolved_address("127.0.0.1", 9092) -> expands based on type definition
786
+ - std::nullopt -> null
787
+ - fips_mode_flag::disabled -> "disabled"
788
+ - model::kafka_audit_logging_topic() -> dynamically looked up from source
789
+ """
790
+ arg_str = arg_str.strip()
791
+
792
+ # Handle std::nullopt -> null
793
+ if arg_str == "std::nullopt":
794
+ return "null"
795
+
796
+ # Handle C++ function calls that return constant values
797
+ # Dynamically look up function return values from the source code
798
+ function_call_match = re.match(r'([a-zA-Z0-9_:]+)\(\)', arg_str)
799
+ if function_call_match:
800
+ function_name = function_call_match.group(1)
801
+ resolved_value = resolve_cpp_function_call(function_name)
802
+ if resolved_value is not None:
803
+ return f'"{resolved_value}"'
804
+
805
+ # Handle enum-like patterns (such as fips_mode_flag::disabled -> "disabled")
806
+ enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', arg_str)
807
+ if enum_match:
808
+ enum_value = enum_match.group(1)
809
+ return f'"{enum_value}"'
810
+
811
+ # Handle default constructors and their default values
812
+ # This handles cases where C++ default constructors are used but should map to specific values
813
+
814
+ # Pattern 1: Full constructor syntax like config::leaders_preference{}
815
+ constructor_patterns = {
816
+ r'config::leaders_preference\{\}': '"none"', # Based on C++ code analysis
817
+ r'std::chrono::seconds\{0\}': '0',
818
+ r'std::chrono::milliseconds\{0\}': '0',
819
+ r'model::timeout_clock::duration\{\}': '0',
820
+ r'config::data_directory_path\{\}': '""',
821
+ r'std::optional<[^>]+>\{\}': 'null', # Empty optional
822
+ }
823
+
824
+ for pattern, replacement in constructor_patterns.items():
825
+ if re.match(pattern, arg_str):
826
+ return replacement
827
+
828
+ # Pattern 2: Truncated type names that likely came from default constructors
829
+ # These are cases where tree-sitter parsing truncated "config::type{}" to just "type"
830
+ truncated_patterns = {
831
+ 'leaders_preference': '"none"', # config::leaders_preference{} -> none
832
+ 'data_directory_path': '""', # config::data_directory_path{} -> empty string
833
+ 'timeout_clock_duration': '0', # model::timeout_clock::duration{} -> 0
834
+ 'log_level': '"info"', # Default log level
835
+ 'compression_type': '"none"', # Default compression
836
+ }
837
+
838
+ # Check if arg_str is exactly one of these truncated patterns
839
+ if arg_str in truncated_patterns:
840
+ return truncated_patterns[arg_str]
841
+
842
+ # Pattern 3: Handle remaining default constructor syntax generically
843
+ generic_constructor_match = re.match(r'[a-zA-Z0-9_:]+\{\}', arg_str)
844
+ if generic_constructor_match:
845
+ # For unknown constructors, try to infer a reasonable default
846
+ type_name = arg_str[:-2] # Remove the {}
847
+ if 'duration' in type_name.lower() or 'time' in type_name.lower():
848
+ return '0'
849
+ elif 'path' in type_name.lower() or 'directory' in type_name.lower():
850
+ return '""'
851
+ elif 'optional' in type_name.lower():
852
+ return 'null'
853
+ else:
854
+ return '""' # Conservative default to empty string
855
+
856
+ # Handle string concatenation with + operator (such as "128_kib + 1")
857
+ if " + " in arg_str:
858
+ return f'"{arg_str}"'
859
+
860
+ return arg_str
861
+
862
+ def expand_default(type_name, default_str):
863
+ """
864
+ Expand C++ default values into structured JSON objects.
865
+
866
+ For array types with initializer list syntax like:
867
+ {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
868
+
869
+ This creates: [{address: "127.0.0.1", port: 9644}]
870
+ """
871
+ # Handle non-string defaults
872
+ if not isinstance(default_str, str):
873
+ return default_str
874
+
875
+ # Apply C++ pattern processing for simple cases (not complex constructor calls)
876
+ if not ("(" in default_str and "::" in default_str):
877
+ processed = process_cpp_patterns(default_str)
878
+ if processed != default_str:
879
+ # Pattern was processed, return the result
880
+ if processed == "null":
881
+ return None
882
+ elif processed.startswith('"') and processed.endswith('"'):
883
+ return ast.literal_eval(processed)
884
+ else:
885
+ return processed
886
+
887
+ type_def = resolve_definition_type(definitions.get(type_name, {}))
888
+ if "enum" in type_def:
889
+ return default_str
890
+ # If it has properties but no explicit type, it's an object
891
+ if type_def.get("type") == "object" or (type_def.get("properties") and not type_def.get("type")):
892
+ tname, args = parse_constructor(default_str)
893
+ if tname is None:
894
+ return default_str
895
+
896
+ props = list(type_def["properties"].keys())
897
+ result = {}
898
+
899
+ # For each constructor argument, try to expand it and map to the correct property
900
+ for i, prop in enumerate(props):
901
+ prop_def = type_def["properties"][prop]
902
+ if "$ref" in prop_def:
903
+ sub_type = prop_def["$ref"].split("/")[-1]
904
+ else:
905
+ sub_type = prop_def.get("type")
906
+
907
+ if i < len(args):
908
+ arg = args[i]
909
+ # Check if this argument is a nested constructor call
910
+ if "(" in arg and "::" in arg:
911
+ # Parse the nested constructor
912
+ nested_tname, nested_args = parse_constructor(arg)
913
+ if nested_tname and nested_tname in definitions:
914
+ # Get the definition for the nested type
915
+ nested_type_def = resolve_definition_type(definitions.get(nested_tname, {}))
916
+ nested_props = list(nested_type_def.get("properties", {}).keys())
917
+
918
+ # Expand the nested constructor by mapping its arguments to its properties
919
+ nested_result = {}
920
+ for j, nested_prop in enumerate(nested_props):
921
+ nested_prop_def = nested_type_def["properties"][nested_prop]
922
+ if j < len(nested_args):
923
+ nested_arg = nested_args[j]
924
+ # Apply simple C++ pattern processing to the argument
925
+ processed_nested_arg = process_cpp_patterns(nested_arg)
926
+
927
+ # Convert the processed argument based on the property type
928
+ if nested_prop_def.get("type") == "string":
929
+ if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
930
+ nested_result[nested_prop] = ast.literal_eval(processed_nested_arg)
931
+ else:
932
+ nested_result[nested_prop] = processed_nested_arg
933
+ elif nested_prop_def.get("type") == "integer":
934
+ try:
935
+ nested_result[nested_prop] = int(processed_nested_arg)
936
+ except ValueError:
937
+ nested_result[nested_prop] = processed_nested_arg
938
+ elif nested_prop_def.get("type") == "boolean":
939
+ nested_result[nested_prop] = processed_nested_arg.lower() == "true"
940
+ else:
941
+ nested_result[nested_prop] = processed_nested_arg
942
+ else:
943
+ nested_result[nested_prop] = None
944
+
945
+ # Now we have the expanded nested object, we need to map it to the parent object's properties
946
+ # This is where the type-aware mapping happens
947
+
948
+ # Special case: if the nested type is net::unresolved_address and parent is broker_endpoint
949
+ if nested_tname == "net::unresolved_address" and type_name == "model::broker_endpoint":
950
+ # Map net::unresolved_address properties to broker_endpoint
951
+ # Only map the fields that actually exist in the net::unresolved_address
952
+ result["address"] = nested_result.get("address")
953
+ result["port"] = nested_result.get("port")
954
+ break
955
+ else:
956
+ # General case: if we have a single nested constructor argument,
957
+ # try to merge its properties into the parent
958
+ if i == 0 and len(args) == 1:
959
+ result.update(nested_result)
960
+ # Set remaining properties to None
961
+ for remaining_prop in props[i+1:]:
962
+ if remaining_prop not in result:
963
+ result[remaining_prop] = None
964
+ break
965
+ else:
966
+ # Map the nested object to the current property
967
+ result[prop] = nested_result
968
+ else:
969
+ # Fallback: recursively expand with the expected property type
970
+ expanded_arg = expand_default(sub_type, arg)
971
+ result[prop] = expanded_arg
972
+ else:
973
+ # Simple value, parse based on the property type
974
+ # First apply C++ pattern processing
975
+ processed_arg = process_cpp_patterns(arg)
976
+
977
+ if sub_type == "string":
978
+ # If processed_arg is already quoted, use ast.literal_eval, otherwise keep as is
979
+ if processed_arg.startswith('"') and processed_arg.endswith('"'):
980
+ result[prop] = ast.literal_eval(processed_arg)
981
+ else:
982
+ result[prop] = processed_arg
983
+ elif sub_type == "integer":
984
+ try:
985
+ result[prop] = int(processed_arg)
986
+ except ValueError:
987
+ # If conversion fails, keep as string (might be processed C++ pattern)
988
+ result[prop] = processed_arg
989
+ elif sub_type == "boolean":
990
+ result[prop] = processed_arg.lower() == "true"
991
+ else:
992
+ result[prop] = processed_arg
993
+ else:
994
+ result[prop] = None
995
+ return result
996
+ elif type_def.get("type") == "array":
997
+ # Handle array defaults with C++ initializer list syntax like {model::broker_endpoint(...)}
998
+ # This is specifically important for one_or_many_property types that use initializer lists
999
+ # in their C++ defaults but should produce JSON arrays in the output.
1000
+ #
1001
+ # Example transformation:
1002
+ # C++: {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))}
1003
+ # JSON: [{"address": "127.0.0.1", "port": 9644, "name": "127.0.0.1:9644"}]
1004
+ if isinstance(default_str, str) and default_str.strip().startswith("{") and default_str.strip().endswith("}"):
1005
+ # This is an initializer list, parse the elements
1006
+ initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces
1007
+ if initializer_content:
1008
+ # Parse multiple comma-separated elements
1009
+ elements = []
1010
+ current_element = ""
1011
+ paren_depth = 0
1012
+ in_quotes = False
1013
+
1014
+ # Parse elements while respecting nested parentheses and quoted strings
1015
+ for char in initializer_content:
1016
+ if char == '"' and (not current_element or current_element[-1] != '\\'):
1017
+ in_quotes = not in_quotes
1018
+
1019
+ if not in_quotes:
1020
+ if char == '(':
1021
+ paren_depth += 1
1022
+ elif char == ')':
1023
+ paren_depth -= 1
1024
+ elif char == ',' and paren_depth == 0:
1025
+ # Found a top-level comma, this is a separator
1026
+ if current_element.strip():
1027
+ elements.append(current_element.strip())
1028
+ current_element = ""
1029
+ continue
1030
+
1031
+ current_element += char
1032
+
1033
+ # Add the last element
1034
+ if current_element.strip():
1035
+ elements.append(current_element.strip())
1036
+
1037
+ # Try to determine the item type from the type_def
1038
+ items_def = type_def.get("items", {})
1039
+ if "$ref" in items_def:
1040
+ item_type_name = items_def["$ref"].split("/")[-1]
1041
+ else:
1042
+ item_type_name = items_def.get("type", "string") # Default to string for arrays
1043
+
1044
+ # Process each element
1045
+ result_array = []
1046
+ for element_str in elements:
1047
+ # Check if this element is a function call that needs resolution
1048
+ if "::" in element_str and element_str.endswith("()"):
1049
+ # This is a function call, resolve it
1050
+ resolved_value = process_cpp_patterns(element_str)
1051
+ if resolved_value.startswith('"') and resolved_value.endswith('"'):
1052
+ # Remove quotes from resolved string values
1053
+ result_array.append(ast.literal_eval(resolved_value))
1054
+ else:
1055
+ result_array.append(resolved_value)
1056
+ elif element_str.startswith('"') and element_str.endswith('"'):
1057
+ # This is a quoted string, parse it
1058
+ result_array.append(ast.literal_eval(element_str))
1059
+ elif item_type_name == "string":
1060
+ # For string items, expand using the item type (might be constructor)
1061
+ expanded_element = expand_default(item_type_name, element_str)
1062
+ result_array.append(expanded_element)
1063
+ else:
1064
+ # For other types, expand using the item type
1065
+ expanded_element = expand_default(item_type_name, element_str)
1066
+ result_array.append(expanded_element)
1067
+
1068
+ return result_array
1069
+ else:
1070
+ return []
1071
+ else:
1072
+ return default_str
1073
+ else:
1074
+ return default_str
1075
+
1076
+ for prop in properties.values():
1077
+ t = prop.get("type")
1078
+ ref_name = None
1079
+
1080
+ # Handle both JSON pointer references and direct type names
1081
+ if isinstance(t, str):
1082
+ if t.startswith("#/definitions/"):
1083
+ ref_name = t.split("/")[-1]
1084
+ elif t in definitions:
1085
+ ref_name = t
1086
+
1087
+ if ref_name and ref_name in definitions:
1088
+ defn = definitions.get(ref_name)
1089
+ if defn:
1090
+ resolved = resolve_definition_type(defn)
1091
+ # Always set type to the resolved type string (object, string, etc.)
1092
+ resolved_type = resolved.get("type")
1093
+ if resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
1094
+ prop["type"] = resolved_type
1095
+ else:
1096
+ prop["type"] = "object" # fallback for complex types
1097
+ # Expand default if possible
1098
+ if "default" in prop and prop["default"] is not None:
1099
+ expanded = expand_default(ref_name, prop["default"])
1100
+ prop["default"] = expanded
1101
+
1102
+ # Handle case where default is already an object with nested constructors
1103
+ elif prop.get("type") == "object" and isinstance(prop.get("default"), dict):
1104
+ default_obj = prop["default"]
1105
+ for field_name, field_value in default_obj.items():
1106
+ if isinstance(field_value, str) and "::" in field_value and "(" in field_value:
1107
+ # This field contains a nested constructor, try to expand it
1108
+ tname, args = parse_constructor(field_value)
1109
+ if tname and tname in definitions:
1110
+ expanded = expand_default(tname, field_value)
1111
+ if isinstance(expanded, dict):
1112
+ # Update the existing object fields with the expanded values
1113
+ for exp_key, exp_value in expanded.items():
1114
+ if exp_key in default_obj:
1115
+ default_obj[exp_key] = exp_value
1116
+ # Remove the field that contained the constructor
1117
+ # unless it's supposed to remain (like 'name' field)
1118
+ # For now, let's replace entire default with expanded version
1119
+ prop["default"] = expanded
1120
+ break
1121
+
1122
+ # Handle case where property type is array and default contains C++ constructor syntax
1123
+ # This is a backup mechanism for cases where the expand_default function above
1124
+ # didn't catch array initialization patterns. It specifically looks for properties
1125
+ # that are already marked as array type but still have string defaults with
1126
+ # C++ constructor syntax that need expansion.
1127
+ elif prop.get("type") == "array" and isinstance(prop.get("default"), str):
1128
+ default_str = prop["default"]
1129
+ if default_str.strip().startswith("{") and default_str.strip().endswith("}"):
1130
+ # This is an initializer list for an array, expand it using the same logic as expand_default
1131
+ initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces
1132
+ if initializer_content:
1133
+ # Parse multiple comma-separated elements
1134
+ elements = []
1135
+ current_element = ""
1136
+ paren_depth = 0
1137
+ in_quotes = False
1138
+
1139
+ # Parse elements while respecting nested parentheses and quoted strings
1140
+ for char in initializer_content:
1141
+ if char == '"' and (not current_element or current_element[-1] != '\\'):
1142
+ in_quotes = not in_quotes
1143
+
1144
+ if not in_quotes:
1145
+ if char == '(':
1146
+ paren_depth += 1
1147
+ elif char == ')':
1148
+ paren_depth -= 1
1149
+ elif char == ',' and paren_depth == 0:
1150
+ # Found a top-level comma, this is a separator
1151
+ if current_element.strip():
1152
+ elements.append(current_element.strip())
1153
+ current_element = ""
1154
+ continue
1155
+
1156
+ current_element += char
1157
+
1158
+ # Add the last element
1159
+ if current_element.strip():
1160
+ elements.append(current_element.strip())
1161
+
1162
+ # Get the item type from the property definition
1163
+ items_type = prop.get("items", {}).get("type", "string")
1164
+
1165
+ # Process each element
1166
+ result_array = []
1167
+ for element_str in elements:
1168
+ # Check if this element is a function call that needs resolution
1169
+ if "::" in element_str and element_str.endswith("()"):
1170
+ # This is a function call, resolve it
1171
+ resolved_value = process_cpp_patterns(element_str)
1172
+ if resolved_value.startswith('"') and resolved_value.endswith('"'):
1173
+ # Remove quotes from resolved string values
1174
+ result_array.append(ast.literal_eval(resolved_value))
1175
+ else:
1176
+ result_array.append(resolved_value)
1177
+ elif element_str.startswith('"') and element_str.endswith('"'):
1178
+ # This is a quoted string, parse it
1179
+ result_array.append(ast.literal_eval(element_str))
1180
+ elif items_type in definitions:
1181
+ # For complex types, expand using the item type
1182
+ expanded_element = expand_default(items_type, element_str)
1183
+ result_array.append(expanded_element)
1184
+ else:
1185
+ # For simple types, just use the element as-is (likely a string)
1186
+ result_array.append(element_str)
1187
+
1188
+ prop["default"] = result_array
1189
+ else:
1190
+ prop["default"] = []
1191
+
1192
+ # Handle array properties where the default is a single object but should be an array
1193
+ # This is crucial for one_or_many_property types that are detected as arrays
1194
+ # but have defaults that were parsed as single objects by the transformers.
1195
+ #
1196
+ # Background: The transformer chain processes defaults before type resolution,
1197
+ # so a property like admin with default {model::broker_endpoint(...)} gets
1198
+ # expanded to {address: "127.0.0.1", port: 9644} (single object).
1199
+ # But since admin is one_or_many_property<model::broker_endpoint>, it should
1200
+ # be an array: [{address: "127.0.0.1", port: 9644}]
1201
+ if prop.get("type") == "array":
1202
+ default = prop.get("default")
1203
+ if isinstance(default, dict):
1204
+ # If we have an array type but the default is a single object, wrap it in an array
1205
+ # This handles cases like admin: {address: "127.0.0.1", port: 9644} -> [{address: ...}]
1206
+ prop["default"] = [default]
1207
+ elif isinstance(default, str) and default.strip() == "{}":
1208
+ # Empty object string should become empty array for array types
1209
+ # This handles cases like admin_api_tls: "{}" -> []
1210
+ prop["default"] = []
1211
+
1212
+ # Also handle array item types
1213
+ if prop.get("type") == "array" and "items" in prop:
1214
+ items_type = prop["items"].get("type")
1215
+ if isinstance(items_type, str) and items_type in definitions:
1216
+ item_defn = definitions.get(items_type)
1217
+ if item_defn:
1218
+ resolved_item = resolve_definition_type(item_defn)
1219
+ resolved_item_type = resolved_item.get("type")
1220
+ if resolved_item_type in ("object", "string", "integer", "boolean", "array", "number"):
1221
+ prop["items"]["type"] = resolved_item_type
1222
+ else:
1223
+ prop["items"]["type"] = "object" # fallback for complex types
1224
+
1225
+ # Final pass: apply C++ pattern processing to any remaining unprocessed defaults
1226
+ for prop in properties.values():
1227
+ if "default" in prop:
1228
+ default_value = prop["default"]
1229
+
1230
+ if isinstance(default_value, str):
1231
+ # Process string defaults
1232
+ processed = process_cpp_patterns(default_value)
1233
+ if processed != default_value:
1234
+ if processed == "null":
1235
+ prop["default"] = None
1236
+ elif isinstance(processed, str) and processed.startswith('"') and processed.endswith('"'):
1237
+ prop["default"] = ast.literal_eval(processed)
1238
+ else:
1239
+ prop["default"] = processed
1240
+
1241
+ elif isinstance(default_value, list):
1242
+ # Process array defaults - apply C++ pattern processing to each element
1243
+ processed_array = []
1244
+ for item in default_value:
1245
+ if isinstance(item, dict):
1246
+ # Process each field in the object
1247
+ processed_item = {}
1248
+ for field_name, field_value in item.items():
1249
+ if isinstance(field_value, str) and "::" in field_value and "(" in field_value:
1250
+ # This field contains a C++ constructor pattern - try to expand it using type definitions
1251
+ tname, args = parse_constructor(field_value)
1252
+ if tname and tname in definitions:
1253
+ # Get the definition for the nested type and expand the constructor
1254
+ nested_type_def = resolve_definition_type(definitions.get(tname, {}))
1255
+ if nested_type_def.get("properties"):
1256
+ nested_props = list(nested_type_def["properties"].keys())
1257
+ nested_result = {}
1258
+
1259
+ # Map constructor arguments to type properties
1260
+ for j, nested_prop in enumerate(nested_props):
1261
+ nested_prop_def = nested_type_def["properties"][nested_prop]
1262
+ if j < len(args):
1263
+ nested_arg = args[j]
1264
+ processed_nested_arg = process_cpp_patterns(nested_arg)
1265
+
1266
+ # Convert based on property type
1267
+ if nested_prop_def.get("type") == "string":
1268
+ if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
1269
+ nested_result[nested_prop] = ast.literal_eval(processed_nested_arg)
1270
+ else:
1271
+ nested_result[nested_prop] = processed_nested_arg
1272
+ elif nested_prop_def.get("type") == "integer":
1273
+ try:
1274
+ nested_result[nested_prop] = int(processed_nested_arg)
1275
+ except ValueError:
1276
+ nested_result[nested_prop] = processed_nested_arg
1277
+ elif nested_prop_def.get("type") == "boolean":
1278
+ nested_result[nested_prop] = processed_nested_arg.lower() == "true"
1279
+ else:
1280
+ nested_result[nested_prop] = processed_nested_arg
1281
+ else:
1282
+ nested_result[nested_prop] = None
1283
+
1284
+ # For special case of net::unresolved_address inside broker_authn_endpoint
1285
+ if tname == "net::unresolved_address":
1286
+ # Replace the entire object with expanded net::unresolved_address values
1287
+ # Only include the fields that are actually defined in the type
1288
+ processed_item.update(nested_result)
1289
+ break # Don't process other fields since we replaced the whole object
1290
+ else:
1291
+ processed_item[field_name] = nested_result
1292
+ else:
1293
+ # Fallback to simple pattern processing
1294
+ processed_field = process_cpp_patterns(field_value)
1295
+ if processed_field == "null":
1296
+ processed_item[field_name] = None
1297
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1298
+ processed_item[field_name] = ast.literal_eval(processed_field)
1299
+ else:
1300
+ processed_item[field_name] = processed_field
1301
+ else:
1302
+ # Fallback to simple pattern processing
1303
+ processed_field = process_cpp_patterns(field_value)
1304
+ if processed_field == "null":
1305
+ processed_item[field_name] = None
1306
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1307
+ processed_item[field_name] = ast.literal_eval(processed_field)
1308
+ else:
1309
+ processed_item[field_name] = processed_field
1310
+ elif isinstance(field_value, str):
1311
+ # Simple string field - apply C++ pattern processing
1312
+ processed_field = process_cpp_patterns(field_value)
1313
+ if processed_field == "null":
1314
+ processed_item[field_name] = None
1315
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1316
+ processed_item[field_name] = ast.literal_eval(processed_field)
1317
+ else:
1318
+ processed_item[field_name] = processed_field
1319
+ else:
1320
+ processed_item[field_name] = field_value
1321
+ processed_array.append(processed_item)
1322
+ else:
1323
+ # Non-object array item
1324
+ if isinstance(item, str):
1325
+ processed_item = process_cpp_patterns(item)
1326
+ if processed_item == "null":
1327
+ processed_array.append(None)
1328
+ elif isinstance(processed_item, str) and processed_item.startswith('"') and processed_item.endswith('"'):
1329
+ processed_array.append(ast.literal_eval(processed_item))
1330
+ else:
1331
+ processed_array.append(processed_item)
1332
+ else:
1333
+ processed_array.append(item)
1334
+ prop["default"] = processed_array
1335
+
1336
+ elif isinstance(default_value, dict):
1337
+ # Process object defaults - apply C++ pattern processing to each field
1338
+ processed_object = {}
1339
+ for field_name, field_value in default_value.items():
1340
+ if isinstance(field_value, str):
1341
+ processed_field = process_cpp_patterns(field_value)
1342
+ if processed_field == "null":
1343
+ processed_object[field_name] = None
1344
+ elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'):
1345
+ processed_object[field_name] = ast.literal_eval(processed_field)
1346
+ else:
1347
+ processed_object[field_name] = processed_field
1348
+ else:
1349
+ processed_object[field_name] = field_value
1350
+ prop["default"] = processed_object
1351
+
1352
+ # Handle unresolved C++ types
1353
+ prop_type = prop.get("type")
1354
+ if isinstance(prop_type, str):
1355
+ # Check if it's an unresolved C++ type (contains :: or ends with >)
1356
+ if ("::" in prop_type or prop_type.endswith(">") or
1357
+ prop_type.endswith("_t") or prop_type.startswith("std::")):
1358
+ # Default unresolved C++ types to string, unless they look like numbers
1359
+ if any(word in prop_type.lower() for word in ["int", "long", "short", "double", "float", "number"]):
1360
+ prop["type"] = "integer"
1361
+ elif any(word in prop_type.lower() for word in ["bool"]):
1362
+ prop["type"] = "boolean"
1363
+ else:
1364
+ prop["type"] = "string"
1365
+
1366
+ # Final pass: process enterprise values
1367
+ for prop in properties.values():
1368
+ if "enterprise_value" in prop:
1369
+ enterprise_value = prop["enterprise_value"]
1370
+ if isinstance(enterprise_value, str):
1371
+ processed_enterprise = process_enterprise_value(enterprise_value)
1372
+ prop["enterprise_value"] = processed_enterprise
1373
+
1374
+ return properties
1375
+
1376
+
1377
+ def extract_topic_properties(source_path):
1378
+ """
1379
+ Extract topic properties and convert them to the standard properties format.
1380
+
1381
+ Args:
1382
+ source_path: Path to the Redpanda source code
1383
+
1384
+ Returns:
1385
+ Dictionary of topic properties in the standard format with config_scope: "topic"
1386
+ """
1387
+ if TopicPropertyExtractor is None:
1388
+ logging.warning("TopicPropertyExtractor not available, skipping topic property extraction")
1389
+ return {}
1390
+
1391
+ try:
1392
+ extractor = TopicPropertyExtractor(source_path)
1393
+ topic_data = extractor.extract_topic_properties()
1394
+ topic_properties = topic_data.get("topic_properties", {})
1395
+
1396
+ # Convert topic properties to the standard properties format
1397
+ converted_properties = {}
1398
+ for prop_name, prop_data in topic_properties.items():
1399
+ # Skip no-op properties
1400
+ if prop_data.get("is_noop", False):
1401
+ continue
1402
+
1403
+ converted_properties[prop_name] = {
1404
+ "name": prop_name,
1405
+ "description": prop_data.get("description", ""),
1406
+ "type": prop_data.get("type", "string"),
1407
+ "config_scope": "topic",
1408
+ "source_file": prop_data.get("source_file", ""),
1409
+ "corresponding_cluster_property": prop_data.get("corresponding_cluster_property", ""),
1410
+ "acceptable_values": prop_data.get("acceptable_values", ""),
1411
+ "is_deprecated": False,
1412
+ "is_topic_property": True
1413
+ }
1414
+
1415
+ logging.info(f"Extracted {len(converted_properties)} topic properties (excluding {len([p for p in topic_properties.values() if p.get('is_noop', False)])} no-op properties)")
1416
+ return converted_properties
1417
+
1418
+ except Exception as e:
1419
+ logging.error(f"Failed to extract topic properties: {e}")
1420
+ return {}
1421
+
1422
+
148
1423
  def main():
1424
+ """
1425
+ CLI entry point that extracts Redpanda configuration properties from C++ sources and emits JSON outputs.
1426
+
1427
+ Runs a full extraction and transformation pipeline:
1428
+ - Parses command-line options (required: --path). Optional flags include --recursive, --output, --enhanced-output, --definitions, --overrides, --cloud-support, and --verbose.
1429
+ - Validates input paths and collects header/.cc file pairs.
1430
+ - Initializes Tree-sitter C++ parser and extracts configuration properties from source files (optionally augmented with topic properties).
1431
+ - Produces two outputs:
1432
+ - Original properties JSON: resolved types, expanded C++ defaults, added config_scope, and optional cloud metadata.
1433
+ - Enhanced properties JSON: same as original but with overrides applied before final resolution.
1434
+ - If --cloud-support is requested, attempts to fetch cloud configuration and add cloud metadata; this requires the cloud_config integration and network access (also requires GITHUB_TOKEN for private access). If cloud support is requested but dependencies are missing, the process will exit with an error.
1435
+ - Writes JSON to files when --output and/or --enhanced-output are provided; otherwise prints the original JSON to stdout.
1436
+ - Exits with non-zero status on fatal errors (missing files, parse errors, missing Tree-sitter parser, I/O failures, or missing cloud dependencies when requested).
1437
+
1438
+ Side effects:
1439
+ - Reads and writes files, may call external cloud config fetchers, logs to the configured logger, and may call sys.exit() on fatal conditions.
1440
+ """
149
1441
  import argparse
150
1442
 
151
1443
  def generate_options():
1444
+ """
1445
+ Create and return an argparse.ArgumentParser preconfigured for the property extractor CLI.
1446
+
1447
+ The parser understands the following options:
1448
+ - --path (required): path to the Redpanda source directory to scan.
1449
+ - --recursive: scan the path recursively.
1450
+ - --output: file path to write the JSON output (stdout if omitted).
1451
+ - --enhanced-output: file path to write the enhanced JSON output with overrides applied.
1452
+ - --definitions: JSON file containing type definitions (defaults to a definitions.json co-located with this module).
1453
+ - --overrides: optional JSON file with property description/metadata overrides.
1454
+ - --cloud-support: enable fetching cloud metadata from the cloudv2 repository (requires GITHUB_TOKEN and external dependencies such as pyyaml and requests).
1455
+ - -v / --verbose: enable verbose (DEBUG-level) logging.
1456
+
1457
+ Returns:
1458
+ argparse.ArgumentParser: Parser configured with the above options.
1459
+ """
152
1460
  arg_parser = argparse.ArgumentParser(
153
- description="Extract all properties from the Redpanda's source code and generate a JSON output with their definitions"
154
- )
155
- arg_parser.add_argument(
156
- "--path",
157
- type=str,
158
- required=True,
159
- help="Path to the Redpanda's source dir to extract the properties",
160
- )
161
-
162
- arg_parser.add_argument(
163
- "--recursive", action="store_true", help="Scan the path recursively"
1461
+ description="Internal property extraction tool - use doc-tools.js for user interface"
164
1462
  )
165
-
166
- arg_parser.add_argument(
167
- "--output",
168
- type=str,
169
- required=False,
170
- help="File to store the JSON output. If no file is provided, the JSON will be printed to the standard output",
171
- )
172
-
173
- arg_parser.add_argument(
174
- "--definitions",
175
- type=str,
176
- required=False,
177
- default=os.path.dirname(os.path.realpath(__file__)) + "/definitions.json",
178
- help='JSON file with the type definitions. This file will be merged in the output under the "definitions" field',
179
- )
180
-
181
- arg_parser.add_argument("-v", "--verbose", action="store_true")
1463
+ # Core required parameters
1464
+ arg_parser.add_argument("--path", type=str, required=True, help="Path to Redpanda source directory")
1465
+ arg_parser.add_argument("--recursive", action="store_true", help="Scan path recursively")
1466
+
1467
+ # Output options
1468
+ arg_parser.add_argument("--output", type=str, help="JSON output file path")
1469
+ arg_parser.add_argument("--enhanced-output", type=str, help="Enhanced JSON output file path")
1470
+
1471
+ # Data sources
1472
+ arg_parser.add_argument("--definitions", type=str,
1473
+ default=os.path.dirname(os.path.realpath(__file__)) + "/definitions.json",
1474
+ help="Type definitions JSON file")
1475
+ arg_parser.add_argument("--overrides", type=str, help="Property overrides JSON file")
1476
+
1477
+ # Feature flags (set by Makefile from environment variables)
1478
+ arg_parser.add_argument("--cloud-support", action="store_true", help="Enable cloud metadata")
1479
+ arg_parser.add_argument("-v", "--verbose", action="store_true", help="Verbose logging")
182
1480
 
183
1481
  return arg_parser
184
1482
 
@@ -187,8 +1485,10 @@ def main():
187
1485
 
188
1486
  if options.verbose:
189
1487
  logging.basicConfig(level="DEBUG")
1488
+ # Also enable INFO logging for cloud_config in verbose mode
1489
+ logging.getLogger('cloud_config').setLevel(logging.INFO)
190
1490
  else:
191
- logging.basicConfig(level="INFO")
1491
+ logging.basicConfig(level="WARNING") # Suppress INFO logs by default
192
1492
 
193
1493
  validate_paths(options)
194
1494
 
@@ -208,6 +1508,16 @@ def main():
208
1508
  logging.error(f"Failed to parse definitions file: {e}")
209
1509
  sys.exit(1)
210
1510
 
1511
+ # Load property overrides if provided
1512
+ overrides = None
1513
+ if options.overrides:
1514
+ try:
1515
+ with open(options.overrides) as f:
1516
+ overrides = json.load(f)
1517
+ except Exception as e:
1518
+ logging.error(f"Failed to load overrides file: {e}")
1519
+ sys.exit(1)
1520
+
211
1521
  treesitter_dir = os.path.join(os.getcwd(), "tree-sitter/tree-sitter-cpp")
212
1522
  destination_path = os.path.join(treesitter_dir, "tree-sitter-cpp.so")
213
1523
 
@@ -219,25 +1529,94 @@ def main():
219
1529
  treesitter_dir, destination_path
220
1530
  )
221
1531
 
1532
+
222
1533
  files_with_properties = get_files_with_properties(
223
1534
  file_pairs, treesitter_parser, cpp_language
224
1535
  )
225
1536
  properties = transform_files_with_properties(files_with_properties)
226
- properties_and_definitions = merge_properties_and_definitions(
227
- properties, definitions
1537
+
1538
+ # Extract topic properties and add them to the main properties dictionary
1539
+ topic_properties = extract_topic_properties(options.path)
1540
+ if topic_properties:
1541
+ properties.update(topic_properties)
1542
+ logging.info(f"Added {len(topic_properties)} topic properties to the main properties collection")
1543
+
1544
+ # First, create the original properties without overrides for the base JSON output
1545
+ # 1. Add config_scope field based on which source file defines the property
1546
+ original_properties = add_config_scope(deepcopy(properties))
1547
+
1548
+ # 2. Fetch cloud configuration and add cloud support metadata if requested
1549
+ # Check both CLI flag and environment variable (CLOUD_SUPPORT=1 from Makefile)
1550
+ cloud_support_enabled = options.cloud_support or os.environ.get('CLOUD_SUPPORT') == '1'
1551
+ cloud_config = None
1552
+ if cloud_support_enabled:
1553
+ if fetch_cloud_config and add_cloud_support_metadata:
1554
+ logging.info("Cloud support enabled, fetching cloud configuration...")
1555
+ cloud_config = fetch_cloud_config() # This will raise an exception if it fails
1556
+ original_properties = add_cloud_support_metadata(original_properties, cloud_config)
1557
+ logging.info(f"✅ Cloud support metadata applied successfully using configuration version {cloud_config.version}")
1558
+ else:
1559
+ logging.error("❌ Cloud support requested but cloud_config module not available")
1560
+ logging.error("This indicates missing Python dependencies for cloud configuration")
1561
+ logging.error("Install required packages: pip install pyyaml requests")
1562
+ logging.error("Or if using a virtual environment, activate it first")
1563
+ sys.exit(1)
1564
+
1565
+ # 3. Resolve type references and expand default values for original properties
1566
+ original_properties = resolve_type_and_default(original_properties, definitions)
1567
+
1568
+ # Generate original properties JSON (without overrides)
1569
+ original_properties_and_definitions = merge_properties_and_definitions(
1570
+ original_properties, definitions
228
1571
  )
1572
+ original_json_output = json.dumps(original_properties_and_definitions, indent=4, sort_keys=True)
1573
+
1574
+ # Now create enhanced properties with overrides applied
1575
+ # 1. Apply any description overrides from external override files
1576
+ enhanced_properties = apply_property_overrides(deepcopy(properties), overrides, options.overrides)
1577
+
1578
+ # 2. Add config_scope field based on which source file defines the property
1579
+ enhanced_properties = add_config_scope(enhanced_properties)
1580
+
1581
+ # 3. Add cloud support metadata if requested
1582
+ if cloud_config:
1583
+ enhanced_properties = add_cloud_support_metadata(enhanced_properties, cloud_config)
1584
+ logging.info("✅ Cloud support metadata applied to enhanced properties")
1585
+
1586
+ # 4. Resolve type references and expand default values
1587
+ # This step converts:
1588
+ # - C++ type names (model::broker_endpoint) to JSON schema types (object)
1589
+ # - C++ constructor defaults to structured JSON objects
1590
+ # - Single object defaults to arrays for one_or_many_property types
1591
+ enhanced_properties = resolve_type_and_default(enhanced_properties, definitions)
229
1592
 
230
- json_output = json.dumps(properties_and_definitions, indent=4, sort_keys=True)
1593
+ # Generate enhanced properties JSON (with overrides)
1594
+ enhanced_properties_and_definitions = merge_properties_and_definitions(
1595
+ enhanced_properties, definitions
1596
+ )
1597
+ enhanced_json_output = json.dumps(enhanced_properties_and_definitions, indent=4, sort_keys=True)
231
1598
 
1599
+ # Write original properties file (for backward compatibility)
232
1600
  if options.output:
233
1601
  try:
234
1602
  with open(options.output, "w+") as json_file:
235
- json_file.write(json_output)
1603
+ json_file.write(original_json_output)
1604
+ print(f"✅ Original properties JSON generated at {options.output}")
236
1605
  except IOError as e:
237
- logging.error(f"Failed to write output file: {e}")
1606
+ logging.error(f"Failed to write original output file: {e}")
238
1607
  sys.exit(1)
239
1608
  else:
240
- print(json_output)
1609
+ print(original_json_output)
1610
+
1611
+ # Write enhanced properties file (with overrides applied)
1612
+ if options.enhanced_output:
1613
+ try:
1614
+ with open(options.enhanced_output, "w+") as json_file:
1615
+ json_file.write(enhanced_json_output)
1616
+ print(f"✅ Enhanced properties JSON (with overrides) generated at {options.enhanced_output}")
1617
+ except IOError as e:
1618
+ logging.error(f"Failed to write enhanced output file: {e}")
1619
+ sys.exit(1)
241
1620
 
242
1621
  if __name__ == "__main__":
243
1622
  main()