@redpanda-data/docs-extensions-and-macros 4.11.0 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/doc-tools.js +4 -2
- package/extensions/convert-to-markdown.js +17 -1
- package/package.json +3 -1
- package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
- package/tools/property-extractor/Makefile +12 -1
- package/tools/property-extractor/README.adoc +828 -97
- package/tools/property-extractor/compare-properties.js +38 -13
- package/tools/property-extractor/constant_resolver.py +610 -0
- package/tools/property-extractor/file_pair.py +42 -0
- package/tools/property-extractor/generate-handlebars-docs.js +41 -8
- package/tools/property-extractor/helpers/gt.js +9 -0
- package/tools/property-extractor/helpers/includes.js +17 -0
- package/tools/property-extractor/helpers/index.js +3 -0
- package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
- package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
- package/tools/property-extractor/overrides.json +248 -0
- package/tools/property-extractor/parser.py +254 -32
- package/tools/property-extractor/property_bag.py +40 -0
- package/tools/property-extractor/property_extractor.py +1417 -430
- package/tools/property-extractor/requirements.txt +1 -0
- package/tools/property-extractor/templates/property-backup.hbs +161 -0
- package/tools/property-extractor/templates/property.hbs +104 -49
- package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
- package/tools/property-extractor/templates/topic-property.hbs +72 -34
- package/tools/property-extractor/tests/test_known_values.py +617 -0
- package/tools/property-extractor/tests/transformers_test.py +81 -6
- package/tools/property-extractor/topic_property_extractor.py +23 -10
- package/tools/property-extractor/transformers.py +2191 -369
- package/tools/property-extractor/type_definition_extractor.py +669 -0
- package/tools/property-extractor/definitions.json +0 -245
|
@@ -65,12 +65,308 @@ from copy import deepcopy
|
|
|
65
65
|
from pathlib import Path
|
|
66
66
|
from file_pair import FilePair
|
|
67
67
|
from tree_sitter import Language, Parser
|
|
68
|
+
import operator
|
|
69
|
+
import re
|
|
68
70
|
|
|
69
71
|
from parser import build_treesitter_cpp_library, extract_properties_from_file_pair
|
|
70
72
|
from property_bag import PropertyBag
|
|
71
73
|
from transformers import *
|
|
74
|
+
from constant_resolver import ConstantResolver
|
|
75
|
+
|
|
76
|
+
# Compiled regex patterns for performance optimization
|
|
77
|
+
VECTOR_PATTERN = re.compile(r'std::vector<[^>]+>\s*\{\s*([^}]*)\s*\}')
|
|
78
|
+
ENUM_PATTERN = re.compile(r'^[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)$') # Match full qualified identifier, not followed by constructors
|
|
79
|
+
CONSTRUCTOR_PATTERN = re.compile(r'([a-zA-Z0-9_:]+)\((.*)\)')
|
|
80
|
+
BRACED_CONSTRUCTOR_PATTERN = re.compile(r'([a-zA-Z0-9_:]+)\{(.*)\}')
|
|
81
|
+
DIGIT_SEPARATOR_PATTERN = re.compile(r"(?<=\d)'(?=\d)")
|
|
82
|
+
FUNCTION_CALL_PATTERN = re.compile(r'([a-zA-Z0-9_:]+)\(\)')
|
|
83
|
+
CHRONO_PATTERN = re.compile(r'std::chrono::([a-zA-Z]+)\s*\{\s*(\d+)\s*\}')
|
|
84
|
+
CHRONO_PAREN_PATTERN = re.compile(r'(?:std::)?chrono::([a-zA-Z]+)\s*\(\s*([^)]+)\s*\)')
|
|
85
|
+
TIME_UNIT_PATTERN = re.compile(r'(\d+)\s*(min|s|ms|h)')
|
|
86
|
+
ADDRESS_PATTERN = re.compile(r'net::unresolved_address\s*\(\s*"?([^",]+)"?\s*,\s*([^)]+)\)')
|
|
87
|
+
KEYVAL_PATTERN = re.compile(r"'([^']+)':\s*'([^']+)'")
|
|
88
|
+
IDENTIFIER_PATTERN = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
|
|
89
|
+
SSTRING_PATTERN = re.compile(r'ss::sstring\{([a-zA-Z_][a-zA-Z0-9_]*)\}')
|
|
90
|
+
UNDERSCORE_PREFIX_PATTERN = re.compile(r"^_")
|
|
91
|
+
|
|
92
|
+
class ConstexprCache:
|
|
93
|
+
"""
|
|
94
|
+
Cache for C++ constexpr identifier and function lookups to avoid repeated filesystem walks.
|
|
95
|
+
|
|
96
|
+
This class dramatically improves performance when processing large numbers of properties
|
|
97
|
+
by building a cache of all constexpr definitions once, then serving lookups from memory.
|
|
98
|
+
|
|
99
|
+
Performance Impact:
|
|
100
|
+
- Without cache: O(n*m) where n = properties, m = source files (thousands of filesystem operations)
|
|
101
|
+
- With cache: O(m + n) where cache build is O(m), lookups are O(1) (single filesystem walk)
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(self):
|
|
105
|
+
self.constexpr_cache = {} # identifier -> value
|
|
106
|
+
self.function_cache = {} # function_name -> value
|
|
107
|
+
self.is_built = False
|
|
108
|
+
self.redpanda_source = None
|
|
109
|
+
|
|
110
|
+
def build_cache(self, redpanda_source=None):
|
|
111
|
+
"""
|
|
112
|
+
Build the cache by walking the Redpanda source tree once and extracting all constexpr definitions.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
redpanda_source (str, optional): Path to Redpanda source. If None, will be auto-detected.
|
|
116
|
+
"""
|
|
117
|
+
if self.is_built and self.redpanda_source == redpanda_source:
|
|
118
|
+
return # Already built for this source
|
|
119
|
+
|
|
120
|
+
if not redpanda_source:
|
|
121
|
+
redpanda_source = find_redpanda_source()
|
|
122
|
+
|
|
123
|
+
if not redpanda_source:
|
|
124
|
+
logger.warning("Could not find Redpanda source directory for constexpr cache")
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
self.redpanda_source = redpanda_source
|
|
128
|
+
self.constexpr_cache.clear()
|
|
129
|
+
self.function_cache.clear()
|
|
130
|
+
|
|
131
|
+
# Constexpr identifier patterns
|
|
132
|
+
constexpr_patterns = [
|
|
133
|
+
re.compile(r'inline\s+constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
134
|
+
re.compile(r'constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
135
|
+
re.compile(r'inline\s+constexpr\s+auto\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"([^"]+)"'),
|
|
136
|
+
re.compile(r'constexpr\s+auto\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"([^"]+)"'),
|
|
137
|
+
re.compile(r'static\s+constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
138
|
+
re.compile(r'static\s+inline\s+constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
# General function patterns to extract ALL string-returning functions
|
|
142
|
+
# These patterns capture: namespace::function_name and the returned string
|
|
143
|
+
general_function_patterns = [
|
|
144
|
+
# Pattern: inline constexpr std::string_view name { "value" }
|
|
145
|
+
re.compile(r'inline\s+constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
146
|
+
# Pattern: constexpr std::string_view name { "value" }
|
|
147
|
+
re.compile(r'constexpr\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\{\s*"([^"]+)"\s*\}'),
|
|
148
|
+
# Pattern: inline std::string_view name() { return "value"; }
|
|
149
|
+
re.compile(r'inline\s+std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'),
|
|
150
|
+
# Pattern: std::string_view name() { return "value"; }
|
|
151
|
+
re.compile(r'std::string_view\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'),
|
|
152
|
+
# Pattern: inline const model::topic name("value")
|
|
153
|
+
re.compile(r'inline\s+const\s+model::topic\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*"([^"]+)"\s*\)'),
|
|
154
|
+
# Pattern: const model::topic name("value")
|
|
155
|
+
re.compile(r'const\s+model::topic\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*"([^"]+)"\s*\)'),
|
|
156
|
+
# Pattern: inline const model::ns name("value")
|
|
157
|
+
re.compile(r'inline\s+const\s+model::ns\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*"([^"]+)"\s*\)'),
|
|
158
|
+
# Pattern: const model::ns name("value")
|
|
159
|
+
re.compile(r'const\s+model::ns\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*"([^"]+)"\s*\)'),
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
# Legacy specific patterns (kept for compatibility, but general patterns should cover these)
|
|
163
|
+
function_patterns = {}
|
|
164
|
+
|
|
165
|
+
search_dirs = [
|
|
166
|
+
os.path.join(redpanda_source, 'src', 'v', 'model'), # For model:: functions
|
|
167
|
+
os.path.join(redpanda_source, 'src', 'v', 'config'),
|
|
168
|
+
os.path.join(redpanda_source, 'src', 'v', 'kafka'),
|
|
169
|
+
os.path.join(redpanda_source, 'src', 'v', 'security'),
|
|
170
|
+
os.path.join(redpanda_source, 'src', 'v', 'pandaproxy'),
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
files_processed = 0
|
|
174
|
+
for search_dir in search_dirs:
|
|
175
|
+
if not os.path.exists(search_dir):
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
for root, dirs, files in os.walk(search_dir):
|
|
179
|
+
for file in files:
|
|
180
|
+
if file.endswith(('.h', '.cc', '.hpp', '.cpp')):
|
|
181
|
+
file_path = os.path.join(root, file)
|
|
182
|
+
try:
|
|
183
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
184
|
+
content = f.read()
|
|
185
|
+
|
|
186
|
+
# Extract constexpr identifiers
|
|
187
|
+
for pattern in constexpr_patterns:
|
|
188
|
+
for match in pattern.finditer(content):
|
|
189
|
+
identifier = match.group(1)
|
|
190
|
+
value = match.group(2)
|
|
191
|
+
self.constexpr_cache[identifier] = value
|
|
192
|
+
|
|
193
|
+
# Extract ALL string-returning functions using general patterns
|
|
194
|
+
# This replaces hardcoded function patterns
|
|
195
|
+
for pattern in general_function_patterns:
|
|
196
|
+
for match in pattern.finditer(content):
|
|
197
|
+
func_name = match.group(1)
|
|
198
|
+
func_value = match.group(2)
|
|
199
|
+
|
|
200
|
+
# Try to determine namespace for the function
|
|
201
|
+
namespace = self._extract_namespace_for_function(content, match.start())
|
|
202
|
+
|
|
203
|
+
# Store with both simple name and qualified name
|
|
204
|
+
self.function_cache[func_name] = func_value
|
|
205
|
+
if namespace:
|
|
206
|
+
qualified_name = f"{namespace}::{func_name}"
|
|
207
|
+
self.function_cache[qualified_name] = func_value
|
|
208
|
+
|
|
209
|
+
# Legacy: Extract function definitions from hardcoded patterns (if any)
|
|
210
|
+
for func_name, patterns in function_patterns.items():
|
|
211
|
+
for pattern in patterns:
|
|
212
|
+
match = pattern.search(content)
|
|
213
|
+
if match:
|
|
214
|
+
self.function_cache[func_name] = match.group(1)
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
files_processed += 1
|
|
218
|
+
|
|
219
|
+
except (FileNotFoundError, PermissionError, OSError, UnicodeDecodeError) as e:
|
|
220
|
+
logger.debug(f"Error reading {file_path} for cache: {e}")
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
self.is_built = True
|
|
224
|
+
logger.debug(f"Built constexpr cache: {len(self.constexpr_cache)} identifiers, "
|
|
225
|
+
f"{len(self.function_cache)} functions from {files_processed} files")
|
|
226
|
+
|
|
227
|
+
def _extract_namespace_for_function(self, content, position):
|
|
228
|
+
"""
|
|
229
|
+
Extract the namespace at a given position in the file.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
content (str): File content
|
|
233
|
+
position (int): Position in the file
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
str: Namespace (e.g., "model" or "config::tls")
|
|
237
|
+
"""
|
|
238
|
+
# Look backwards from position to find namespace declaration
|
|
239
|
+
preceding = content[:position]
|
|
240
|
+
|
|
241
|
+
# Find all namespace declarations before this position
|
|
242
|
+
namespace_pattern = re.compile(r'namespace\s+(\w+)\s*\{')
|
|
243
|
+
namespaces = []
|
|
244
|
+
|
|
245
|
+
for match in namespace_pattern.finditer(preceding):
|
|
246
|
+
ns_name = match.group(1)
|
|
247
|
+
# Check if we're still inside this namespace by tracking brace depth
|
|
248
|
+
# Start with depth=1 (we entered the namespace with its opening brace)
|
|
249
|
+
after_ns = content[match.end():position]
|
|
250
|
+
brace_depth = 1
|
|
251
|
+
|
|
252
|
+
for char in after_ns:
|
|
253
|
+
if char == '{':
|
|
254
|
+
brace_depth += 1
|
|
255
|
+
elif char == '}':
|
|
256
|
+
brace_depth -= 1
|
|
257
|
+
if brace_depth == 0:
|
|
258
|
+
# Namespace was closed before reaching current position
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
if brace_depth > 0:
|
|
262
|
+
# Still inside this namespace
|
|
263
|
+
namespaces.append(ns_name)
|
|
264
|
+
|
|
265
|
+
return '::'.join(namespaces) if namespaces else ''
|
|
266
|
+
|
|
267
|
+
def lookup_constexpr(self, identifier):
|
|
268
|
+
"""
|
|
269
|
+
Look up a constexpr identifier value from the cache.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
identifier (str): The identifier to look up
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
str or None: The resolved value if found, None otherwise
|
|
276
|
+
"""
|
|
277
|
+
if not self.is_built:
|
|
278
|
+
self.build_cache()
|
|
279
|
+
|
|
280
|
+
return self.constexpr_cache.get(identifier)
|
|
281
|
+
|
|
282
|
+
def lookup_function(self, function_name):
|
|
283
|
+
"""
|
|
284
|
+
Look up a function call result from the cache.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
function_name (str): The function name to look up
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
str or None: The resolved value if found, None otherwise
|
|
291
|
+
"""
|
|
292
|
+
if not self.is_built:
|
|
293
|
+
self.build_cache()
|
|
294
|
+
|
|
295
|
+
return self.function_cache.get(function_name)
|
|
296
|
+
|
|
297
|
+
# Global cache instance
|
|
298
|
+
_constexpr_cache = ConstexprCache()
|
|
299
|
+
|
|
300
|
+
# Global storage for type definitions (used by transformers for enum mapping)
|
|
301
|
+
_type_definitions_cache = {}
|
|
72
302
|
|
|
73
303
|
# Import topic property extractor
|
|
304
|
+
def find_redpanda_source():
|
|
305
|
+
"""
|
|
306
|
+
Locate the Redpanda source directory by searching standard locations.
|
|
307
|
+
|
|
308
|
+
The property extractor looks for the Redpanda source code in multiple
|
|
309
|
+
locations to handle different execution contexts (project root, tools directory, etc.).
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
str or None: Path to the Redpanda source directory if found, None otherwise.
|
|
313
|
+
"""
|
|
314
|
+
redpanda_source_paths = [
|
|
315
|
+
'tmp/redpanda', # Current directory
|
|
316
|
+
'../tmp/redpanda', # Parent directory
|
|
317
|
+
'tools/property-extractor/tmp/redpanda', # From project root
|
|
318
|
+
os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda')
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
for path in redpanda_source_paths:
|
|
322
|
+
if os.path.exists(path):
|
|
323
|
+
return path
|
|
324
|
+
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
def safe_arithmetic_eval(expression):
|
|
328
|
+
"""
|
|
329
|
+
Safely evaluate simple arithmetic expressions like '60 * 5' without using eval().
|
|
330
|
+
Only allows basic operators: +, -, *, /, //, %, and **
|
|
331
|
+
Only works with integers and basic arithmetic.
|
|
332
|
+
|
|
333
|
+
Returns the result if successful, raises ValueError if unsafe or invalid.
|
|
334
|
+
"""
|
|
335
|
+
# Only allow safe characters: digits, spaces, and basic operators
|
|
336
|
+
allowed_chars = set('0123456789+-*/%() ')
|
|
337
|
+
if not all(c in allowed_chars for c in expression):
|
|
338
|
+
raise ValueError("Expression contains unsafe characters")
|
|
339
|
+
|
|
340
|
+
# Simple operator mapping for basic arithmetic
|
|
341
|
+
ops = {
|
|
342
|
+
'+': operator.add,
|
|
343
|
+
'-': operator.sub,
|
|
344
|
+
'*': operator.mul,
|
|
345
|
+
'/': operator.truediv,
|
|
346
|
+
'//': operator.floordiv,
|
|
347
|
+
'%': operator.mod,
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
# For simple cases like "60 * 5", handle directly
|
|
351
|
+
for op_str, op_func in ops.items():
|
|
352
|
+
if op_str in expression:
|
|
353
|
+
parts = expression.split(op_str)
|
|
354
|
+
if len(parts) == 2:
|
|
355
|
+
try:
|
|
356
|
+
left = int(parts[0].strip())
|
|
357
|
+
right = int(parts[1].strip())
|
|
358
|
+
return int(op_func(left, right))
|
|
359
|
+
except (ValueError, ZeroDivisionError):
|
|
360
|
+
pass
|
|
361
|
+
|
|
362
|
+
# If it's just a number, return it
|
|
363
|
+
try:
|
|
364
|
+
return int(expression.strip())
|
|
365
|
+
except ValueError:
|
|
366
|
+
pass
|
|
367
|
+
|
|
368
|
+
raise ValueError("Could not safely evaluate expression")
|
|
369
|
+
|
|
74
370
|
try:
|
|
75
371
|
from topic_property_extractor import TopicPropertyExtractor
|
|
76
372
|
except ImportError:
|
|
@@ -116,29 +412,11 @@ def process_enterprise_value(enterprise_str):
|
|
|
116
412
|
Union[str, bool, list]: A JSON-serializable representation of the input.
|
|
117
413
|
"""
|
|
118
414
|
enterprise_str = enterprise_str.strip()
|
|
119
|
-
|
|
120
|
-
# Handle special SASL mechanism function names
|
|
121
|
-
if enterprise_str == "is_enterprise_sasl_mechanism":
|
|
122
|
-
# Dynamically look up enterprise SASL mechanisms from source
|
|
123
|
-
enterprise_mechanisms = get_enterprise_sasl_mechanisms()
|
|
124
|
-
if enterprise_mechanisms:
|
|
125
|
-
return enterprise_mechanisms
|
|
126
|
-
else:
|
|
127
|
-
# Fallback to known values if lookup fails
|
|
128
|
-
return ["GSSAPI", "OAUTHBEARER"]
|
|
129
|
-
elif enterprise_str == "is_enterprise_sasl_mechanisms_override":
|
|
130
|
-
# Get the enterprise mechanisms dynamically for a more accurate description
|
|
131
|
-
enterprise_mechanisms = get_enterprise_sasl_mechanisms()
|
|
132
|
-
if enterprise_mechanisms:
|
|
133
|
-
mechanism_list = ", ".join(enterprise_mechanisms)
|
|
134
|
-
return f"Any override containing enterprise mechanisms ({mechanism_list})."
|
|
135
|
-
else:
|
|
136
|
-
return "Any override containing enterprise mechanisms."
|
|
137
|
-
|
|
415
|
+
|
|
138
416
|
# FIRST: Handle std::vector initialization patterns (highest priority)
|
|
139
417
|
# This must come before enum processing because vectors can contain enums
|
|
140
418
|
# Tolerate optional whitespace around braces
|
|
141
|
-
vector_match =
|
|
419
|
+
vector_match = VECTOR_PATTERN.match(enterprise_str)
|
|
142
420
|
if vector_match:
|
|
143
421
|
content = vector_match.group(1).strip()
|
|
144
422
|
if not content:
|
|
@@ -161,7 +439,7 @@ def process_enterprise_value(enterprise_str):
|
|
|
161
439
|
values.append(ast.literal_eval(value))
|
|
162
440
|
else:
|
|
163
441
|
# Handle enum values in the vector
|
|
164
|
-
enum_match =
|
|
442
|
+
enum_match = ENUM_PATTERN.match(value)
|
|
165
443
|
if enum_match:
|
|
166
444
|
values.append(enum_match.group(1))
|
|
167
445
|
else:
|
|
@@ -177,7 +455,7 @@ def process_enterprise_value(enterprise_str):
|
|
|
177
455
|
values.append(ast.literal_eval(value))
|
|
178
456
|
else:
|
|
179
457
|
# Handle enum values in the vector
|
|
180
|
-
enum_match =
|
|
458
|
+
enum_match = ENUM_PATTERN.match(value)
|
|
181
459
|
if enum_match:
|
|
182
460
|
values.append(enum_match.group(1))
|
|
183
461
|
else:
|
|
@@ -186,18 +464,16 @@ def process_enterprise_value(enterprise_str):
|
|
|
186
464
|
return values
|
|
187
465
|
|
|
188
466
|
# SECOND: Handle enum-like patterns (extract the last part after ::)
|
|
189
|
-
enum_match =
|
|
467
|
+
enum_match = ENUM_PATTERN.match(enterprise_str)
|
|
190
468
|
if enum_match:
|
|
191
469
|
enum_value = enum_match.group(1)
|
|
192
470
|
return enum_value
|
|
193
471
|
|
|
194
472
|
# THIRD: Handle C++ lambda expressions - these usually indicate "any non-default value"
|
|
195
473
|
if enterprise_str.startswith("[](") and enterprise_str.endswith("}"):
|
|
196
|
-
# For lambda expressions,
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
else:
|
|
200
|
-
return "Enterprise feature enabled"
|
|
474
|
+
# For lambda expressions, return a generic message
|
|
475
|
+
# No hardcoded logic for specific properties
|
|
476
|
+
return "Enterprise feature enabled"
|
|
201
477
|
|
|
202
478
|
# FOURTH: Handle simple values with proper JSON types
|
|
203
479
|
# Convert boolean literals to actual boolean values for JSON compatibility
|
|
@@ -214,168 +490,79 @@ def process_enterprise_value(enterprise_str):
|
|
|
214
490
|
|
|
215
491
|
def resolve_cpp_function_call(function_name):
|
|
216
492
|
"""
|
|
217
|
-
Resolve
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
493
|
+
Resolve zero-argument C++ functions to their literal string return values.
|
|
494
|
+
|
|
495
|
+
Uses the pre-built ConstexprCache which dynamically extracts ALL string-returning
|
|
496
|
+
functions from source using general patterns. No hardcoded patterns needed.
|
|
497
|
+
|
|
221
498
|
Parameters:
|
|
222
|
-
function_name (str): Fully-qualified C++ function name to resolve.
|
|
223
|
-
|
|
499
|
+
function_name (str): Fully-qualified C++ function name to resolve (e.g., "model::kafka_audit_logging_topic")
|
|
500
|
+
|
|
224
501
|
Returns:
|
|
225
|
-
str or None: The literal string returned by the C++ function
|
|
502
|
+
str or None: The literal string returned by the C++ function, or None if not found in cache
|
|
226
503
|
"""
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
'
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
r'std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
|
|
245
|
-
r'inline\s+std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
|
|
246
|
-
],
|
|
247
|
-
'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
|
|
248
|
-
},
|
|
249
|
-
'model::kafka_internal_namespace': {
|
|
250
|
-
'patterns': [
|
|
251
|
-
r'inline\s+const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
|
|
252
|
-
r'const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
|
|
253
|
-
r'model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)',
|
|
254
|
-
r'std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"',
|
|
255
|
-
r'inline\s+std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"'
|
|
256
|
-
],
|
|
257
|
-
'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h']
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
# Check if we have search patterns for this function
|
|
262
|
-
if function_name not in search_patterns:
|
|
263
|
-
logger.debug(f"No search patterns defined for function: {function_name}")
|
|
264
|
-
return None
|
|
265
|
-
|
|
266
|
-
config = search_patterns[function_name]
|
|
267
|
-
|
|
268
|
-
# Try to find the Redpanda source directory
|
|
269
|
-
# Look for it in the standard locations used by the property extractor
|
|
270
|
-
redpanda_source_paths = [
|
|
271
|
-
'tmp/redpanda', # Current directory
|
|
272
|
-
'../tmp/redpanda', # Parent directory
|
|
273
|
-
'tools/property-extractor/tmp/redpanda', # From project root
|
|
274
|
-
os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda')
|
|
275
|
-
]
|
|
276
|
-
|
|
277
|
-
redpanda_source = None
|
|
278
|
-
for path in redpanda_source_paths:
|
|
279
|
-
if os.path.exists(path):
|
|
280
|
-
redpanda_source = path
|
|
281
|
-
break
|
|
282
|
-
|
|
283
|
-
if not redpanda_source:
|
|
284
|
-
logger.warning(f"Could not find Redpanda source directory to resolve function: {function_name}")
|
|
285
|
-
return None
|
|
286
|
-
|
|
287
|
-
# Search in the specified files
|
|
288
|
-
for file_path in config['files']:
|
|
289
|
-
full_path = os.path.join(redpanda_source, file_path)
|
|
290
|
-
if not os.path.exists(full_path):
|
|
291
|
-
continue
|
|
292
|
-
|
|
293
|
-
try:
|
|
294
|
-
with open(full_path, 'r', encoding='utf-8') as f:
|
|
295
|
-
content = f.read()
|
|
296
|
-
|
|
297
|
-
# Try each pattern
|
|
298
|
-
for pattern in config['patterns']:
|
|
299
|
-
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
|
|
300
|
-
if match:
|
|
301
|
-
resolved_value = match.group(1)
|
|
302
|
-
logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
|
|
303
|
-
return resolved_value
|
|
304
|
-
|
|
305
|
-
except Exception as e:
|
|
306
|
-
logger.debug(f"Error reading {full_path}: {e}")
|
|
307
|
-
continue
|
|
308
|
-
|
|
309
|
-
# If not found in specific files, do a broader search
|
|
310
|
-
logger.debug(f"Function {function_name} not found in expected files, doing broader search...")
|
|
311
|
-
|
|
312
|
-
# Search more broadly in the model directory
|
|
313
|
-
model_dir = os.path.join(redpanda_source, 'src', 'v', 'model')
|
|
314
|
-
if os.path.exists(model_dir):
|
|
315
|
-
for root, dirs, files in os.walk(model_dir):
|
|
316
|
-
for file in files:
|
|
317
|
-
if file.endswith('.h') or file.endswith('.cc'):
|
|
318
|
-
file_path = os.path.join(root, file)
|
|
319
|
-
try:
|
|
320
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
321
|
-
content = f.read()
|
|
322
|
-
|
|
323
|
-
# Try patterns for this file
|
|
324
|
-
for pattern in config['patterns']:
|
|
325
|
-
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
|
|
326
|
-
if match:
|
|
327
|
-
resolved_value = match.group(1)
|
|
328
|
-
logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}")
|
|
329
|
-
return resolved_value
|
|
330
|
-
|
|
331
|
-
except Exception as e:
|
|
332
|
-
logger.debug(f"Error reading {file_path}: {e}")
|
|
333
|
-
continue
|
|
334
|
-
|
|
335
|
-
logger.warning(f"Could not resolve function call: {function_name}()")
|
|
504
|
+
# Look up function in the pre-built cache
|
|
505
|
+
# The cache was populated by ConstexprCache.build_cache() with general patterns
|
|
506
|
+
# that automatically discover ALL string-returning functions
|
|
507
|
+
cached_result = _constexpr_cache.lookup_function(function_name)
|
|
508
|
+
if cached_result is not None:
|
|
509
|
+
logger.debug(f"Resolved function '{function_name}' -> '{cached_result}' from cache")
|
|
510
|
+
return cached_result
|
|
511
|
+
|
|
512
|
+
# Also try without namespace qualifier (e.g., "kafka_audit_logging_topic")
|
|
513
|
+
if '::' in function_name:
|
|
514
|
+
simple_name = function_name.split('::')[-1]
|
|
515
|
+
cached_result = _constexpr_cache.lookup_function(simple_name)
|
|
516
|
+
if cached_result is not None:
|
|
517
|
+
logger.debug(f"Resolved function '{function_name}' (as '{simple_name}') -> '{cached_result}' from cache")
|
|
518
|
+
return cached_result
|
|
519
|
+
|
|
520
|
+
logger.debug(f"Function '{function_name}' not found in cache")
|
|
336
521
|
return None
|
|
337
522
|
|
|
338
523
|
|
|
339
524
|
def resolve_constexpr_identifier(identifier):
|
|
340
525
|
"""
|
|
341
526
|
Resolve a constexpr identifier from Redpanda source code to its literal string value.
|
|
342
|
-
|
|
527
|
+
|
|
528
|
+
Uses a cache to avoid repeated filesystem walks for better performance.
|
|
343
529
|
Searches common Redpanda source locations for constexpr string or string_view definitions matching the given identifier and returns the literal if found.
|
|
344
|
-
|
|
530
|
+
|
|
345
531
|
Parameters:
|
|
346
|
-
identifier (str): The identifier name to resolve (e.g., "scram").
|
|
347
|
-
|
|
532
|
+
identifier (str): The identifier name to resolve (e.g., "scram" or "net::tls_v1_2_cipher_suites").
|
|
533
|
+
|
|
348
534
|
Returns:
|
|
349
535
|
str or None: The resolved literal string value if found, otherwise `None`.
|
|
350
536
|
"""
|
|
351
|
-
# Try
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
'
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
redpanda_source = None
|
|
360
|
-
for path in redpanda_source_paths:
|
|
361
|
-
if os.path.exists(path):
|
|
362
|
-
redpanda_source = path
|
|
363
|
-
break
|
|
364
|
-
|
|
537
|
+
# Try cache lookup first (much faster)
|
|
538
|
+
cached_result = _constexpr_cache.lookup_constexpr(identifier)
|
|
539
|
+
if cached_result is not None:
|
|
540
|
+
logger.debug(f"Resolved identifier '{identifier}' -> '{cached_result}' from cache")
|
|
541
|
+
return cached_result
|
|
542
|
+
|
|
543
|
+
# Fallback to original filesystem search for compatibility
|
|
544
|
+
redpanda_source = find_redpanda_source()
|
|
365
545
|
if not redpanda_source:
|
|
366
546
|
logger.debug(f"Could not find Redpanda source directory to resolve identifier: {identifier}")
|
|
367
547
|
return None
|
|
548
|
+
|
|
549
|
+
# Strip namespace qualifier if present (e.g., "net::tls_v1_2_cipher_suites" -> "tls_v1_2_cipher_suites")
|
|
550
|
+
search_identifier = identifier.split('::')[-1] if '::' in identifier else identifier
|
|
368
551
|
|
|
369
552
|
# Pattern to match constexpr string_view definitions
|
|
370
553
|
# Matches: inline constexpr std::string_view scram{"SCRAM"};
|
|
371
554
|
patterns = [
|
|
372
|
-
rf'inline\s+constexpr\s+std::string_view\s+{re.escape(
|
|
373
|
-
rf'constexpr\s+std::string_view\s+{re.escape(
|
|
374
|
-
rf'inline\s+constexpr\s+auto\s+{re.escape(
|
|
375
|
-
rf'constexpr\s+auto\s+{re.escape(
|
|
376
|
-
rf'static\s+constexpr\s+std::string_view\s+{re.escape(
|
|
377
|
-
rf'static\s+inline\s+constexpr\s+std::string_view\s+{re.escape(
|
|
555
|
+
rf'inline\s+constexpr\s+std::string_view\s+{re.escape(search_identifier)}\s*\{{\s*"([^"]+)"\s*\}}',
|
|
556
|
+
rf'constexpr\s+std::string_view\s+{re.escape(search_identifier)}\s*\{{\s*"([^"]+)"\s*\}}',
|
|
557
|
+
rf'inline\s+constexpr\s+auto\s+{re.escape(search_identifier)}\s*=\s*"([^"]+)"',
|
|
558
|
+
rf'constexpr\s+auto\s+{re.escape(search_identifier)}\s*=\s*"([^"]+)"',
|
|
559
|
+
rf'static\s+constexpr\s+std::string_view\s+{re.escape(search_identifier)}\s*\{{\s*"([^"]+)"\s*\}}',
|
|
560
|
+
rf'static\s+inline\s+constexpr\s+std::string_view\s+{re.escape(search_identifier)}\s*\{{\s*"([^"]+)"\s*\}}',
|
|
378
561
|
]
|
|
562
|
+
|
|
563
|
+
# Pattern for multi-line concatenated string constants (like TLS cipher suites)
|
|
564
|
+
# Matches: const std::string_view identifier = "line1"\n "line2"\n...;
|
|
565
|
+
multiline_pattern = rf'(?:const|extern\s+const)\s+std::string_view\s+{re.escape(search_identifier)}\s*=\s*((?:"[^"]*"\s*)+);'
|
|
379
566
|
|
|
380
567
|
# Search recursively through the config directory and other common locations
|
|
381
568
|
search_dirs = [
|
|
@@ -383,6 +570,7 @@ def resolve_constexpr_identifier(identifier):
|
|
|
383
570
|
os.path.join(redpanda_source, 'src', 'v', 'kafka'),
|
|
384
571
|
os.path.join(redpanda_source, 'src', 'v', 'security'),
|
|
385
572
|
os.path.join(redpanda_source, 'src', 'v', 'pandaproxy'),
|
|
573
|
+
os.path.join(redpanda_source, 'src', 'v', 'net'), # For TLS cipher suites and network constants
|
|
386
574
|
]
|
|
387
575
|
|
|
388
576
|
for search_dir in search_dirs:
|
|
@@ -399,13 +587,24 @@ def resolve_constexpr_identifier(identifier):
|
|
|
399
587
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
400
588
|
content = f.read()
|
|
401
589
|
|
|
402
|
-
# Try each pattern
|
|
590
|
+
# Try each single-line pattern first
|
|
403
591
|
for pattern in patterns:
|
|
404
592
|
match = re.search(pattern, content, re.MULTILINE)
|
|
405
593
|
if match:
|
|
406
594
|
resolved_value = match.group(1)
|
|
407
595
|
logger.debug(f"Resolved identifier '{identifier}' -> '{resolved_value}' from {file_path}")
|
|
408
596
|
return resolved_value
|
|
597
|
+
|
|
598
|
+
# Try multi-line concatenated string pattern (for TLS cipher suites, etc.)
|
|
599
|
+
multiline_match = re.search(multiline_pattern, content, re.MULTILINE | re.DOTALL)
|
|
600
|
+
if multiline_match:
|
|
601
|
+
# Extract all quoted strings and concatenate them
|
|
602
|
+
strings_section = multiline_match.group(1)
|
|
603
|
+
string_literals = re.findall(r'"([^"]*)"', strings_section)
|
|
604
|
+
if string_literals:
|
|
605
|
+
resolved_value = ''.join(string_literals)
|
|
606
|
+
logger.debug(f"Resolved multi-line identifier '{identifier}' -> '{resolved_value[:50]}...' from {file_path}")
|
|
607
|
+
return resolved_value
|
|
409
608
|
|
|
410
609
|
except (FileNotFoundError, PermissionError, OSError, UnicodeDecodeError) as e:
|
|
411
610
|
logger.debug(f"Error reading {file_path}: {e}")
|
|
@@ -415,82 +614,6 @@ def resolve_constexpr_identifier(identifier):
|
|
|
415
614
|
return None
|
|
416
615
|
|
|
417
616
|
|
|
418
|
-
def get_enterprise_sasl_mechanisms():
|
|
419
|
-
"""
|
|
420
|
-
Locate and resolve enterprise SASL mechanisms declared in Redpanda's sasl_mechanisms.h.
|
|
421
|
-
|
|
422
|
-
Searches known Redpanda source locations for an inline constexpr definition of enterprise_sasl_mechanisms,
|
|
423
|
-
extracts the identifiers, and resolves each identifier to its literal string value where possible; unresolved
|
|
424
|
-
identifiers are converted to an uppercase fallback.
|
|
425
|
-
|
|
426
|
-
Returns:
|
|
427
|
-
list or None: List of enterprise SASL mechanism strings (e.g., ["GSSAPI", "OAUTHBEARER"]),
|
|
428
|
-
or `None` if the lookup fails.
|
|
429
|
-
"""
|
|
430
|
-
# Try to find the Redpanda source directory
|
|
431
|
-
redpanda_source_paths = [
|
|
432
|
-
'tmp/redpanda', # Current directory
|
|
433
|
-
'../tmp/redpanda', # Parent directory
|
|
434
|
-
'tools/property-extractor/tmp/redpanda', # From project root
|
|
435
|
-
os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda')
|
|
436
|
-
]
|
|
437
|
-
|
|
438
|
-
redpanda_source = None
|
|
439
|
-
for path in redpanda_source_paths:
|
|
440
|
-
if os.path.exists(path):
|
|
441
|
-
redpanda_source = path
|
|
442
|
-
break
|
|
443
|
-
|
|
444
|
-
if not redpanda_source:
|
|
445
|
-
logger.debug("Could not find Redpanda source directory to resolve enterprise SASL mechanisms")
|
|
446
|
-
return None
|
|
447
|
-
|
|
448
|
-
# Look for the enterprise_sasl_mechanisms definition in sasl_mechanisms.h
|
|
449
|
-
sasl_mechanisms_file = os.path.join(redpanda_source, 'src', 'v', 'config', 'sasl_mechanisms.h')
|
|
450
|
-
|
|
451
|
-
if not os.path.exists(sasl_mechanisms_file):
|
|
452
|
-
logger.debug(f"sasl_mechanisms.h not found at {sasl_mechanisms_file}")
|
|
453
|
-
return None
|
|
454
|
-
|
|
455
|
-
try:
|
|
456
|
-
with open(sasl_mechanisms_file, 'r', encoding='utf-8') as f:
|
|
457
|
-
content = f.read()
|
|
458
|
-
|
|
459
|
-
# Pattern to match the enterprise_sasl_mechanisms array definition
|
|
460
|
-
# inline constexpr auto enterprise_sasl_mechanisms = std::to_array<std::string_view>({gssapi, oauthbearer});
|
|
461
|
-
pattern = r'inline\s+constexpr\s+auto\s+enterprise_sasl_mechanisms\s*=\s*std::to_array<[^>]+>\s*\(\s*\{\s*([^}]+)\s*\}\s*\)'
|
|
462
|
-
|
|
463
|
-
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
|
|
464
|
-
if match:
|
|
465
|
-
# Extract the identifiers from the array (e.g., "gssapi, oauthbearer")
|
|
466
|
-
identifiers_str = match.group(1).strip()
|
|
467
|
-
|
|
468
|
-
# Split by comma and clean up whitespace
|
|
469
|
-
identifiers = [id.strip() for id in identifiers_str.split(',') if id.strip()]
|
|
470
|
-
|
|
471
|
-
# Resolve each identifier to its actual string value
|
|
472
|
-
mechanisms = []
|
|
473
|
-
for identifier in identifiers:
|
|
474
|
-
resolved_value = resolve_constexpr_identifier(identifier)
|
|
475
|
-
if resolved_value:
|
|
476
|
-
mechanisms.append(resolved_value)
|
|
477
|
-
else:
|
|
478
|
-
logger.debug(f"Could not resolve SASL mechanism identifier: {identifier}")
|
|
479
|
-
# Fallback: use the identifier name in uppercase
|
|
480
|
-
mechanisms.append(identifier.upper())
|
|
481
|
-
|
|
482
|
-
if mechanisms:
|
|
483
|
-
logger.debug(f"Resolved enterprise SASL mechanisms: {mechanisms}")
|
|
484
|
-
return mechanisms
|
|
485
|
-
else:
|
|
486
|
-
logger.debug("Could not find enterprise_sasl_mechanisms definition in sasl_mechanisms.h")
|
|
487
|
-
return None
|
|
488
|
-
|
|
489
|
-
except (OSError, UnicodeDecodeError, re.error) as e:
|
|
490
|
-
logger.debug(f"Error reading {sasl_mechanisms_file}: {e}")
|
|
491
|
-
return None
|
|
492
|
-
|
|
493
|
-
|
|
494
617
|
def validate_paths(options):
|
|
495
618
|
"""
|
|
496
619
|
Validate that required file-system paths referenced by `options` exist and exit the process on failure.
|
|
@@ -520,7 +643,7 @@ def validate_paths(options):
|
|
|
520
643
|
def get_file_pairs(options):
|
|
521
644
|
path = Path(options.path)
|
|
522
645
|
|
|
523
|
-
file_iter = path.rglob("*.h") if options.recursive else path.
|
|
646
|
+
file_iter = path.rglob("*.h") if options.recursive else path.glob("*.h")
|
|
524
647
|
|
|
525
648
|
file_pairs = []
|
|
526
649
|
|
|
@@ -573,8 +696,19 @@ def get_files_with_properties(file_pairs, treesitter_parser, cpp_language):
|
|
|
573
696
|
|
|
574
697
|
def transform_files_with_properties(files_with_properties):
|
|
575
698
|
type_transformer = TypeTransformer()
|
|
699
|
+
|
|
700
|
+
# Initialize ConstantResolver for validator enum extraction
|
|
701
|
+
redpanda_src = find_redpanda_source()
|
|
702
|
+
constant_resolver = None
|
|
703
|
+
if redpanda_src:
|
|
704
|
+
src_v_path = Path(redpanda_src) / 'src' / 'v'
|
|
705
|
+
if src_v_path.exists():
|
|
706
|
+
constant_resolver = ConstantResolver(src_v_path)
|
|
707
|
+
logger.debug(f"Initialized ConstantResolver with path: {src_v_path}")
|
|
708
|
+
|
|
576
709
|
transformers = [
|
|
577
710
|
EnterpriseTransformer(), ## this must be the first, as it modifies current data
|
|
711
|
+
ParamNormalizerTransformer(),
|
|
578
712
|
TypeTransformer(),
|
|
579
713
|
MetaParamTransformer(),
|
|
580
714
|
BasicInfoTransformer(),
|
|
@@ -585,6 +719,7 @@ def transform_files_with_properties(files_with_properties):
|
|
|
585
719
|
VisibilityTransformer(),
|
|
586
720
|
DeprecatedTransformer(),
|
|
587
721
|
IsSecretTransformer(),
|
|
722
|
+
ExampleTransformer(),
|
|
588
723
|
NumericBoundsTransformer(type_transformer),
|
|
589
724
|
DurationBoundsTransformer(type_transformer),
|
|
590
725
|
SimpleDefaultValuesTransformer(),
|
|
@@ -593,14 +728,20 @@ def transform_files_with_properties(files_with_properties):
|
|
|
593
728
|
AliasTransformer(),
|
|
594
729
|
]
|
|
595
730
|
|
|
731
|
+
# Add enum extractors if we have a constant_resolver
|
|
732
|
+
if constant_resolver:
|
|
733
|
+
transformers.append(ValidatorEnumExtractor(constant_resolver))
|
|
734
|
+
transformers.append(RuntimeValidationEnumExtractor(constant_resolver))
|
|
735
|
+
|
|
596
736
|
all_properties = PropertyBag()
|
|
597
737
|
|
|
598
738
|
for fp, properties in files_with_properties:
|
|
599
739
|
for name in properties:
|
|
600
740
|
# ignore private properties
|
|
601
|
-
if
|
|
741
|
+
if UNDERSCORE_PREFIX_PATTERN.match(name):
|
|
602
742
|
continue
|
|
603
743
|
|
|
744
|
+
|
|
604
745
|
property_definition = PropertyBag()
|
|
605
746
|
|
|
606
747
|
for transformer in transformers:
|
|
@@ -613,6 +754,143 @@ def transform_files_with_properties(files_with_properties):
|
|
|
613
754
|
return all_properties
|
|
614
755
|
|
|
615
756
|
|
|
757
|
+
def apply_transformers_to_topic_properties(topic_properties):
|
|
758
|
+
"""
|
|
759
|
+
Apply transformers to topic properties that were extracted separately.
|
|
760
|
+
This ensures topic properties get the same metadata as cluster properties.
|
|
761
|
+
"""
|
|
762
|
+
if not topic_properties:
|
|
763
|
+
return topic_properties
|
|
764
|
+
|
|
765
|
+
type_transformer = TypeTransformer()
|
|
766
|
+
transformers = [
|
|
767
|
+
# Apply selected transformers that are relevant for topic properties
|
|
768
|
+
NeedsRestartTransformer(), # This is the key one we need for needs_restart field
|
|
769
|
+
VisibilityTransformer(),
|
|
770
|
+
DeprecatedTransformer(),
|
|
771
|
+
IsSecretTransformer(),
|
|
772
|
+
ExperimentalTransformer(),
|
|
773
|
+
EnterpriseTransformer(), # Need this to set is_enterprise field
|
|
774
|
+
]
|
|
775
|
+
|
|
776
|
+
transformed_properties = PropertyBag()
|
|
777
|
+
|
|
778
|
+
for prop_name, prop_data in topic_properties.items():
|
|
779
|
+
property_definition = PropertyBag(prop_data) # Start with existing data
|
|
780
|
+
|
|
781
|
+
# Create a mock file path for topic properties
|
|
782
|
+
mock_fp = "topic_properties"
|
|
783
|
+
|
|
784
|
+
# Create a mock properties dict that transformers expect
|
|
785
|
+
mock_properties = {prop_name: property_definition}
|
|
786
|
+
|
|
787
|
+
for transformer in transformers:
|
|
788
|
+
if transformer.accepts(property_definition, mock_fp):
|
|
789
|
+
transformer.parse(property_definition, property_definition, mock_fp)
|
|
790
|
+
|
|
791
|
+
transformed_properties[prop_name] = property_definition
|
|
792
|
+
|
|
793
|
+
logging.info(f"Applied transformers to {len(transformed_properties)} topic properties")
|
|
794
|
+
return transformed_properties
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def filter_referenced_definitions(properties, definitions):
|
|
798
|
+
"""
|
|
799
|
+
Filter definitions to only include types that are actually referenced by properties.
|
|
800
|
+
|
|
801
|
+
Performs transitive closure: if type A references type B, both are included.
|
|
802
|
+
This significantly reduces the size of the definitions section.
|
|
803
|
+
|
|
804
|
+
Args:
|
|
805
|
+
properties: Dict of property definitions
|
|
806
|
+
definitions: Dict of all type definitions
|
|
807
|
+
|
|
808
|
+
Returns:
|
|
809
|
+
dict: Filtered definitions containing only referenced types
|
|
810
|
+
"""
|
|
811
|
+
referenced = set()
|
|
812
|
+
|
|
813
|
+
def collect_references(obj, visited=None):
|
|
814
|
+
"""Recursively collect type references from properties and definitions."""
|
|
815
|
+
if visited is None:
|
|
816
|
+
visited = set()
|
|
817
|
+
|
|
818
|
+
if isinstance(obj, dict):
|
|
819
|
+
# Check for $ref
|
|
820
|
+
if '$ref' in obj:
|
|
821
|
+
ref = obj['$ref']
|
|
822
|
+
if ref.startswith('#/definitions/'):
|
|
823
|
+
type_name = ref.replace('#/definitions/', '')
|
|
824
|
+
if type_name not in visited:
|
|
825
|
+
referenced.add(type_name)
|
|
826
|
+
visited.add(type_name)
|
|
827
|
+
# Recursively collect references from this definition
|
|
828
|
+
if type_name in definitions:
|
|
829
|
+
collect_references(definitions[type_name], visited)
|
|
830
|
+
|
|
831
|
+
# Check for c_type
|
|
832
|
+
if 'c_type' in obj:
|
|
833
|
+
type_name = obj['c_type']
|
|
834
|
+
if type_name and type_name in definitions and type_name not in visited:
|
|
835
|
+
referenced.add(type_name)
|
|
836
|
+
visited.add(type_name)
|
|
837
|
+
collect_references(definitions[type_name], visited)
|
|
838
|
+
|
|
839
|
+
# Recurse into nested objects
|
|
840
|
+
for value in obj.values():
|
|
841
|
+
collect_references(value, visited)
|
|
842
|
+
|
|
843
|
+
elif isinstance(obj, list):
|
|
844
|
+
for item in obj:
|
|
845
|
+
collect_references(item, visited)
|
|
846
|
+
|
|
847
|
+
# Collect all references from properties
|
|
848
|
+
collect_references(properties)
|
|
849
|
+
|
|
850
|
+
# Filter definitions to only referenced types
|
|
851
|
+
filtered = {k: v for k, v in definitions.items() if k in referenced}
|
|
852
|
+
|
|
853
|
+
logger.info(f"📉 Filtered definitions from {len(definitions)} to {len(filtered)} (only referenced types)")
|
|
854
|
+
|
|
855
|
+
return filtered
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def clean_private_fields_from_definitions(definitions):
|
|
859
|
+
"""
|
|
860
|
+
Remove private fields (those starting with _) from definition properties.
|
|
861
|
+
This keeps the JSON output clean by only exposing public API.
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
definitions: Dictionary of type definitions
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
Dictionary with private fields filtered out
|
|
868
|
+
"""
|
|
869
|
+
cleaned = {}
|
|
870
|
+
total_private_fields = 0
|
|
871
|
+
|
|
872
|
+
for def_name, def_data in definitions.items():
|
|
873
|
+
if 'properties' in def_data and def_data['properties']:
|
|
874
|
+
# Filter out fields starting with underscore
|
|
875
|
+
original_props = def_data['properties']
|
|
876
|
+
cleaned_props = {k: v for k, v in original_props.items() if not k.startswith('_')}
|
|
877
|
+
|
|
878
|
+
private_count = len(original_props) - len(cleaned_props)
|
|
879
|
+
total_private_fields += private_count
|
|
880
|
+
|
|
881
|
+
# Only include definitions that have at least one public field
|
|
882
|
+
if cleaned_props:
|
|
883
|
+
cleaned[def_name] = {**def_data, 'properties': cleaned_props}
|
|
884
|
+
else:
|
|
885
|
+
# Keep definitions without properties (like enums)
|
|
886
|
+
cleaned[def_name] = def_data
|
|
887
|
+
|
|
888
|
+
if total_private_fields > 0:
|
|
889
|
+
logger.info(f"🧹 Cleaned {total_private_fields} private fields from definitions")
|
|
890
|
+
|
|
891
|
+
return cleaned
|
|
892
|
+
|
|
893
|
+
|
|
616
894
|
# The definitions.json file contains type definitions that the extractor uses to standardize and centralize type information. After extracting and transforming the properties from the source code, the function merge_properties_and_definitions looks up each property's type in the definitions. If a property's type (or the type of its items, in the case of arrays) matches one of the definitions, the transformer replaces that type with a JSON pointer ( such as #/definitions/<type>) to the corresponding entry in definitions.json. The final JSON output then includes both a properties section (with types now referencing the definitions) and a definitions section, so that consumers of the output can easily resolve the full type information.
|
|
617
895
|
def merge_properties_and_definitions(properties, definitions):
|
|
618
896
|
# Do not overwrite the resolved type/default with a reference. Just return the resolved properties and definitions.
|
|
@@ -621,67 +899,45 @@ def merge_properties_and_definitions(properties, definitions):
|
|
|
621
899
|
|
|
622
900
|
def apply_property_overrides(properties, overrides, overrides_file_path=None):
|
|
623
901
|
"""
|
|
624
|
-
Apply
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
Multiple example input formats are supported for user convenience:
|
|
640
|
-
|
|
641
|
-
1. Direct AsciiDoc string:
|
|
642
|
-
"example": ".Example\n[,yaml]\n----\nredpanda:\n property_name: value\n----"
|
|
643
|
-
|
|
644
|
-
2. Multi-line array (each element becomes a line):
|
|
645
|
-
"example": [
|
|
646
|
-
".Example",
|
|
647
|
-
"[,yaml]",
|
|
648
|
-
"----",
|
|
649
|
-
"redpanda:",
|
|
650
|
-
" property_name: value",
|
|
651
|
-
"----"
|
|
652
|
-
]
|
|
653
|
-
|
|
654
|
-
3. External file reference:
|
|
655
|
-
"example_file": "examples/property_name.adoc"
|
|
656
|
-
|
|
657
|
-
4. Auto-formatted YAML with title and description:
|
|
658
|
-
"example_yaml": {
|
|
659
|
-
"title": "Example Configuration",
|
|
660
|
-
"description": "This shows how to configure the property.",
|
|
661
|
-
"config": {
|
|
662
|
-
"redpanda": {
|
|
663
|
-
"property_name": "value"
|
|
664
|
-
}
|
|
665
|
-
}
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
Args:
|
|
669
|
-
properties: Dictionary of extracted properties from C++ source
|
|
670
|
-
overrides: Dictionary loaded from overrides JSON file
|
|
671
|
-
overrides_file_path: Path to the overrides file (for resolving relative example_file paths)
|
|
672
|
-
|
|
902
|
+
Apply overrides from an overrides mapping to the extracted properties, mutating and returning the properties dictionary.
|
|
903
|
+
|
|
904
|
+
Processes entries in overrides["properties"]; for each override key the function:
|
|
905
|
+
- If the key matches a property dictionary key, applies the override to that property.
|
|
906
|
+
- Otherwise, searches existing properties for an entry whose `"name"` equals the override key and applies the override if found.
|
|
907
|
+
- If no matching property is found, creates a new property from the override and adds it under the override key.
|
|
908
|
+
|
|
909
|
+
The function supports overrides that add or replace description, version, example content, default, type, config_scope, related_topics, and other metadata. When examples reference external files, relative paths are resolved relative to overrides_file_path.
|
|
910
|
+
|
|
911
|
+
Parameters:
|
|
912
|
+
properties (dict): Mapping of existing property entries (modified in-place).
|
|
913
|
+
overrides (dict): Loaded overrides structure; only keys under "properties" are processed.
|
|
914
|
+
overrides_file_path (str|None): Filesystem path of the overrides file used to resolve relative example_file references.
|
|
915
|
+
|
|
673
916
|
Returns:
|
|
674
|
-
|
|
917
|
+
dict: The same properties mapping with overrides applied and any new properties created.
|
|
675
918
|
"""
|
|
676
919
|
if overrides and "properties" in overrides:
|
|
677
920
|
for prop, override in overrides["properties"].items():
|
|
921
|
+
# First check if property exists by key
|
|
678
922
|
if prop in properties:
|
|
679
923
|
# Apply overrides to existing properties
|
|
680
924
|
_apply_override_to_existing_property(properties[prop], override, overrides_file_path)
|
|
681
925
|
else:
|
|
682
|
-
#
|
|
683
|
-
|
|
684
|
-
|
|
926
|
+
# Check if property exists by name field (handles cases where key != name)
|
|
927
|
+
existing_property_key = None
|
|
928
|
+
for key, property_data in properties.items():
|
|
929
|
+
if hasattr(property_data, 'get') and property_data.get('name') == prop:
|
|
930
|
+
existing_property_key = key
|
|
931
|
+
break
|
|
932
|
+
|
|
933
|
+
if existing_property_key:
|
|
934
|
+
# Found existing property by name, apply overrides to it
|
|
935
|
+
logger.info(f"Applying override to existing property '{prop}' (found by name, key='{existing_property_key}')")
|
|
936
|
+
_apply_override_to_existing_property(properties[existing_property_key], override, overrides_file_path)
|
|
937
|
+
else:
|
|
938
|
+
# Create new property from override
|
|
939
|
+
logger.info(f"Creating new property from override: {prop}")
|
|
940
|
+
properties[prop] = _create_property_from_override(prop, override, overrides_file_path)
|
|
685
941
|
return properties
|
|
686
942
|
|
|
687
943
|
|
|
@@ -911,25 +1167,426 @@ def add_config_scope(properties):
|
|
|
911
1167
|
return properties
|
|
912
1168
|
|
|
913
1169
|
|
|
1170
|
+
def map_enum_defaults(properties):
|
|
1171
|
+
"""
|
|
1172
|
+
Map enum default values to their user-facing strings using enum_string_mappings.
|
|
1173
|
+
|
|
1174
|
+
This runs after resolve_type_and_default() when enum constraints have been populated.
|
|
1175
|
+
For properties with enum constraints, if the default value is not in the enum list,
|
|
1176
|
+
check if it matches a raw enum value in the type definitions and map it to the
|
|
1177
|
+
user-facing string representation.
|
|
1178
|
+
|
|
1179
|
+
Args:
|
|
1180
|
+
properties (dict): Properties with resolved types and enum constraints
|
|
1181
|
+
|
|
1182
|
+
Returns:
|
|
1183
|
+
dict: Properties with mapped enum default values
|
|
1184
|
+
"""
|
|
1185
|
+
global _type_definitions_cache
|
|
1186
|
+
|
|
1187
|
+
if not _type_definitions_cache:
|
|
1188
|
+
return properties
|
|
1189
|
+
|
|
1190
|
+
for prop_name, prop in properties.items():
|
|
1191
|
+
# Skip if not an enum property or no default
|
|
1192
|
+
if not prop.get("enum") or "default" not in prop:
|
|
1193
|
+
continue
|
|
1194
|
+
|
|
1195
|
+
default = prop.get("default")
|
|
1196
|
+
enum_values = prop.get("enum", [])
|
|
1197
|
+
|
|
1198
|
+
# Skip if default is None or already in the enum list
|
|
1199
|
+
if default is None or default in enum_values:
|
|
1200
|
+
continue
|
|
1201
|
+
|
|
1202
|
+
# Check if default is a raw enum value that needs mapping
|
|
1203
|
+
if not isinstance(default, str):
|
|
1204
|
+
continue
|
|
1205
|
+
|
|
1206
|
+
# Search type definitions for matching enum with string mappings
|
|
1207
|
+
for type_name, type_def in _type_definitions_cache.items():
|
|
1208
|
+
if type_def.get("type") != "enum":
|
|
1209
|
+
continue
|
|
1210
|
+
|
|
1211
|
+
mappings = type_def.get("enum_string_mappings")
|
|
1212
|
+
if not mappings:
|
|
1213
|
+
continue
|
|
1214
|
+
|
|
1215
|
+
# If we find a mapping for this default value, apply it
|
|
1216
|
+
if default in mappings:
|
|
1217
|
+
mapped_value = mappings[default]
|
|
1218
|
+
prop["default"] = mapped_value
|
|
1219
|
+
logger.debug(f"✓ Mapped enum default for {prop_name}: {default} → {mapped_value}")
|
|
1220
|
+
break
|
|
1221
|
+
|
|
1222
|
+
return properties
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
def format_time_human_readable(value, unit):
|
|
1226
|
+
"""
|
|
1227
|
+
Convert a numeric time value to a human-readable string.
|
|
1228
|
+
|
|
1229
|
+
Args:
|
|
1230
|
+
value: Numeric value (int)
|
|
1231
|
+
unit: 'ms' for milliseconds, 's' for seconds
|
|
1232
|
+
|
|
1233
|
+
Returns:
|
|
1234
|
+
Human-readable string like "7 days", "1 hour", "30 minutes"
|
|
1235
|
+
"""
|
|
1236
|
+
# Convert to milliseconds for uniform handling
|
|
1237
|
+
if unit == 's':
|
|
1238
|
+
ms = value * 1000
|
|
1239
|
+
else:
|
|
1240
|
+
ms = value
|
|
1241
|
+
|
|
1242
|
+
# Time unit thresholds in milliseconds
|
|
1243
|
+
units = [
|
|
1244
|
+
(365 * 24 * 60 * 60 * 1000, 'year', 'years'),
|
|
1245
|
+
(7 * 24 * 60 * 60 * 1000, 'week', 'weeks'),
|
|
1246
|
+
(24 * 60 * 60 * 1000, 'day', 'days'),
|
|
1247
|
+
(60 * 60 * 1000, 'hour', 'hours'),
|
|
1248
|
+
(60 * 1000, 'minute', 'minutes'),
|
|
1249
|
+
(1000, 'second', 'seconds'),
|
|
1250
|
+
(1, 'millisecond', 'milliseconds'),
|
|
1251
|
+
]
|
|
1252
|
+
|
|
1253
|
+
# Try to find the largest unit that divides evenly
|
|
1254
|
+
for threshold, singular, plural in units:
|
|
1255
|
+
if ms >= threshold and ms % threshold == 0:
|
|
1256
|
+
count = ms // threshold
|
|
1257
|
+
unit_name = singular if count == 1 else plural
|
|
1258
|
+
return f"{int(count)} {unit_name}"
|
|
1259
|
+
|
|
1260
|
+
# If no clean division, return the original with units
|
|
1261
|
+
if unit == 's':
|
|
1262
|
+
return f"{value} seconds"
|
|
1263
|
+
else:
|
|
1264
|
+
return f"{value} milliseconds"
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
def evaluate_chrono_expressions(properties):
|
|
1268
|
+
"""
|
|
1269
|
+
Evaluate chrono expressions in default values and convert to numeric values.
|
|
1270
|
+
Also adds human-readable versions for better UX in templates.
|
|
1271
|
+
|
|
1272
|
+
Examples:
|
|
1273
|
+
- "24h * 365" -> 31536000000 (for milliseconds) + "365 days"
|
|
1274
|
+
- "7 * 24h" -> 604800 (for seconds) + "7 days"
|
|
1275
|
+
- "1h" -> 3600000 (for milliseconds) or 3600 (for seconds) + "1 hour"
|
|
1276
|
+
|
|
1277
|
+
Conversion factors:
|
|
1278
|
+
- ms (milliseconds): 1
|
|
1279
|
+
- s (seconds): 1000 ms
|
|
1280
|
+
- min (minutes): 60000 ms
|
|
1281
|
+
- h (hours): 3600000 ms
|
|
1282
|
+
- d (days): 86400000 ms
|
|
1283
|
+
"""
|
|
1284
|
+
import re
|
|
1285
|
+
|
|
1286
|
+
# Conversion factors to milliseconds
|
|
1287
|
+
time_units = {
|
|
1288
|
+
'ms': 1,
|
|
1289
|
+
's': 1000,
|
|
1290
|
+
'min': 60 * 1000,
|
|
1291
|
+
'h': 60 * 60 * 1000,
|
|
1292
|
+
'd': 24 * 60 * 60 * 1000,
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
def parse_time_value(expr):
|
|
1296
|
+
"""Parse a time expression like '24h' or '365' and return milliseconds."""
|
|
1297
|
+
expr = expr.strip()
|
|
1298
|
+
|
|
1299
|
+
# Try to match number with unit suffix
|
|
1300
|
+
match = re.match(r'^(\d+(?:\.\d+)?)(ms|s|min|h|d)?$', expr)
|
|
1301
|
+
if match:
|
|
1302
|
+
value = float(match.group(1))
|
|
1303
|
+
unit = match.group(2) if match.group(2) else None
|
|
1304
|
+
|
|
1305
|
+
if unit:
|
|
1306
|
+
return value * time_units[unit]
|
|
1307
|
+
else:
|
|
1308
|
+
# Bare number - assume it's already in target units
|
|
1309
|
+
return value
|
|
1310
|
+
|
|
1311
|
+
return None
|
|
1312
|
+
|
|
1313
|
+
def evaluate_expression(expr_str):
|
|
1314
|
+
"""Evaluate a simple mathematical expression with time units."""
|
|
1315
|
+
expr_str = expr_str.strip()
|
|
1316
|
+
|
|
1317
|
+
# Handle simple cases first (just a time value)
|
|
1318
|
+
simple_value = parse_time_value(expr_str)
|
|
1319
|
+
if simple_value is not None:
|
|
1320
|
+
return simple_value
|
|
1321
|
+
|
|
1322
|
+
# Handle expressions like "24h * 365" or "7 * 24h"
|
|
1323
|
+
# Replace time values with their millisecond equivalents
|
|
1324
|
+
tokens = re.split(r'(\s*[*/+\-]\s*)', expr_str)
|
|
1325
|
+
evaluated_tokens = []
|
|
1326
|
+
|
|
1327
|
+
for token in tokens:
|
|
1328
|
+
token = token.strip()
|
|
1329
|
+
if not token:
|
|
1330
|
+
continue
|
|
1331
|
+
|
|
1332
|
+
# Check if it's an operator
|
|
1333
|
+
if token in ['*', '/', '+', '-']:
|
|
1334
|
+
evaluated_tokens.append(token)
|
|
1335
|
+
else:
|
|
1336
|
+
# Try to parse as time value
|
|
1337
|
+
value = parse_time_value(token)
|
|
1338
|
+
if value is not None:
|
|
1339
|
+
evaluated_tokens.append(str(value))
|
|
1340
|
+
else:
|
|
1341
|
+
# Not a time value, keep as is
|
|
1342
|
+
evaluated_tokens.append(token)
|
|
1343
|
+
|
|
1344
|
+
# Evaluate the expression
|
|
1345
|
+
try:
|
|
1346
|
+
result = eval(' '.join(evaluated_tokens))
|
|
1347
|
+
return result
|
|
1348
|
+
except:
|
|
1349
|
+
return None
|
|
1350
|
+
|
|
1351
|
+
converted_count = 0
|
|
1352
|
+
|
|
1353
|
+
for prop_name, prop in properties.items():
|
|
1354
|
+
default = prop.get('default')
|
|
1355
|
+
c_type = prop.get('c_type', '')
|
|
1356
|
+
|
|
1357
|
+
# Only process string defaults with chrono types
|
|
1358
|
+
if not isinstance(default, str):
|
|
1359
|
+
continue
|
|
1360
|
+
|
|
1361
|
+
# Check if it's a chrono type or looks like a time expression
|
|
1362
|
+
is_chrono = 'chrono' in c_type or 'duration' in c_type.lower()
|
|
1363
|
+
has_time_expr = any(unit in default for unit in ['ms', 's', 'min', 'h', 'd']) or any(op in default for op in ['*', '+', '-', '/'])
|
|
1364
|
+
|
|
1365
|
+
if not (is_chrono or has_time_expr):
|
|
1366
|
+
continue
|
|
1367
|
+
|
|
1368
|
+
# Try to evaluate the expression
|
|
1369
|
+
result_ms = evaluate_expression(default)
|
|
1370
|
+
|
|
1371
|
+
if result_ms is not None:
|
|
1372
|
+
# Convert to appropriate output unit based on type
|
|
1373
|
+
unit = 'ms' # Track which unit we're using for human-readable format
|
|
1374
|
+
|
|
1375
|
+
if 'std::chrono::milliseconds' in c_type:
|
|
1376
|
+
result = int(result_ms)
|
|
1377
|
+
unit = 'ms'
|
|
1378
|
+
elif 'std::chrono::seconds' in c_type:
|
|
1379
|
+
result = int(result_ms / 1000)
|
|
1380
|
+
unit = 's'
|
|
1381
|
+
elif 'std::chrono::minutes' in c_type:
|
|
1382
|
+
result = int(result_ms / 60000)
|
|
1383
|
+
unit = 'min'
|
|
1384
|
+
elif 'std::chrono::hours' in c_type:
|
|
1385
|
+
result = int(result_ms / 3600000)
|
|
1386
|
+
unit = 'h'
|
|
1387
|
+
elif 'duration' in c_type.lower():
|
|
1388
|
+
# Assume milliseconds for generic duration types
|
|
1389
|
+
result = int(result_ms)
|
|
1390
|
+
unit = 'ms'
|
|
1391
|
+
else:
|
|
1392
|
+
# Default to milliseconds
|
|
1393
|
+
result = int(result_ms)
|
|
1394
|
+
unit = 'ms'
|
|
1395
|
+
|
|
1396
|
+
prop['default'] = result
|
|
1397
|
+
|
|
1398
|
+
# Add human-readable version for templates
|
|
1399
|
+
human_readable = format_time_human_readable(result, unit)
|
|
1400
|
+
prop['default_human_readable'] = human_readable
|
|
1401
|
+
|
|
1402
|
+
converted_count += 1
|
|
1403
|
+
logger.debug(f"Evaluated chrono expression for {prop_name}: '{default}' -> {result} ({human_readable})")
|
|
1404
|
+
|
|
1405
|
+
if converted_count > 0:
|
|
1406
|
+
logger.info(f"Evaluated {converted_count} chrono expressions in default values")
|
|
1407
|
+
|
|
1408
|
+
return properties
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
def resolve_type_with_namespace(type_name, definitions):
|
|
1412
|
+
"""
|
|
1413
|
+
Resolve a type name, trying with namespace prefixes if not found directly.
|
|
1414
|
+
|
|
1415
|
+
Args:
|
|
1416
|
+
type_name: Type name to resolve (may be unqualified)
|
|
1417
|
+
definitions: Dictionary of type definitions
|
|
1418
|
+
|
|
1419
|
+
Returns:
|
|
1420
|
+
The definition dict if found, or {} if not found
|
|
1421
|
+
"""
|
|
1422
|
+
# Try the type name as-is first
|
|
1423
|
+
if type_name in definitions:
|
|
1424
|
+
return definitions[type_name]
|
|
1425
|
+
|
|
1426
|
+
# Try common namespace prefixes
|
|
1427
|
+
common_namespaces = ['config', 'model', 'security', 'net', 'kafka', 'pandaproxy']
|
|
1428
|
+
for namespace in common_namespaces:
|
|
1429
|
+
qualified_name = f"{namespace}::{type_name}"
|
|
1430
|
+
if qualified_name in definitions:
|
|
1431
|
+
return definitions[qualified_name]
|
|
1432
|
+
|
|
1433
|
+
# Not found
|
|
1434
|
+
return {}
|
|
1435
|
+
|
|
1436
|
+
|
|
914
1437
|
def resolve_type_and_default(properties, definitions):
|
|
915
1438
|
"""
|
|
916
1439
|
Normalize property types and expand C++-style default values into JSON-compatible Python structures.
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
1440
|
+
|
|
1441
|
+
============================================================================
|
|
1442
|
+
TYPE RESOLUTION SYSTEM - How C++ Types Become JSON Schema Types
|
|
1443
|
+
============================================================================
|
|
1444
|
+
|
|
1445
|
+
This function bridges C++ type system with JSON Schema by:
|
|
1446
|
+
1. Resolving definition references ($ref pointers) to actual type structures
|
|
1447
|
+
2. Expanding C++ constructors into JSON-compatible default values
|
|
1448
|
+
3. Ensuring type consistency between properties and their defaults
|
|
1449
|
+
4. Handling special array/optional type patterns from Redpanda source
|
|
1450
|
+
|
|
1451
|
+
TYPE RESOLUTION FLOW:
|
|
1452
|
+
┌─────────────────────────────────────────────────────────────────────────
|
|
1453
|
+
│ C++ Source:
|
|
1454
|
+
│ property<model::broker_endpoint> admin(...,
|
|
1455
|
+
│ model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))
|
|
1456
|
+
│ )
|
|
1457
|
+
│
|
|
1458
|
+
│ ↓ TypeTransformer (transformers.py)
|
|
1459
|
+
│ type: "broker_endpoint" (extracted from template parameter)
|
|
1460
|
+
│ default: "model::broker_endpoint(net::unresolved_address(\"127.0.0.1\", 9644))"
|
|
1461
|
+
│
|
|
1462
|
+
│ ↓ Definition Lookup (definitions dict)
|
|
1463
|
+
│ definitions["broker_endpoint"] = {
|
|
1464
|
+
│ "type": "object",
|
|
1465
|
+
│ "properties": {"address": {"type": "string"}, "port": {"type": "integer"}}
|
|
1466
|
+
│ }
|
|
1467
|
+
│
|
|
1468
|
+
│ ↓ Constructor Expansion (this function)
|
|
1469
|
+
│ type: "object" (resolved from definition)
|
|
1470
|
+
│ default: {"address": "127.0.0.1", "port": 9644} (expanded constructor)
|
|
1471
|
+
│
|
|
1472
|
+
│ ↓ JSON Output:
|
|
1473
|
+
│ "admin": {
|
|
1474
|
+
│ "type": "object",
|
|
1475
|
+
│ "properties": {...},
|
|
1476
|
+
│ "default": {"address": "127.0.0.1", "port": 9644}
|
|
1477
|
+
│ }
|
|
1478
|
+
└─────────────────────────────────────────────────────────────────────────
|
|
1479
|
+
|
|
1480
|
+
DEFINITION SYSTEM - Reusable Type Structures:
|
|
1481
|
+
┌─────────────────────────────────────────────────────────────────────────
|
|
1482
|
+
│ Purpose: Definitions centralize complex type information to avoid
|
|
1483
|
+
│ repeating structure across multiple properties.
|
|
1484
|
+
│
|
|
1485
|
+
│ Source: definitions.json contains hand-crafted JSON Schema definitions
|
|
1486
|
+
│ for Redpanda's C++ types (endpoints, durations, enums, etc.)
|
|
1487
|
+
│
|
|
1488
|
+
│ Usage in Properties:
|
|
1489
|
+
│ Before resolution: type: "broker_endpoint"
|
|
1490
|
+
│ After resolution: type: "object" + properties from definition
|
|
1491
|
+
│
|
|
1492
|
+
│ $ref Pointers:
|
|
1493
|
+
│ Some definitions use JSON Schema $ref to reference other definitions:
|
|
1494
|
+
│ {"$ref": "#/definitions/compression"} → resolve recursively
|
|
1495
|
+
│
|
|
1496
|
+
│ Definition Structure:
|
|
1497
|
+
│ {
|
|
1498
|
+
│ "compression": {
|
|
1499
|
+
│ "type": "string",
|
|
1500
|
+
│ "enum": ["gzip", "snappy", "lz4", "zstd", "none"]
|
|
1501
|
+
│ },
|
|
1502
|
+
│ "broker_endpoint": {
|
|
1503
|
+
│ "type": "object",
|
|
1504
|
+
│ "properties": {
|
|
1505
|
+
│ "address": {"type": "string"},
|
|
1506
|
+
│ "port": {"type": "integer"}
|
|
1507
|
+
│ }
|
|
1508
|
+
│ }
|
|
1509
|
+
│ }
|
|
1510
|
+
└─────────────────────────────────────────────────────────────────────────
|
|
1511
|
+
|
|
1512
|
+
CONSTRUCTOR EXPANSION - C++ to JSON Conversion:
|
|
1513
|
+
┌─────────────────────────────────────────────────────────────────────────
|
|
1514
|
+
│ SIMPLE PRIMITIVES:
|
|
1515
|
+
│ C++: 9092 → JSON: 9092
|
|
1516
|
+
│ C++: "localhost" → JSON: "localhost"
|
|
1517
|
+
│ C++: true → JSON: true
|
|
1518
|
+
│
|
|
1519
|
+
│ ENUM VALUES:
|
|
1520
|
+
│ C++: model::compression::gzip → JSON: "gzip"
|
|
1521
|
+
│ Pattern: namespace::type::value → Extract final value
|
|
1522
|
+
│
|
|
1523
|
+
│ CONSTRUCTORS:
|
|
1524
|
+
│ C++: net::unresolved_address("127.0.0.1", 9644)
|
|
1525
|
+
│ → Parse: type=unresolved_address, args=["127.0.0.1", 9644]
|
|
1526
|
+
│ → Lookup definition for "unresolved_address"
|
|
1527
|
+
│ → Match args to definition properties by position
|
|
1528
|
+
│ → Result: {"address": "127.0.0.1", "port": 9644}
|
|
1529
|
+
│
|
|
1530
|
+
│ ARRAYS:
|
|
1531
|
+
│ C++: std::vector<int>{1, 2, 3} → JSON: [1, 2, 3]
|
|
1532
|
+
│ C++: {1, 2, 3} → JSON: [1, 2, 3]
|
|
1533
|
+
│ Special: one_or_many_property wraps single values in arrays
|
|
1534
|
+
│
|
|
1535
|
+
│ CHRONO DURATIONS:
|
|
1536
|
+
│ C++: std::chrono::seconds{30} → JSON: 30 (with units in description)
|
|
1537
|
+
│ C++: std::chrono::milliseconds{5000} → JSON: 5000
|
|
1538
|
+
│
|
|
1539
|
+
│ OPTIONAL TYPES:
|
|
1540
|
+
│ C++: std::optional<int>{} → JSON: null
|
|
1541
|
+
│ C++: std::optional<int>{42} → JSON: 42
|
|
1542
|
+
└─────────────────────────────────────────────────────────────────────────
|
|
1543
|
+
|
|
1544
|
+
SPECIAL HANDLING:
|
|
1545
|
+
┌─────────────────────────────────────────────────────────────────────────
|
|
1546
|
+
│ one_or_many_property<T>:
|
|
1547
|
+
│ - Always treated as array type in JSON
|
|
1548
|
+
│ - Single default values are wrapped: {x:1} → [{x:1}]
|
|
1549
|
+
│ - Already-array defaults preserved: [{x:1}] → [{x:1}]
|
|
1550
|
+
│
|
|
1551
|
+
│ Array Items Type Resolution:
|
|
1552
|
+
│ - If items.type references a definition, resolve it:
|
|
1553
|
+
│ items.type: "endpoint_tls_config" → items: {...definition...}
|
|
1554
|
+
│ - Ensures array item validation has full type information
|
|
1555
|
+
│
|
|
1556
|
+
│ Enterprise Values:
|
|
1557
|
+
│ - enterprise_value strings expanded via process_enterprise_value()
|
|
1558
|
+
│ - Converts license restriction patterns to user-friendly strings
|
|
1559
|
+
└─────────────────────────────────────────────────────────────────────────
|
|
1560
|
+
|
|
1561
|
+
HOW TO ADD NEW TYPE DEFINITIONS:
|
|
1562
|
+
1. Identify the C++ type that needs a definition (e.g., new_endpoint_type)
|
|
1563
|
+
2. Analyze the C++ struct/class to determine JSON schema structure
|
|
1564
|
+
3. Add entry to definitions.json with appropriate JSON Schema:
|
|
1565
|
+
{
|
|
1566
|
+
"new_endpoint_type": {
|
|
1567
|
+
"type": "object",
|
|
1568
|
+
"properties": {"field1": {"type": "string"}, "field2": {"type": "integer"}}
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
4. TypeTransformer will automatically extract the type name from C++
|
|
1572
|
+
5. This function will look up the definition and expand constructors
|
|
1573
|
+
6. Test with a property using the new type to verify expansion
|
|
1574
|
+
|
|
920
1575
|
Parameters:
|
|
921
|
-
properties (dict):
|
|
922
|
-
|
|
923
|
-
|
|
1576
|
+
properties (dict): Property name → metadata dict with keys: "type", "default",
|
|
1577
|
+
"items", "enterprise_value" that will be modified in-place
|
|
1578
|
+
definitions (dict): Type name → JSON Schema definition used for lookups
|
|
1579
|
+
and constructor expansion
|
|
1580
|
+
|
|
924
1581
|
Returns:
|
|
925
|
-
dict: The same `properties`
|
|
1582
|
+
dict: The same `properties` dict after in-place type normalization and
|
|
1583
|
+
default value expansion
|
|
926
1584
|
"""
|
|
927
1585
|
import ast
|
|
928
1586
|
import re
|
|
929
1587
|
|
|
930
1588
|
def resolve_definition_type(defn):
|
|
931
1589
|
"""Recursively resolve $ref pointers to get the actual type definition."""
|
|
932
|
-
# Recursively resolve $ref
|
|
933
1590
|
while isinstance(defn, dict) and "$ref" in defn:
|
|
934
1591
|
ref = defn["$ref"]
|
|
935
1592
|
ref_name = ref.split("/")[-1]
|
|
@@ -954,18 +1611,15 @@ def resolve_type_and_default(properties, definitions):
|
|
|
954
1611
|
original_s = s
|
|
955
1612
|
if s.startswith("{") and s.endswith("}"):
|
|
956
1613
|
s = s[1:-1].strip()
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
match = re.match(r'([a-zA-Z0-9_:]+)\((.*)\)', s)
|
|
1614
|
+
|
|
1615
|
+
match = CONSTRUCTOR_PATTERN.match(s)
|
|
960
1616
|
if match:
|
|
961
1617
|
type_name, arg_str = match.groups()
|
|
962
1618
|
else:
|
|
963
|
-
|
|
964
|
-
match = re.match(r'([a-zA-Z0-9_:]+)\{(.*)\}', s)
|
|
1619
|
+
match = BRACED_CONSTRUCTOR_PATTERN.match(s)
|
|
965
1620
|
if match:
|
|
966
1621
|
type_name, arg_str = match.groups()
|
|
967
1622
|
else:
|
|
968
|
-
# Primitive or enum
|
|
969
1623
|
if s.startswith('"') and s.endswith('"'):
|
|
970
1624
|
return None, [ast.literal_eval(s)]
|
|
971
1625
|
try:
|
|
@@ -997,7 +1651,7 @@ def resolve_type_and_default(properties, definitions):
|
|
|
997
1651
|
def process_cpp_patterns(arg_str):
|
|
998
1652
|
"""
|
|
999
1653
|
Convert a C++-style expression string into a JSON-friendly literal representation.
|
|
1000
|
-
|
|
1654
|
+
|
|
1001
1655
|
This function recognises common C++ patterns produced by the extractor and maps them to values suitable for JSON schema defaults and examples. Handled cases include:
|
|
1002
1656
|
- std::nullopt -> null
|
|
1003
1657
|
- zero-argument functions (e.g., model::kafka_audit_logging_topic()) resolved from source when possible
|
|
@@ -1005,30 +1659,27 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1005
1659
|
- constexpr identifiers and simple string constructors resolved to their literal strings when available
|
|
1006
1660
|
- known default constructors and truncated type names mapped to sensible defaults (e.g., duration -> 0, path -> "")
|
|
1007
1661
|
- simple heuristics for unknown constructors and concatenated expressions
|
|
1008
|
-
|
|
1662
|
+
|
|
1009
1663
|
Returns:
|
|
1010
1664
|
processed (str): A string representing the JSON-ready value (for example: '"value"', 'null', '0', or the original input when no mapping applied).
|
|
1011
1665
|
"""
|
|
1012
1666
|
arg_str = arg_str.strip()
|
|
1013
1667
|
# Remove C++ digit separators (apostrophes) that may appear in numeric literals
|
|
1014
|
-
# Example: "30'000ms" -> "30000ms"
|
|
1015
|
-
arg_str =
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
if arg_str == "std::nullopt":
|
|
1668
|
+
# Example: "30'000ms" -> "30000ms"
|
|
1669
|
+
arg_str = DIGIT_SEPARATOR_PATTERN.sub('', arg_str)
|
|
1670
|
+
|
|
1671
|
+
if arg_str == "std::nullopt" or arg_str == "nullopt":
|
|
1019
1672
|
return "null"
|
|
1020
|
-
|
|
1021
|
-
#
|
|
1022
|
-
|
|
1023
|
-
function_call_match = re.match(r'([a-zA-Z0-9_:]+)\(\)', arg_str)
|
|
1673
|
+
|
|
1674
|
+
# Dynamically resolve C++ function calls by looking up their return values in source
|
|
1675
|
+
function_call_match = FUNCTION_CALL_PATTERN.match(arg_str)
|
|
1024
1676
|
if function_call_match:
|
|
1025
1677
|
function_name = function_call_match.group(1)
|
|
1026
1678
|
resolved_value = resolve_cpp_function_call(function_name)
|
|
1027
1679
|
if resolved_value is not None:
|
|
1028
1680
|
return f'"{resolved_value}"'
|
|
1029
1681
|
|
|
1030
|
-
|
|
1031
|
-
chrono_match = re.match(r'std::chrono::([a-zA-Z]+)\s*\{\s*(\d+)\s*\}', arg_str)
|
|
1682
|
+
chrono_match = CHRONO_PATTERN.match(arg_str)
|
|
1032
1683
|
if chrono_match:
|
|
1033
1684
|
unit = chrono_match.group(1)
|
|
1034
1685
|
value = chrono_match.group(2)
|
|
@@ -1043,64 +1694,130 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1043
1694
|
short = unit_map.get(unit.lower(), unit)
|
|
1044
1695
|
return f'"{value} {short}"'
|
|
1045
1696
|
|
|
1046
|
-
# Handle
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1697
|
+
# Handle chrono literals with parentheses like chrono::milliseconds(5min) -> "5 minutes"
|
|
1698
|
+
chrono_paren_match = CHRONO_PAREN_PATTERN.match(arg_str)
|
|
1699
|
+
if chrono_paren_match:
|
|
1700
|
+
unit = chrono_paren_match.group(1)
|
|
1701
|
+
value = chrono_paren_match.group(2).strip()
|
|
1702
|
+
|
|
1703
|
+
inner_time_match = TIME_UNIT_PATTERN.match(value)
|
|
1704
|
+
if inner_time_match:
|
|
1705
|
+
num, suffix = inner_time_match.groups()
|
|
1706
|
+
inner_unit_map = {
|
|
1707
|
+
"min": "minute",
|
|
1708
|
+
"s": "second",
|
|
1709
|
+
"ms": "millisecond",
|
|
1710
|
+
"h": "hour",
|
|
1711
|
+
}
|
|
1712
|
+
base = inner_unit_map.get(suffix, suffix)
|
|
1713
|
+
if num != "1" and not base.endswith("s"):
|
|
1714
|
+
base = base + "s"
|
|
1715
|
+
return f'"{num} {base}"'
|
|
1716
|
+
|
|
1717
|
+
# Evaluate arithmetic in duration constructors (e.g., "60 * 5" -> "300 seconds")
|
|
1718
|
+
if "*" in value:
|
|
1719
|
+
try:
|
|
1720
|
+
result = safe_arithmetic_eval(value)
|
|
1721
|
+
unit_map = {
|
|
1722
|
+
'hours': 'hour',
|
|
1723
|
+
'minutes': 'minute',
|
|
1724
|
+
'seconds': 'second',
|
|
1725
|
+
'milliseconds': 'millisecond',
|
|
1726
|
+
'microseconds': 'microsecond',
|
|
1727
|
+
'nanoseconds': 'nanosecond'
|
|
1728
|
+
}
|
|
1729
|
+
base = unit_map.get(unit.lower(), unit)
|
|
1730
|
+
if result != 1 and not base.endswith("s"):
|
|
1731
|
+
base = base + "s"
|
|
1732
|
+
return f'"{result} {base}"'
|
|
1733
|
+
except (ValueError, Exception):
|
|
1734
|
+
pass
|
|
1735
|
+
|
|
1736
|
+
try:
|
|
1737
|
+
num = int(value)
|
|
1738
|
+
unit_map = {
|
|
1739
|
+
'hours': 'hour',
|
|
1740
|
+
'minutes': 'minute',
|
|
1741
|
+
'seconds': 'second',
|
|
1742
|
+
'milliseconds': 'millisecond',
|
|
1743
|
+
'microseconds': 'microsecond',
|
|
1744
|
+
'nanoseconds': 'nanosecond'
|
|
1745
|
+
}
|
|
1746
|
+
base = unit_map.get(unit.lower(), unit)
|
|
1747
|
+
if num != 1 and not base.endswith("s"):
|
|
1748
|
+
base = base + "s"
|
|
1749
|
+
return f'"{num} {base}"'
|
|
1750
|
+
except ValueError:
|
|
1751
|
+
return f'"{value} {unit}"'
|
|
1752
|
+
|
|
1753
|
+
address_match = ADDRESS_PATTERN.match(arg_str)
|
|
1754
|
+
if address_match:
|
|
1755
|
+
addr = address_match.group(1).strip().strip('"')
|
|
1756
|
+
port = address_match.group(2).strip()
|
|
1757
|
+
try:
|
|
1758
|
+
port_val = int(port)
|
|
1759
|
+
return f'"{addr}:{port_val}"'
|
|
1760
|
+
except ValueError:
|
|
1761
|
+
return f'"{addr}:{port}"'
|
|
1762
|
+
|
|
1763
|
+
keyval_match = KEYVAL_PATTERN.match(arg_str)
|
|
1764
|
+
if keyval_match:
|
|
1765
|
+
key = keyval_match.group(1)
|
|
1766
|
+
value = keyval_match.group(2)
|
|
1767
|
+
processed_value = process_cpp_patterns(value)
|
|
1768
|
+
if processed_value.startswith('"') and processed_value.endswith('"'):
|
|
1769
|
+
processed_value = processed_value[1:-1]
|
|
1770
|
+
return processed_value
|
|
1771
|
+
|
|
1772
|
+
# Extract enum value from qualified identifiers (fips_mode_flag::disabled -> "disabled")
|
|
1773
|
+
# ENUM_PATTERN uses anchors to avoid matching constructor syntax (config::type{})
|
|
1774
|
+
enum_match = ENUM_PATTERN.match(arg_str)
|
|
1050
1775
|
if enum_match:
|
|
1051
1776
|
enum_value = enum_match.group(1)
|
|
1052
1777
|
return f'"{enum_value}"'
|
|
1053
|
-
|
|
1054
|
-
#
|
|
1055
|
-
|
|
1056
|
-
if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', arg_str):
|
|
1778
|
+
|
|
1779
|
+
# Resolve constexpr identifiers by looking up their values in source files
|
|
1780
|
+
if IDENTIFIER_PATTERN.match(arg_str):
|
|
1057
1781
|
resolved_value = resolve_constexpr_identifier(arg_str)
|
|
1058
1782
|
if resolved_value is not None:
|
|
1059
1783
|
return f'"{resolved_value}"'
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
sstring_match = re.match(r'ss::sstring\{([a-zA-Z_][a-zA-Z0-9_]*)\}', arg_str)
|
|
1784
|
+
|
|
1785
|
+
sstring_match = SSTRING_PATTERN.match(arg_str)
|
|
1063
1786
|
if sstring_match:
|
|
1064
1787
|
identifier = sstring_match.group(1)
|
|
1065
1788
|
resolved_value = resolve_constexpr_identifier(identifier)
|
|
1066
1789
|
if resolved_value is not None:
|
|
1067
1790
|
return f'"{resolved_value}"'
|
|
1068
1791
|
else:
|
|
1069
|
-
# Fallback to the identifier itself
|
|
1070
1792
|
return f'"{identifier}"'
|
|
1071
|
-
|
|
1072
|
-
#
|
|
1073
|
-
#
|
|
1074
|
-
|
|
1075
|
-
# Pattern 1: Full constructor syntax like config::leaders_preference{}
|
|
1793
|
+
|
|
1794
|
+
# Map C++ default constructors to their runtime values
|
|
1795
|
+
# These patterns are derived from analyzing the C++ source implementations
|
|
1076
1796
|
constructor_patterns = {
|
|
1077
|
-
r'config::leaders_preference\{\}': '"none"', #
|
|
1797
|
+
r'config::leaders_preference\{\}': '"none"', # type_t::none is default
|
|
1078
1798
|
r'std::chrono::seconds\{0\}': '0',
|
|
1079
1799
|
r'std::chrono::milliseconds\{0\}': '0',
|
|
1080
1800
|
r'model::timeout_clock::duration\{\}': '0',
|
|
1081
1801
|
r'config::data_directory_path\{\}': '""',
|
|
1082
|
-
r'std::optional<[^>]+>\{\}': 'null',
|
|
1802
|
+
r'std::optional<[^>]+>\{\}': 'null',
|
|
1083
1803
|
}
|
|
1084
|
-
|
|
1804
|
+
|
|
1085
1805
|
for pattern, replacement in constructor_patterns.items():
|
|
1086
1806
|
if re.match(pattern, arg_str):
|
|
1087
1807
|
return replacement
|
|
1088
|
-
|
|
1089
|
-
#
|
|
1090
|
-
# These are cases where tree-sitter parsing truncated "config::type{}" to just "type"
|
|
1808
|
+
|
|
1809
|
+
# Fallback mappings for truncated type names (tree-sitter may truncate constructors)
|
|
1091
1810
|
truncated_patterns = {
|
|
1092
|
-
'leaders_preference': '"none"',
|
|
1093
|
-
'data_directory_path': '""',
|
|
1094
|
-
'timeout_clock_duration': '0',
|
|
1095
|
-
'log_level': '"info"',
|
|
1096
|
-
'compression_type': '"none"',
|
|
1811
|
+
'leaders_preference': '"none"',
|
|
1812
|
+
'data_directory_path': '""',
|
|
1813
|
+
'timeout_clock_duration': '0',
|
|
1814
|
+
'log_level': '"info"',
|
|
1815
|
+
'compression_type': '"none"',
|
|
1097
1816
|
}
|
|
1098
|
-
|
|
1099
|
-
# Check if arg_str is exactly one of these truncated patterns
|
|
1817
|
+
|
|
1100
1818
|
if arg_str in truncated_patterns:
|
|
1101
1819
|
return truncated_patterns[arg_str]
|
|
1102
|
-
|
|
1103
|
-
# Pattern 3: Handle remaining default constructor syntax generically
|
|
1820
|
+
|
|
1104
1821
|
generic_constructor_match = re.match(r'[a-zA-Z0-9_:]+\{\}', arg_str)
|
|
1105
1822
|
if generic_constructor_match:
|
|
1106
1823
|
# For unknown constructors, try to infer a reasonable default
|
|
@@ -1169,8 +1886,12 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1169
1886
|
else:
|
|
1170
1887
|
return processed_arg
|
|
1171
1888
|
|
|
1172
|
-
type_def = resolve_definition_type(
|
|
1889
|
+
type_def = resolve_definition_type(resolve_type_with_namespace(type_name, definitions))
|
|
1173
1890
|
if "enum" in type_def:
|
|
1891
|
+
# Strip C++ namespace qualifiers from enum values
|
|
1892
|
+
# e.g., model::partition_autobalancing_mode::continuous → continuous
|
|
1893
|
+
if isinstance(default_str, str) and '::' in default_str:
|
|
1894
|
+
return default_str.split('::')[-1]
|
|
1174
1895
|
return default_str
|
|
1175
1896
|
# If it has properties but no explicit type, it's an object
|
|
1176
1897
|
if type_def.get("type") == "object" or (type_def.get("properties") and not type_def.get("type")):
|
|
@@ -1180,10 +1901,12 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1180
1901
|
|
|
1181
1902
|
props = list(type_def["properties"].keys())
|
|
1182
1903
|
result = {}
|
|
1183
|
-
|
|
1904
|
+
|
|
1184
1905
|
# For each constructor argument, try to expand it and map to the correct property
|
|
1185
1906
|
for i, prop in enumerate(props):
|
|
1186
1907
|
prop_def = type_def["properties"][prop]
|
|
1908
|
+
# Strip leading underscore from private field names for public API
|
|
1909
|
+
public_prop_name = prop.lstrip('_')
|
|
1187
1910
|
if "$ref" in prop_def:
|
|
1188
1911
|
sub_type = prop_def["$ref"].split("/")[-1]
|
|
1189
1912
|
else:
|
|
@@ -1199,33 +1922,35 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1199
1922
|
# Get the definition for the nested type
|
|
1200
1923
|
nested_type_def = resolve_definition_type(definitions.get(nested_tname, {}))
|
|
1201
1924
|
nested_props = list(nested_type_def.get("properties", {}).keys())
|
|
1202
|
-
|
|
1925
|
+
|
|
1203
1926
|
# Expand the nested constructor by mapping its arguments to its properties
|
|
1204
1927
|
nested_result = {}
|
|
1205
1928
|
for j, nested_prop in enumerate(nested_props):
|
|
1206
1929
|
nested_prop_def = nested_type_def["properties"][nested_prop]
|
|
1930
|
+
# Strip leading underscore from private field names for public API
|
|
1931
|
+
public_nested_prop_name = nested_prop.lstrip('_')
|
|
1207
1932
|
if j < len(nested_args):
|
|
1208
1933
|
nested_arg = nested_args[j]
|
|
1209
1934
|
# Apply simple C++ pattern processing to the argument
|
|
1210
1935
|
processed_nested_arg = process_cpp_patterns(nested_arg)
|
|
1211
|
-
|
|
1936
|
+
|
|
1212
1937
|
# Convert the processed argument based on the property type
|
|
1213
1938
|
if nested_prop_def.get("type") == "string":
|
|
1214
1939
|
if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
|
|
1215
|
-
nested_result[
|
|
1940
|
+
nested_result[public_nested_prop_name] = ast.literal_eval(processed_nested_arg)
|
|
1216
1941
|
else:
|
|
1217
|
-
nested_result[
|
|
1942
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1218
1943
|
elif nested_prop_def.get("type") == "integer":
|
|
1219
1944
|
try:
|
|
1220
|
-
nested_result[
|
|
1945
|
+
nested_result[public_nested_prop_name] = int(processed_nested_arg)
|
|
1221
1946
|
except ValueError:
|
|
1222
|
-
nested_result[
|
|
1947
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1223
1948
|
elif nested_prop_def.get("type") == "boolean":
|
|
1224
|
-
nested_result[
|
|
1949
|
+
nested_result[public_nested_prop_name] = processed_nested_arg.lower() == "true"
|
|
1225
1950
|
else:
|
|
1226
|
-
nested_result[
|
|
1951
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1227
1952
|
else:
|
|
1228
|
-
nested_result[
|
|
1953
|
+
nested_result[public_nested_prop_name] = None
|
|
1229
1954
|
|
|
1230
1955
|
# Now we have the expanded nested object, we need to map it to the parent object's properties
|
|
1231
1956
|
# This is where the type-aware mapping happens
|
|
@@ -1244,39 +1969,40 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1244
1969
|
result.update(nested_result)
|
|
1245
1970
|
# Set remaining properties to None
|
|
1246
1971
|
for remaining_prop in props[i+1:]:
|
|
1247
|
-
|
|
1248
|
-
|
|
1972
|
+
public_remaining_prop = remaining_prop.lstrip('_')
|
|
1973
|
+
if public_remaining_prop not in result:
|
|
1974
|
+
result[public_remaining_prop] = None
|
|
1249
1975
|
break
|
|
1250
1976
|
else:
|
|
1251
1977
|
# Map the nested object to the current property
|
|
1252
|
-
result[
|
|
1978
|
+
result[public_prop_name] = nested_result
|
|
1253
1979
|
else:
|
|
1254
1980
|
# Fallback: recursively expand with the expected property type
|
|
1255
1981
|
expanded_arg = expand_default(sub_type, arg)
|
|
1256
|
-
result[
|
|
1982
|
+
result[public_prop_name] = expanded_arg
|
|
1257
1983
|
else:
|
|
1258
1984
|
# Simple value, parse based on the property type
|
|
1259
1985
|
# First apply C++ pattern processing
|
|
1260
1986
|
processed_arg = process_cpp_patterns(arg)
|
|
1261
|
-
|
|
1987
|
+
|
|
1262
1988
|
if sub_type == "string":
|
|
1263
1989
|
# If processed_arg is already quoted, use ast.literal_eval, otherwise keep as is
|
|
1264
1990
|
if processed_arg.startswith('"') and processed_arg.endswith('"'):
|
|
1265
|
-
result[
|
|
1991
|
+
result[public_prop_name] = ast.literal_eval(processed_arg)
|
|
1266
1992
|
else:
|
|
1267
|
-
result[
|
|
1993
|
+
result[public_prop_name] = processed_arg
|
|
1268
1994
|
elif sub_type == "integer":
|
|
1269
1995
|
try:
|
|
1270
|
-
result[
|
|
1996
|
+
result[public_prop_name] = int(processed_arg)
|
|
1271
1997
|
except ValueError:
|
|
1272
1998
|
# If conversion fails, keep as string (might be processed C++ pattern)
|
|
1273
|
-
result[
|
|
1999
|
+
result[public_prop_name] = processed_arg
|
|
1274
2000
|
elif sub_type == "boolean":
|
|
1275
|
-
result[
|
|
2001
|
+
result[public_prop_name] = processed_arg.lower() == "true"
|
|
1276
2002
|
else:
|
|
1277
|
-
result[
|
|
2003
|
+
result[public_prop_name] = processed_arg
|
|
1278
2004
|
else:
|
|
1279
|
-
result[
|
|
2005
|
+
result[public_prop_name] = None
|
|
1280
2006
|
return result
|
|
1281
2007
|
elif type_def.get("type") == "array":
|
|
1282
2008
|
# Handle array defaults with C++ initializer list syntax like {model::broker_endpoint(...)}
|
|
@@ -1361,24 +2087,45 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1361
2087
|
for prop in properties.values():
|
|
1362
2088
|
t = prop.get("type")
|
|
1363
2089
|
ref_name = None
|
|
1364
|
-
|
|
1365
|
-
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
# Handle both JSON pointer references and direct type names (including C++ types)
|
|
1366
2093
|
if isinstance(t, str):
|
|
1367
2094
|
if t.startswith("#/definitions/"):
|
|
1368
2095
|
ref_name = t.split("/")[-1]
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
2096
|
+
else:
|
|
2097
|
+
# Try to resolve the type with namespace prefixes
|
|
2098
|
+
resolved_def = resolve_type_with_namespace(t, definitions)
|
|
2099
|
+
if resolved_def:
|
|
2100
|
+
# Find the actual key name that matched
|
|
2101
|
+
if t in definitions:
|
|
2102
|
+
ref_name = t
|
|
2103
|
+
else:
|
|
2104
|
+
# Try namespace-qualified versions
|
|
2105
|
+
for namespace in ['config', 'model', 'security', 'net', 'kafka', 'pandaproxy']:
|
|
2106
|
+
qualified = f"{namespace}::{t}"
|
|
2107
|
+
if qualified in definitions:
|
|
2108
|
+
ref_name = qualified
|
|
2109
|
+
break
|
|
2110
|
+
|
|
2111
|
+
if ref_name:
|
|
2112
|
+
defn = resolve_type_with_namespace(ref_name, definitions) if ref_name not in definitions else definitions.get(ref_name)
|
|
1374
2113
|
if defn:
|
|
1375
2114
|
resolved = resolve_definition_type(defn)
|
|
1376
2115
|
# Always set type to the resolved type string (object, string, etc.)
|
|
1377
2116
|
resolved_type = resolved.get("type")
|
|
1378
|
-
|
|
2117
|
+
|
|
2118
|
+
# Special handling for enum types
|
|
2119
|
+
if resolved_type == "enum" or "enum" in resolved:
|
|
2120
|
+
# Enums are represented as strings with an enum constraint in JSON Schema
|
|
2121
|
+
prop["type"] = "string"
|
|
2122
|
+
if "enum" in resolved:
|
|
2123
|
+
prop["enum"] = resolved["enum"]
|
|
2124
|
+
elif resolved_type in ("object", "string", "integer", "boolean", "array", "number"):
|
|
1379
2125
|
prop["type"] = resolved_type
|
|
1380
2126
|
else:
|
|
1381
2127
|
prop["type"] = "object" # fallback for complex types
|
|
2128
|
+
|
|
1382
2129
|
# Expand default if possible
|
|
1383
2130
|
if "default" in prop and prop["default"] is not None:
|
|
1384
2131
|
expanded = expand_default(ref_name, prop["default"])
|
|
@@ -1494,24 +2241,70 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1494
2241
|
# This handles cases like admin_api_tls: "{}" -> []
|
|
1495
2242
|
prop["default"] = []
|
|
1496
2243
|
|
|
1497
|
-
# Also handle array item types
|
|
1498
|
-
|
|
2244
|
+
# Also handle array item types - resolve C++ type references
|
|
2245
|
+
# Note: Check for 'items' field regardless of type, since some transformers may overwrite
|
|
2246
|
+
# the type from "array" to "object" while leaving the items field behind
|
|
2247
|
+
if "items" in prop:
|
|
1499
2248
|
items_type = prop["items"].get("type")
|
|
1500
|
-
if isinstance(items_type, str)
|
|
1501
|
-
|
|
1502
|
-
if
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
2249
|
+
if isinstance(items_type, str):
|
|
2250
|
+
# Check if items_type is a C++ type that needs resolution
|
|
2251
|
+
if items_type in definitions:
|
|
2252
|
+
item_defn = definitions.get(items_type)
|
|
2253
|
+
if item_defn:
|
|
2254
|
+
resolved_item = resolve_definition_type(item_defn)
|
|
2255
|
+
resolved_item_type = resolved_item.get("type")
|
|
2256
|
+
if resolved_item_type in ("object", "string", "integer", "boolean", "array", "number"):
|
|
2257
|
+
prop["items"]["type"] = resolved_item_type
|
|
2258
|
+
else:
|
|
2259
|
+
prop["items"]["type"] = "object" # fallback for complex types
|
|
2260
|
+
# If not in definitions but looks like a C++ type, apply fallback logic
|
|
2261
|
+
elif "::" in items_type or items_type.endswith(">") or items_type.endswith("_t") or items_type.startswith("std::"):
|
|
2262
|
+
# Apply same heuristics as for unresolved property types
|
|
2263
|
+
if any(word in items_type.lower() for word in ["int", "long", "short", "double", "float", "number", "_id"]):
|
|
2264
|
+
prop["items"]["type"] = "integer"
|
|
2265
|
+
elif any(word in items_type.lower() for word in ["bool"]):
|
|
2266
|
+
prop["items"]["type"] = "boolean"
|
|
2267
|
+
elif any(word in items_type.lower() for word in ["string", "str", "path", "url", "name"]):
|
|
2268
|
+
prop["items"]["type"] = "string"
|
|
1507
2269
|
else:
|
|
1508
|
-
|
|
2270
|
+
# Default to object for complex types (config::*, model::*, etc.)
|
|
2271
|
+
prop["items"]["type"] = "object"
|
|
2272
|
+
logger.debug(f"Resolved C++ type in items: {items_type} -> {prop['items']['type']} (for property '{prop.get('name', 'unknown')}')")
|
|
1509
2273
|
|
|
1510
2274
|
# Final pass: apply C++ pattern processing to any remaining unprocessed defaults
|
|
1511
2275
|
for prop in properties.values():
|
|
1512
2276
|
if "default" in prop:
|
|
1513
2277
|
default_value = prop["default"]
|
|
1514
2278
|
|
|
2279
|
+
# Special handling for arrays containing key-value patterns like "'key': 'value'"
|
|
2280
|
+
if isinstance(default_value, list) and len(default_value) > 0:
|
|
2281
|
+
# Check if this looks like an array of key-value patterns
|
|
2282
|
+
all_keyval_patterns = True
|
|
2283
|
+
for item in default_value:
|
|
2284
|
+
if not isinstance(item, str) or not re.match(r"'[^']+'\s*:\s*'[^']+'", item):
|
|
2285
|
+
all_keyval_patterns = False
|
|
2286
|
+
break
|
|
2287
|
+
|
|
2288
|
+
if all_keyval_patterns:
|
|
2289
|
+
# Convert array of key-value strings to a single object
|
|
2290
|
+
result_object = {}
|
|
2291
|
+
for item in default_value:
|
|
2292
|
+
keyval_match = re.match(r"'([^']+)'\s*:\s*'([^']+)'", item)
|
|
2293
|
+
if keyval_match:
|
|
2294
|
+
key = keyval_match.group(1)
|
|
2295
|
+
value = keyval_match.group(2)
|
|
2296
|
+
# Process the value part
|
|
2297
|
+
processed_value = process_cpp_patterns(value)
|
|
2298
|
+
if processed_value.startswith('"') and processed_value.endswith('"'):
|
|
2299
|
+
processed_value = processed_value[1:-1] # Remove outer quotes
|
|
2300
|
+
result_object[key] = processed_value
|
|
2301
|
+
|
|
2302
|
+
# Convert the array type to object since we're now storing an object
|
|
2303
|
+
prop["default"] = result_object
|
|
2304
|
+
if prop.get("type") == "array":
|
|
2305
|
+
prop["type"] = "object"
|
|
2306
|
+
continue # Skip further processing for this property
|
|
2307
|
+
|
|
1515
2308
|
if isinstance(default_value, str):
|
|
1516
2309
|
# Process string defaults
|
|
1517
2310
|
processed = process_cpp_patterns(default_value)
|
|
@@ -1544,27 +2337,29 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1544
2337
|
# Map constructor arguments to type properties
|
|
1545
2338
|
for j, nested_prop in enumerate(nested_props):
|
|
1546
2339
|
nested_prop_def = nested_type_def["properties"][nested_prop]
|
|
2340
|
+
# Strip leading underscore from private field names for public API
|
|
2341
|
+
public_nested_prop_name = nested_prop.lstrip('_')
|
|
1547
2342
|
if j < len(args):
|
|
1548
2343
|
nested_arg = args[j]
|
|
1549
2344
|
processed_nested_arg = process_cpp_patterns(nested_arg)
|
|
1550
|
-
|
|
2345
|
+
|
|
1551
2346
|
# Convert based on property type
|
|
1552
2347
|
if nested_prop_def.get("type") == "string":
|
|
1553
2348
|
if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'):
|
|
1554
|
-
nested_result[
|
|
2349
|
+
nested_result[public_nested_prop_name] = ast.literal_eval(processed_nested_arg)
|
|
1555
2350
|
else:
|
|
1556
|
-
nested_result[
|
|
2351
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1557
2352
|
elif nested_prop_def.get("type") == "integer":
|
|
1558
2353
|
try:
|
|
1559
|
-
nested_result[
|
|
2354
|
+
nested_result[public_nested_prop_name] = int(processed_nested_arg)
|
|
1560
2355
|
except ValueError:
|
|
1561
|
-
nested_result[
|
|
2356
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1562
2357
|
elif nested_prop_def.get("type") == "boolean":
|
|
1563
|
-
nested_result[
|
|
2358
|
+
nested_result[public_nested_prop_name] = processed_nested_arg.lower() == "true"
|
|
1564
2359
|
else:
|
|
1565
|
-
nested_result[
|
|
2360
|
+
nested_result[public_nested_prop_name] = processed_nested_arg
|
|
1566
2361
|
else:
|
|
1567
|
-
nested_result[
|
|
2362
|
+
nested_result[public_nested_prop_name] = None
|
|
1568
2363
|
|
|
1569
2364
|
# For special case of net::unresolved_address inside broker_authn_endpoint
|
|
1570
2365
|
if tname == "net::unresolved_address":
|
|
@@ -1655,7 +2450,93 @@ def resolve_type_and_default(properties, definitions):
|
|
|
1655
2450
|
if isinstance(enterprise_value, str):
|
|
1656
2451
|
processed_enterprise = process_enterprise_value(enterprise_value)
|
|
1657
2452
|
prop["enterprise_value"] = processed_enterprise
|
|
1658
|
-
|
|
2453
|
+
|
|
2454
|
+
# FINAL COMPREHENSIVE PASS: Ensure NO C++ types remain in the output
|
|
2455
|
+
# This catches any edge cases that earlier passes missed
|
|
2456
|
+
for prop_name, prop in properties.items():
|
|
2457
|
+
# Check property type field
|
|
2458
|
+
if isinstance(prop.get("type"), str) and ("::" in prop["type"] or prop["type"].endswith(">")):
|
|
2459
|
+
logger.warning(f"Found unresolved C++ type in property '{prop_name}': {prop['type']}")
|
|
2460
|
+
# Apply smart fallback resolution
|
|
2461
|
+
cpp_type = prop["type"]
|
|
2462
|
+
if any(word in cpp_type.lower() for word in ["int", "long", "short", "double", "float", "number", "_id"]):
|
|
2463
|
+
prop["type"] = "integer"
|
|
2464
|
+
elif any(word in cpp_type.lower() for word in ["bool"]):
|
|
2465
|
+
prop["type"] = "boolean"
|
|
2466
|
+
elif any(word in cpp_type.lower() for word in ["string", "str", "path", "url", "name"]):
|
|
2467
|
+
prop["type"] = "string"
|
|
2468
|
+
else:
|
|
2469
|
+
# Default to object for complex types (config::*, model::*, etc.)
|
|
2470
|
+
prop["type"] = "object"
|
|
2471
|
+
logger.info(f" Resolved to: {prop['type']}")
|
|
2472
|
+
|
|
2473
|
+
# Check items.type field for arrays
|
|
2474
|
+
if prop.get("type") == "array" and "items" in prop:
|
|
2475
|
+
items_type = prop["items"].get("type")
|
|
2476
|
+
if isinstance(items_type, str) and ("::" in items_type or items_type.endswith(">")):
|
|
2477
|
+
logger.warning(f"Found unresolved C++ type in property '{prop_name}' items: {items_type}")
|
|
2478
|
+
# Apply smart fallback resolution
|
|
2479
|
+
if any(word in items_type.lower() for word in ["int", "long", "short", "double", "float", "number", "_id"]):
|
|
2480
|
+
prop["items"]["type"] = "integer"
|
|
2481
|
+
elif any(word in items_type.lower() for word in ["bool"]):
|
|
2482
|
+
prop["items"]["type"] = "boolean"
|
|
2483
|
+
elif any(word in items_type.lower() for word in ["string", "str", "path", "url", "name"]):
|
|
2484
|
+
prop["items"]["type"] = "string"
|
|
2485
|
+
else:
|
|
2486
|
+
# Default to object for complex types (config::*, model::*, etc.)
|
|
2487
|
+
prop["items"]["type"] = "object"
|
|
2488
|
+
logger.info(f" Resolved to: {prop['items']['type']}")
|
|
2489
|
+
|
|
2490
|
+
# Check items.$ref field
|
|
2491
|
+
# Only warn if it's NOT a JSON pointer (valid JSON pointers start with #/)
|
|
2492
|
+
items_ref = prop["items"].get("$ref")
|
|
2493
|
+
if isinstance(items_ref, str) and "::" in items_ref:
|
|
2494
|
+
if items_ref.startswith("#/definitions/"):
|
|
2495
|
+
# This is a valid JSON pointer - extract and resolve the definition name
|
|
2496
|
+
ref_type = items_ref.split("/")[-1]
|
|
2497
|
+
if ref_type in definitions:
|
|
2498
|
+
resolved = resolve_definition_type(definitions[ref_type])
|
|
2499
|
+
resolved_type = resolved.get("type", "object")
|
|
2500
|
+
prop["items"]["type"] = resolved_type
|
|
2501
|
+
del prop["items"]["$ref"]
|
|
2502
|
+
logger.debug(f"Resolved items.$ref '{items_ref}' to '{resolved_type}' for property '{prop_name}'")
|
|
2503
|
+
else:
|
|
2504
|
+
logger.warning(f"Cannot resolve items.$ref '{items_ref}' - definition not found for property '{prop_name}'")
|
|
2505
|
+
else:
|
|
2506
|
+
# Raw C++ type name (not a JSON pointer) - this is an error
|
|
2507
|
+
logger.warning(f"Found raw C++ type in property '{prop_name}' items.$ref: {items_ref}")
|
|
2508
|
+
if items_ref in definitions:
|
|
2509
|
+
resolved = resolve_definition_type(definitions[items_ref])
|
|
2510
|
+
resolved_type = resolved.get("type", "object")
|
|
2511
|
+
prop["items"]["type"] = resolved_type
|
|
2512
|
+
del prop["items"]["$ref"]
|
|
2513
|
+
logger.info(f" Resolved to: {prop['items']['type']}")
|
|
2514
|
+
|
|
2515
|
+
# Check $ref field at property level
|
|
2516
|
+
# Only warn if it's NOT a JSON pointer
|
|
2517
|
+
prop_ref = prop.get("$ref")
|
|
2518
|
+
if isinstance(prop_ref, str) and "::" in prop_ref:
|
|
2519
|
+
if prop_ref.startswith("#/definitions/"):
|
|
2520
|
+
# This is a valid JSON pointer - extract and resolve the definition name
|
|
2521
|
+
ref_type = prop_ref.split("/")[-1]
|
|
2522
|
+
if ref_type in definitions:
|
|
2523
|
+
resolved = resolve_definition_type(definitions[ref_type])
|
|
2524
|
+
resolved_type = resolved.get("type", "object")
|
|
2525
|
+
prop["type"] = resolved_type
|
|
2526
|
+
del prop["$ref"]
|
|
2527
|
+
logger.debug(f"Resolved $ref '{prop_ref}' to '{resolved_type}' for property '{prop_name}'")
|
|
2528
|
+
else:
|
|
2529
|
+
logger.warning(f"Cannot resolve $ref '{prop_ref}' - definition not found for property '{prop_name}'")
|
|
2530
|
+
else:
|
|
2531
|
+
# Raw C++ type name (not a JSON pointer) - this is an error
|
|
2532
|
+
logger.warning(f"Found raw C++ type in property '{prop_name}' $ref: {prop_ref}")
|
|
2533
|
+
if prop_ref in definitions:
|
|
2534
|
+
resolved = resolve_definition_type(definitions[prop_ref])
|
|
2535
|
+
resolved_type = resolved.get("type", "object")
|
|
2536
|
+
prop["type"] = resolved_type
|
|
2537
|
+
del prop["$ref"]
|
|
2538
|
+
logger.info(f" Resolved to: {prop['type']}")
|
|
2539
|
+
|
|
1659
2540
|
return properties
|
|
1660
2541
|
|
|
1661
2542
|
|
|
@@ -1733,7 +2614,7 @@ def extract_topic_properties(source_path):
|
|
|
1733
2614
|
"description": prop_data.get("description", ""),
|
|
1734
2615
|
"type": prop_data.get("type", "string"),
|
|
1735
2616
|
"config_scope": "topic",
|
|
1736
|
-
"
|
|
2617
|
+
"defined_in": prop_data.get("defined_in", ""),
|
|
1737
2618
|
"corresponding_cluster_property": prop_data.get("corresponding_cluster_property", ""),
|
|
1738
2619
|
"acceptable_values": prop_data.get("acceptable_values", ""),
|
|
1739
2620
|
"is_deprecated": False,
|
|
@@ -1754,7 +2635,8 @@ def main():
|
|
|
1754
2635
|
CLI entry point that extracts Redpanda configuration properties from C++ sources and emits JSON outputs.
|
|
1755
2636
|
|
|
1756
2637
|
Runs a full extraction and transformation pipeline:
|
|
1757
|
-
- Parses command-line options (required: --path). Optional flags include --recursive, --output, --enhanced-output, --
|
|
2638
|
+
- Parses command-line options (required: --path). Optional flags include --recursive, --output, --enhanced-output, --overrides, --cloud-support, and --verbose.
|
|
2639
|
+
- The --overrides file can contain both property overrides (under "properties" key) and definition overrides (under "definitions" key).
|
|
1758
2640
|
- Validates input paths and collects header/.cc file pairs.
|
|
1759
2641
|
- Initializes Tree-sitter C++ parser and extracts configuration properties from source files (optionally augmented with topic properties).
|
|
1760
2642
|
- Produces two outputs:
|
|
@@ -1767,7 +2649,9 @@ def main():
|
|
|
1767
2649
|
Side effects:
|
|
1768
2650
|
- Reads and writes files, may call external cloud config fetchers, logs to the configured logger, and may call sys.exit() on fatal conditions.
|
|
1769
2651
|
"""
|
|
2652
|
+
global _type_definitions_cache
|
|
1770
2653
|
import argparse
|
|
2654
|
+
from pathlib import Path
|
|
1771
2655
|
|
|
1772
2656
|
def generate_options():
|
|
1773
2657
|
"""
|
|
@@ -1778,8 +2662,12 @@ def main():
|
|
|
1778
2662
|
- --recursive: scan the path recursively.
|
|
1779
2663
|
- --output: file path to write the JSON output (stdout if omitted).
|
|
1780
2664
|
- --enhanced-output: file path to write the enhanced JSON output with overrides applied.
|
|
1781
|
-
- --
|
|
1782
|
-
|
|
2665
|
+
- --overrides: optional JSON file with property and definition overrides. Structure:
|
|
2666
|
+
{
|
|
2667
|
+
"properties": { "property_name": { "description": "...", ... } },
|
|
2668
|
+
"definitions": { "type_name": { "type": "object", "properties": {...} } }
|
|
2669
|
+
}
|
|
2670
|
+
- --definitions: DEPRECATED - use overrides.json with "definitions" key instead.
|
|
1783
2671
|
- --cloud-support: enable fetching cloud metadata from the cloudv2 repository (requires GITHUB_TOKEN and external dependencies such as pyyaml and requests).
|
|
1784
2672
|
- -v / --verbose: enable verbose (DEBUG-level) logging.
|
|
1785
2673
|
|
|
@@ -1798,10 +2686,16 @@ def main():
|
|
|
1798
2686
|
arg_parser.add_argument("--enhanced-output", type=str, help="Enhanced JSON output file path")
|
|
1799
2687
|
|
|
1800
2688
|
# Data sources
|
|
1801
|
-
arg_parser.add_argument(
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
2689
|
+
arg_parser.add_argument(
|
|
2690
|
+
"--definitions",
|
|
2691
|
+
type=str,
|
|
2692
|
+
help="DEPRECATED: Type definitions JSON file (use --overrides with 'definitions' key instead)"
|
|
2693
|
+
)
|
|
2694
|
+
arg_parser.add_argument(
|
|
2695
|
+
"--overrides",
|
|
2696
|
+
type=str,
|
|
2697
|
+
help="JSON file with property and definition overrides. Format: {'properties': {...}, 'definitions': {...}}"
|
|
2698
|
+
)
|
|
1805
2699
|
|
|
1806
2700
|
# Feature flags (set by Makefile from environment variables)
|
|
1807
2701
|
arg_parser.add_argument("--cloud-support", action="store_true", help="Enable cloud metadata")
|
|
@@ -1827,26 +2721,71 @@ def main():
|
|
|
1827
2721
|
logging.error("No h/cc file pairs were found")
|
|
1828
2722
|
sys.exit(-1)
|
|
1829
2723
|
|
|
1830
|
-
|
|
2724
|
+
# DYNAMIC TYPE DEFINITION EXTRACTION
|
|
2725
|
+
# Automatically extract type definitions from C++ source code
|
|
2726
|
+
# This replaces the need for manually maintaining definitions.json
|
|
2727
|
+
logger.info("🔍 Extracting type definitions from C++ source code...")
|
|
1831
2728
|
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
2729
|
+
from type_definition_extractor import extract_definitions_from_source
|
|
2730
|
+
|
|
2731
|
+
try:
|
|
2732
|
+
# Extract definitions from the parent 'v' directory to get all subdirectories
|
|
2733
|
+
# (model, config, net, etc.) since types may be defined in different modules
|
|
2734
|
+
source_root = Path(options.path)
|
|
2735
|
+
|
|
2736
|
+
# If path points to repo root, go down to src/v
|
|
2737
|
+
if (source_root / 'src' / 'v').exists():
|
|
2738
|
+
source_root = source_root / 'src' / 'v'
|
|
2739
|
+
# If path points to a specific subdirectory, go up to the parent 'v' directory
|
|
2740
|
+
elif source_root.name in ('config', 'model', 'net', 'kafka', 'pandaproxy', 'security'):
|
|
2741
|
+
source_root = source_root.parent
|
|
2742
|
+
|
|
2743
|
+
logger.debug(f"Extracting type definitions from: {source_root}")
|
|
2744
|
+
definitions = extract_definitions_from_source(str(source_root))
|
|
2745
|
+
logger.info(f"✅ Extracted {len(definitions)} type definitions dynamically")
|
|
2746
|
+
|
|
2747
|
+
# Store definitions in global cache for transformers to access
|
|
2748
|
+
_type_definitions_cache = definitions
|
|
2749
|
+
except Exception as e:
|
|
2750
|
+
logger.warning(f"Failed to extract dynamic definitions: {e}")
|
|
2751
|
+
definitions = {}
|
|
1839
2752
|
|
|
1840
|
-
# Load property
|
|
2753
|
+
# Load overrides file (contains both property and definition overrides)
|
|
1841
2754
|
overrides = None
|
|
1842
2755
|
if options.overrides:
|
|
1843
2756
|
try:
|
|
1844
2757
|
with open(options.overrides) as f:
|
|
1845
2758
|
overrides = json.load(f)
|
|
2759
|
+
|
|
2760
|
+
# Load definition overrides from the overrides file
|
|
2761
|
+
if overrides and "definitions" in overrides:
|
|
2762
|
+
definition_overrides = overrides["definitions"]
|
|
2763
|
+
num_overrides = len(definition_overrides)
|
|
2764
|
+
definitions.update(definition_overrides)
|
|
2765
|
+
_type_definitions_cache = definitions
|
|
2766
|
+
logger.info(f"📝 Loaded {num_overrides} definition overrides from {options.overrides}")
|
|
1846
2767
|
except Exception as e:
|
|
1847
2768
|
logging.error(f"Failed to load overrides file: {e}")
|
|
1848
2769
|
sys.exit(1)
|
|
1849
2770
|
|
|
2771
|
+
# DEPRECATED: Support legacy --definitions flag for backward compatibility
|
|
2772
|
+
# Users should migrate to putting definitions in overrides.json under "definitions" key
|
|
2773
|
+
if options.definitions and os.path.exists(options.definitions):
|
|
2774
|
+
try:
|
|
2775
|
+
logger.warning("⚠️ --definitions flag is deprecated. Please move definitions to overrides.json under 'definitions' key")
|
|
2776
|
+
with open(options.definitions) as json_file:
|
|
2777
|
+
static_definitions = json.load(json_file)
|
|
2778
|
+
|
|
2779
|
+
# Merge: static overrides take precedence
|
|
2780
|
+
num_overrides = len(static_definitions)
|
|
2781
|
+
definitions.update(static_definitions)
|
|
2782
|
+
_type_definitions_cache = definitions
|
|
2783
|
+
|
|
2784
|
+
logger.info(f"📝 Loaded {num_overrides} legacy definition overrides from {options.definitions}")
|
|
2785
|
+
except json.JSONDecodeError as e:
|
|
2786
|
+
logging.error(f"Failed to parse definitions file: {e}")
|
|
2787
|
+
sys.exit(1)
|
|
2788
|
+
|
|
1850
2789
|
treesitter_dir = os.path.join(os.getcwd(), "tree-sitter/tree-sitter-cpp")
|
|
1851
2790
|
destination_path = os.path.join(treesitter_dir, "tree-sitter-cpp.so")
|
|
1852
2791
|
|
|
@@ -1858,6 +2797,11 @@ def main():
|
|
|
1858
2797
|
treesitter_dir, destination_path
|
|
1859
2798
|
)
|
|
1860
2799
|
|
|
2800
|
+
# Pre-build constexpr cache for performance
|
|
2801
|
+
# This avoids repeated filesystem walks when resolving C++ identifiers and function calls
|
|
2802
|
+
logger.info("🔧 Building constexpr identifier cache...")
|
|
2803
|
+
_constexpr_cache.build_cache(options.path)
|
|
2804
|
+
logger.info(f"✅ Cached {len(_constexpr_cache.constexpr_cache)} constexpr identifiers and {len(_constexpr_cache.function_cache)} functions")
|
|
1861
2805
|
|
|
1862
2806
|
files_with_properties = get_files_with_properties(
|
|
1863
2807
|
file_pairs, treesitter_parser, cpp_language
|
|
@@ -1867,9 +2811,28 @@ def main():
|
|
|
1867
2811
|
# Extract topic properties and add them to the main properties dictionary
|
|
1868
2812
|
topic_properties = extract_topic_properties(options.path)
|
|
1869
2813
|
if topic_properties:
|
|
2814
|
+
# Apply transformers to topic properties to ensure they get the same metadata as cluster properties
|
|
2815
|
+
topic_properties = apply_transformers_to_topic_properties(topic_properties)
|
|
1870
2816
|
properties.update(topic_properties)
|
|
1871
2817
|
logging.info(f"Added {len(topic_properties)} topic properties to the main properties collection")
|
|
1872
2818
|
|
|
2819
|
+
# Fix up corresponding_cluster_property mappings
|
|
2820
|
+
# Some cluster properties have a "_default" suffix that the extractor doesn't catch
|
|
2821
|
+
fixup_count = 0
|
|
2822
|
+
for prop_name, prop_data in properties.items():
|
|
2823
|
+
if prop_data.get('is_topic_property') and prop_data.get('corresponding_cluster_property'):
|
|
2824
|
+
cluster_prop = prop_data['corresponding_cluster_property']
|
|
2825
|
+
# Check if the mapped cluster property exists
|
|
2826
|
+
if cluster_prop not in properties:
|
|
2827
|
+
# Try the _default variant
|
|
2828
|
+
default_variant = f'{cluster_prop}_default'
|
|
2829
|
+
if default_variant in properties:
|
|
2830
|
+
prop_data['corresponding_cluster_property'] = default_variant
|
|
2831
|
+
fixup_count += 1
|
|
2832
|
+
|
|
2833
|
+
if fixup_count > 0:
|
|
2834
|
+
logging.info(f"Fixed {fixup_count} cluster property mappings by adding '_default' suffix")
|
|
2835
|
+
|
|
1873
2836
|
# First, create the original properties without overrides for the base JSON output
|
|
1874
2837
|
# 1. Add config_scope field based on which source file defines the property
|
|
1875
2838
|
original_properties = add_config_scope(deepcopy(properties))
|
|
@@ -1893,10 +2856,22 @@ def main():
|
|
|
1893
2856
|
|
|
1894
2857
|
# 3. Resolve type references and expand default values for original properties
|
|
1895
2858
|
original_properties = resolve_type_and_default(original_properties, definitions)
|
|
1896
|
-
|
|
2859
|
+
|
|
2860
|
+
# 4. Map enum default values to user-facing strings
|
|
2861
|
+
original_properties = map_enum_defaults(original_properties)
|
|
2862
|
+
|
|
2863
|
+
# 5. Evaluate chrono expressions in default values
|
|
2864
|
+
original_properties = evaluate_chrono_expressions(original_properties)
|
|
2865
|
+
|
|
2866
|
+
# 6. Filter definitions to only include referenced types (reduces bloat)
|
|
2867
|
+
filtered_definitions = filter_referenced_definitions(original_properties, definitions)
|
|
2868
|
+
|
|
2869
|
+
# 6. Clean private fields from definitions (keep JSON output clean)
|
|
2870
|
+
filtered_definitions = clean_private_fields_from_definitions(filtered_definitions)
|
|
2871
|
+
|
|
1897
2872
|
# Generate original properties JSON (without overrides)
|
|
1898
2873
|
original_properties_and_definitions = merge_properties_and_definitions(
|
|
1899
|
-
original_properties,
|
|
2874
|
+
original_properties, filtered_definitions
|
|
1900
2875
|
)
|
|
1901
2876
|
original_json_output = json.dumps(original_properties_and_definitions, indent=4, sort_keys=True)
|
|
1902
2877
|
|
|
@@ -1914,14 +2889,26 @@ def main():
|
|
|
1914
2889
|
|
|
1915
2890
|
# 4. Resolve type references and expand default values
|
|
1916
2891
|
# This step converts:
|
|
1917
|
-
# - C++ type names (model::broker_endpoint) to JSON schema types (object)
|
|
2892
|
+
# - C++ type names (model::broker_endpoint) to JSON schema types (object)
|
|
1918
2893
|
# - C++ constructor defaults to structured JSON objects
|
|
1919
2894
|
# - Single object defaults to arrays for one_or_many_property types
|
|
1920
2895
|
enhanced_properties = resolve_type_and_default(enhanced_properties, definitions)
|
|
1921
2896
|
|
|
2897
|
+
# 5. Map enum default values to user-facing strings
|
|
2898
|
+
enhanced_properties = map_enum_defaults(enhanced_properties)
|
|
2899
|
+
|
|
2900
|
+
# 6. Evaluate chrono expressions in default values
|
|
2901
|
+
enhanced_properties = evaluate_chrono_expressions(enhanced_properties)
|
|
2902
|
+
|
|
2903
|
+
# 7. Filter definitions to only include referenced types (reduces bloat)
|
|
2904
|
+
filtered_enhanced_definitions = filter_referenced_definitions(enhanced_properties, definitions)
|
|
2905
|
+
|
|
2906
|
+
# 7. Clean private fields from definitions (keep JSON output clean)
|
|
2907
|
+
filtered_enhanced_definitions = clean_private_fields_from_definitions(filtered_enhanced_definitions)
|
|
2908
|
+
|
|
1922
2909
|
# Generate enhanced properties JSON (with overrides)
|
|
1923
2910
|
enhanced_properties_and_definitions = merge_properties_and_definitions(
|
|
1924
|
-
enhanced_properties,
|
|
2911
|
+
enhanced_properties, filtered_enhanced_definitions
|
|
1925
2912
|
)
|
|
1926
2913
|
enhanced_json_output = json.dumps(enhanced_properties_and_definitions, indent=4, sort_keys=True)
|
|
1927
2914
|
|