@redpanda-data/docs-extensions-and-macros 4.11.1 → 4.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/doc-tools.js +201 -10
- package/package.json +3 -1
- package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
- package/tools/property-extractor/Makefile +12 -1
- package/tools/property-extractor/README.adoc +828 -97
- package/tools/property-extractor/compare-properties.js +38 -13
- package/tools/property-extractor/constant_resolver.py +610 -0
- package/tools/property-extractor/file_pair.py +42 -0
- package/tools/property-extractor/generate-handlebars-docs.js +41 -8
- package/tools/property-extractor/helpers/gt.js +9 -0
- package/tools/property-extractor/helpers/includes.js +17 -0
- package/tools/property-extractor/helpers/index.js +3 -0
- package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
- package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
- package/tools/property-extractor/overrides.json +248 -0
- package/tools/property-extractor/parser.py +254 -32
- package/tools/property-extractor/property_bag.py +40 -0
- package/tools/property-extractor/property_extractor.py +1417 -430
- package/tools/property-extractor/requirements.txt +1 -0
- package/tools/property-extractor/templates/property-backup.hbs +161 -0
- package/tools/property-extractor/templates/property.hbs +104 -49
- package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
- package/tools/property-extractor/templates/topic-property.hbs +72 -34
- package/tools/property-extractor/tests/test_known_values.py +617 -0
- package/tools/property-extractor/tests/transformers_test.py +81 -6
- package/tools/property-extractor/topic_property_extractor.py +23 -10
- package/tools/property-extractor/transformers.py +2191 -369
- package/tools/property-extractor/type_definition_extractor.py +669 -0
- package/tools/redpanda-connect/helpers/renderConnectFields.js +33 -1
- package/tools/redpanda-connect/report-delta.js +132 -9
- package/tools/property-extractor/definitions.json +0 -245
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Resolves C++ constant references to their actual values.
|
|
4
|
+
|
|
5
|
+
For properties that use constants as default values (e.g., `ss::sstring{net::tls_v1_2_cipher_suites}`),
|
|
6
|
+
this module looks up the constant definition and extracts the actual string value.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional, Dict
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConstantResolver:
|
|
18
|
+
def __init__(self, source_path: Path):
|
|
19
|
+
"""
|
|
20
|
+
Initialize the constant resolver with the Redpanda source directory.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
source_path: Path to the Redpanda src/v directory
|
|
24
|
+
"""
|
|
25
|
+
self.source_path = source_path
|
|
26
|
+
self._constant_cache: Dict[str, str] = {}
|
|
27
|
+
|
|
28
|
+
def resolve_constant(self, constant_name: str) -> Optional[str]:
|
|
29
|
+
"""
|
|
30
|
+
Resolve a C++ constant name to its actual string value.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
constant_name: The constant name (e.g., "net::tls_v1_2_cipher_suites" or "tls_v1_2_cipher_suites")
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The actual string value, or None if not found
|
|
37
|
+
"""
|
|
38
|
+
# Check cache first
|
|
39
|
+
if constant_name in self._constant_cache:
|
|
40
|
+
return self._constant_cache[constant_name]
|
|
41
|
+
|
|
42
|
+
# Parse namespace and identifier
|
|
43
|
+
if '::' in constant_name:
|
|
44
|
+
namespace, identifier = constant_name.rsplit('::', 1)
|
|
45
|
+
else:
|
|
46
|
+
namespace = None
|
|
47
|
+
identifier = constant_name
|
|
48
|
+
|
|
49
|
+
# Search strategy based on namespace
|
|
50
|
+
if namespace == 'net':
|
|
51
|
+
value = self._search_in_files(['net/tls.cc', 'net/tls.h'], identifier)
|
|
52
|
+
elif namespace == 'model':
|
|
53
|
+
value = self._search_in_files(['model/**/*.cc', 'model/**/*.h'], identifier)
|
|
54
|
+
elif namespace == 'config':
|
|
55
|
+
value = self._search_in_files(['config/**/*.cc', 'config/**/*.h'], identifier)
|
|
56
|
+
else:
|
|
57
|
+
# Try common locations
|
|
58
|
+
value = self._search_in_files([
|
|
59
|
+
'net/**/*.cc',
|
|
60
|
+
'config/**/*.cc',
|
|
61
|
+
'model/**/*.cc'
|
|
62
|
+
], identifier)
|
|
63
|
+
|
|
64
|
+
if value:
|
|
65
|
+
self._constant_cache[constant_name] = value
|
|
66
|
+
logger.info(f"Resolved constant {constant_name} = {value[:50]}...")
|
|
67
|
+
|
|
68
|
+
return value
|
|
69
|
+
|
|
70
|
+
def _search_in_files(self, patterns: list[str], identifier: str) -> Optional[str]:
|
|
71
|
+
"""
|
|
72
|
+
Search for a constant definition in files matching the given patterns.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
patterns: List of file patterns to search (e.g., ['net/tls.cc'])
|
|
76
|
+
identifier: The constant identifier (e.g., 'tls_v1_2_cipher_suites')
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
The constant's string value, or None if not found
|
|
80
|
+
"""
|
|
81
|
+
for pattern in patterns:
|
|
82
|
+
if '**' in pattern:
|
|
83
|
+
# Glob pattern
|
|
84
|
+
files = list(self.source_path.glob(pattern))
|
|
85
|
+
else:
|
|
86
|
+
# Direct path
|
|
87
|
+
file_path = self.source_path / pattern
|
|
88
|
+
files = [file_path] if file_path.exists() else []
|
|
89
|
+
|
|
90
|
+
for file_path in files:
|
|
91
|
+
try:
|
|
92
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
93
|
+
content = f.read()
|
|
94
|
+
|
|
95
|
+
value = self._extract_constant_value(content, identifier)
|
|
96
|
+
if value:
|
|
97
|
+
logger.debug(f"Found {identifier} in {file_path}")
|
|
98
|
+
return value
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.debug(f"Error reading {file_path}: {e}")
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def resolve_array_constant(self, array_name: str) -> Optional[list]:
|
|
106
|
+
"""
|
|
107
|
+
Resolve a C++ array constant to get its element values.
|
|
108
|
+
|
|
109
|
+
Handles patterns like:
|
|
110
|
+
- inline constexpr auto supported_sasl_mechanisms = std::to_array<std::string_view>({gssapi, scram, oauthbearer, plain});
|
|
111
|
+
- constexpr std::array<std::string_view, N> array_name = {val1, val2, val3};
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
array_name: The array constant name (e.g., "supported_sasl_mechanisms")
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
List of string values from the array, or None if not found
|
|
118
|
+
"""
|
|
119
|
+
# Check cache
|
|
120
|
+
cache_key = f"array:{array_name}"
|
|
121
|
+
if cache_key in self._constant_cache:
|
|
122
|
+
return self._constant_cache[cache_key]
|
|
123
|
+
|
|
124
|
+
# Search in config directory first (most common for validators)
|
|
125
|
+
search_dirs = ['config', 'security', 'model', 'kafka']
|
|
126
|
+
|
|
127
|
+
for dir_name in search_dirs:
|
|
128
|
+
search_path = self.source_path / dir_name
|
|
129
|
+
if not search_path.exists():
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
for file_path in search_path.rglob('*.h'):
|
|
133
|
+
try:
|
|
134
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
135
|
+
content = f.read()
|
|
136
|
+
|
|
137
|
+
values = self._extract_array_values(content, array_name)
|
|
138
|
+
if values:
|
|
139
|
+
# Resolve any identifiers in the array to their string values
|
|
140
|
+
resolved_values = []
|
|
141
|
+
for val in values:
|
|
142
|
+
if val.startswith('"') and val.endswith('"'):
|
|
143
|
+
# Already a string literal
|
|
144
|
+
resolved_values.append(val.strip('"'))
|
|
145
|
+
else:
|
|
146
|
+
# Try to resolve as an identifier
|
|
147
|
+
resolved = self._extract_constant_value(content, val)
|
|
148
|
+
if resolved:
|
|
149
|
+
resolved_values.append(resolved)
|
|
150
|
+
else:
|
|
151
|
+
resolved_values.append(val)
|
|
152
|
+
|
|
153
|
+
self._constant_cache[cache_key] = resolved_values
|
|
154
|
+
logger.debug(f"Resolved array {array_name} = {resolved_values} from {file_path}")
|
|
155
|
+
return resolved_values
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.debug(f"Error reading {file_path}: {e}")
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
logger.debug(f"Could not resolve array constant: {array_name}")
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
def _extract_array_values(self, content: str, identifier: str) -> Optional[list]:
|
|
165
|
+
"""
|
|
166
|
+
Extract array element values from C++ source code.
|
|
167
|
+
|
|
168
|
+
Handles patterns like:
|
|
169
|
+
- std::to_array<std::string_view>({val1, val2, val3})
|
|
170
|
+
- std::array<std::string_view, N> = {val1, val2, val3}
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
content: The C++ source code
|
|
174
|
+
identifier: The array identifier
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
List of element values (may be identifiers or string literals), or None if not found
|
|
178
|
+
"""
|
|
179
|
+
# Pattern for array with std::to_array or direct initialization
|
|
180
|
+
patterns = [
|
|
181
|
+
# inline constexpr auto name = std::to_array<type>({val1, val2});
|
|
182
|
+
rf'inline\s+constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*std::to_array<[^>]+>\s*\(\s*\{{([^}}]+)\}}\s*\)',
|
|
183
|
+
# constexpr auto name = std::to_array<type>({val1, val2});
|
|
184
|
+
rf'constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*std::to_array<[^>]+>\s*\(\s*\{{([^}}]+)\}}\s*\)',
|
|
185
|
+
# constexpr std::array<type, N> name = {val1, val2};
|
|
186
|
+
rf'constexpr\s+std::array<[^>]+>\s+{re.escape(identifier)}\s*=\s*\{{([^}}]+)\}}',
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for pattern in patterns:
|
|
190
|
+
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
|
|
191
|
+
if match:
|
|
192
|
+
# Extract comma-separated values
|
|
193
|
+
values_str = match.group(1)
|
|
194
|
+
# Remove comments
|
|
195
|
+
values_str = re.sub(r'//.*$', '', values_str, flags=re.MULTILINE)
|
|
196
|
+
values_str = re.sub(r'/\*.*?\*/', '', values_str, flags=re.DOTALL)
|
|
197
|
+
|
|
198
|
+
# Split by comma and clean up
|
|
199
|
+
values = [v.strip() for v in values_str.split(',') if v.strip()]
|
|
200
|
+
return values
|
|
201
|
+
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
def _is_enterprise_file(self, file_path: Path) -> bool:
|
|
205
|
+
"""
|
|
206
|
+
Check if a file is marked as a Redpanda Enterprise file.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
file_path: Path to the source file
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
True if the file contains "Redpanda Enterprise file" in its header
|
|
213
|
+
"""
|
|
214
|
+
try:
|
|
215
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
216
|
+
# Check first 500 chars (license header)
|
|
217
|
+
header = f.read(500)
|
|
218
|
+
return "Redpanda Enterprise file" in header
|
|
219
|
+
except Exception:
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
def _resolve_authenticator_name(self, class_ref: str) -> Optional[dict]:
|
|
223
|
+
"""
|
|
224
|
+
Resolve an authenticator class name constant.
|
|
225
|
+
|
|
226
|
+
For patterns like:
|
|
227
|
+
- security::scram_sha256_authenticator
|
|
228
|
+
- security::oidc::sasl_authenticator
|
|
229
|
+
|
|
230
|
+
Finds the class definition and extracts the `static constexpr const char* name` value.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
class_ref: Qualified class name (e.g., "security::scram_sha256_authenticator")
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Dict with 'value' and 'is_enterprise' keys, or None if not found
|
|
237
|
+
"""
|
|
238
|
+
# Extract all namespace parts and class name
|
|
239
|
+
parts = class_ref.split('::')
|
|
240
|
+
class_name = parts[-1] # Last part is the class name
|
|
241
|
+
namespaces = parts[:-1] # Everything before is namespaces
|
|
242
|
+
|
|
243
|
+
# Search in security directory for authenticator classes
|
|
244
|
+
search_path = self.source_path / 'security'
|
|
245
|
+
if not search_path.exists():
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
# For nested namespaces like security::oidc::sasl_authenticator,
|
|
249
|
+
# also check subdirectories
|
|
250
|
+
search_paths = [search_path]
|
|
251
|
+
if len(namespaces) > 1:
|
|
252
|
+
# If we have security::oidc, also check security/oidc directory
|
|
253
|
+
nested_path = search_path
|
|
254
|
+
for ns in namespaces[1:]: # Skip 'security' as that's already the base
|
|
255
|
+
nested_path = nested_path / ns
|
|
256
|
+
if nested_path.exists():
|
|
257
|
+
search_paths.append(nested_path)
|
|
258
|
+
|
|
259
|
+
for search_dir in search_paths:
|
|
260
|
+
for header_file in search_dir.rglob('*.h'):
|
|
261
|
+
try:
|
|
262
|
+
with open(header_file, 'r', encoding='utf-8') as f:
|
|
263
|
+
content = f.read()
|
|
264
|
+
|
|
265
|
+
# Strategy: Find the class/struct declaration, then search for name constant
|
|
266
|
+
# in the following content (avoids nested brace issues)
|
|
267
|
+
|
|
268
|
+
# Pattern 1: Standard class/struct declaration
|
|
269
|
+
class_pattern = rf'(?:struct|class)\s+{re.escape(class_name)}\s+[^{{]*\{{'
|
|
270
|
+
class_match = re.search(class_pattern, content)
|
|
271
|
+
|
|
272
|
+
if class_match:
|
|
273
|
+
# Extract a chunk after the class declaration (3000 chars should be enough)
|
|
274
|
+
start_pos = class_match.end()
|
|
275
|
+
chunk = content[start_pos:start_pos + 3000]
|
|
276
|
+
|
|
277
|
+
# Look for the name constant in this chunk
|
|
278
|
+
name_pattern = r'static\s+constexpr\s+const\s+char\s*\*\s*name\s*=\s*"([^"]+)"'
|
|
279
|
+
name_match = re.search(name_pattern, chunk)
|
|
280
|
+
if name_match:
|
|
281
|
+
value = name_match.group(1)
|
|
282
|
+
is_enterprise = self._is_enterprise_file(header_file)
|
|
283
|
+
logger.debug(f"Resolved authenticator {class_ref}::name → '{value}' (enterprise={is_enterprise}) from {header_file}")
|
|
284
|
+
return {"value": value, "is_enterprise": is_enterprise}
|
|
285
|
+
|
|
286
|
+
# Pattern 2: Template specialization (for scram authenticators)
|
|
287
|
+
template_pattern = rf'struct\s+\w+<[^>]*{re.escape(class_name.replace("_authenticator", ""))}[^>]*>\s*\{{'
|
|
288
|
+
template_match = re.search(template_pattern, content)
|
|
289
|
+
|
|
290
|
+
if template_match:
|
|
291
|
+
start_pos = template_match.end()
|
|
292
|
+
chunk = content[start_pos:start_pos + 2000]
|
|
293
|
+
|
|
294
|
+
name_pattern = r'static\s+constexpr\s+const\s+char\s*\*\s*name\s*=\s*"([^"]+)"'
|
|
295
|
+
name_match = re.search(name_pattern, chunk)
|
|
296
|
+
if name_match:
|
|
297
|
+
value = name_match.group(1)
|
|
298
|
+
is_enterprise = self._is_enterprise_file(header_file)
|
|
299
|
+
logger.debug(f"Resolved authenticator {class_ref}::name → '{value}' (enterprise={is_enterprise}) from {header_file}")
|
|
300
|
+
return {"value": value, "is_enterprise": is_enterprise}
|
|
301
|
+
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.debug(f"Error reading {header_file}: {e}")
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
logger.debug(f"Could not resolve authenticator name for: {class_ref}")
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
def _extract_constant_value(self, content: str, identifier: str) -> Optional[str]:
|
|
310
|
+
"""
|
|
311
|
+
Extract a constant's string value from C++ source code.
|
|
312
|
+
|
|
313
|
+
Handles formats like:
|
|
314
|
+
- const std::string_view identifier = "value";
|
|
315
|
+
- constexpr std::string_view identifier = "value";
|
|
316
|
+
- const ss::sstring identifier = "value";
|
|
317
|
+
- inline constexpr std::string_view identifier{"value"};
|
|
318
|
+
- Multi-line concatenated strings
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
content: The C++ source code
|
|
322
|
+
identifier: The constant identifier
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
The extracted string value, or None if not found
|
|
326
|
+
"""
|
|
327
|
+
# Pattern 1: inline constexpr std::string_view name{"VALUE"};
|
|
328
|
+
brace_pattern = rf'inline\s+constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}'
|
|
329
|
+
match = re.search(brace_pattern, content)
|
|
330
|
+
if match:
|
|
331
|
+
return match.group(1)
|
|
332
|
+
|
|
333
|
+
# Pattern 2: constexpr std::string_view name{"VALUE"};
|
|
334
|
+
brace_pattern2 = rf'constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}'
|
|
335
|
+
match = re.search(brace_pattern2, content)
|
|
336
|
+
if match:
|
|
337
|
+
return match.group(1)
|
|
338
|
+
|
|
339
|
+
# Pattern 3: const/constexpr type identifier = "value";
|
|
340
|
+
# Matches: const/constexpr [type] identifier = "value" (with possible concatenation)
|
|
341
|
+
pattern = rf'''
|
|
342
|
+
(?:const|constexpr|extern\s+const|inline\s+constexpr)\s+ # const qualifier
|
|
343
|
+
(?:std::string_view|ss::sstring|std::string)\s+ # type
|
|
344
|
+
{re.escape(identifier)}\s* # identifier
|
|
345
|
+
=\s* # equals
|
|
346
|
+
( # capture group for value
|
|
347
|
+
"(?:[^"\\]|\\.)*" # first string literal
|
|
348
|
+
(?:\s*\n\s*"(?:[^"\\]|\\.)*")* # optional continuation strings
|
|
349
|
+
)
|
|
350
|
+
\s*; # semicolon
|
|
351
|
+
'''
|
|
352
|
+
|
|
353
|
+
match = re.search(pattern, content, re.VERBOSE | re.DOTALL)
|
|
354
|
+
if not match:
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
# Extract and concatenate all string literals
|
|
358
|
+
value_section = match.group(1)
|
|
359
|
+
|
|
360
|
+
# Find all quoted strings
|
|
361
|
+
string_literals = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', value_section)
|
|
362
|
+
|
|
363
|
+
if not string_literals:
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
# Concatenate all string parts
|
|
367
|
+
result = ''.join(string_literals)
|
|
368
|
+
|
|
369
|
+
# Handle escape sequences
|
|
370
|
+
result = result.encode('utf-8').decode('unicode_escape')
|
|
371
|
+
|
|
372
|
+
return result
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def resolve_property_default(default_value: str, resolver: ConstantResolver) -> str:
|
|
376
|
+
"""
|
|
377
|
+
Resolve a property default value that might be a constant reference.
|
|
378
|
+
|
|
379
|
+
Handles patterns like:
|
|
380
|
+
- "literal string" -> returns as-is
|
|
381
|
+
- net::tls_v1_2_cipher_suites -> resolves to actual value
|
|
382
|
+
- ss::sstring{net::tls_v1_2_cipher_suites} -> extracts and resolves
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
default_value: The default value string from the parser
|
|
386
|
+
resolver: ConstantResolver instance
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
The resolved string value
|
|
390
|
+
"""
|
|
391
|
+
if not default_value or not isinstance(default_value, str):
|
|
392
|
+
return default_value
|
|
393
|
+
|
|
394
|
+
# If it's already a quoted string literal, return as-is
|
|
395
|
+
if default_value.startswith('"') and default_value.endswith('"'):
|
|
396
|
+
return default_value
|
|
397
|
+
|
|
398
|
+
# Check if it's a constructor with a constant: ss::sstring{constant}
|
|
399
|
+
constructor_match = re.match(r'[\w:]+\{([\w:]+)\}', default_value)
|
|
400
|
+
if constructor_match:
|
|
401
|
+
constant_name = constructor_match.group(1)
|
|
402
|
+
resolved = resolver.resolve_constant(constant_name)
|
|
403
|
+
if resolved:
|
|
404
|
+
return resolved
|
|
405
|
+
# Fall through to try resolving as plain identifier
|
|
406
|
+
default_value = constant_name
|
|
407
|
+
|
|
408
|
+
# Check if it's a plain identifier that looks like a constant
|
|
409
|
+
if re.match(r'^[\w:]+$', default_value) and ('::' in default_value or default_value.islower()):
|
|
410
|
+
resolved = resolver.resolve_constant(default_value)
|
|
411
|
+
if resolved:
|
|
412
|
+
return resolved
|
|
413
|
+
|
|
414
|
+
return default_value
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def resolve_validator_enum_constraint(validator_name: str, resolver: ConstantResolver) -> Optional[list]:
|
|
418
|
+
"""
|
|
419
|
+
Extract enum constraint values from a validator function.
|
|
420
|
+
|
|
421
|
+
For validators like validate_sasl_mechanisms, this function:
|
|
422
|
+
1. Finds the validator function in validators.cc
|
|
423
|
+
2. Parses it to find what constant array it validates against (e.g., supported_sasl_mechanisms)
|
|
424
|
+
3. Resolves that array to get the actual enum values
|
|
425
|
+
4. Checks for enterprise values (e.g., enterprise_sasl_mechanisms)
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
validator_name: Name of the validator function (e.g., "validate_sasl_mechanisms")
|
|
429
|
+
resolver: ConstantResolver instance
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
List of dicts with 'value' and 'is_enterprise' keys, or None if not found
|
|
433
|
+
"""
|
|
434
|
+
# Find validators.cc in the config directory
|
|
435
|
+
validators_file = resolver.source_path / 'config' / 'validators.cc'
|
|
436
|
+
if not validators_file.exists():
|
|
437
|
+
logger.debug(f"validators.cc not found at {validators_file}")
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
with open(validators_file, 'r', encoding='utf-8') as f:
|
|
442
|
+
content = f.read()
|
|
443
|
+
except Exception as e:
|
|
444
|
+
logger.debug(f"Error reading validators.cc: {e}")
|
|
445
|
+
return None
|
|
446
|
+
|
|
447
|
+
# Find the validator function definition
|
|
448
|
+
# Pattern: validate_name(...) { ... }
|
|
449
|
+
func_pattern = rf'{re.escape(validator_name)}\s*\([^)]*\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
|
|
450
|
+
func_match = re.search(func_pattern, content, re.DOTALL)
|
|
451
|
+
|
|
452
|
+
if not func_match:
|
|
453
|
+
logger.debug(f"Validator function {validator_name} not found")
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
func_body = func_match.group(1)
|
|
457
|
+
|
|
458
|
+
# Look for patterns like:
|
|
459
|
+
# - std::ranges::contains(supported_sasl_mechanisms, m)
|
|
460
|
+
# - std::find.*(...array.begin())
|
|
461
|
+
# - std::count.*(...array.begin())
|
|
462
|
+
constraint_patterns = [
|
|
463
|
+
r'std::ranges::contains\s*\(\s*([\w:]+)\s*,',
|
|
464
|
+
r'std::find.*\(\s*([\w:]+)\.begin\(\)',
|
|
465
|
+
r'std::count.*\(\s*([\w:]+)\.begin\(\)',
|
|
466
|
+
]
|
|
467
|
+
|
|
468
|
+
constraint_array = None
|
|
469
|
+
for pattern in constraint_patterns:
|
|
470
|
+
match = re.search(pattern, func_body)
|
|
471
|
+
if match:
|
|
472
|
+
constraint_array = match.group(1)
|
|
473
|
+
logger.debug(f"Found constraint array '{constraint_array}' in validator {validator_name}")
|
|
474
|
+
break
|
|
475
|
+
|
|
476
|
+
if not constraint_array:
|
|
477
|
+
logger.debug(f"No constraint array found in {validator_name}")
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
# Resolve the constraint array to get enum values
|
|
481
|
+
enum_values = resolver.resolve_array_constant(constraint_array)
|
|
482
|
+
if not enum_values:
|
|
483
|
+
return None
|
|
484
|
+
|
|
485
|
+
# Check if there's an enterprise version of this array
|
|
486
|
+
# Pattern: supported_X -> enterprise_X
|
|
487
|
+
enterprise_array = constraint_array.replace('supported_', 'enterprise_')
|
|
488
|
+
enterprise_values = []
|
|
489
|
+
|
|
490
|
+
if enterprise_array != constraint_array:
|
|
491
|
+
enterprise_values = resolver.resolve_array_constant(enterprise_array) or []
|
|
492
|
+
if enterprise_values:
|
|
493
|
+
logger.debug(f"Found enterprise array '{enterprise_array}' with values: {enterprise_values}")
|
|
494
|
+
|
|
495
|
+
# Build result with enterprise metadata
|
|
496
|
+
results = []
|
|
497
|
+
for value in enum_values:
|
|
498
|
+
is_enterprise = value in enterprise_values
|
|
499
|
+
results.append({
|
|
500
|
+
"value": value,
|
|
501
|
+
"is_enterprise": is_enterprise
|
|
502
|
+
})
|
|
503
|
+
|
|
504
|
+
logger.info(f"Extracted {len(results)} enum values from validator {validator_name}: {[r['value'] for r in results]}")
|
|
505
|
+
return results
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def resolve_runtime_validation_enum_constraint(property_name: str, defined_in: str, resolver: ConstantResolver) -> Optional[list]:
|
|
509
|
+
"""
|
|
510
|
+
Extract enum constraint values from runtime validation functions.
|
|
511
|
+
|
|
512
|
+
For properties without constructor validators, this searches for validation
|
|
513
|
+
functions that check the property value against constants.
|
|
514
|
+
|
|
515
|
+
Pattern example (kafka/client/configuration.cc:validate_sasl_properties):
|
|
516
|
+
if (
|
|
517
|
+
mechanism != security::scram_sha256_authenticator::name
|
|
518
|
+
&& mechanism != security::scram_sha512_authenticator::name
|
|
519
|
+
&& mechanism != security::oidc::sasl_authenticator::name) {
|
|
520
|
+
throw std::invalid_argument(...);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
property_name: Name of the property (e.g., "sasl_mechanism")
|
|
525
|
+
defined_in: Path where property is defined (e.g., "src/v/kafka/client/configuration.cc")
|
|
526
|
+
resolver: ConstantResolver instance
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
List of dicts with 'value' and 'is_enterprise' keys, or None if not found
|
|
530
|
+
"""
|
|
531
|
+
# Find the source file where the property is defined
|
|
532
|
+
source_file = resolver.source_path / Path(defined_in).relative_to('src/v')
|
|
533
|
+
|
|
534
|
+
if not source_file.exists():
|
|
535
|
+
logger.debug(f"Source file not found: {source_file}")
|
|
536
|
+
return None
|
|
537
|
+
|
|
538
|
+
try:
|
|
539
|
+
with open(source_file, 'r', encoding='utf-8') as f:
|
|
540
|
+
content = f.read()
|
|
541
|
+
except Exception as e:
|
|
542
|
+
logger.debug(f"Error reading {source_file}: {e}")
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
# Search for validation functions that reference this property
|
|
546
|
+
# Use a simpler approach: find function declarations and extract chunks around them
|
|
547
|
+
|
|
548
|
+
# Look for validation/check functions
|
|
549
|
+
func_decl_pattern = r'(?:void|bool|std::optional<[\w:]+>)\s+(\w*validate\w*|\w*check\w*)\s*\('
|
|
550
|
+
|
|
551
|
+
for func_match in re.finditer(func_decl_pattern, content, re.IGNORECASE):
|
|
552
|
+
func_name = func_match.group(1)
|
|
553
|
+
func_start = func_match.start()
|
|
554
|
+
|
|
555
|
+
# Extract chunks: one after function definition, one for searching call sites
|
|
556
|
+
func_def_chunk = content[func_start:func_start + 2000]
|
|
557
|
+
|
|
558
|
+
# Also search the entire file for call sites (more expensive but necessary)
|
|
559
|
+
# Look for: func_name(...property_name()...)
|
|
560
|
+
func_call_pattern = rf'{re.escape(func_name)}\s*\([^;)]*\b\w*\.?{re.escape(property_name)}\s*\('
|
|
561
|
+
call_site_match = re.search(func_call_pattern, content)
|
|
562
|
+
|
|
563
|
+
if not call_site_match:
|
|
564
|
+
logger.debug(f"Skipping validation function {func_name} - property {property_name} not passed to this function")
|
|
565
|
+
continue
|
|
566
|
+
|
|
567
|
+
logger.debug(f"Found validation function {func_name} that validates property {property_name}")
|
|
568
|
+
|
|
569
|
+
# Use the function definition chunk for finding comparison patterns
|
|
570
|
+
chunk = func_def_chunk
|
|
571
|
+
|
|
572
|
+
# Look for comparison patterns in this chunk
|
|
573
|
+
# We're looking for patterns like: mechanism != constant::name
|
|
574
|
+
# Use a generic parameter name search since we don't know the exact param name
|
|
575
|
+
comparison_pattern = r'(\w+)\s*!=\s*([\w:]+::name)'
|
|
576
|
+
|
|
577
|
+
matches = re.findall(comparison_pattern, chunk)
|
|
578
|
+
if matches:
|
|
579
|
+
# Group by parameter name
|
|
580
|
+
by_param = {}
|
|
581
|
+
for param_name, constant_ref in matches:
|
|
582
|
+
if param_name not in by_param:
|
|
583
|
+
by_param[param_name] = []
|
|
584
|
+
by_param[param_name].append(constant_ref)
|
|
585
|
+
|
|
586
|
+
# Process the parameter with the most comparisons (likely the one we want)
|
|
587
|
+
if by_param:
|
|
588
|
+
most_common_param = max(by_param.keys(), key=lambda k: len(by_param[k]))
|
|
589
|
+
constant_refs = by_param[most_common_param]
|
|
590
|
+
|
|
591
|
+
logger.debug(f"Found {len(constant_refs)} comparisons in function {func_name} for parameter {most_common_param}")
|
|
592
|
+
|
|
593
|
+
# Resolve each constant to its actual value
|
|
594
|
+
enum_values = []
|
|
595
|
+
for constant_ref in constant_refs:
|
|
596
|
+
# Extract the class/struct name (remove ::name)
|
|
597
|
+
class_ref = constant_ref.replace('::name', '')
|
|
598
|
+
|
|
599
|
+
# Search for the class definition and extract the name constant
|
|
600
|
+
result = resolver._resolve_authenticator_name(class_ref)
|
|
601
|
+
if result:
|
|
602
|
+
enum_values.append(result)
|
|
603
|
+
logger.debug(f"Resolved {constant_ref} → '{result['value']}' (enterprise={result['is_enterprise']})")
|
|
604
|
+
|
|
605
|
+
if enum_values:
|
|
606
|
+
logger.info(f"Extracted {len(enum_values)} enum values from runtime validation for {property_name}: {[v['value'] for v in enum_values]}")
|
|
607
|
+
return enum_values
|
|
608
|
+
|
|
609
|
+
logger.debug(f"No runtime validation enum constraint found for {property_name}")
|
|
610
|
+
return None
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from tree_sitter import Language, Parser
|
|
3
|
+
|
|
1
4
|
class FilePair:
|
|
2
5
|
def __init__(self, header, implementation) -> None:
|
|
3
6
|
self.header = header
|
|
@@ -5,3 +8,42 @@ class FilePair:
|
|
|
5
8
|
|
|
6
9
|
def __repr__(self) -> str:
|
|
7
10
|
return f"(header={self.header}, implementation={self.implementation})"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_files_with_properties(src_dir, cpp_language_lib_path):
|
|
14
|
+
"""
|
|
15
|
+
Find matching C++ header/implementation pairs and extract properties from them.
|
|
16
|
+
Returns a list of (FilePair, PropertyBag) tuples.
|
|
17
|
+
"""
|
|
18
|
+
from parser import extract_properties_from_file_pair
|
|
19
|
+
|
|
20
|
+
cpp_language = Language(cpp_language_lib_path, "cpp")
|
|
21
|
+
parser = Parser()
|
|
22
|
+
parser.set_language(cpp_language)
|
|
23
|
+
|
|
24
|
+
files_with_properties = []
|
|
25
|
+
|
|
26
|
+
for root, _, files in os.walk(src_dir):
|
|
27
|
+
for file in files:
|
|
28
|
+
if not file.endswith(".h"):
|
|
29
|
+
continue
|
|
30
|
+
|
|
31
|
+
header_path = os.path.join(root, file)
|
|
32
|
+
base = os.path.splitext(file)[0]
|
|
33
|
+
|
|
34
|
+
# Look for a matching implementation file
|
|
35
|
+
impl_candidates = [f"{base}.cc", f"{base}.cpp"]
|
|
36
|
+
impl_path = next((os.path.join(root, c) for c in impl_candidates if c in files), None)
|
|
37
|
+
if not impl_path:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
pair = FilePair(header_path, impl_path)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
props = extract_properties_from_file_pair(parser, cpp_language, pair)
|
|
44
|
+
if props and len(props) > 0:
|
|
45
|
+
files_with_properties.append((pair, props))
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f"[WARN] Failed to extract from {pair}: {e}")
|
|
48
|
+
|
|
49
|
+
return files_with_properties
|