@redpanda-data/docs-extensions-and-macros 4.11.0 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/bin/doc-tools.js +4 -2
  2. package/extensions/convert-to-markdown.js +17 -1
  3. package/package.json +3 -1
  4. package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
  5. package/tools/property-extractor/Makefile +12 -1
  6. package/tools/property-extractor/README.adoc +828 -97
  7. package/tools/property-extractor/compare-properties.js +38 -13
  8. package/tools/property-extractor/constant_resolver.py +610 -0
  9. package/tools/property-extractor/file_pair.py +42 -0
  10. package/tools/property-extractor/generate-handlebars-docs.js +41 -8
  11. package/tools/property-extractor/helpers/gt.js +9 -0
  12. package/tools/property-extractor/helpers/includes.js +17 -0
  13. package/tools/property-extractor/helpers/index.js +3 -0
  14. package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
  15. package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
  16. package/tools/property-extractor/overrides.json +248 -0
  17. package/tools/property-extractor/parser.py +254 -32
  18. package/tools/property-extractor/property_bag.py +40 -0
  19. package/tools/property-extractor/property_extractor.py +1417 -430
  20. package/tools/property-extractor/requirements.txt +1 -0
  21. package/tools/property-extractor/templates/property-backup.hbs +161 -0
  22. package/tools/property-extractor/templates/property.hbs +104 -49
  23. package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
  24. package/tools/property-extractor/templates/topic-property.hbs +72 -34
  25. package/tools/property-extractor/tests/test_known_values.py +617 -0
  26. package/tools/property-extractor/tests/transformers_test.py +81 -6
  27. package/tools/property-extractor/topic_property_extractor.py +23 -10
  28. package/tools/property-extractor/transformers.py +2191 -369
  29. package/tools/property-extractor/type_definition_extractor.py +669 -0
  30. package/tools/property-extractor/definitions.json +0 -245
@@ -0,0 +1,610 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Resolves C++ constant references to their actual values.
4
+
5
+ For properties that use constants as default values (e.g., `ss::sstring{net::tls_v1_2_cipher_suites}`),
6
+ this module looks up the constant definition and extracts the actual string value.
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Optional, Dict
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ConstantResolver:
18
+ def __init__(self, source_path: Path):
19
+ """
20
+ Initialize the constant resolver with the Redpanda source directory.
21
+
22
+ Args:
23
+ source_path: Path to the Redpanda src/v directory
24
+ """
25
+ self.source_path = source_path
26
+ self._constant_cache: Dict[str, str] = {}
27
+
28
+ def resolve_constant(self, constant_name: str) -> Optional[str]:
29
+ """
30
+ Resolve a C++ constant name to its actual string value.
31
+
32
+ Args:
33
+ constant_name: The constant name (e.g., "net::tls_v1_2_cipher_suites" or "tls_v1_2_cipher_suites")
34
+
35
+ Returns:
36
+ The actual string value, or None if not found
37
+ """
38
+ # Check cache first
39
+ if constant_name in self._constant_cache:
40
+ return self._constant_cache[constant_name]
41
+
42
+ # Parse namespace and identifier
43
+ if '::' in constant_name:
44
+ namespace, identifier = constant_name.rsplit('::', 1)
45
+ else:
46
+ namespace = None
47
+ identifier = constant_name
48
+
49
+ # Search strategy based on namespace
50
+ if namespace == 'net':
51
+ value = self._search_in_files(['net/tls.cc', 'net/tls.h'], identifier)
52
+ elif namespace == 'model':
53
+ value = self._search_in_files(['model/**/*.cc', 'model/**/*.h'], identifier)
54
+ elif namespace == 'config':
55
+ value = self._search_in_files(['config/**/*.cc', 'config/**/*.h'], identifier)
56
+ else:
57
+ # Try common locations
58
+ value = self._search_in_files([
59
+ 'net/**/*.cc',
60
+ 'config/**/*.cc',
61
+ 'model/**/*.cc'
62
+ ], identifier)
63
+
64
+ if value:
65
+ self._constant_cache[constant_name] = value
66
+ logger.info(f"Resolved constant {constant_name} = {value[:50]}...")
67
+
68
+ return value
69
+
70
+ def _search_in_files(self, patterns: list[str], identifier: str) -> Optional[str]:
71
+ """
72
+ Search for a constant definition in files matching the given patterns.
73
+
74
+ Args:
75
+ patterns: List of file patterns to search (e.g., ['net/tls.cc'])
76
+ identifier: The constant identifier (e.g., 'tls_v1_2_cipher_suites')
77
+
78
+ Returns:
79
+ The constant's string value, or None if not found
80
+ """
81
+ for pattern in patterns:
82
+ if '**' in pattern:
83
+ # Glob pattern
84
+ files = list(self.source_path.glob(pattern))
85
+ else:
86
+ # Direct path
87
+ file_path = self.source_path / pattern
88
+ files = [file_path] if file_path.exists() else []
89
+
90
+ for file_path in files:
91
+ try:
92
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
93
+ content = f.read()
94
+
95
+ value = self._extract_constant_value(content, identifier)
96
+ if value:
97
+ logger.debug(f"Found {identifier} in {file_path}")
98
+ return value
99
+ except Exception as e:
100
+ logger.debug(f"Error reading {file_path}: {e}")
101
+ continue
102
+
103
+ return None
104
+
105
+ def resolve_array_constant(self, array_name: str) -> Optional[list]:
106
+ """
107
+ Resolve a C++ array constant to get its element values.
108
+
109
+ Handles patterns like:
110
+ - inline constexpr auto supported_sasl_mechanisms = std::to_array<std::string_view>({gssapi, scram, oauthbearer, plain});
111
+ - constexpr std::array<std::string_view, N> array_name = {val1, val2, val3};
112
+
113
+ Args:
114
+ array_name: The array constant name (e.g., "supported_sasl_mechanisms")
115
+
116
+ Returns:
117
+ List of string values from the array, or None if not found
118
+ """
119
+ # Check cache
120
+ cache_key = f"array:{array_name}"
121
+ if cache_key in self._constant_cache:
122
+ return self._constant_cache[cache_key]
123
+
124
+ # Search in config directory first (most common for validators)
125
+ search_dirs = ['config', 'security', 'model', 'kafka']
126
+
127
+ for dir_name in search_dirs:
128
+ search_path = self.source_path / dir_name
129
+ if not search_path.exists():
130
+ continue
131
+
132
+ for file_path in search_path.rglob('*.h'):
133
+ try:
134
+ with open(file_path, 'r', encoding='utf-8') as f:
135
+ content = f.read()
136
+
137
+ values = self._extract_array_values(content, array_name)
138
+ if values:
139
+ # Resolve any identifiers in the array to their string values
140
+ resolved_values = []
141
+ for val in values:
142
+ if val.startswith('"') and val.endswith('"'):
143
+ # Already a string literal
144
+ resolved_values.append(val.strip('"'))
145
+ else:
146
+ # Try to resolve as an identifier
147
+ resolved = self._extract_constant_value(content, val)
148
+ if resolved:
149
+ resolved_values.append(resolved)
150
+ else:
151
+ resolved_values.append(val)
152
+
153
+ self._constant_cache[cache_key] = resolved_values
154
+ logger.debug(f"Resolved array {array_name} = {resolved_values} from {file_path}")
155
+ return resolved_values
156
+
157
+ except Exception as e:
158
+ logger.debug(f"Error reading {file_path}: {e}")
159
+ continue
160
+
161
+ logger.debug(f"Could not resolve array constant: {array_name}")
162
+ return None
163
+
164
+ def _extract_array_values(self, content: str, identifier: str) -> Optional[list]:
165
+ """
166
+ Extract array element values from C++ source code.
167
+
168
+ Handles patterns like:
169
+ - std::to_array<std::string_view>({val1, val2, val3})
170
+ - std::array<std::string_view, N> = {val1, val2, val3}
171
+
172
+ Args:
173
+ content: The C++ source code
174
+ identifier: The array identifier
175
+
176
+ Returns:
177
+ List of element values (may be identifiers or string literals), or None if not found
178
+ """
179
+ # Pattern for array with std::to_array or direct initialization
180
+ patterns = [
181
+ # inline constexpr auto name = std::to_array<type>({val1, val2});
182
+ rf'inline\s+constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*std::to_array<[^>]+>\s*\(\s*\{{([^}}]+)\}}\s*\)',
183
+ # constexpr auto name = std::to_array<type>({val1, val2});
184
+ rf'constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*std::to_array<[^>]+>\s*\(\s*\{{([^}}]+)\}}\s*\)',
185
+ # constexpr std::array<type, N> name = {val1, val2};
186
+ rf'constexpr\s+std::array<[^>]+>\s+{re.escape(identifier)}\s*=\s*\{{([^}}]+)\}}',
187
+ ]
188
+
189
+ for pattern in patterns:
190
+ match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
191
+ if match:
192
+ # Extract comma-separated values
193
+ values_str = match.group(1)
194
+ # Remove comments
195
+ values_str = re.sub(r'//.*$', '', values_str, flags=re.MULTILINE)
196
+ values_str = re.sub(r'/\*.*?\*/', '', values_str, flags=re.DOTALL)
197
+
198
+ # Split by comma and clean up
199
+ values = [v.strip() for v in values_str.split(',') if v.strip()]
200
+ return values
201
+
202
+ return None
203
+
204
+ def _is_enterprise_file(self, file_path: Path) -> bool:
205
+ """
206
+ Check if a file is marked as a Redpanda Enterprise file.
207
+
208
+ Args:
209
+ file_path: Path to the source file
210
+
211
+ Returns:
212
+ True if the file contains "Redpanda Enterprise file" in its header
213
+ """
214
+ try:
215
+ with open(file_path, 'r', encoding='utf-8') as f:
216
+ # Check first 500 chars (license header)
217
+ header = f.read(500)
218
+ return "Redpanda Enterprise file" in header
219
+ except Exception:
220
+ return False
221
+
222
+ def _resolve_authenticator_name(self, class_ref: str) -> Optional[dict]:
223
+ """
224
+ Resolve an authenticator class name constant.
225
+
226
+ For patterns like:
227
+ - security::scram_sha256_authenticator
228
+ - security::oidc::sasl_authenticator
229
+
230
+ Finds the class definition and extracts the `static constexpr const char* name` value.
231
+
232
+ Args:
233
+ class_ref: Qualified class name (e.g., "security::scram_sha256_authenticator")
234
+
235
+ Returns:
236
+ Dict with 'value' and 'is_enterprise' keys, or None if not found
237
+ """
238
+ # Extract all namespace parts and class name
239
+ parts = class_ref.split('::')
240
+ class_name = parts[-1] # Last part is the class name
241
+ namespaces = parts[:-1] # Everything before is namespaces
242
+
243
+ # Search in security directory for authenticator classes
244
+ search_path = self.source_path / 'security'
245
+ if not search_path.exists():
246
+ return None
247
+
248
+ # For nested namespaces like security::oidc::sasl_authenticator,
249
+ # also check subdirectories
250
+ search_paths = [search_path]
251
+ if len(namespaces) > 1:
252
+ # If we have security::oidc, also check security/oidc directory
253
+ nested_path = search_path
254
+ for ns in namespaces[1:]: # Skip 'security' as that's already the base
255
+ nested_path = nested_path / ns
256
+ if nested_path.exists():
257
+ search_paths.append(nested_path)
258
+
259
+ for search_dir in search_paths:
260
+ for header_file in search_dir.rglob('*.h'):
261
+ try:
262
+ with open(header_file, 'r', encoding='utf-8') as f:
263
+ content = f.read()
264
+
265
+ # Strategy: Find the class/struct declaration, then search for name constant
266
+ # in the following content (avoids nested brace issues)
267
+
268
+ # Pattern 1: Standard class/struct declaration
269
+ class_pattern = rf'(?:struct|class)\s+{re.escape(class_name)}\s+[^{{]*\{{'
270
+ class_match = re.search(class_pattern, content)
271
+
272
+ if class_match:
273
+ # Extract a chunk after the class declaration (3000 chars should be enough)
274
+ start_pos = class_match.end()
275
+ chunk = content[start_pos:start_pos + 3000]
276
+
277
+ # Look for the name constant in this chunk
278
+ name_pattern = r'static\s+constexpr\s+const\s+char\s*\*\s*name\s*=\s*"([^"]+)"'
279
+ name_match = re.search(name_pattern, chunk)
280
+ if name_match:
281
+ value = name_match.group(1)
282
+ is_enterprise = self._is_enterprise_file(header_file)
283
+ logger.debug(f"Resolved authenticator {class_ref}::name → '{value}' (enterprise={is_enterprise}) from {header_file}")
284
+ return {"value": value, "is_enterprise": is_enterprise}
285
+
286
+ # Pattern 2: Template specialization (for scram authenticators)
287
+ template_pattern = rf'struct\s+\w+<[^>]*{re.escape(class_name.replace("_authenticator", ""))}[^>]*>\s*\{{'
288
+ template_match = re.search(template_pattern, content)
289
+
290
+ if template_match:
291
+ start_pos = template_match.end()
292
+ chunk = content[start_pos:start_pos + 2000]
293
+
294
+ name_pattern = r'static\s+constexpr\s+const\s+char\s*\*\s*name\s*=\s*"([^"]+)"'
295
+ name_match = re.search(name_pattern, chunk)
296
+ if name_match:
297
+ value = name_match.group(1)
298
+ is_enterprise = self._is_enterprise_file(header_file)
299
+ logger.debug(f"Resolved authenticator {class_ref}::name → '{value}' (enterprise={is_enterprise}) from {header_file}")
300
+ return {"value": value, "is_enterprise": is_enterprise}
301
+
302
+ except Exception as e:
303
+ logger.debug(f"Error reading {header_file}: {e}")
304
+ continue
305
+
306
+ logger.debug(f"Could not resolve authenticator name for: {class_ref}")
307
+ return None
308
+
309
+ def _extract_constant_value(self, content: str, identifier: str) -> Optional[str]:
310
+ """
311
+ Extract a constant's string value from C++ source code.
312
+
313
+ Handles formats like:
314
+ - const std::string_view identifier = "value";
315
+ - constexpr std::string_view identifier = "value";
316
+ - const ss::sstring identifier = "value";
317
+ - inline constexpr std::string_view identifier{"value"};
318
+ - Multi-line concatenated strings
319
+
320
+ Args:
321
+ content: The C++ source code
322
+ identifier: The constant identifier
323
+
324
+ Returns:
325
+ The extracted string value, or None if not found
326
+ """
327
+ # Pattern 1: inline constexpr std::string_view name{"VALUE"};
328
+ brace_pattern = rf'inline\s+constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}'
329
+ match = re.search(brace_pattern, content)
330
+ if match:
331
+ return match.group(1)
332
+
333
+ # Pattern 2: constexpr std::string_view name{"VALUE"};
334
+ brace_pattern2 = rf'constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}'
335
+ match = re.search(brace_pattern2, content)
336
+ if match:
337
+ return match.group(1)
338
+
339
+ # Pattern 3: const/constexpr type identifier = "value";
340
+ # Matches: const/constexpr [type] identifier = "value" (with possible concatenation)
341
+ pattern = rf'''
342
+ (?:const|constexpr|extern\s+const|inline\s+constexpr)\s+ # const qualifier
343
+ (?:std::string_view|ss::sstring|std::string)\s+ # type
344
+ {re.escape(identifier)}\s* # identifier
345
+ =\s* # equals
346
+ ( # capture group for value
347
+ "(?:[^"\\]|\\.)*" # first string literal
348
+ (?:\s*\n\s*"(?:[^"\\]|\\.)*")* # optional continuation strings
349
+ )
350
+ \s*; # semicolon
351
+ '''
352
+
353
+ match = re.search(pattern, content, re.VERBOSE | re.DOTALL)
354
+ if not match:
355
+ return None
356
+
357
+ # Extract and concatenate all string literals
358
+ value_section = match.group(1)
359
+
360
+ # Find all quoted strings
361
+ string_literals = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', value_section)
362
+
363
+ if not string_literals:
364
+ return None
365
+
366
+ # Concatenate all string parts
367
+ result = ''.join(string_literals)
368
+
369
+ # Handle escape sequences
370
+ result = result.encode('utf-8').decode('unicode_escape')
371
+
372
+ return result
373
+
374
+
375
+ def resolve_property_default(default_value: str, resolver: ConstantResolver) -> str:
376
+ """
377
+ Resolve a property default value that might be a constant reference.
378
+
379
+ Handles patterns like:
380
+ - "literal string" -> returns as-is
381
+ - net::tls_v1_2_cipher_suites -> resolves to actual value
382
+ - ss::sstring{net::tls_v1_2_cipher_suites} -> extracts and resolves
383
+
384
+ Args:
385
+ default_value: The default value string from the parser
386
+ resolver: ConstantResolver instance
387
+
388
+ Returns:
389
+ The resolved string value
390
+ """
391
+ if not default_value or not isinstance(default_value, str):
392
+ return default_value
393
+
394
+ # If it's already a quoted string literal, return as-is
395
+ if default_value.startswith('"') and default_value.endswith('"'):
396
+ return default_value
397
+
398
+ # Check if it's a constructor with a constant: ss::sstring{constant}
399
+ constructor_match = re.match(r'[\w:]+\{([\w:]+)\}', default_value)
400
+ if constructor_match:
401
+ constant_name = constructor_match.group(1)
402
+ resolved = resolver.resolve_constant(constant_name)
403
+ if resolved:
404
+ return resolved
405
+ # Fall through to try resolving as plain identifier
406
+ default_value = constant_name
407
+
408
+ # Check if it's a plain identifier that looks like a constant
409
+ if re.match(r'^[\w:]+$', default_value) and ('::' in default_value or default_value.islower()):
410
+ resolved = resolver.resolve_constant(default_value)
411
+ if resolved:
412
+ return resolved
413
+
414
+ return default_value
415
+
416
+
417
+ def resolve_validator_enum_constraint(validator_name: str, resolver: ConstantResolver) -> Optional[list]:
418
+ """
419
+ Extract enum constraint values from a validator function.
420
+
421
+ For validators like validate_sasl_mechanisms, this function:
422
+ 1. Finds the validator function in validators.cc
423
+ 2. Parses it to find what constant array it validates against (e.g., supported_sasl_mechanisms)
424
+ 3. Resolves that array to get the actual enum values
425
+ 4. Checks for enterprise values (e.g., enterprise_sasl_mechanisms)
426
+
427
+ Args:
428
+ validator_name: Name of the validator function (e.g., "validate_sasl_mechanisms")
429
+ resolver: ConstantResolver instance
430
+
431
+ Returns:
432
+ List of dicts with 'value' and 'is_enterprise' keys, or None if not found
433
+ """
434
+ # Find validators.cc in the config directory
435
+ validators_file = resolver.source_path / 'config' / 'validators.cc'
436
+ if not validators_file.exists():
437
+ logger.debug(f"validators.cc not found at {validators_file}")
438
+ return None
439
+
440
+ try:
441
+ with open(validators_file, 'r', encoding='utf-8') as f:
442
+ content = f.read()
443
+ except Exception as e:
444
+ logger.debug(f"Error reading validators.cc: {e}")
445
+ return None
446
+
447
+ # Find the validator function definition
448
+ # Pattern: validate_name(...) { ... }
449
+ func_pattern = rf'{re.escape(validator_name)}\s*\([^)]*\)\s*\{{([^}}]+(?:\{{[^}}]*\}}[^}}]*)*)\}}'
450
+ func_match = re.search(func_pattern, content, re.DOTALL)
451
+
452
+ if not func_match:
453
+ logger.debug(f"Validator function {validator_name} not found")
454
+ return None
455
+
456
+ func_body = func_match.group(1)
457
+
458
+ # Look for patterns like:
459
+ # - std::ranges::contains(supported_sasl_mechanisms, m)
460
+ # - std::find.*(...array.begin())
461
+ # - std::count.*(...array.begin())
462
+ constraint_patterns = [
463
+ r'std::ranges::contains\s*\(\s*([\w:]+)\s*,',
464
+ r'std::find.*\(\s*([\w:]+)\.begin\(\)',
465
+ r'std::count.*\(\s*([\w:]+)\.begin\(\)',
466
+ ]
467
+
468
+ constraint_array = None
469
+ for pattern in constraint_patterns:
470
+ match = re.search(pattern, func_body)
471
+ if match:
472
+ constraint_array = match.group(1)
473
+ logger.debug(f"Found constraint array '{constraint_array}' in validator {validator_name}")
474
+ break
475
+
476
+ if not constraint_array:
477
+ logger.debug(f"No constraint array found in {validator_name}")
478
+ return None
479
+
480
+ # Resolve the constraint array to get enum values
481
+ enum_values = resolver.resolve_array_constant(constraint_array)
482
+ if not enum_values:
483
+ return None
484
+
485
+ # Check if there's an enterprise version of this array
486
+ # Pattern: supported_X -> enterprise_X
487
+ enterprise_array = constraint_array.replace('supported_', 'enterprise_')
488
+ enterprise_values = []
489
+
490
+ if enterprise_array != constraint_array:
491
+ enterprise_values = resolver.resolve_array_constant(enterprise_array) or []
492
+ if enterprise_values:
493
+ logger.debug(f"Found enterprise array '{enterprise_array}' with values: {enterprise_values}")
494
+
495
+ # Build result with enterprise metadata
496
+ results = []
497
+ for value in enum_values:
498
+ is_enterprise = value in enterprise_values
499
+ results.append({
500
+ "value": value,
501
+ "is_enterprise": is_enterprise
502
+ })
503
+
504
+ logger.info(f"Extracted {len(results)} enum values from validator {validator_name}: {[r['value'] for r in results]}")
505
+ return results
506
+
507
+
508
+ def resolve_runtime_validation_enum_constraint(property_name: str, defined_in: str, resolver: ConstantResolver) -> Optional[list]:
509
+ """
510
+ Extract enum constraint values from runtime validation functions.
511
+
512
+ For properties without constructor validators, this searches for validation
513
+ functions that check the property value against constants.
514
+
515
+ Pattern example (kafka/client/configuration.cc:validate_sasl_properties):
516
+ if (
517
+ mechanism != security::scram_sha256_authenticator::name
518
+ && mechanism != security::scram_sha512_authenticator::name
519
+ && mechanism != security::oidc::sasl_authenticator::name) {
520
+ throw std::invalid_argument(...);
521
+ }
522
+
523
+ Args:
524
+ property_name: Name of the property (e.g., "sasl_mechanism")
525
+ defined_in: Path where property is defined (e.g., "src/v/kafka/client/configuration.cc")
526
+ resolver: ConstantResolver instance
527
+
528
+ Returns:
529
+ List of dicts with 'value' and 'is_enterprise' keys, or None if not found
530
+ """
531
+ # Find the source file where the property is defined
532
+ source_file = resolver.source_path / Path(defined_in).relative_to('src/v')
533
+
534
+ if not source_file.exists():
535
+ logger.debug(f"Source file not found: {source_file}")
536
+ return None
537
+
538
+ try:
539
+ with open(source_file, 'r', encoding='utf-8') as f:
540
+ content = f.read()
541
+ except Exception as e:
542
+ logger.debug(f"Error reading {source_file}: {e}")
543
+ return None
544
+
545
+ # Search for validation functions that reference this property
546
+ # Use a simpler approach: find function declarations and extract chunks around them
547
+
548
+ # Look for validation/check functions
549
+ func_decl_pattern = r'(?:void|bool|std::optional<[\w:]+>)\s+(\w*validate\w*|\w*check\w*)\s*\('
550
+
551
+ for func_match in re.finditer(func_decl_pattern, content, re.IGNORECASE):
552
+ func_name = func_match.group(1)
553
+ func_start = func_match.start()
554
+
555
+ # Extract chunks: one after function definition, one for searching call sites
556
+ func_def_chunk = content[func_start:func_start + 2000]
557
+
558
+ # Also search the entire file for call sites (more expensive but necessary)
559
+ # Look for: func_name(...property_name()...)
560
+ func_call_pattern = rf'{re.escape(func_name)}\s*\([^;)]*\b\w*\.?{re.escape(property_name)}\s*\('
561
+ call_site_match = re.search(func_call_pattern, content)
562
+
563
+ if not call_site_match:
564
+ logger.debug(f"Skipping validation function {func_name} - property {property_name} not passed to this function")
565
+ continue
566
+
567
+ logger.debug(f"Found validation function {func_name} that validates property {property_name}")
568
+
569
+ # Use the function definition chunk for finding comparison patterns
570
+ chunk = func_def_chunk
571
+
572
+ # Look for comparison patterns in this chunk
573
+ # We're looking for patterns like: mechanism != constant::name
574
+ # Use a generic parameter name search since we don't know the exact param name
575
+ comparison_pattern = r'(\w+)\s*!=\s*([\w:]+::name)'
576
+
577
+ matches = re.findall(comparison_pattern, chunk)
578
+ if matches:
579
+ # Group by parameter name
580
+ by_param = {}
581
+ for param_name, constant_ref in matches:
582
+ if param_name not in by_param:
583
+ by_param[param_name] = []
584
+ by_param[param_name].append(constant_ref)
585
+
586
+ # Process the parameter with the most comparisons (likely the one we want)
587
+ if by_param:
588
+ most_common_param = max(by_param.keys(), key=lambda k: len(by_param[k]))
589
+ constant_refs = by_param[most_common_param]
590
+
591
+ logger.debug(f"Found {len(constant_refs)} comparisons in function {func_name} for parameter {most_common_param}")
592
+
593
+ # Resolve each constant to its actual value
594
+ enum_values = []
595
+ for constant_ref in constant_refs:
596
+ # Extract the class/struct name (remove ::name)
597
+ class_ref = constant_ref.replace('::name', '')
598
+
599
+ # Search for the class definition and extract the name constant
600
+ result = resolver._resolve_authenticator_name(class_ref)
601
+ if result:
602
+ enum_values.append(result)
603
+ logger.debug(f"Resolved {constant_ref} → '{result['value']}' (enterprise={result['is_enterprise']})")
604
+
605
+ if enum_values:
606
+ logger.info(f"Extracted {len(enum_values)} enum values from runtime validation for {property_name}: {[v['value'] for v in enum_values]}")
607
+ return enum_values
608
+
609
+ logger.debug(f"No runtime validation enum constraint found for {property_name}")
610
+ return None
@@ -1,3 +1,6 @@
1
+ import os
2
+ from tree_sitter import Language, Parser
3
+
1
4
  class FilePair:
2
5
  def __init__(self, header, implementation) -> None:
3
6
  self.header = header
@@ -5,3 +8,42 @@ class FilePair:
5
8
 
6
9
  def __repr__(self) -> str:
7
10
  return f"(header={self.header}, implementation={self.implementation})"
11
+
12
+
13
+ def get_files_with_properties(src_dir, cpp_language_lib_path):
14
+ """
15
+ Find matching C++ header/implementation pairs and extract properties from them.
16
+ Returns a list of (FilePair, PropertyBag) tuples.
17
+ """
18
+ from parser import extract_properties_from_file_pair
19
+
20
+ cpp_language = Language(cpp_language_lib_path, "cpp")
21
+ parser = Parser()
22
+ parser.set_language(cpp_language)
23
+
24
+ files_with_properties = []
25
+
26
+ for root, _, files in os.walk(src_dir):
27
+ for file in files:
28
+ if not file.endswith(".h"):
29
+ continue
30
+
31
+ header_path = os.path.join(root, file)
32
+ base = os.path.splitext(file)[0]
33
+
34
+ # Look for a matching implementation file
35
+ impl_candidates = [f"{base}.cc", f"{base}.cpp"]
36
+ impl_path = next((os.path.join(root, c) for c in impl_candidates if c in files), None)
37
+ if not impl_path:
38
+ continue
39
+
40
+ pair = FilePair(header_path, impl_path)
41
+
42
+ try:
43
+ props = extract_properties_from_file_pair(parser, cpp_language, pair)
44
+ if props and len(props) > 0:
45
+ files_with_properties.append((pair, props))
46
+ except Exception as e:
47
+ print(f"[WARN] Failed to extract from {pair}: {e}")
48
+
49
+ return files_with_properties