@redpanda-data/docs-extensions-and-macros 4.11.0 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/bin/doc-tools.js +4 -2
  2. package/extensions/convert-to-markdown.js +17 -1
  3. package/package.json +3 -1
  4. package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
  5. package/tools/property-extractor/Makefile +12 -1
  6. package/tools/property-extractor/README.adoc +828 -97
  7. package/tools/property-extractor/compare-properties.js +38 -13
  8. package/tools/property-extractor/constant_resolver.py +610 -0
  9. package/tools/property-extractor/file_pair.py +42 -0
  10. package/tools/property-extractor/generate-handlebars-docs.js +41 -8
  11. package/tools/property-extractor/helpers/gt.js +9 -0
  12. package/tools/property-extractor/helpers/includes.js +17 -0
  13. package/tools/property-extractor/helpers/index.js +3 -0
  14. package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
  15. package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
  16. package/tools/property-extractor/overrides.json +248 -0
  17. package/tools/property-extractor/parser.py +254 -32
  18. package/tools/property-extractor/property_bag.py +40 -0
  19. package/tools/property-extractor/property_extractor.py +1417 -430
  20. package/tools/property-extractor/requirements.txt +1 -0
  21. package/tools/property-extractor/templates/property-backup.hbs +161 -0
  22. package/tools/property-extractor/templates/property.hbs +104 -49
  23. package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
  24. package/tools/property-extractor/templates/topic-property.hbs +72 -34
  25. package/tools/property-extractor/tests/test_known_values.py +617 -0
  26. package/tools/property-extractor/tests/transformers_test.py +81 -6
  27. package/tools/property-extractor/topic_property_extractor.py +23 -10
  28. package/tools/property-extractor/transformers.py +2191 -369
  29. package/tools/property-extractor/type_definition_extractor.py +669 -0
  30. package/tools/property-extractor/definitions.json +0 -245
@@ -4,19 +4,99 @@ from property_bag import PropertyBag
4
4
  from copy import deepcopy
5
5
  import itertools as it
6
6
  import re
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def _is_config_property_type(type_str):
12
+ """
13
+ Check if a C++ type is a Redpanda configuration property.
14
+
15
+ Returns True only for property wrapper types like:
16
+ - property<T>
17
+ - bounded_property<T>
18
+ - deprecated_property<T>
19
+ - one_or_many_property<T>
20
+ - enum_property<T>
21
+ - enterprise<property<T>>
22
+ - enterprise<bounded_property<T>>
23
+
24
+ Returns False for internal structs like:
25
+ - connection_cfg, consumer_cfg, ack_level, proxy_request, etc.
26
+ - Primitive types: ss::sstring, iobuf, std::vector<T>, etc.
27
+ """
28
+ if not type_str:
29
+ return False
30
+
31
+ type_str = type_str.strip()
32
+
33
+ # Known property wrapper types (defined first for exclusion check)
34
+ PROPERTY_WRAPPERS = [
35
+ 'property<',
36
+ 'bounded_property<',
37
+ 'deprecated_property<',
38
+ 'one_or_many_property<',
39
+ 'enum_property<',
40
+ 'retention_duration_property',
41
+ 'development_feature_property<',
42
+ 'hidden_when_default_property<',
43
+ ]
44
+
45
+ # Explicitly exclude common non-property types
46
+ # (unless they're wrapped in a property type)
47
+ NON_PROPERTY_TYPES = [
48
+ 'ss::sstring',
49
+ 'sstring',
50
+ 'iobuf',
51
+ 'std::string',
52
+ 'std::vector<',
53
+ 'std::optional<',
54
+ 'std::chrono::',
55
+ 'model::node_id',
56
+ 'net::unresolved_address',
57
+ 'serde::envelope<',
58
+ ]
59
+
60
+ # Check if type contains any property wrapper
61
+ has_property_wrapper = any(
62
+ wrapper in type_str or type_str.startswith(wrapper.replace('<', ''))
63
+ for wrapper in PROPERTY_WRAPPERS
64
+ )
65
+
66
+ # Quick rejection of known non-property types (unless wrapped in property)
67
+ if not has_property_wrapper:
68
+ for non_prop in NON_PROPERTY_TYPES:
69
+ if non_prop in type_str:
70
+ return False
71
+
72
+ # Check for direct property wrapper usage
73
+ if any(type_str.startswith(wrapper.replace('<', '')) or wrapper in type_str
74
+ for wrapper in PROPERTY_WRAPPERS):
75
+ return True
76
+
77
+ # Check for enterprise wrapper containing property types
78
+ if type_str.startswith('enterprise<'):
79
+ return any(wrapper in type_str for wrapper in PROPERTY_WRAPPERS)
80
+
81
+ return False
82
+
7
83
 
8
84
 
9
85
  HEADER_QUERY = """
10
86
  (field_declaration
11
- type: (_) @type
12
- (#match? @type ".*property.*")
13
- declarator: (_) @name
87
+ type: [
88
+ (type_identifier)
89
+ (template_type)
90
+ (qualified_identifier)
91
+ ] @type
92
+ declarator: (field_identifier) @name
14
93
  ) @declaration
15
94
  """
16
95
 
96
+
17
97
  # Tree-sitter query for extracting C++ property constructor arguments and enterprise values
18
98
  #
19
- # - Enhanced to capture all expression types including:
99
+ # - Capture all expression types including:
20
100
  # * call_expression: Handles function calls like model::kafka_audit_logging_topic()
21
101
  # * template_instantiation: Handles template syntax like std::vector<ss::sstring>{...}
22
102
  # * concatenated_string: Handles C++ string concatenation with +
@@ -71,6 +151,21 @@ def get_file_contents(path):
71
151
 
72
152
 
73
153
  def parse_cpp_header(treesitter_parser, cpp_language, source_code):
154
+ """
155
+ Parses a C++ configuration header file to extract property declarations
156
+ and classify them by type (enterprise, deprecated, bounded, etc.).
157
+
158
+ Detects and annotates:
159
+ - is_enterprise
160
+ - is_deprecated
161
+ - is_bounded
162
+ - is_enum
163
+ - is_one_or_many
164
+ - is_enterprise_wrapper
165
+ - base_property_type (the inner C++ type, if extractable)
166
+ - property_kinds (list of wrapper kinds, e.g. ['enterprise', 'bounded'])
167
+ """
168
+
74
169
  query = cpp_language.query(HEADER_QUERY)
75
170
  tree = treesitter_parser.parse(source_code)
76
171
 
@@ -83,11 +178,51 @@ def parse_cpp_header(treesitter_parser, cpp_language, source_code):
83
178
  for node, label in captures:
84
179
  if label == "name":
85
180
  property_name = node.text.decode("utf-8")
181
+
182
+ # Validate this is a config property type - skip internal structs
183
+ if not _is_config_property_type(current_type):
184
+ logger.debug(f"Skipping non-property field '{property_name}' with type '{current_type}'")
185
+ current_type = None
186
+ current_declaration = None
187
+ continue
188
+
86
189
  properties[property_name]["name_in_file"] = property_name
87
190
  properties[property_name]["type"] = current_type
88
191
  properties[property_name]["declaration"] = current_declaration
192
+
193
+ t = current_type or ""
194
+
195
+ # --- Detect property wrapper kinds dynamically ---
196
+ wrapper_kinds = [
197
+ "enterprise",
198
+ "deprecated_property",
199
+ "bounded_property",
200
+ "enum_property",
201
+ "one_or_many_property",
202
+ "property",
203
+ ]
204
+
205
+ property_kinds = [k for k in wrapper_kinds if k in t]
206
+
207
+ # --- Flags for common wrappers ---
208
+ properties[property_name]["is_enterprise"] = "enterprise<" in t
209
+ properties[property_name]["is_deprecated"] = "deprecated_property" in t
210
+ properties[property_name]["is_bounded"] = "bounded_property" in t
211
+ properties[property_name]["is_enum"] = "enum_property" in t
212
+ properties[property_name]["is_one_or_many"] = "one_or_many_property" in t
213
+ properties[property_name]["is_enterprise_wrapper"] = t.strip().startswith("enterprise<")
214
+ properties[property_name]["property_kinds"] = property_kinds
215
+
216
+ # --- Extract inner property type (recursively handles nesting) ---
217
+ base_match = re.search(r'property<\s*([^>]+)\s*>', t)
218
+ if base_match:
219
+ properties[property_name]["base_property_type"] = base_match.group(1).strip()
220
+ else:
221
+ properties[property_name]["base_property_type"] = None
222
+
89
223
  current_type = None
90
224
  current_declaration = None
225
+
91
226
  elif label == "type":
92
227
  current_type = node.text.decode("utf-8")
93
228
  elif label == "declaration":
@@ -99,8 +234,21 @@ def parse_cpp_header(treesitter_parser, cpp_language, source_code):
99
234
  def __unquote_string(value):
100
235
  # placeholder to keep escaped double quotes (e.g. \"name\")
101
236
  escaped_quotes_placeholder = "$$$___quote___$$$"
237
+
238
+ # Handle C++ raw string literals: R"(content)" or R"delimiter(content)delimiter"
239
+ # First try simple case without delimiter: R"(content)"
240
+ simple_raw_match = re.match(r'^R"[(](.*)[)]"\s*$', value.strip(), re.DOTALL)
241
+ if simple_raw_match:
242
+ return simple_raw_match.group(1)
243
+
244
+ # Handle raw string with custom delimiter: R"delimiter(content)delimiter"
245
+ delimited_raw_match = re.match(r'^R"([^(]+)[(](.*)[)]\1"\s*$', value.strip(), re.DOTALL)
246
+ if delimited_raw_match:
247
+ return delimited_raw_match.group(2)
248
+
249
+ # Handle regular quoted strings
102
250
  return re.sub(
103
- r'^R?"([^"]*)"\s*$',
251
+ r'^"([^"]*)"\s*$',
104
252
  "\\1",
105
253
  re.sub(
106
254
  '\\\\"',
@@ -176,56 +324,130 @@ def __normalize_param(param, node, treesitter_parser, cpp_language, source_code)
176
324
 
177
325
 
178
326
  def parse_cpp_source(treesitter_parser, cpp_language, source_code):
327
+ """
328
+ Parse C++ source file and extract constructor arguments for each config field.
329
+
330
+ For each field initializer like:
331
+
332
+ core_balancing_continuous(
333
+ *this,
334
+ true,
335
+ false,
336
+ "core_balancing_continuous",
337
+ "If set to ...",
338
+ meta{ ... },
339
+ true,
340
+ property<bool>::noop_validator,
341
+ legacy_default<bool>{false, legacy_version{16}})
342
+
343
+ we produce:
344
+
345
+ parameters["core_balancing_continuous"]["params"] = [
346
+ { "value": "true", "type": "true" },
347
+ { "value": "false", "type": "false" },
348
+ { "value": "core_balancing_continuous", "type": "string_literal" },
349
+ { "value": "If set to ...", "type": "string_literal" },
350
+ { "value": { ... }, "type": "initializer_list" },
351
+ { "value": "true", "type": "true" },
352
+ { "value": "property<bool>::noop_validator", "type": "call_expression" },
353
+ { "value": "legacy_default<bool>{false, legacy_version{16}}", "type": "_" },
354
+ ]
355
+
356
+ (the initial `*this` is intentionally skipped).
357
+ """
179
358
  query = cpp_language.query(SOURCE_QUERY)
180
359
  tree = treesitter_parser.parse(source_code)
181
-
182
360
  captures = query.captures(tree.root_node)
183
361
 
184
- current_parameter = None
185
- state = "read_field"
186
-
187
362
  parameters = PropertyBag()
363
+ current_field = None
364
+ seen_first_argument = False
188
365
 
189
- for i in captures:
190
- node = i[0]
191
- if node.type == "field_initializer":
192
- state = "read_field"
193
-
194
- if state == "read_field" or node.type == "field_identifier":
195
- if node.type != "field_identifier":
196
- continue
197
- current_parameter = node.text.decode("utf-8")
198
- parameters[current_parameter] = PropertyBag()
199
- parameters[current_parameter]["params"] = []
200
- state = "skip_until_pointer"
201
- elif state == "skip_until_pointer":
202
- if node.type != "pointer_expression":
203
- continue
204
- state = "read_parameters"
205
- elif state == "read_parameters":
206
- param = dict(value=node.text.decode("utf-8"), type=node.type)
366
+ for node, label in captures:
367
+ # Start of a new field initializer
368
+ if label == "field" and node.type == "field_identifier":
369
+ current_field = node.text.decode("utf-8")
370
+ parameters[current_field] = PropertyBag()
371
+ parameters[current_field]["params"] = []
372
+ seen_first_argument = False
373
+ continue
374
+
375
+ # Individual arguments for the current field
376
+ if label == "argument" and current_field is not None:
377
+ raw_value = node.text.decode("utf-8").strip()
378
+
379
+ # Skip the first argument if it's the context pointer (*this)
380
+ if not seen_first_argument:
381
+ seen_first_argument = True
382
+ if raw_value == "*this":
383
+ continue # do not store *this
384
+ # If the first argument is not *this (weird edge case), fall through and record it.
385
+
386
+ param = dict(value=raw_value, type=node.type or "_")
207
387
  normalized_param = __normalize_param(
208
388
  param, node, treesitter_parser, cpp_language, source_code
209
389
  )
210
390
 
211
391
  if normalized_param:
212
- parameters[current_parameter]["params"].append(normalized_param)
392
+ parameters[current_field]["params"].append(normalized_param)
213
393
 
214
394
  return parameters
215
395
 
216
396
 
397
+
217
398
  def __merge_header_and_source_properties(header_properties, source_properties):
399
+ """
400
+ Merge header-based property metadata (types, wrappers, flags)
401
+ with source-based initialization parameters.
402
+
403
+ This function ensures:
404
+ - Header-only metadata like 'is_enterprise', 'base_property_type', etc.
405
+ are always preserved.
406
+ - Source-derived data like 'params' are merged without overwriting
407
+ header metadata.
408
+ - Missing source entries still return valid PropertyBags with only header info.
409
+ """
218
410
  properties = deepcopy(header_properties)
219
411
 
220
- for key in header_properties.keys():
412
+ for key, header_entry in header_properties.items():
413
+ merged = deepcopy(header_entry)
414
+
221
415
  if key in source_properties:
222
- properties[key].update(source_properties[key])
416
+ # Merge parameter list
417
+ source_entry = source_properties[key]
418
+ for k, v in source_entry.items():
419
+ # If the key doesn't exist in header, copy it over
420
+ # Otherwise, keep header's metadata (type flags, etc.)
421
+ if k not in merged:
422
+ merged[k] = v
423
+ elif k == "params":
424
+ # Always take params from source
425
+ merged["params"] = v
426
+
223
427
  else:
224
- return PropertyBag()
428
+ # No source info → ensure params is at least an empty list
429
+ merged["params"] = merged.get("params", [])
430
+
431
+ # Reinforce that header metadata should not be lost
432
+ for meta_key in [
433
+ "type",
434
+ "declaration",
435
+ "is_enterprise",
436
+ "is_deprecated",
437
+ "is_bounded",
438
+ "is_enum",
439
+ "is_one_or_many",
440
+ "is_enterprise_wrapper",
441
+ "base_property_type",
442
+ "property_kinds",
443
+ ]:
444
+ if meta_key not in merged and meta_key in header_entry:
445
+ merged[meta_key] = header_entry[meta_key]
446
+
447
+ properties[key] = merged
225
448
 
226
449
  return properties
227
450
 
228
-
229
451
  def extract_properties_from_file_pair(
230
452
  treesitter_parser, cpp_language, file_pair: FilePair
231
453
  ):
@@ -1,4 +1,44 @@
1
1
  class PropertyBag(dict):
2
+ """
3
+ A recursive, auto-expanding dictionary used throughout the configuration parser.
4
+
5
+ This class behaves like a normal Python `dict`, but when you access a missing key,
6
+ it automatically creates and inserts another `PropertyBag` at that key instead of
7
+ raising a `KeyError`.
8
+
9
+ This makes it convenient for building up deeply nested structures incrementally
10
+ without having to check whether intermediate keys already exist.
11
+
12
+ Example
13
+ -------
14
+ >>> props = PropertyBag()
15
+ >>> props["core_balancing_continuous"]["params"].append("true")
16
+ >>> props
17
+ {'core_balancing_continuous': {'params': ['true']}}
18
+
19
+ How it works
20
+ ------------
21
+ - The __missing__ method is called automatically by dict.__getitem__()
22
+ when a requested key is not present.
23
+ - Instead of raising KeyError, we insert and return a new PropertyBag(),
24
+ enabling seamless nested assignment.
25
+
26
+ Typical usage in the parser
27
+ ----------------------------
28
+ PropertyBag is used to accumulate data while parsing:
29
+ - Configuration property declarations from the header file.
30
+ - Constructor argument lists from the C++ source file.
31
+ - Metadata fields from nested initializer lists.
32
+
33
+ Because the parser doesn’t know in advance which keys will appear,
34
+ this auto-expanding structure keeps the code simple and robust:
35
+
36
+ parameters[field]["params"].append(param)
37
+ header_properties[name]["type"] = cpp_type
38
+
39
+ Both of these lines work safely even if `field` or `name` did not previously exist.
40
+ """
41
+
2
42
  def __missing__(self, key):
3
43
  self[key] = PropertyBag()
4
44
  return self[key]