@redpanda-data/docs-extensions-and-macros 4.11.0 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/doc-tools.js +4 -2
- package/extensions/convert-to-markdown.js +17 -1
- package/package.json +3 -1
- package/tools/property-extractor/COMPUTED_CONSTANTS.md +173 -0
- package/tools/property-extractor/Makefile +12 -1
- package/tools/property-extractor/README.adoc +828 -97
- package/tools/property-extractor/compare-properties.js +38 -13
- package/tools/property-extractor/constant_resolver.py +610 -0
- package/tools/property-extractor/file_pair.py +42 -0
- package/tools/property-extractor/generate-handlebars-docs.js +41 -8
- package/tools/property-extractor/helpers/gt.js +9 -0
- package/tools/property-extractor/helpers/includes.js +17 -0
- package/tools/property-extractor/helpers/index.js +3 -0
- package/tools/property-extractor/helpers/isEnterpriseEnum.js +24 -0
- package/tools/property-extractor/helpers/renderPropertyExample.js +6 -5
- package/tools/property-extractor/overrides.json +248 -0
- package/tools/property-extractor/parser.py +254 -32
- package/tools/property-extractor/property_bag.py +40 -0
- package/tools/property-extractor/property_extractor.py +1417 -430
- package/tools/property-extractor/requirements.txt +1 -0
- package/tools/property-extractor/templates/property-backup.hbs +161 -0
- package/tools/property-extractor/templates/property.hbs +104 -49
- package/tools/property-extractor/templates/topic-property-backup.hbs +148 -0
- package/tools/property-extractor/templates/topic-property.hbs +72 -34
- package/tools/property-extractor/tests/test_known_values.py +617 -0
- package/tools/property-extractor/tests/transformers_test.py +81 -6
- package/tools/property-extractor/topic_property_extractor.py +23 -10
- package/tools/property-extractor/transformers.py +2191 -369
- package/tools/property-extractor/type_definition_extractor.py +669 -0
- package/tools/property-extractor/definitions.json +0 -245
|
@@ -4,19 +4,99 @@ from property_bag import PropertyBag
|
|
|
4
4
|
from copy import deepcopy
|
|
5
5
|
import itertools as it
|
|
6
6
|
import re
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def _is_config_property_type(type_str):
|
|
12
|
+
"""
|
|
13
|
+
Check if a C++ type is a Redpanda configuration property.
|
|
14
|
+
|
|
15
|
+
Returns True only for property wrapper types like:
|
|
16
|
+
- property<T>
|
|
17
|
+
- bounded_property<T>
|
|
18
|
+
- deprecated_property<T>
|
|
19
|
+
- one_or_many_property<T>
|
|
20
|
+
- enum_property<T>
|
|
21
|
+
- enterprise<property<T>>
|
|
22
|
+
- enterprise<bounded_property<T>>
|
|
23
|
+
|
|
24
|
+
Returns False for internal structs like:
|
|
25
|
+
- connection_cfg, consumer_cfg, ack_level, proxy_request, etc.
|
|
26
|
+
- Primitive types: ss::sstring, iobuf, std::vector<T>, etc.
|
|
27
|
+
"""
|
|
28
|
+
if not type_str:
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
type_str = type_str.strip()
|
|
32
|
+
|
|
33
|
+
# Known property wrapper types (defined first for exclusion check)
|
|
34
|
+
PROPERTY_WRAPPERS = [
|
|
35
|
+
'property<',
|
|
36
|
+
'bounded_property<',
|
|
37
|
+
'deprecated_property<',
|
|
38
|
+
'one_or_many_property<',
|
|
39
|
+
'enum_property<',
|
|
40
|
+
'retention_duration_property',
|
|
41
|
+
'development_feature_property<',
|
|
42
|
+
'hidden_when_default_property<',
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# Explicitly exclude common non-property types
|
|
46
|
+
# (unless they're wrapped in a property type)
|
|
47
|
+
NON_PROPERTY_TYPES = [
|
|
48
|
+
'ss::sstring',
|
|
49
|
+
'sstring',
|
|
50
|
+
'iobuf',
|
|
51
|
+
'std::string',
|
|
52
|
+
'std::vector<',
|
|
53
|
+
'std::optional<',
|
|
54
|
+
'std::chrono::',
|
|
55
|
+
'model::node_id',
|
|
56
|
+
'net::unresolved_address',
|
|
57
|
+
'serde::envelope<',
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# Check if type contains any property wrapper
|
|
61
|
+
has_property_wrapper = any(
|
|
62
|
+
wrapper in type_str or type_str.startswith(wrapper.replace('<', ''))
|
|
63
|
+
for wrapper in PROPERTY_WRAPPERS
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Quick rejection of known non-property types (unless wrapped in property)
|
|
67
|
+
if not has_property_wrapper:
|
|
68
|
+
for non_prop in NON_PROPERTY_TYPES:
|
|
69
|
+
if non_prop in type_str:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
# Check for direct property wrapper usage
|
|
73
|
+
if any(type_str.startswith(wrapper.replace('<', '')) or wrapper in type_str
|
|
74
|
+
for wrapper in PROPERTY_WRAPPERS):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# Check for enterprise wrapper containing property types
|
|
78
|
+
if type_str.startswith('enterprise<'):
|
|
79
|
+
return any(wrapper in type_str for wrapper in PROPERTY_WRAPPERS)
|
|
80
|
+
|
|
81
|
+
return False
|
|
82
|
+
|
|
7
83
|
|
|
8
84
|
|
|
9
85
|
HEADER_QUERY = """
|
|
10
86
|
(field_declaration
|
|
11
|
-
type:
|
|
12
|
-
|
|
13
|
-
|
|
87
|
+
type: [
|
|
88
|
+
(type_identifier)
|
|
89
|
+
(template_type)
|
|
90
|
+
(qualified_identifier)
|
|
91
|
+
] @type
|
|
92
|
+
declarator: (field_identifier) @name
|
|
14
93
|
) @declaration
|
|
15
94
|
"""
|
|
16
95
|
|
|
96
|
+
|
|
17
97
|
# Tree-sitter query for extracting C++ property constructor arguments and enterprise values
|
|
18
98
|
#
|
|
19
|
-
# -
|
|
99
|
+
# - Capture all expression types including:
|
|
20
100
|
# * call_expression: Handles function calls like model::kafka_audit_logging_topic()
|
|
21
101
|
# * template_instantiation: Handles template syntax like std::vector<ss::sstring>{...}
|
|
22
102
|
# * concatenated_string: Handles C++ string concatenation with +
|
|
@@ -71,6 +151,21 @@ def get_file_contents(path):
|
|
|
71
151
|
|
|
72
152
|
|
|
73
153
|
def parse_cpp_header(treesitter_parser, cpp_language, source_code):
|
|
154
|
+
"""
|
|
155
|
+
Parses a C++ configuration header file to extract property declarations
|
|
156
|
+
and classify them by type (enterprise, deprecated, bounded, etc.).
|
|
157
|
+
|
|
158
|
+
Detects and annotates:
|
|
159
|
+
- is_enterprise
|
|
160
|
+
- is_deprecated
|
|
161
|
+
- is_bounded
|
|
162
|
+
- is_enum
|
|
163
|
+
- is_one_or_many
|
|
164
|
+
- is_enterprise_wrapper
|
|
165
|
+
- base_property_type (the inner C++ type, if extractable)
|
|
166
|
+
- property_kinds (list of wrapper kinds, e.g. ['enterprise', 'bounded'])
|
|
167
|
+
"""
|
|
168
|
+
|
|
74
169
|
query = cpp_language.query(HEADER_QUERY)
|
|
75
170
|
tree = treesitter_parser.parse(source_code)
|
|
76
171
|
|
|
@@ -83,11 +178,51 @@ def parse_cpp_header(treesitter_parser, cpp_language, source_code):
|
|
|
83
178
|
for node, label in captures:
|
|
84
179
|
if label == "name":
|
|
85
180
|
property_name = node.text.decode("utf-8")
|
|
181
|
+
|
|
182
|
+
# Validate this is a config property type - skip internal structs
|
|
183
|
+
if not _is_config_property_type(current_type):
|
|
184
|
+
logger.debug(f"Skipping non-property field '{property_name}' with type '{current_type}'")
|
|
185
|
+
current_type = None
|
|
186
|
+
current_declaration = None
|
|
187
|
+
continue
|
|
188
|
+
|
|
86
189
|
properties[property_name]["name_in_file"] = property_name
|
|
87
190
|
properties[property_name]["type"] = current_type
|
|
88
191
|
properties[property_name]["declaration"] = current_declaration
|
|
192
|
+
|
|
193
|
+
t = current_type or ""
|
|
194
|
+
|
|
195
|
+
# --- Detect property wrapper kinds dynamically ---
|
|
196
|
+
wrapper_kinds = [
|
|
197
|
+
"enterprise",
|
|
198
|
+
"deprecated_property",
|
|
199
|
+
"bounded_property",
|
|
200
|
+
"enum_property",
|
|
201
|
+
"one_or_many_property",
|
|
202
|
+
"property",
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
property_kinds = [k for k in wrapper_kinds if k in t]
|
|
206
|
+
|
|
207
|
+
# --- Flags for common wrappers ---
|
|
208
|
+
properties[property_name]["is_enterprise"] = "enterprise<" in t
|
|
209
|
+
properties[property_name]["is_deprecated"] = "deprecated_property" in t
|
|
210
|
+
properties[property_name]["is_bounded"] = "bounded_property" in t
|
|
211
|
+
properties[property_name]["is_enum"] = "enum_property" in t
|
|
212
|
+
properties[property_name]["is_one_or_many"] = "one_or_many_property" in t
|
|
213
|
+
properties[property_name]["is_enterprise_wrapper"] = t.strip().startswith("enterprise<")
|
|
214
|
+
properties[property_name]["property_kinds"] = property_kinds
|
|
215
|
+
|
|
216
|
+
# --- Extract inner property type (recursively handles nesting) ---
|
|
217
|
+
base_match = re.search(r'property<\s*([^>]+)\s*>', t)
|
|
218
|
+
if base_match:
|
|
219
|
+
properties[property_name]["base_property_type"] = base_match.group(1).strip()
|
|
220
|
+
else:
|
|
221
|
+
properties[property_name]["base_property_type"] = None
|
|
222
|
+
|
|
89
223
|
current_type = None
|
|
90
224
|
current_declaration = None
|
|
225
|
+
|
|
91
226
|
elif label == "type":
|
|
92
227
|
current_type = node.text.decode("utf-8")
|
|
93
228
|
elif label == "declaration":
|
|
@@ -99,8 +234,21 @@ def parse_cpp_header(treesitter_parser, cpp_language, source_code):
|
|
|
99
234
|
def __unquote_string(value):
|
|
100
235
|
# placeholder to keep escaped double quotes (e.g. \"name\")
|
|
101
236
|
escaped_quotes_placeholder = "$$$___quote___$$$"
|
|
237
|
+
|
|
238
|
+
# Handle C++ raw string literals: R"(content)" or R"delimiter(content)delimiter"
|
|
239
|
+
# First try simple case without delimiter: R"(content)"
|
|
240
|
+
simple_raw_match = re.match(r'^R"[(](.*)[)]"\s*$', value.strip(), re.DOTALL)
|
|
241
|
+
if simple_raw_match:
|
|
242
|
+
return simple_raw_match.group(1)
|
|
243
|
+
|
|
244
|
+
# Handle raw string with custom delimiter: R"delimiter(content)delimiter"
|
|
245
|
+
delimited_raw_match = re.match(r'^R"([^(]+)[(](.*)[)]\1"\s*$', value.strip(), re.DOTALL)
|
|
246
|
+
if delimited_raw_match:
|
|
247
|
+
return delimited_raw_match.group(2)
|
|
248
|
+
|
|
249
|
+
# Handle regular quoted strings
|
|
102
250
|
return re.sub(
|
|
103
|
-
r'^
|
|
251
|
+
r'^"([^"]*)"\s*$',
|
|
104
252
|
"\\1",
|
|
105
253
|
re.sub(
|
|
106
254
|
'\\\\"',
|
|
@@ -176,56 +324,130 @@ def __normalize_param(param, node, treesitter_parser, cpp_language, source_code)
|
|
|
176
324
|
|
|
177
325
|
|
|
178
326
|
def parse_cpp_source(treesitter_parser, cpp_language, source_code):
|
|
327
|
+
"""
|
|
328
|
+
Parse C++ source file and extract constructor arguments for each config field.
|
|
329
|
+
|
|
330
|
+
For each field initializer like:
|
|
331
|
+
|
|
332
|
+
core_balancing_continuous(
|
|
333
|
+
*this,
|
|
334
|
+
true,
|
|
335
|
+
false,
|
|
336
|
+
"core_balancing_continuous",
|
|
337
|
+
"If set to ...",
|
|
338
|
+
meta{ ... },
|
|
339
|
+
true,
|
|
340
|
+
property<bool>::noop_validator,
|
|
341
|
+
legacy_default<bool>{false, legacy_version{16}})
|
|
342
|
+
|
|
343
|
+
we produce:
|
|
344
|
+
|
|
345
|
+
parameters["core_balancing_continuous"]["params"] = [
|
|
346
|
+
{ "value": "true", "type": "true" },
|
|
347
|
+
{ "value": "false", "type": "false" },
|
|
348
|
+
{ "value": "core_balancing_continuous", "type": "string_literal" },
|
|
349
|
+
{ "value": "If set to ...", "type": "string_literal" },
|
|
350
|
+
{ "value": { ... }, "type": "initializer_list" },
|
|
351
|
+
{ "value": "true", "type": "true" },
|
|
352
|
+
{ "value": "property<bool>::noop_validator", "type": "call_expression" },
|
|
353
|
+
{ "value": "legacy_default<bool>{false, legacy_version{16}}", "type": "_" },
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
(the initial `*this` is intentionally skipped).
|
|
357
|
+
"""
|
|
179
358
|
query = cpp_language.query(SOURCE_QUERY)
|
|
180
359
|
tree = treesitter_parser.parse(source_code)
|
|
181
|
-
|
|
182
360
|
captures = query.captures(tree.root_node)
|
|
183
361
|
|
|
184
|
-
current_parameter = None
|
|
185
|
-
state = "read_field"
|
|
186
|
-
|
|
187
362
|
parameters = PropertyBag()
|
|
363
|
+
current_field = None
|
|
364
|
+
seen_first_argument = False
|
|
188
365
|
|
|
189
|
-
for
|
|
190
|
-
|
|
191
|
-
if node.type == "
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
if
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
366
|
+
for node, label in captures:
|
|
367
|
+
# Start of a new field initializer
|
|
368
|
+
if label == "field" and node.type == "field_identifier":
|
|
369
|
+
current_field = node.text.decode("utf-8")
|
|
370
|
+
parameters[current_field] = PropertyBag()
|
|
371
|
+
parameters[current_field]["params"] = []
|
|
372
|
+
seen_first_argument = False
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
# Individual arguments for the current field
|
|
376
|
+
if label == "argument" and current_field is not None:
|
|
377
|
+
raw_value = node.text.decode("utf-8").strip()
|
|
378
|
+
|
|
379
|
+
# Skip the first argument if it's the context pointer (*this)
|
|
380
|
+
if not seen_first_argument:
|
|
381
|
+
seen_first_argument = True
|
|
382
|
+
if raw_value == "*this":
|
|
383
|
+
continue # do not store *this
|
|
384
|
+
# If the first argument is not *this (weird edge case), fall through and record it.
|
|
385
|
+
|
|
386
|
+
param = dict(value=raw_value, type=node.type or "_")
|
|
207
387
|
normalized_param = __normalize_param(
|
|
208
388
|
param, node, treesitter_parser, cpp_language, source_code
|
|
209
389
|
)
|
|
210
390
|
|
|
211
391
|
if normalized_param:
|
|
212
|
-
parameters[
|
|
392
|
+
parameters[current_field]["params"].append(normalized_param)
|
|
213
393
|
|
|
214
394
|
return parameters
|
|
215
395
|
|
|
216
396
|
|
|
397
|
+
|
|
217
398
|
def __merge_header_and_source_properties(header_properties, source_properties):
|
|
399
|
+
"""
|
|
400
|
+
Merge header-based property metadata (types, wrappers, flags)
|
|
401
|
+
with source-based initialization parameters.
|
|
402
|
+
|
|
403
|
+
This function ensures:
|
|
404
|
+
- Header-only metadata like 'is_enterprise', 'base_property_type', etc.
|
|
405
|
+
are always preserved.
|
|
406
|
+
- Source-derived data like 'params' are merged without overwriting
|
|
407
|
+
header metadata.
|
|
408
|
+
- Missing source entries still return valid PropertyBags with only header info.
|
|
409
|
+
"""
|
|
218
410
|
properties = deepcopy(header_properties)
|
|
219
411
|
|
|
220
|
-
for key in header_properties.
|
|
412
|
+
for key, header_entry in header_properties.items():
|
|
413
|
+
merged = deepcopy(header_entry)
|
|
414
|
+
|
|
221
415
|
if key in source_properties:
|
|
222
|
-
|
|
416
|
+
# Merge parameter list
|
|
417
|
+
source_entry = source_properties[key]
|
|
418
|
+
for k, v in source_entry.items():
|
|
419
|
+
# If the key doesn't exist in header, copy it over
|
|
420
|
+
# Otherwise, keep header's metadata (type flags, etc.)
|
|
421
|
+
if k not in merged:
|
|
422
|
+
merged[k] = v
|
|
423
|
+
elif k == "params":
|
|
424
|
+
# Always take params from source
|
|
425
|
+
merged["params"] = v
|
|
426
|
+
|
|
223
427
|
else:
|
|
224
|
-
|
|
428
|
+
# No source info → ensure params is at least an empty list
|
|
429
|
+
merged["params"] = merged.get("params", [])
|
|
430
|
+
|
|
431
|
+
# Reinforce that header metadata should not be lost
|
|
432
|
+
for meta_key in [
|
|
433
|
+
"type",
|
|
434
|
+
"declaration",
|
|
435
|
+
"is_enterprise",
|
|
436
|
+
"is_deprecated",
|
|
437
|
+
"is_bounded",
|
|
438
|
+
"is_enum",
|
|
439
|
+
"is_one_or_many",
|
|
440
|
+
"is_enterprise_wrapper",
|
|
441
|
+
"base_property_type",
|
|
442
|
+
"property_kinds",
|
|
443
|
+
]:
|
|
444
|
+
if meta_key not in merged and meta_key in header_entry:
|
|
445
|
+
merged[meta_key] = header_entry[meta_key]
|
|
446
|
+
|
|
447
|
+
properties[key] = merged
|
|
225
448
|
|
|
226
449
|
return properties
|
|
227
450
|
|
|
228
|
-
|
|
229
451
|
def extract_properties_from_file_pair(
|
|
230
452
|
treesitter_parser, cpp_language, file_pair: FilePair
|
|
231
453
|
):
|
|
@@ -1,4 +1,44 @@
|
|
|
1
1
|
class PropertyBag(dict):
|
|
2
|
+
"""
|
|
3
|
+
A recursive, auto-expanding dictionary used throughout the configuration parser.
|
|
4
|
+
|
|
5
|
+
This class behaves like a normal Python `dict`, but when you access a missing key,
|
|
6
|
+
it automatically creates and inserts another `PropertyBag` at that key instead of
|
|
7
|
+
raising a `KeyError`.
|
|
8
|
+
|
|
9
|
+
This makes it convenient for building up deeply nested structures incrementally
|
|
10
|
+
without having to check whether intermediate keys already exist.
|
|
11
|
+
|
|
12
|
+
Example
|
|
13
|
+
-------
|
|
14
|
+
>>> props = PropertyBag()
|
|
15
|
+
>>> props["core_balancing_continuous"]["params"].append("true")
|
|
16
|
+
>>> props
|
|
17
|
+
{'core_balancing_continuous': {'params': ['true']}}
|
|
18
|
+
|
|
19
|
+
How it works
|
|
20
|
+
------------
|
|
21
|
+
- The __missing__ method is called automatically by dict.__getitem__()
|
|
22
|
+
when a requested key is not present.
|
|
23
|
+
- Instead of raising KeyError, we insert and return a new PropertyBag(),
|
|
24
|
+
enabling seamless nested assignment.
|
|
25
|
+
|
|
26
|
+
Typical usage in the parser
|
|
27
|
+
----------------------------
|
|
28
|
+
PropertyBag is used to accumulate data while parsing:
|
|
29
|
+
- Configuration property declarations from the header file.
|
|
30
|
+
- Constructor argument lists from the C++ source file.
|
|
31
|
+
- Metadata fields from nested initializer lists.
|
|
32
|
+
|
|
33
|
+
Because the parser doesn’t know in advance which keys will appear,
|
|
34
|
+
this auto-expanding structure keeps the code simple and robust:
|
|
35
|
+
|
|
36
|
+
parameters[field]["params"].append(param)
|
|
37
|
+
header_properties[name]["type"] = cpp_type
|
|
38
|
+
|
|
39
|
+
Both of these lines work safely even if `field` or `name` did not previously exist.
|
|
40
|
+
"""
|
|
41
|
+
|
|
2
42
|
def __missing__(self, key):
|
|
3
43
|
self[key] = PropertyBag()
|
|
4
44
|
return self[key]
|