@redpanda-data/docs-extensions-and-macros 4.7.4 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/doc-tools.js CHANGED
@@ -777,6 +777,41 @@ automation
777
777
  process.exit(0);
778
778
  });
779
779
 
780
+ automation
781
+ .command('topic-property-docs')
782
+ .description('Generate JSON and AsciiDoc documentation for Redpanda topic configuration properties')
783
+ .option('--tag <tag>', 'Git tag or branch to extract from', 'dev')
784
+ .option('--diff <oldTag>', 'Also diff autogenerated topic properties from <oldTag> → <tag>')
785
+ .action((options) => {
786
+ verifyPropertyDependencies();
787
+
788
+ const newTag = options.tag;
789
+ const oldTag = options.diff;
790
+ const cwd = path.resolve(__dirname, '../tools/property-extractor');
791
+ const make = (tag) => {
792
+ console.log(`⏳ Building topic property docs for ${tag}…`);
793
+ const r = spawnSync('make', ['topic-properties', `TAG=${tag}`], { cwd, stdio: 'inherit' });
794
+ if (r.error) {
795
+ console.error(`❌ ${r.error.message}`);
796
+ process.exit(1);
797
+ }
798
+ if (r.status !== 0) process.exit(r.status);
799
+ };
800
+
801
+ if (oldTag) {
802
+ const oldDir = path.join('autogenerated', oldTag, 'properties');
803
+ if (!fs.existsSync(oldDir)) make(oldTag);
804
+ }
805
+
806
+ make(newTag);
807
+
808
+ if (oldTag) {
809
+ diffDirs('properties', oldTag, newTag);
810
+ }
811
+
812
+ process.exit(0);
813
+ });
814
+
780
815
  automation
781
816
  .command('rpk-docs')
782
817
  .description('Generate AsciiDoc documentation for rpk CLI commands')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.7.4",
3
+ "version": "4.8.0",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -1,4 +1,16 @@
1
- .PHONY: build venv clean redpanda-git treesitter generate-docs check
1
+ .PHONY: build venv clean redpanda-git treesitter topic-properties generate-docs check
2
+
3
+ # --- Main build: venv, fetch code, build parser, extract & docgen ---
4
+ build: venv redpanda-git treesitter
5
+ @echo "🔧 Building with Redpanda tag: $(TAG)"
6
+ @mkdir -p $(TOOL_ROOT)/gen
7
+ @cd $(TOOL_ROOT) && \
8
+ $(PYTHON) -W ignore::FutureWarning property_extractor.py \
9
+ --recursive \
10
+ --path $(REDPANDA_SRC) \
11
+ --output gen/properties-output.json
12
+ @echo "✅ Cluster properties JSON generated at $(TOOL_ROOT)/gen/properties-output.json"
13
+ @$(MAKE) generate-docs
2
14
 
3
15
  # Default tag (can be overridden via `make TAG=v25.1.1`)
4
16
  TAG ?= dev
@@ -25,18 +37,6 @@ PYTHON := $(VENV)/bin/python
25
37
  OUTPUT_DIR := $(REPO_ROOT)/autogenerated/$(TAG)/properties
26
38
  TREE_SITTER := npx tree-sitter
27
39
 
28
- # --- Main build: venv, fetch code, build parser, extract & docgen ---
29
- build: venv redpanda-git treesitter
30
- @echo "🔧 Building with Redpanda tag: $(TAG)"
31
- @mkdir -p $(TOOL_ROOT)/gen
32
- @cd $(TOOL_ROOT) && \
33
- $(PYTHON) -W ignore::FutureWarning property_extractor.py \
34
- --recursive \
35
- --path $(REDPANDA_SRC) \
36
- --output gen/properties-output.json
37
- @echo "✅ JSON generated at $(TOOL_ROOT)/gen/properties-output.json"
38
- @$(MAKE) generate-docs
39
-
40
40
  # --- Ensure Python venv & dependencies ---
41
41
  venv: $(TOOL_ROOT)/requirements.txt
42
42
  @if [ ! -d "$(VENV)" ]; then \
@@ -103,3 +103,15 @@ check:
103
103
  @echo "VENV: $(VENV)"
104
104
  @echo "PYTHON: $(PYTHON)"
105
105
  @echo "OUTPUT_DIR: $(OUTPUT_DIR)"
106
+
107
+ # --- Extract topic properties ---
108
+ topic-properties: venv redpanda-git treesitter
109
+ @echo "🔧 Extracting topic properties with Redpanda tag: $(TAG)"
110
+ @mkdir -p $(TOOL_ROOT)/gen
111
+ @mkdir -p "$(OUTPUT_DIR)"
112
+ @cd $(TOOL_ROOT) && \
113
+ $(PYTHON) topic_property_extractor.py \
114
+ --source-path $(REDPANDA_SRC) \
115
+ --output-json "$(OUTPUT_DIR)/topic-properties-output.json" \
116
+ --output-adoc "$(OUTPUT_DIR)/topic-properties.adoc"
117
+ @echo "✅ Topic properties extracted"
@@ -14,6 +14,7 @@ ERROR_FOLDER_NAME = "error"
14
14
  OUTPUT_FILE_BROKER = "broker-properties.adoc"
15
15
  OUTPUT_FILE_CLUSTER = "cluster-properties.adoc"
16
16
  OUTPUT_FILE_CLOUD = "object-storage-properties.adoc"
17
+ OUTPUT_FILE_TOPIC = "topic-properties.adoc"
17
18
  OUTPUT_FILE_DEPRECATED = os.path.join("deprecated", "partials", "deprecated-properties.adoc")
18
19
  ALL_PROPERTIES_FILE = "all_properties.txt"
19
20
 
@@ -66,6 +67,20 @@ CLUSTER_CONFIG_INTRO = (
66
67
  )
67
68
  CLUSTER_CONFIG_TITLE = "== Cluster configuration\n\n"
68
69
 
70
+ TOPIC_PAGE_TITLE = (
71
+ "= Topic Configuration Properties\n"
72
+ ":page-aliases: reference:topic-properties.adoc\n"
73
+ ":description: Reference of topic configuration properties.\n\n"
74
+ )
75
+
76
+ TOPIC_INTRO = (
77
+ "A topic-level property sets a Redpanda or Kafka configuration for a particular topic.\n\n"
78
+ "Many topic-level properties have corresponding xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property.\n\n"
79
+ "NOTE: All topic properties take effect immediately after being set.\n\n"
80
+ )
81
+
82
+ TOPIC_CONFIG_TITLE = "== Topic configuration\n\n"
83
+
69
84
  CLOUD_PAGE_TITLE = (
70
85
  "= Object Storage Properties\n"
71
86
  ":description: Reference of object storage properties.\n\n"
@@ -92,7 +107,8 @@ DEFINED_IN_MAPPING = {
92
107
  "src/v/pandaproxy/schema_registry/configuration.cc": "schema reg",
93
108
  "src/v/pandaproxy/rest/configuration.cc": "http proxy",
94
109
  "src/v/kafka/client/configuration.cc": "http client",
95
- "src/v/config/configuration.cc": "cluster"
110
+ "src/v/config/configuration.cc": "cluster",
111
+ "src/v/kafka/server/handlers/topics/types.cc": "topic"
96
112
  }
97
113
 
98
114
  SUFFIX_TO_UNIT = {
@@ -339,6 +355,7 @@ def main():
339
355
  kafka_client_content = []
340
356
  cluster_config_content = []
341
357
  cloud_config_content = []
358
+ topic_config_content = []
342
359
  deprecated_broker_content = []
343
360
  deprecated_cluster_content = []
344
361
  all_properties = []
@@ -388,6 +405,7 @@ def main():
388
405
  "http client": kafka_client_content,
389
406
  "cluster": cluster_config_content,
390
407
  "cloud": cloud_config_content,
408
+ "topic": topic_config_content,
391
409
  }
392
410
  if group in group_mapping:
393
411
  group_mapping[group].append(property_doc)
@@ -423,6 +441,12 @@ def main():
423
441
  + CLOUD_CONFIG_TITLE
424
442
  + "".join(cloud_config_content)
425
443
  )
444
+ topic_page = (
445
+ TOPIC_PAGE_TITLE
446
+ + TOPIC_INTRO
447
+ + TOPIC_CONFIG_TITLE
448
+ + "".join(topic_config_content)
449
+ )
426
450
  deprecated_page = (
427
451
  DEPRECATED_PROPERTIES_TITLE
428
452
  + DEPRECATED_PROPERTIES_INTRO
@@ -436,6 +460,7 @@ def main():
436
460
  write_data_to_file(page_folder, OUTPUT_FILE_BROKER, broker_page)
437
461
  write_data_to_file(page_folder, OUTPUT_FILE_CLUSTER, cluster_page)
438
462
  write_data_to_file(page_folder, OUTPUT_FILE_CLOUD, cloud_page)
463
+ write_data_to_file(page_folder, OUTPUT_FILE_TOPIC, topic_page)
439
464
  write_data_to_file(page_folder, OUTPUT_FILE_DEPRECATED, deprecated_page)
440
465
  write_data_to_file(output_dir, ALL_PROPERTIES_FILE, "\n".join(all_properties))
441
466
 
@@ -0,0 +1,630 @@
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import re
4
+ import json
5
+ import argparse
6
+ from pathlib import Path
7
+ import sys
8
+ from typing import Dict, List, Optional
9
+
10
+
11
+ class TopicPropertyExtractor:
12
+ def __init__(self, source_path: str):
13
+ self.source_path = Path(source_path)
14
+ self.topic_properties = {}
15
+ self.cluster_mappings = {}
16
+ self.enum_values = {}
17
+ self.noop_properties = set()
18
+
19
+ def extract_topic_properties(self) -> Dict:
20
+ """Extract topic property constants from source files"""
21
+
22
+ # Step 1: Discover all topic property constants
23
+ self._discover_topic_properties()
24
+
25
+ # Step 2: Find enum definitions for acceptable values
26
+ self._discover_enum_values()
27
+
28
+ # Step 3: Discover no-op properties
29
+ self._discover_noop_properties()
30
+
31
+ # Step 4: Discover cluster property mappings from source code
32
+ self._discover_cluster_mappings()
33
+
34
+ # Step 5: Match properties with their validators and mappings
35
+ self._correlate_properties_with_data()
36
+
37
+ return {
38
+ "topic_properties": self.topic_properties,
39
+ "cluster_mappings": self.cluster_mappings,
40
+ "enum_values": self.enum_values,
41
+ "noop_properties": list(self.noop_properties)
42
+ }
43
+
44
+ def _discover_topic_properties(self):
45
+ """Dynamically discover all topic property constants from source files"""
46
+
47
+ # Priority files - parse these first with the most comprehensive patterns
48
+ priority_files = [
49
+ "src/v/kafka/server/handlers/topics/types.h",
50
+ "src/v/kafka/protocol/topic_properties.h",
51
+ "src/v/cluster/topic_properties.h",
52
+ ]
53
+
54
+ for file_pattern in priority_files:
55
+ file_path = self.source_path / file_pattern
56
+ if file_path.exists():
57
+ self._parse_topic_properties_from_file(file_path)
58
+
59
+ # Comprehensive search - scan all header files that might contain properties
60
+ search_patterns = [
61
+ "src/**/*topic*.h",
62
+ "src/**/*kafka*.h",
63
+ "src/**/*handler*.h",
64
+ "src/**/*config*.h",
65
+ "src/**/*property*.h",
66
+ ]
67
+
68
+ scanned_files = set()
69
+ for pattern in search_patterns:
70
+ for header_file in self.source_path.glob(pattern):
71
+ if header_file not in scanned_files:
72
+ scanned_files.add(header_file)
73
+ self._scan_file_for_topic_properties(header_file)
74
+
75
+ # Also scan the specific types.h file that we know contains many properties
76
+ types_files = list(self.source_path.glob("src/**/types.h"))
77
+ for types_file in types_files:
78
+ if types_file not in scanned_files:
79
+ self._scan_file_for_topic_properties(types_file)
80
+
81
+ def _parse_topic_properties_from_file(self, file_path: Path):
82
+ """Parse topic property constants from a specific file"""
83
+ try:
84
+ with open(file_path, 'r', encoding='utf-8') as f:
85
+ content = f.read()
86
+
87
+ # Multiple patterns to catch all possible property definitions
88
+ patterns = [
89
+ # Pattern 1: inline constexpr std::string_view topic_property_xxx = "yyy";
90
+ r'inline\s+constexpr\s+std::string_view\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
91
+ # Pattern 2: constexpr std::string_view topic_property_xxx = "yyy";
92
+ r'constexpr\s+std::string_view\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
93
+ # Pattern 3: const std::string topic_property_xxx = "yyy";
94
+ r'const\s+std::string\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
95
+ # Pattern 4: static const char* topic_property_xxx = "yyy";
96
+ r'static\s+const\s+char\*\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
97
+ ]
98
+
99
+ total_matches = 0
100
+ for pattern in patterns:
101
+ matches = re.findall(pattern, content)
102
+ total_matches += len(matches)
103
+
104
+ for var_name, property_name in matches:
105
+ # Only add if not already found (prefer inline constexpr definitions)
106
+ if property_name not in self.topic_properties:
107
+ self.topic_properties[property_name] = {
108
+ "variable_name": f"topic_property_{var_name}",
109
+ "property_name": property_name,
110
+ "source_file": str(file_path.relative_to(self.source_path)),
111
+ "description": "",
112
+ "type": self._determine_property_type(property_name),
113
+ "acceptable_values": None,
114
+ "corresponding_cluster_property": None,
115
+ "is_noop": False # Will be updated later in _correlate_properties_with_data
116
+ }
117
+ print(f"Found {total_matches} topic properties in {file_path}")
118
+ except Exception as e:
119
+ print(f"Error reading {file_path}: {e}")
120
+
121
+ def _scan_file_for_topic_properties(self, file_path: Path):
122
+ """Scan any file for topic_property_ constants"""
123
+ try:
124
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
125
+ content = f.read()
126
+
127
+ # Enhanced patterns to catch all property definitions
128
+ patterns = [
129
+ # Pattern 1: inline constexpr std::string_view topic_property_xxx = "yyy";
130
+ r'inline\s+constexpr\s+std::string_view\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
131
+ # Pattern 2: constexpr std::string_view topic_property_xxx = "yyy";
132
+ r'constexpr\s+std::string_view\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
133
+ # Pattern 3: topic_property_xxx = "yyy" (simple assignment)
134
+ r'topic_property_(\w+)\s*=\s*"([^"]+)"',
135
+ # Pattern 4: const std::string topic_property_xxx = "yyy";
136
+ r'const\s+std::string\s+topic_property_(\w+)\s*=\s*"([^"]+)"\s*;',
137
+ # Pattern 5: Look for string literals that look like topic properties
138
+ r'"((?:redpanda\.|cleanup\.|compression\.|segment\.|flush\.|delete\.|replication\.|write\.|min\.|max\.|confluent\.)[^"]+)"'
139
+ ]
140
+
141
+ for pattern in patterns:
142
+ matches = re.findall(pattern, content)
143
+
144
+ for match in matches:
145
+ if len(match) == 2:
146
+ # Regular patterns with var_name and property_name
147
+ var_name, property_name = match
148
+ else:
149
+ # String literal pattern - generate var_name from property_name
150
+ property_name = match
151
+ var_name = re.sub(r'[^a-zA-Z0-9_]', '_', property_name)
152
+ var_name = re.sub(r'_+', '_', var_name).strip('_')
153
+
154
+ # Validate this looks like a real topic property
155
+ if self._is_valid_topic_property(property_name) and property_name not in self.topic_properties:
156
+ self.topic_properties[property_name] = {
157
+ "variable_name": f"topic_property_{var_name}",
158
+ "property_name": property_name,
159
+ "source_file": str(file_path.relative_to(self.source_path)),
160
+ "description": "",
161
+ "type": self._determine_property_type(property_name),
162
+ "acceptable_values": None,
163
+ "corresponding_cluster_property": None,
164
+ "is_noop": False # Will be updated later in _correlate_properties_with_data
165
+ }
166
+ except Exception as e:
167
+ print(f"Debug: Skipping {file_path}: {e}", file=sys.stderr)
168
+
169
+ def _discover_enum_values(self):
170
+ """Discover enum definitions that correspond to topic property acceptable values"""
171
+
172
+ # Key enum files for topic property validation
173
+ enum_files = [
174
+ "src/v/model/compression.h",
175
+ "src/v/model/fundamental.h",
176
+ "src/v/model/timestamp.h",
177
+ ]
178
+
179
+ for file_pattern in enum_files:
180
+ file_path = self.source_path / file_pattern
181
+ if file_path.exists():
182
+ self._parse_enums_from_file(file_path)
183
+
184
+ # Also search other model files for enums
185
+ for header_file in self.source_path.glob("src/v/model/**/*.h"):
186
+ self._scan_file_for_enums(header_file)
187
+
188
+ def _discover_noop_properties(self):
189
+ """Discover no-op properties from the allowlist_topic_noop_confs array"""
190
+
191
+ # Look for the allowlist in types.h file
192
+ types_file = self.source_path / "src/v/kafka/server/handlers/topics/types.h"
193
+ if not types_file.exists():
194
+ print("Warning: types.h file not found for no-op property detection")
195
+ return
196
+
197
+ try:
198
+ with open(types_file, 'r', encoding='utf-8') as f:
199
+ content = f.read()
200
+
201
+ # Pattern to match the allowlist_topic_noop_confs array
202
+ # Looks for the array declaration and captures all string literals within it
203
+ pattern = r'allowlist_topic_noop_confs\s*=\s*\{([^}]+)\}'
204
+ match = re.search(pattern, content, re.DOTALL)
205
+
206
+ if match:
207
+ array_content = match.group(1)
208
+ # Extract all quoted strings from the array
209
+ string_pattern = r'"([^"]+)"'
210
+ noop_properties = re.findall(string_pattern, array_content)
211
+
212
+ self.noop_properties = set(noop_properties)
213
+ print(f"Found {len(self.noop_properties)} no-op properties")
214
+ else:
215
+ print("Warning: allowlist_topic_noop_confs array not found in types.h")
216
+
217
+ except Exception as e:
218
+ print(f"Error reading no-op properties from {types_file}: {e}")
219
+
220
+ def _parse_enums_from_file(self, file_path: Path):
221
+ """Parse enum definitions from a file"""
222
+ try:
223
+ with open(file_path, 'r', encoding='utf-8') as f:
224
+ content = f.read()
225
+
226
+ # Pattern for enum class definitions
227
+ enum_pattern = r'enum\s+class\s+(\w+)\s*[^{]*{([^}]+)}'
228
+ enum_matches = re.findall(enum_pattern, content, re.DOTALL)
229
+
230
+ for enum_name, enum_body in enum_matches:
231
+ values = self._extract_enum_values(enum_body)
232
+ if values:
233
+ self.enum_values[enum_name] = {
234
+ "source_file": str(file_path.relative_to(self.source_path)),
235
+ "values": values
236
+ }
237
+
238
+ # Pattern for regular enums too
239
+ regular_enum_pattern = r'enum\s+(\w+)\s*{([^}]+)}'
240
+ regular_matches = re.findall(regular_enum_pattern, content, re.DOTALL)
241
+
242
+ for enum_name, enum_body in regular_matches:
243
+ values = self._extract_enum_values(enum_body)
244
+ if values:
245
+ self.enum_values[enum_name] = {
246
+ "source_file": str(file_path.relative_to(self.source_path)),
247
+ "values": values
248
+ }
249
+
250
+ except Exception as e:
251
+ print(f"Error parsing enums from {file_path}: {e}")
252
+
253
+ def _scan_file_for_enums(self, file_path: Path):
254
+ """Scan any file for enum definitions"""
255
+ try:
256
+ with open(file_path, 'r', encoding='utf-8') as f:
257
+ content = f.read()
258
+
259
+ # Look for enum class definitions
260
+ enum_pattern = r'enum\s+class\s+(\w+)\s*[^{]*{([^}]+)}'
261
+ matches = re.findall(enum_pattern, content, re.DOTALL)
262
+
263
+ for enum_name, enum_body in matches:
264
+ if enum_name not in self.enum_values:
265
+ values = self._extract_enum_values(enum_body)
266
+ if values:
267
+ self.enum_values[enum_name] = {
268
+ "source_file": str(file_path.relative_to(self.source_path)),
269
+ "values": values
270
+ }
271
+ except Exception as e:
272
+ print(f"Debug: Error scanning enums in {file_path}: {e}", file=sys.stderr)
273
+
274
+ def _is_valid_topic_property(self, prop_name: str) -> bool:
275
+ """Validate that a string looks like a real topic property"""
276
+
277
+ # Must be non-empty and reasonable length
278
+ if not prop_name or len(prop_name) < 3 or len(prop_name) > 100:
279
+ return False
280
+
281
+ # Must contain only valid characters for topic properties
282
+ if not re.match(r'^[a-zA-Z][a-zA-Z0-9._-]*$', prop_name):
283
+ return False
284
+
285
+ # Known topic property prefixes/patterns
286
+ valid_patterns = [
287
+ r'^redpanda\.',
288
+ r'^cleanup\.policy$',
289
+ r'^compression\.type$',
290
+ r'^segment\.',
291
+ r'^flush\.',
292
+ r'^delete\.',
293
+ r'^replication\.factor$',
294
+ r'^write\.caching$',
295
+ r'^min\.',
296
+ r'^max\.',
297
+ r'^confluent\.',
298
+ r'.*\.ms$',
299
+ r'.*\.bytes$',
300
+ r'.*\.ratio$',
301
+ ]
302
+
303
+ return any(re.match(pattern, prop_name, re.IGNORECASE) for pattern in valid_patterns)
304
+
305
+ def _determine_property_type(self, property_name: str) -> str:
306
+ """Determine the type of a property based on its name and usage patterns"""
307
+
308
+ # Type mapping based on property name patterns
309
+ if any(keyword in property_name for keyword in ["caching", "recovery", "read", "write", "delete"]):
310
+ if property_name in ["write.caching", "redpanda.remote.recovery", "redpanda.remote.write",
311
+ "redpanda.remote.read", "redpanda.remote.delete", "redpanda.remote.readreplica"]:
312
+ return "boolean"
313
+
314
+ elif any(suffix in property_name for suffix in [".bytes", ".ms", ".factor", ".lag.ms"]):
315
+ return "integer"
316
+
317
+ elif "ratio" in property_name:
318
+ return "number"
319
+
320
+ elif property_name in ["cleanup.policy", "compression.type", "message.timestamp.type"]:
321
+ return "string" # enum-based strings
322
+
323
+ # Default to string for unknown properties
324
+ return "string"
325
+
326
+ def _extract_enum_values(self, enum_body: str) -> List[str]:
327
+ """Extract enum value names from enum body"""
328
+ values = []
329
+
330
+ # Pattern to match enum value declarations (handle various formats)
331
+ value_patterns = [
332
+ r'(\w+)\s*=\s*[^,}]+', # name = value
333
+ r'(\w+)\s*,', # name,
334
+ r'(\w+)\s*}' # name}
335
+ ]
336
+
337
+ for pattern in value_patterns:
338
+ matches = re.findall(pattern, enum_body)
339
+ for match in matches:
340
+ if match and match not in values and not match.isdigit():
341
+ values.append(match)
342
+
343
+ return values
344
+
345
+ def _discover_cluster_mappings(self):
346
+ """Discover topic-to-cluster property mappings from source code"""
347
+
348
+ # Search in configuration and handler files for mappings
349
+ search_patterns = [
350
+ "src/v/config/**/*.cc",
351
+ "src/v/config/**/*.h",
352
+ "src/v/kafka/server/handlers/**/*.cc",
353
+ "src/v/kafka/server/handlers/**/*.h",
354
+ "src/v/cluster/**/*.cc",
355
+ "src/v/cluster/**/*.h"
356
+ ]
357
+
358
+ mapping_candidates = {}
359
+
360
+ for pattern in search_patterns:
361
+ for file_path in self.source_path.glob(pattern):
362
+ if file_path.is_file():
363
+ candidates = self._find_mappings_in_file(file_path)
364
+ mapping_candidates.update(candidates)
365
+
366
+ # Process mapping candidates to find correlations
367
+ self._process_mapping_candidates(mapping_candidates)
368
+
369
+ def _find_mappings_in_file(self, file_path: Path) -> Dict[str, str]:
370
+ """Find potential topic-to-cluster property mappings in a file"""
371
+ try:
372
+ with open(file_path, 'r', encoding='utf-8') as f:
373
+ content = f.read()
374
+
375
+ mappings = {}
376
+
377
+ # Pattern 1: Look for configuration property definitions with proper cluster prop names
378
+ # Example: config.get("log_cleanup_policy") or similar patterns
379
+ config_patterns = [
380
+ r'config\.get\("([^"]+)"\)', # config.get("property_name")
381
+ r'\.([a-z_]+(?:_[a-z]+)*)\(', # method calls like .retention_bytes(
382
+ r'([a-z_]+(?:_[a-z]+)*)\s*=', # assignments like retention_bytes =
383
+ ]
384
+
385
+ for pattern in config_patterns:
386
+ matches = re.findall(pattern, content)
387
+ for match in matches:
388
+ # Only consider names that look like cluster properties
389
+ if self._looks_like_cluster_property(match):
390
+ # Try to correlate with topic properties
391
+ topic_prop = self._correlate_cluster_to_topic_property(match)
392
+ if topic_prop and topic_prop in self.topic_properties:
393
+ mappings[topic_prop] = match
394
+
395
+ return mappings
396
+
397
+ except Exception as e:
398
+ print(f"Debug: Error finding mappings in {file_path}: {e}", file=sys.stderr)
399
+ return {}
400
+
401
+ def _looks_like_cluster_property(self, prop_name: str) -> bool:
402
+ """Check if a name looks like a cluster property"""
403
+ # Cluster properties typically have specific patterns
404
+ cluster_patterns = [
405
+ r'^[a-z]+(_[a-z]+)*$', # snake_case like log_cleanup_policy
406
+ r'.*_default$', # ends with _default
407
+ r'.*_(ms|bytes|ratio|type|policy)$', # ends with common suffixes
408
+ ]
409
+
410
+ return any(re.match(pattern, prop_name) for pattern in cluster_patterns) and len(prop_name) > 4
411
+
412
+ def _correlate_cluster_to_topic_property(self, cluster_prop: str) -> Optional[str]:
413
+ """Try to correlate a cluster property name to a topic property"""
414
+
415
+ # Known correlation patterns
416
+ correlations = {
417
+ "log_cleanup_policy": "cleanup.policy",
418
+ "log_compression_type": "compression.type",
419
+ "log_retention_ms": "retention.ms",
420
+ "retention_bytes": "retention.bytes",
421
+ "log_segment_ms": "segment.ms",
422
+ "log_segment_size": "segment.bytes",
423
+ "log_message_timestamp_type": "message.timestamp.type",
424
+ "kafka_batch_max_bytes": "max.message.bytes",
425
+ "default_topic_replication": "replication.factor",
426
+ "write_caching_default": "write.caching",
427
+ }
428
+
429
+ # Direct lookup first
430
+ if cluster_prop in correlations:
431
+ return correlations[cluster_prop]
432
+
433
+ # Remove common prefixes/suffixes
434
+ cleaned = cluster_prop
435
+ if cleaned.startswith("log_"):
436
+ cleaned = cleaned[4:]
437
+ if cleaned.endswith("_default"):
438
+ cleaned = cleaned[:-8]
439
+ if cleaned.endswith("_ms"):
440
+ cleaned = cleaned[:-3] + ".ms"
441
+ if cleaned.endswith("_bytes"):
442
+ cleaned = cleaned[:-6] + ".bytes"
443
+ if cleaned.endswith("_policy"):
444
+ cleaned = cleaned[:-7] + ".policy"
445
+ if cleaned.endswith("_type"):
446
+ cleaned = cleaned[:-5] + ".type"
447
+
448
+ # Convert snake_case to dot.case
449
+ topic_candidate = cleaned.replace("_", ".")
450
+
451
+ if topic_candidate in self.topic_properties:
452
+ return topic_candidate
453
+
454
+ return None
455
+
456
+ def _process_mapping_candidates(self, mapping_candidates: Dict[str, str]):
457
+ """Process and validate mapping candidates"""
458
+ for topic_prop, cluster_prop in mapping_candidates.items():
459
+ if topic_prop in self.topic_properties:
460
+ self.cluster_mappings[topic_prop] = cluster_prop
461
+
462
+ def _resolve_topic_property_name(self, var_name: str) -> Optional[str]:
463
+ """Resolve topic_property_xxx variable to actual property name"""
464
+ for prop_name, prop_data in self.topic_properties.items():
465
+ if prop_data["variable_name"] == f"topic_property_{var_name}":
466
+ return prop_name
467
+ return None
468
+
469
+ def _correlate_properties_with_data(self):
470
+ """Correlate topic properties with their acceptable values and cluster mappings"""
471
+
472
+ for prop_name, prop_data in self.topic_properties.items():
473
+ # Update cluster mapping if found
474
+ if prop_name in self.cluster_mappings:
475
+ prop_data["corresponding_cluster_property"] = self.cluster_mappings[prop_name]
476
+
477
+ # Mark as no-op if found in the allowlist
478
+ prop_data["is_noop"] = prop_name in self.noop_properties
479
+
480
+ # Update acceptable values based on property type
481
+ prop_data["acceptable_values"] = self._determine_acceptable_values(prop_name, prop_data)
482
+
483
+ def _determine_acceptable_values(self, prop_name: str, prop_data: Dict) -> str:
484
+ """Determine acceptable values for a property based on runtime analysis"""
485
+
486
+ # Check if it's an enum-based property
487
+ if "compression" in prop_name:
488
+ if "compression" in self.enum_values:
489
+ values = self.enum_values["compression"]["values"]
490
+ # Filter out special values like 'count', 'producer'
491
+ filtered_values = [v for v in values if v not in ['count', 'producer']]
492
+ return f"[`{'`, `'.join(filtered_values)}`]"
493
+
494
+ elif "cleanup.policy" in prop_name:
495
+ if "cleanup_policy_bitflags" in self.enum_values:
496
+ values = self.enum_values["cleanup_policy_bitflags"]["values"]
497
+ # Convert enum names to policy names
498
+ policy_values = []
499
+ for v in values:
500
+ if v == "deletion":
501
+ policy_values.append("delete")
502
+ elif v == "compaction":
503
+ policy_values.append("compact")
504
+ if policy_values:
505
+ policy_values.append("compact,delete") # Combined policy
506
+ return f"[`{'`, `'.join(policy_values)}`]"
507
+
508
+ elif "timestamp.type" in prop_name:
509
+ return "[`CreateTime`, `LogAppendTime`]"
510
+
511
+ elif prop_data.get("type") == "boolean":
512
+ return "[`true`, `false`]"
513
+
514
+ # For numeric properties, determine format based on type and name
515
+ elif prop_data.get("type") == "number" and "ratio" in prop_name:
516
+ return "[`0`, `1.0`]"
517
+ elif prop_data.get("type") == "integer":
518
+ if ".factor" in prop_name:
519
+ return "integer (1 or greater)"
520
+ elif ".bytes" in prop_name:
521
+ return "bytes (integer)"
522
+ elif ".ms" in prop_name:
523
+ return "milliseconds (integer)"
524
+ else:
525
+ return "integer"
526
+
527
+ return "" # Default to empty if unknown
528
+
529
+ def generate_topic_properties_adoc(self, output_path: str):
530
+ """Generate topic-properties.adoc file"""
531
+
532
+ adoc_content = """= Topic Configuration Properties
533
+ :page-aliases: reference:topic-properties.adoc
534
+ :description: Reference of topic configuration properties.
535
+
536
+ A topic-level property sets a Redpanda or Kafka configuration for a particular topic.
537
+
538
+ Many topic-level properties have corresponding xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property.
539
+
540
+ For information on how to configure topic properties, see xref:manage:cluster-maintenance/topic-property-configuration.adoc[].
541
+
542
+ NOTE: All topic properties take effect immediately after being set.
543
+
544
+ == Topic property mappings
545
+
546
+ |===
547
+ | Topic property | Corresponding cluster property
548
+
549
+ """
550
+
551
+ # Add table rows ONLY for properties with cluster mappings and exclude no-ops
552
+ for prop_name, prop_data in sorted(self.topic_properties.items()):
553
+ cluster_prop = prop_data.get("corresponding_cluster_property")
554
+ is_noop = prop_data.get("is_noop", False)
555
+ if cluster_prop and not is_noop: # Only include if there's a cluster mapping and not a no-op
556
+ anchor = prop_name.replace(".", "").replace("-", "").lower()
557
+ adoc_content += f"| <<{anchor},`{prop_name}`>>\n"
558
+ adoc_content += f"| xref:./cluster-properties.adoc#{cluster_prop}[`{cluster_prop}`]\n\n"
559
+
560
+ adoc_content += """|===
561
+
562
+ == Topic properties
563
+
564
+ """
565
+
566
+ # Add individual property documentation - ONLY include properties with cluster mappings and exclude no-ops
567
+ for prop_name, prop_data in sorted(self.topic_properties.items()):
568
+ cluster_prop = prop_data.get("corresponding_cluster_property")
569
+ is_noop = prop_data.get("is_noop", False)
570
+
571
+ # Skip properties without cluster mappings or no-op properties
572
+ if not cluster_prop or is_noop:
573
+ continue
574
+
575
+ anchor = prop_name.replace(".", "").replace("-", "").lower()
576
+ acceptable_values = prop_data.get("acceptable_values", "")
577
+ prop_type = prop_data.get("type", "string")
578
+
579
+ adoc_content += f"""
580
+ [[{anchor}]]
581
+ === {prop_name}
582
+
583
+ *Type:* {prop_type}
584
+
585
+ """
586
+ if acceptable_values:
587
+ adoc_content += f"*Accepted values:* {acceptable_values}\n\n"
588
+
589
+ adoc_content += "*Default:* null\n\n"
590
+ adoc_content += f"*Related cluster property:* xref:./cluster-properties.adoc#{cluster_prop}[`{cluster_prop}`]\n\n"
591
+ adoc_content += "---\n\n"
592
+
593
+ # Write the file
594
+ output_dir = os.path.dirname(output_path)
595
+ if output_dir: # Only create directory if there's a path
596
+ os.makedirs(output_dir, exist_ok=True)
597
+ with open(output_path, 'w', encoding='utf-8') as f:
598
+ f.write(adoc_content)
599
+
600
+ print(f"Generated topic properties documentation: {output_path}")
601
+
602
+
603
+ def main():
604
+ parser = argparse.ArgumentParser(description="Extract topic properties from Redpanda source code")
605
+ parser.add_argument("--source-path", required=True, help="Path to Redpanda source code")
606
+ parser.add_argument("--output-json", help="Output JSON file path")
607
+ parser.add_argument("--output-adoc", help="Output AsciiDoc file path")
608
+
609
+ args = parser.parse_args()
610
+
611
+ extractor = TopicPropertyExtractor(args.source_path)
612
+ result = extractor.extract_topic_properties()
613
+
614
+ # Calculate properties that will be included in documentation (non-no-op with cluster mappings)
615
+ documented_props = [prop for prop, data in result['topic_properties'].items()
616
+ if data.get('corresponding_cluster_property') and not data.get('is_noop', False)]
617
+
618
+ print(f"Found {len(result['topic_properties'])} total properties ({len(documented_props)} documented, {len(result['noop_properties'])} no-op)")
619
+
620
+ if args.output_json:
621
+ with open(args.output_json, 'w', encoding='utf-8') as f:
622
+ json.dump(result, f, indent=2)
623
+ print(f"Topic properties JSON saved to: {args.output_json}")
624
+
625
+ if args.output_adoc:
626
+ extractor.generate_topic_properties_adoc(args.output_adoc)
627
+
628
+
629
+ if __name__ == "__main__":
630
+ main()