@redpanda-data/docs-extensions-and-macros 4.12.2 → 4.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,12 +9,15 @@ from typing import Dict, List, Optional
9
9
 
10
10
 
11
11
  class TopicPropertyExtractor:
12
- def __init__(self, source_path: str):
12
+ def __init__(self, source_path: str, cluster_properties: Optional[Dict] = None):
13
13
  self.source_path = Path(source_path)
14
14
  self.topic_properties = {}
15
15
  self.cluster_mappings = {}
16
16
  self.enum_values = {}
17
17
  self.noop_properties = set()
18
+ self.dynamic_correlations = {} # Cache for dynamically discovered mappings
19
+ self.alternate_mappings = {} # topic_prop -> alternate_cluster_prop for conditional mappings
20
+ self.cluster_properties = cluster_properties or {} # Cluster properties for default value lookup
18
21
 
19
22
  def extract_topic_properties(self) -> Dict:
20
23
  """Extract topic property constants from source files"""
@@ -27,11 +30,11 @@ class TopicPropertyExtractor:
27
30
 
28
31
  # Step 3: Discover no-op properties
29
32
  self._discover_noop_properties()
30
-
31
- # Step 4: Discover cluster property mappings from source code
32
- self._discover_cluster_mappings()
33
-
34
- # Step 5: Match properties with their validators and mappings
33
+
34
+ # Step 4: Discover cluster property mappings from source code (now using dynamic extraction)
35
+ # Note: Dynamic extraction happens in _correlate_properties_with_data
36
+
37
+ # Step 5: Match properties with their validators and mappings (includes dynamic extraction)
35
38
  self._correlate_properties_with_data()
36
39
 
37
40
  return {
@@ -107,12 +110,16 @@ class TopicPropertyExtractor:
107
110
  self.topic_properties[property_name] = {
108
111
  "variable_name": f"topic_property_{var_name}",
109
112
  "property_name": property_name,
113
+ "name": property_name, # Used by generate-handlebars-docs.js
110
114
  "defined_in": str(file_path.relative_to(self.source_path)),
111
115
  "description": "",
112
116
  "type": self._determine_property_type(property_name),
113
117
  "acceptable_values": None,
114
118
  "corresponding_cluster_property": None,
115
- "is_noop": False # Will be updated later in _correlate_properties_with_data
119
+ "default": None, # Will be populated from cluster property if available
120
+ "is_noop": False, # Will be updated later in _correlate_properties_with_data
121
+ "is_topic_property": True,
122
+ "config_scope": "topic"
116
123
  }
117
124
  print(f"Found {total_matches} topic properties in {file_path}")
118
125
  except Exception as e:
@@ -160,12 +167,16 @@ class TopicPropertyExtractor:
160
167
  self.topic_properties[property_name] = {
161
168
  "variable_name": f"topic_property_{var_name}",
162
169
  "property_name": property_name,
170
+ "name": property_name, # Used by generate-handlebars-docs.js
163
171
  "defined_in": str(file_path.relative_to(self.source_path)),
164
172
  "description": "",
165
173
  "type": self._determine_property_type(property_name),
166
174
  "acceptable_values": None,
167
175
  "corresponding_cluster_property": None,
168
- "is_noop": False # Will be updated later in _correlate_properties_with_data
176
+ "default": None, # Will be populated from cluster property if available
177
+ "is_noop": False, # Will be updated later in _correlate_properties_with_data
178
+ "is_topic_property": True,
179
+ "config_scope": "topic"
169
180
  }
170
181
  except Exception as e:
171
182
  print(f"Debug: Skipping {file_path}: {e}", file=sys.stderr)
@@ -220,7 +231,154 @@ class TopicPropertyExtractor:
220
231
 
221
232
  except Exception as e:
222
233
  print(f"Error reading no-op properties from {types_file}: {e}")
223
-
234
+
235
+ def _extract_mappings_from_config_response_utils(self) -> Dict[str, Dict[str, str]]:
236
+ """
237
+ Dynamically extract cluster-to-topic property mappings from config_response_utils.cc
238
+
239
+ Parses add_topic_config_if_requested() calls to extract:
240
+ - Cluster property names from config::shard_local_cfg().PROPERTY.name()
241
+ - Topic property constants like topic_property_retention_duration
242
+
243
+ Returns a mapping dict: {"topic.property": {"primary": "cluster_property", "alternate": "alt_cluster_property"}}
244
+ """
245
+ config_utils_file = self.source_path / "src/v/kafka/server/handlers/configs/config_response_utils.cc"
246
+
247
+ if not config_utils_file.exists():
248
+ print(f"Warning: config_response_utils.cc not found at {config_utils_file}")
249
+ return {}
250
+
251
+ try:
252
+ with open(config_utils_file, 'r', encoding='utf-8') as f:
253
+ content = f.read()
254
+ except Exception as e:
255
+ print(f"Error reading config_response_utils.cc: {e}")
256
+ return {}
257
+
258
+ # Build a reverse lookup of topic property constants to their actual values
259
+ # topic_property_retention_duration -> "retention.ms"
260
+ topic_const_to_value = {}
261
+ for prop_name, prop_value in self.topic_properties.items():
262
+ # Search for the constant name pattern: topic_property_XXX = "prop_name"
263
+ # We need to reverse lookup: given prop_name, find topic_property_XXX
264
+ const_name = self._find_topic_property_constant_name(prop_name)
265
+ if const_name:
266
+ topic_const_to_value[const_name] = prop_name
267
+
268
+ mappings = {}
269
+
270
+ # Pattern to find add_topic_config_if_requested calls (just the start)
271
+ # We'll then search forward from each match to find the relevant properties
272
+ pattern = r'add_topic_config_if_requested\s*\('
273
+
274
+ # Find all function call starts
275
+ function_starts = [match.start() for match in re.finditer(pattern, content)]
276
+
277
+ for i, start_pos in enumerate(function_starts):
278
+ # Determine the search range: from this call to the next call (or end of file)
279
+ if i + 1 < len(function_starts):
280
+ end_pos = function_starts[i + 1]
281
+ else:
282
+ end_pos = min(start_pos + 2000, len(content))
283
+
284
+ # Extract the text of this function call
285
+ call_text = content[start_pos:end_pos]
286
+
287
+ # First, find where the topic property constant appears
288
+ topic_const_match = re.search(
289
+ r'topic_property_([a-z_]+(?:_[a-z0-9]+)*)',
290
+ call_text
291
+ )
292
+
293
+ if not topic_const_match:
294
+ continue
295
+
296
+ # Search for cluster properties in the full call_text
297
+ # We need to find the end of the current add_topic_config_if_requested call
298
+ # to avoid picking up properties from the next call
299
+ # The call ends at the closing ");", so search for that
300
+ call_end_match = re.search(r'\)\s*;', call_text)
301
+ if call_end_match:
302
+ # Limit our search to just this function call
303
+ search_text = call_text[:call_end_match.end()]
304
+ else:
305
+ # Fallback: limit to the first 1500 characters
306
+ search_text = call_text[:1500]
307
+
308
+ cluster_prop_matches = re.findall(
309
+ r'config::shard_local_cfg\(\)\s*\.\s*([a-z_]+(?:_[a-z0-9]+)*)\s*\.\s*(name|desc)\s*\(\)',
310
+ search_text,
311
+ re.DOTALL
312
+ )
313
+
314
+ # Note: We intentionally do NOT use metadata_cache.get_default_XXX() as a fallback
315
+ # because those methods often return computed values (like record_key_schema_id_validation)
316
+ # that are NOT actual cluster properties that users can configure.
317
+ # Only properties explicitly defined in config::shard_local_cfg() are real cluster properties.
318
+
319
+ if cluster_prop_matches:
320
+ topic_const = f"topic_property_{topic_const_match.group(1)}"
321
+
322
+ # Resolve the constant to its actual string value
323
+ if topic_const in topic_const_to_value:
324
+ topic_prop = topic_const_to_value[topic_const]
325
+
326
+ # Extract unique cluster property names
327
+ cluster_props = list(dict.fromkeys([match[0] for match in cluster_prop_matches]))
328
+
329
+ if len(cluster_props) == 1:
330
+ # Single mapping
331
+ mappings[topic_prop] = {"primary": cluster_props[0]}
332
+ print(f" Found mapping: {cluster_props[0]} -> {topic_prop}")
333
+ elif len(cluster_props) > 1:
334
+ # Conditional mapping - store primary and alternate
335
+ # Use the second one as primary (typically the 'else' case is the default)
336
+ primary = cluster_props[1] if len(cluster_props) > 1 else cluster_props[0]
337
+ alternate = cluster_props[0]
338
+ mappings[topic_prop] = {"primary": primary, "alternate": alternate}
339
+ print(f" Found conditional mapping: {primary} (or {alternate}) -> {topic_prop}")
340
+ else:
341
+ # Debug: constant not found in lookup
342
+ if cluster_prop_matches:
343
+ print(f" Debug: Unresolved constant {topic_const} for cluster props {cluster_prop_matches}")
344
+
345
+ print(f"Dynamically discovered {len(mappings)} cluster property mappings")
346
+ return mappings
347
+
348
+ def _find_topic_property_constant_name(self, prop_value: str) -> Optional[str]:
349
+ """
350
+ Given a topic property value like 'retention.ms', find its constant name
351
+ like 'topic_property_retention_duration'
352
+ """
353
+ # Search in header files for the constant definition
354
+ header_files = [
355
+ "src/v/kafka/protocol/topic_properties.h",
356
+ "src/v/kafka/server/handlers/topics/types.h",
357
+ ]
358
+
359
+ for header_file in header_files:
360
+ file_path = self.source_path / header_file
361
+ if not file_path.exists():
362
+ continue
363
+
364
+ try:
365
+ with open(file_path, 'r', encoding='utf-8') as f:
366
+ content = f.read()
367
+
368
+ # Pattern: topic_property_XXX = "prop_value"
369
+ pattern = rf'topic_property_(\w+)\s*=\s*"{re.escape(prop_value)}"'
370
+ match = re.search(pattern, content)
371
+
372
+ if match:
373
+ const_name = f"topic_property_{match.group(1)}"
374
+ return const_name
375
+
376
+ except Exception as e:
377
+ print(f"Error searching {file_path}: {e}")
378
+ continue
379
+
380
+ return None
381
+
224
382
  def _parse_enums_from_file(self, file_path: Path):
225
383
  """Parse enum definitions from a file"""
226
384
  try:
@@ -441,46 +599,18 @@ class TopicPropertyExtractor:
441
599
 
442
600
  def _correlate_cluster_to_topic_property(self, cluster_prop: str) -> Optional[str]:
443
601
  """Try to correlate a cluster property name to a topic property"""
444
-
445
- # Known correlation patterns
446
- correlations = {
447
- "log_cleanup_policy": "cleanup.policy",
448
- "log_compression_type": "compression.type",
449
- "log_retention_ms": "retention.ms",
450
- "retention_bytes": "retention.bytes",
451
- "log_segment_ms": "segment.ms",
452
- "log_segment_size": "segment.bytes",
453
- "log_message_timestamp_type": "message.timestamp.type",
454
- "kafka_batch_max_bytes": "max.message.bytes",
455
- "default_topic_replication": "replication.factor",
456
- "write_caching_default": "write.caching",
457
- }
458
-
459
- # Direct lookup first
460
- if cluster_prop in correlations:
461
- return correlations[cluster_prop]
462
-
463
- # Remove common prefixes/suffixes
464
- cleaned = cluster_prop
465
- if cleaned.startswith("log_"):
466
- cleaned = cleaned[4:]
467
- if cleaned.endswith("_default"):
468
- cleaned = cleaned[:-8]
469
- if cleaned.endswith("_ms"):
470
- cleaned = cleaned[:-3] + ".ms"
471
- if cleaned.endswith("_bytes"):
472
- cleaned = cleaned[:-6] + ".bytes"
473
- if cleaned.endswith("_policy"):
474
- cleaned = cleaned[:-7] + ".policy"
475
- if cleaned.endswith("_type"):
476
- cleaned = cleaned[:-5] + ".type"
477
-
478
- # Convert snake_case to dot.case
479
- topic_candidate = cleaned.replace("_", ".")
480
-
481
- if topic_candidate in self.topic_properties:
482
- return topic_candidate
483
-
602
+
603
+ # Lazy load dynamic correlations on first call
604
+ if not self.dynamic_correlations:
605
+ print("Extracting cluster property mappings dynamically from source code...")
606
+ self.dynamic_correlations = self._extract_mappings_from_config_response_utils()
607
+
608
+ # Use ONLY dynamically discovered mappings (no fallback heuristics)
609
+ # This ensures we only use accurate mappings from source code
610
+ if cluster_prop in self.dynamic_correlations:
611
+ return self.dynamic_correlations[cluster_prop]
612
+
613
+ # No fallback - if it wasn't discovered dynamically, don't guess
484
614
  return None
485
615
 
486
616
  def _process_mapping_candidates(self, mapping_candidates: Dict[str, str]):
@@ -495,18 +625,77 @@ class TopicPropertyExtractor:
495
625
  if prop_data["variable_name"] == f"topic_property_{var_name}":
496
626
  return prop_name
497
627
  return None
498
-
628
+
629
+ def _is_object_storage_property(self, prop_name: str) -> bool:
630
+ """
631
+ Determines if a cluster property is related to object storage.
632
+ This matches the logic from generate-handlebars-docs.js
633
+ """
634
+ return (
635
+ 'cloud_storage' in prop_name or
636
+ 's3_' in prop_name or
637
+ 'azure_' in prop_name or
638
+ 'gcs_' in prop_name or
639
+ 'archival_' in prop_name or
640
+ 'remote_' in prop_name or
641
+ 'tiered_' in prop_name
642
+ )
643
+
644
+ def _get_cluster_property_doc_file(self, prop_name: str) -> str:
645
+ """
646
+ Determines which documentation file a cluster property should link to.
647
+ Returns either 'cluster-properties.adoc' or 'object-storage-properties.adoc'
648
+ """
649
+ if self._is_object_storage_property(prop_name):
650
+ return 'object-storage-properties.adoc'
651
+ return 'cluster-properties.adoc'
652
+
499
653
  def _correlate_properties_with_data(self):
500
654
  """Correlate topic properties with their acceptable values and cluster mappings"""
501
-
655
+
656
+ # First, populate cluster_mappings from dynamically discovered mappings
657
+ if not self.dynamic_correlations:
658
+ print("Extracting cluster property mappings dynamically from source code...")
659
+ self.dynamic_correlations = self._extract_mappings_from_config_response_utils()
660
+
661
+ # Apply dynamic correlations to properties
662
+ for topic_prop, mapping_info in self.dynamic_correlations.items():
663
+ if topic_prop in self.topic_properties:
664
+ primary = mapping_info.get("primary")
665
+ alternate = mapping_info.get("alternate")
666
+
667
+ if primary:
668
+ self.cluster_mappings[topic_prop] = primary
669
+
670
+ if alternate:
671
+ self.alternate_mappings[topic_prop] = alternate
672
+
502
673
  for prop_name, prop_data in self.topic_properties.items():
503
674
  # Update cluster mapping if found
504
675
  if prop_name in self.cluster_mappings:
505
- prop_data["corresponding_cluster_property"] = self.cluster_mappings[prop_name]
506
-
676
+ cluster_prop = self.cluster_mappings[prop_name]
677
+ prop_data["corresponding_cluster_property"] = cluster_prop
678
+ prop_data["cluster_property_doc_file"] = self._get_cluster_property_doc_file(cluster_prop)
679
+
680
+ # Populate default value from cluster property if available
681
+ if self.cluster_properties:
682
+ cluster_props = self.cluster_properties.get("properties", {})
683
+ if cluster_prop in cluster_props:
684
+ cluster_prop_data = cluster_props[cluster_prop]
685
+ prop_data["default"] = cluster_prop_data.get("default")
686
+ # Also copy default_human_readable if available
687
+ if "default_human_readable" in cluster_prop_data:
688
+ prop_data["default_human_readable"] = cluster_prop_data["default_human_readable"]
689
+
690
+ # Add alternate cluster property if this has conditional mapping
691
+ if prop_name in self.alternate_mappings:
692
+ alternate_prop = self.alternate_mappings[prop_name]
693
+ prop_data["alternate_cluster_property"] = alternate_prop
694
+ prop_data["alternate_cluster_property_doc_file"] = self._get_cluster_property_doc_file(alternate_prop)
695
+
507
696
  # Mark as no-op if found in the allowlist
508
697
  prop_data["is_noop"] = prop_name in self.noop_properties
509
-
698
+
510
699
  # Update acceptable values based on property type
511
700
  prop_data["acceptable_values"] = self._determine_acceptable_values(prop_name, prop_data)
512
701
 
@@ -594,19 +783,19 @@ NOTE: All topic properties take effect immediately after being set.
594
783
 
595
784
  """
596
785
 
597
- # Add individual property documentation - ONLY include properties with cluster mappings and exclude no-ops
786
+ # Add individual property documentation - include ALL non-no-op properties
598
787
  for prop_name, prop_data in sorted(self.topic_properties.items()):
599
788
  cluster_prop = prop_data.get("corresponding_cluster_property")
600
789
  is_noop = prop_data.get("is_noop", False)
601
-
602
- # Skip properties without cluster mappings or no-op properties
603
- if not cluster_prop or is_noop:
790
+
791
+ # Skip only no-op properties
792
+ if is_noop:
604
793
  continue
605
-
794
+
606
795
  anchor = prop_name.replace(".", "").replace("-", "").lower()
607
796
  acceptable_values = prop_data.get("acceptable_values", "")
608
797
  prop_type = prop_data.get("type", "string")
609
-
798
+
610
799
  adoc_content += f"""
611
800
  [[{anchor}]]
612
801
  === {prop_name}
@@ -617,9 +806,13 @@ NOTE: All topic properties take effect immediately after being set.
617
806
  # If the property type is boolean, never include an Accepted values section
618
807
  if acceptable_values and str(prop_type).lower() not in ("boolean", "bool"):
619
808
  adoc_content += f"*Accepted values:* {acceptable_values}\n\n"
620
-
809
+
621
810
  adoc_content += "*Default:* null\n\n"
622
- adoc_content += f"*Related cluster property:* xref:./cluster-properties.adoc#{cluster_prop}[`{cluster_prop}`]\n\n"
811
+
812
+ # Only show related cluster property if one exists
813
+ if cluster_prop:
814
+ adoc_content += f"*Related cluster property:* xref:./cluster-properties.adoc#{cluster_prop}[`{cluster_prop}`]\n\n"
815
+
623
816
  adoc_content += "---\n\n"
624
817
 
625
818
  # Write the file
@@ -637,10 +830,21 @@ def main():
637
830
  parser.add_argument("--source-path", required=True, help="Path to Redpanda source code")
638
831
  parser.add_argument("--output-json", help="Output JSON file path")
639
832
  parser.add_argument("--output-adoc", help="Output AsciiDoc file path")
640
-
833
+ parser.add_argument("--cluster-properties-json", help="Path to cluster properties JSON file for default value lookup")
834
+
641
835
  args = parser.parse_args()
642
-
643
- extractor = TopicPropertyExtractor(args.source_path)
836
+
837
+ # Load cluster properties if provided
838
+ cluster_properties = None
839
+ if args.cluster_properties_json:
840
+ try:
841
+ with open(args.cluster_properties_json, 'r', encoding='utf-8') as f:
842
+ cluster_properties = json.load(f)
843
+ print(f"Loaded cluster properties from: {args.cluster_properties_json}")
844
+ except Exception as e:
845
+ print(f"Warning: Failed to load cluster properties: {e}")
846
+
847
+ extractor = TopicPropertyExtractor(args.source_path, cluster_properties)
644
848
  result = extractor.extract_topic_properties()
645
849
 
646
850
  # Calculate properties that will be included in documentation (non-no-op with cluster mappings)