@redpanda-data/docs-extensions-and-macros 4.2.5 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.adoc +184 -21
  2. package/bin/doc-tools.js +328 -0
  3. package/cli-utils/add-caret-external-links.py +68 -0
  4. package/cli-utils/beta-from-antora.js +27 -0
  5. package/cli-utils/generate-cluster-docs.sh +83 -0
  6. package/cli-utils/install-test-dependencies.sh +158 -0
  7. package/cli-utils/python-venv.sh +20 -0
  8. package/cli-utils/start-cluster.sh +53 -0
  9. package/docker-compose/bootstrap.yml +67 -0
  10. package/docker-compose/docker-compose.yml +414 -0
  11. package/docker-compose/generate-profiles.yaml +77 -0
  12. package/docker-compose/rpk-profile.yaml +24 -0
  13. package/docker-compose/transactions-schema.json +37 -0
  14. package/docker-compose/transactions.md +46 -0
  15. package/docker-compose/transform/README.adoc +73 -0
  16. package/docker-compose/transform/go.mod +5 -0
  17. package/docker-compose/transform/go.sum +2 -0
  18. package/docker-compose/transform/regex.wasm +0 -0
  19. package/docker-compose/transform/transform.go +122 -0
  20. package/docker-compose/transform/transform.yaml +33 -0
  21. package/extension-utils/compute-out.js +38 -0
  22. package/extension-utils/create-asciidoc-file.js +15 -0
  23. package/macros/data-template.js +591 -0
  24. package/package.json +21 -4
  25. package/tools/docusaurus-to-antora-conversion-scripts/convert-docs.sh +114 -0
  26. package/tools/docusaurus-to-antora-conversion-scripts/get-file-changes.sh +9 -0
  27. package/tools/docusaurus-to-antora-conversion-scripts/post-process-asciidoc.js +63 -0
  28. package/tools/docusaurus-to-antora-conversion-scripts/pre-process-markdown.js +108 -0
  29. package/tools/fetch-from-github.js +63 -0
  30. package/tools/gen-rpk-ascii.py +477 -0
  31. package/tools/get-console-version.js +53 -0
  32. package/tools/get-redpanda-version.js +53 -0
  33. package/tools/metrics/metrics.py +199 -0
  34. package/tools/metrics/requirements.txt +1 -0
  35. package/tools/property-extractor/Makefile +99 -0
  36. package/tools/property-extractor/README.adoc +206 -0
  37. package/tools/property-extractor/definitions.json +245 -0
  38. package/tools/property-extractor/file_pair.py +7 -0
  39. package/tools/property-extractor/json-to-asciidoc/generate_docs.py +460 -0
  40. package/tools/property-extractor/parser.py +224 -0
  41. package/tools/property-extractor/property_bag.py +4 -0
  42. package/tools/property-extractor/property_extractor.py +243 -0
  43. package/tools/property-extractor/requirements.txt +2 -0
  44. package/tools/property-extractor/tests/transformers_test.py +376 -0
  45. package/tools/property-extractor/transformers.py +397 -0
@@ -0,0 +1,460 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import argparse
5
+
6
+ # --- Constants for Paths and Filenames ---
7
+ INPUT_JSON_PATH = "gen/"
8
+ INPUT_JSON_FILE = "properties-output.json"
9
+
10
+ OUTPUT_DIR_DEFAULT = "output"
11
+ PAGE_FOLDER_NAME = "pages"
12
+ ERROR_FOLDER_NAME = "error"
13
+
14
+ OUTPUT_FILE_BROKER = "broker-properties.adoc"
15
+ OUTPUT_FILE_CLUSTER = "cluster-properties.adoc"
16
+ OUTPUT_FILE_CLOUD = "object-storage-properties.adoc"
17
+ OUTPUT_FILE_DEPRECATED = os.path.join("deprecated", "partials", "deprecated-properties.adoc")
18
+ ALL_PROPERTIES_FILE = "all_properties.txt"
19
+
20
+ ERROR_FILE_DESCRIPTION = "empty_description.txt"
21
+ ERROR_FILE_TYPE = "empty_type.txt"
22
+ ERROR_FILE_MAX_WITHOUT_MIN = "max_without_min.txt"
23
+ ERROR_FILE_MIN_WITHOUT_MAX = "min_without_max.txt"
24
+
25
+ # --- Static Documentation Strings ---
26
+ BROKER_PAGE_TITLE = (
27
+ "= Broker Configuration Properties\n"
28
+ ":page-aliases: reference:node-properties.adoc, reference:node-configuration-sample.adoc\n"
29
+ ":description: Reference of broker configuration properties.\n\n"
30
+ )
31
+ BROKER_INTRO = (
32
+ "Broker configuration properties are applied individually to each broker in a cluster. "
33
+ "You can find and modify these properties in the `redpanda.yaml` configuration file.\n\n"
34
+ "For information on how to edit broker properties, see xref:manage:cluster-maintenance/node-property-configuration.adoc[].\n\n"
35
+ "NOTE: All broker properties require that you restart Redpanda for any update to take effect.\n\n"
36
+ )
37
+ BROKER_TITLE = "== Broker configuration\n\n"
38
+
39
+ SCHEMA_REGISTRY_TITLE = "== Schema Registry\n\n"
40
+ PANDAPROXY_TITLE = "== HTTP Proxy\n\n"
41
+ KAFKA_CLIENT_TITLE = "== HTTP Proxy Client\n\n"
42
+
43
+ SCHEMA_REGISTRY_INTRO = (
44
+ "The Schema Registry provides configuration properties to help you enable producers and consumers "
45
+ "to share information needed to serialize and deserialize producer and consumer messages.\n\n"
46
+ "For information on how to edit broker properties for the Schema Registry, see xref:manage:cluster-maintenance/node-property-configuration.adoc[].\n\n"
47
+ )
48
+ PANDAPROXY_INTRO = (
49
+ "Redpanda HTTP Proxy allows access to your data through a REST API. For example, you can list topics or brokers, "
50
+ "get events, produce events, subscribe to events from topics using consumer groups, and commit offsets for a consumer.\n\n"
51
+ "See xref:develop:http-proxy.adoc[]\n\n"
52
+ )
53
+ KAFKA_CLIENT_INTRO = "Configuration options for HTTP Proxy Client.\n\n"
54
+
55
+ CLUSTER_PAGE_TITLE = (
56
+ "= Cluster Configuration Properties\n"
57
+ ":page-aliases: reference:tunable-properties.adoc, reference:cluster-properties.adoc\n"
58
+ ":description: Cluster configuration properties list.\n\n"
59
+ )
60
+ CLUSTER_CONFIG_INTRO = (
61
+ "Cluster configuration properties are the same for all brokers in a cluster, and are set at the cluster level.\n\n"
62
+ "For information on how to edit cluster properties, see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[] "
63
+ "or xref:manage:kubernetes/k-cluster-property-configuration.adoc[].\n\n"
64
+ "NOTE: Some cluster properties require that you restart the cluster for any updates to take effect. "
65
+ "See the specific property details to identify whether or not a restart is required.\n\n"
66
+ )
67
+ CLUSTER_CONFIG_TITLE = "== Cluster configuration\n\n"
68
+
69
+ CLOUD_PAGE_TITLE = (
70
+ "= Object Storage Properties\n"
71
+ ":description: Reference of object storage properties.\n\n"
72
+ )
73
+ CLOUD_CONFIG_INTRO = (
74
+ "Object storage properties are a type of cluster property. For information on how to edit cluster properties, "
75
+ "see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[].\n\n"
76
+ "NOTE: Some object storage properties require that you restart the cluster for any updates to take effect. "
77
+ "See the specific property details to identify whether or not a restart is required.\n\n"
78
+ )
79
+ CLOUD_CONFIG_TITLE = (
80
+ "== Object storage configuration\n\n"
81
+ "Object storage properties should only be set if you enable xref:manage:tiered-storage.adoc[Tiered Storage].\n\n"
82
+ )
83
+
84
+ DEPRECATED_PROPERTIES_TITLE = "\n== Configuration properties\n\n"
85
+ DEPRECATED_PROPERTIES_INTRO = "This is an exhaustive list of all the deprecated properties.\n\n"
86
+ DEPRECATED_BROKER_TITLE = "=== Broker properties\n\n"
87
+ DEPRECATED_CLUSTER_TITLE = "=== Cluster properties\n\n"
88
+
89
+ # --- Mapping Constants ---
90
+ DEFINED_IN_MAPPING = {
91
+ "src/v/config/node_config.cc": "broker",
92
+ "src/v/pandaproxy/schema_registry/configuration.cc": "schema reg",
93
+ "src/v/pandaproxy/rest/configuration.cc": "http proxy",
94
+ "src/v/kafka/client/configuration.cc": "http client",
95
+ "src/v/config/configuration.cc": "cluster"
96
+ }
97
+
98
+ SUFFIX_TO_UNIT = {
99
+ "ms": "milliseconds",
100
+ "sec": "seconds", # Code is not always consistent when using seconds.
101
+ "seconds": "seconds",
102
+ "bytes": "bytes",
103
+ "buf": "bytes",
104
+ "partitions": "number of partitions per topic",
105
+ "percent": "percent",
106
+ "bps": "bytes per second",
107
+ "fraction": "fraction"
108
+ }
109
+
110
+ # --- Utility Functions ---
111
+ def parse_arguments():
112
+ parser = argparse.ArgumentParser(
113
+ description="Generate documentation from properties JSON"
114
+ )
115
+ parser.add_argument(
116
+ "--output-dir",
117
+ type=str,
118
+ required=True,
119
+ help="Directory to save the generated documentation",
120
+ )
121
+ return parser.parse_args()
122
+
123
+ def ensure_directory_exists(directory):
124
+ os.makedirs(directory, exist_ok=True)
125
+
126
+ def load_json(input_path, input_file):
127
+ try:
128
+ with open(os.path.join(input_path, input_file), "r", encoding="utf-8") as json_file:
129
+ return json.load(json_file)
130
+ except FileNotFoundError:
131
+ print(f"Error: The file '{input_file}' does not exist.")
132
+ return {}
133
+ except json.JSONDecodeError as e:
134
+ print(f"Error: Failed to parse JSON in '{input_file}': {str(e)}")
135
+ return {}
136
+
137
+ def process_defaults(input_string, suffix):
138
+ # Test for ip:port in vector
139
+ vector_match = re.search(
140
+ r'std::vector<net::unresolved_address>\(\{\{("([\d.]+)",\s*(\d+))\}\}\)', input_string
141
+ )
142
+ if vector_match:
143
+ ip = vector_match.group(2)
144
+ port = vector_match.group(3)
145
+ return [f"{ip}:{port}"]
146
+
147
+ # Test for ip:port in single-string
148
+ broker_match = re.search(r'net::unresolved_address\("([\d.]+)",\s*(\d+)\)', input_string)
149
+ if broker_match:
150
+ ip = broker_match.group(1)
151
+ port = broker_match.group(2)
152
+ return f"{ip}:{port}"
153
+
154
+ # Handle single time units: seconds, milliseconds, hours, minutes
155
+ time_match = re.search(r"(\d+)(ms|s|min|h)", input_string)
156
+ # Handle complex time expressions like '24h*365'
157
+ complex_match = re.search(r"(\d+)(h|min|s|ms)\s*\*\s*(\d+)", input_string)
158
+ # Handle std::chrono::time expressions
159
+ chrono_match = re.search(r"std::chrono::(\w+)[\{\(](\d+)[\)\}]", input_string)
160
+
161
+ if time_match:
162
+ value = int(time_match.group(1))
163
+ unit = time_match.group(2)
164
+ if suffix == "ms":
165
+ if unit == "ms":
166
+ return value
167
+ elif unit == "s":
168
+ return value * 1000
169
+ elif unit == "min":
170
+ return value * 60 * 1000
171
+ elif unit == "h":
172
+ return value * 60 * 60 * 1000
173
+ elif suffix == "sec":
174
+ if unit == "s":
175
+ return value
176
+ elif unit == "min":
177
+ return value * 60
178
+ elif unit == "h":
179
+ return value * 60 * 60
180
+ elif unit == "ms":
181
+ return value / 1000
182
+
183
+ if complex_match:
184
+ value = int(complex_match.group(1))
185
+ unit = complex_match.group(2)
186
+ multiplier = int(complex_match.group(3))
187
+ if suffix == "ms":
188
+ if unit == "h":
189
+ return value * 60 * 60 * 1000 * multiplier
190
+ elif unit == "min":
191
+ return value * 60 * 1000 * multiplier
192
+ elif unit == "s":
193
+ return value * 1000 * multiplier
194
+ elif unit == "ms":
195
+ return value * multiplier
196
+ elif suffix == "sec":
197
+ if unit == "h":
198
+ return value * 60 * 60 * multiplier
199
+ elif unit == "min":
200
+ return value * 60 * multiplier
201
+ elif unit == "s":
202
+ return value * multiplier
203
+ elif unit == "ms":
204
+ return (value * multiplier) / 1000
205
+
206
+ if chrono_match:
207
+ chrono_unit = chrono_match.group(1)
208
+ chrono_value = int(chrono_match.group(2))
209
+ chrono_conversion = {
210
+ "milliseconds": 1,
211
+ "seconds": 1000,
212
+ "minutes": 60 * 1000,
213
+ "hours": 60 * 60 * 1000,
214
+ "days": 24 * 60 * 60 * 1000,
215
+ "weeks": 7 * 24 * 60 * 60 * 1000,
216
+ }
217
+ if suffix == "ms":
218
+ return chrono_value * chrono_conversion.get(chrono_unit, 1)
219
+ elif suffix == "sec":
220
+ if chrono_unit == "milliseconds":
221
+ return chrono_value / 1000
222
+ else:
223
+ return (chrono_value * chrono_conversion.get(chrono_unit, 1)) / 1000
224
+
225
+ # Return the original string if no pattern matches
226
+ return input_string
227
+
228
+ def generate_property_doc(key, value):
229
+ """
230
+ Generate documentation string for a single property.
231
+ Returns None if required fields are missing.
232
+ """
233
+ description = value.get("description", "").strip()
234
+ prop_type = value.get("type", "").strip()
235
+ if not description or not prop_type:
236
+ return None
237
+
238
+ # Capitalize first letter and ensure a period at the end.
239
+ description = description[0].upper() + description[1:]
240
+ if not description.endswith('.'):
241
+ description += '.'
242
+
243
+ lines = [f"=== {value.get('name')}\n\n", f"{description}\n\n"]
244
+
245
+ property_suffix = value.get("name").split('_')[-1]
246
+ if property_suffix in SUFFIX_TO_UNIT:
247
+ lines.append(f"*Unit:* {SUFFIX_TO_UNIT[property_suffix]}\n\n")
248
+
249
+ # For non-broker properties (node_config.cc indicates broker), add restart info.
250
+ if value.get("defined_in") != "src/v/config/node_config.cc":
251
+ restart = "Yes" if value.get("needs_restart", False) else "No"
252
+ lines.append(f"*Requires restart:* {restart}\n\n")
253
+
254
+ if "gets_restored" in value:
255
+ restored = "Yes" if value.get("gets_restored", False) else "No"
256
+ lines.append(f"*Gets restored during cluster restore:* {restored}\n\n")
257
+
258
+ visibility = value.get("visibility") or "user"
259
+ lines.append(f"*Visibility:* `{visibility}`\n\n")
260
+
261
+ if prop_type in ["string", "array", "number", "boolean", "integer"]:
262
+ lines.append(f"*Type:* {prop_type}\n\n")
263
+
264
+ if value.get("maximum") is not None and value.get("minimum") is not None:
265
+ lines.append(
266
+ f"*Accepted values:* [`{value.get('minimum')}`, `{value.get('maximum')}`]\n\n"
267
+ )
268
+
269
+ default = value.get("default")
270
+ if default is None or default == "":
271
+ default_str = "null"
272
+ elif isinstance(default, bool):
273
+ default_str = "true" if default else "false"
274
+ else:
275
+ default_str = str(default).replace("'", "").lower()
276
+ default_str = process_defaults(default_str, property_suffix)
277
+ lines.append(f"*Default:* `{default_str}`\n\n")
278
+ lines.append("---\n\n")
279
+ return "".join(lines)
280
+
281
+ def write_data_to_file(output_dir, filename, data):
282
+ file_path = os.path.join(output_dir, filename)
283
+ ensure_directory_exists(os.path.dirname(file_path))
284
+ try:
285
+ with open(file_path, "w+", encoding="utf-8") as output:
286
+ output.write(data)
287
+ print(f"Data written to {file_path} successfully.")
288
+ return True
289
+ except Exception as e:
290
+ print(f"Error writing data to {filename}: {str(e)}")
291
+ return False
292
+
293
+ def write_error_file(output_dir, filename, error_content, total_properties):
294
+ file_path = os.path.join(output_dir, filename)
295
+ ensure_directory_exists(os.path.dirname(file_path))
296
+ try:
297
+ if os.path.exists(file_path):
298
+ os.remove(file_path)
299
+ if error_content:
300
+ error_content = error_content.rstrip("\n")
301
+ with open(file_path, "w+", encoding="utf-8") as output:
302
+ output.write(error_content)
303
+ error_count = len(error_content.split("\n"))
304
+ if error_count > 0:
305
+ empty_name = filename.replace("empty_", "").replace(".txt", "")
306
+ error_type = (
307
+ "deprecated properties"
308
+ if empty_name == "deprecated_properties"
309
+ else f"properties with empty {empty_name}"
310
+ )
311
+ error_percentage = round((error_count / total_properties) * 100, 2)
312
+ print(
313
+ f"You have {error_count} {error_type}. Percentage of errors: {error_percentage}%. Data written in '{filename}'."
314
+ )
315
+ except Exception as e:
316
+ print(f"Error writing error data to '{filename}': {str(e)}")
317
+
318
+ # --- Main Processing ---
319
+ def main():
320
+ args = parse_arguments()
321
+ output_dir = args.output_dir
322
+ page_folder = os.path.join(output_dir, PAGE_FOLDER_NAME)
323
+ error_folder = os.path.join(output_dir, ERROR_FOLDER_NAME)
324
+
325
+ data = load_json(INPUT_JSON_PATH, INPUT_JSON_FILE)
326
+ properties = data.get("properties", {})
327
+ total_properties = len(properties)
328
+
329
+ # Accumulators for property documentation and error logs.
330
+ broker_config_content = []
331
+ schema_registry_content = []
332
+ pandaproxy_content = []
333
+ kafka_client_content = []
334
+ cluster_config_content = []
335
+ cloud_config_content = []
336
+ deprecated_broker_content = []
337
+ deprecated_cluster_content = []
338
+ all_properties = []
339
+ empty_description_errors = []
340
+ empty_type_errors = []
341
+ max_without_min_errors = []
342
+ min_without_max_errors = []
343
+ deprecated_properties_errors = []
344
+
345
+ for key, value in properties.items():
346
+ all_properties.append(key)
347
+ group = None
348
+ if key.startswith("cloud_"):
349
+ group = "cloud"
350
+ else:
351
+ group = DEFINED_IN_MAPPING.get(value.get("defined_in"))
352
+
353
+ # Handle deprecated properties.
354
+ if value.get("is_deprecated") is True:
355
+ deprecated_properties_errors.append(key)
356
+ if group == "broker":
357
+ deprecated_broker_content.append(f"- {key}\n\n")
358
+ elif group in ["cluster", "cloud"]:
359
+ deprecated_cluster_content.append(f"- {key}\n\n")
360
+ continue
361
+
362
+ # Log errors for missing description or type.
363
+ if not value.get("description", "").strip():
364
+ empty_description_errors.append(key)
365
+ if not value.get("type", "").strip():
366
+ empty_type_errors.append(key)
367
+
368
+ # Check for max/min inconsistencies.
369
+ if value.get("maximum") is not None and value.get("minimum") is None:
370
+ max_without_min_errors.append(key)
371
+ if value.get("minimum") is not None and value.get("maximum") is None:
372
+ min_without_max_errors.append(key)
373
+
374
+ property_doc = generate_property_doc(key, value)
375
+ if property_doc is None:
376
+ continue
377
+
378
+ group_mapping = {
379
+ "broker": broker_config_content,
380
+ "schema reg": schema_registry_content,
381
+ "http proxy": pandaproxy_content,
382
+ "http client": kafka_client_content,
383
+ "cluster": cluster_config_content,
384
+ "cloud": cloud_config_content,
385
+ }
386
+ if group in group_mapping:
387
+ group_mapping[group].append(property_doc)
388
+
389
+ # Construct final documentation pages.
390
+ broker_page = (
391
+ BROKER_PAGE_TITLE
392
+ + BROKER_INTRO
393
+ + BROKER_TITLE
394
+ + "".join(broker_config_content)
395
+ + "\n\n"
396
+ + SCHEMA_REGISTRY_TITLE
397
+ + SCHEMA_REGISTRY_INTRO
398
+ + "".join(schema_registry_content)
399
+ + "\n\n"
400
+ + PANDAPROXY_TITLE
401
+ + PANDAPROXY_INTRO
402
+ + "".join(pandaproxy_content)
403
+ + "\n\n"
404
+ + KAFKA_CLIENT_TITLE
405
+ + KAFKA_CLIENT_INTRO
406
+ + "".join(kafka_client_content)
407
+ )
408
+ cluster_page = (
409
+ CLUSTER_PAGE_TITLE
410
+ + CLUSTER_CONFIG_INTRO
411
+ + CLUSTER_CONFIG_TITLE
412
+ + "".join(cluster_config_content)
413
+ )
414
+ cloud_page = (
415
+ CLOUD_PAGE_TITLE
416
+ + CLOUD_CONFIG_INTRO
417
+ + CLOUD_CONFIG_TITLE
418
+ + "".join(cloud_config_content)
419
+ )
420
+ deprecated_page = (
421
+ DEPRECATED_PROPERTIES_TITLE
422
+ + DEPRECATED_PROPERTIES_INTRO
423
+ + DEPRECATED_BROKER_TITLE
424
+ + "".join(deprecated_broker_content)
425
+ + DEPRECATED_CLUSTER_TITLE
426
+ + "".join(deprecated_cluster_content)
427
+ )
428
+
429
+ # Write output files.
430
+ write_data_to_file(page_folder, OUTPUT_FILE_BROKER, broker_page)
431
+ write_data_to_file(page_folder, OUTPUT_FILE_CLUSTER, cluster_page)
432
+ write_data_to_file(page_folder, OUTPUT_FILE_CLOUD, cloud_page)
433
+ write_data_to_file(page_folder, OUTPUT_FILE_DEPRECATED, deprecated_page)
434
+ write_data_to_file(output_dir, ALL_PROPERTIES_FILE, "\n".join(all_properties))
435
+
436
+ # Write error files.
437
+ write_error_file(
438
+ error_folder, ERROR_FILE_DESCRIPTION, "\n".join(empty_description_errors), total_properties
439
+ )
440
+ write_error_file(
441
+ error_folder, ERROR_FILE_TYPE, "\n".join(empty_type_errors), total_properties
442
+ )
443
+ write_error_file(
444
+ error_folder, ERROR_FILE_MAX_WITHOUT_MIN, "\n".join(max_without_min_errors), total_properties
445
+ )
446
+ write_error_file(
447
+ error_folder, ERROR_FILE_MIN_WITHOUT_MAX, "\n".join(min_without_max_errors), total_properties
448
+ )
449
+ write_error_file(
450
+ error_folder, "deprecated_properties.txt", "\n".join(deprecated_properties_errors), total_properties
451
+ )
452
+
453
+ # Print summary.
454
+ print(f"Total properties read: {total_properties}")
455
+ print(f"Total Broker properties: {len(broker_config_content)}")
456
+ print(f"Total Cluster properties: {len(cluster_config_content)}")
457
+ print(f"Total Cloud properties: {len(cloud_config_content)}")
458
+
459
+ if __name__ == "__main__":
460
+ main()
@@ -0,0 +1,224 @@
1
+ from file_pair import FilePair
2
+ from tree_sitter import Language
3
+ from property_bag import PropertyBag
4
+ from copy import deepcopy
5
+ import itertools as it
6
+ import re
7
+
8
+
9
+ HEADER_QUERY = """
10
+ (field_declaration
11
+ type: (_) @type
12
+ (#match? @type ".*property.*")
13
+ declarator: (_) @name
14
+ ) @declaration
15
+ """
16
+
17
+ SOURCE_QUERY = """
18
+ (field_initializer_list
19
+ (field_initializer
20
+ (field_identifier) @field
21
+ (argument_list (_) @argument)? @arguments
22
+ ) @field
23
+ )
24
+ """
25
+
26
+ INITIALIZER_LIST_QUERY = """
27
+ (initializer_list
28
+ (initializer_pair
29
+ designator: (_
30
+ (field_identifier) @name)
31
+ value: (_) @value
32
+ ))
33
+ """
34
+
35
+ MAX_INITIALIZER_LIST_DEPTH = 10
36
+
37
+
38
+ def get_file_contents(path):
39
+ contents = ""
40
+
41
+ with open(path, "rb") as f:
42
+ contents = f.read()
43
+
44
+ return contents
45
+
46
+
47
+ def parse_cpp_header(treesitter_parser, cpp_language, source_code):
48
+ query = cpp_language.query(HEADER_QUERY)
49
+ tree = treesitter_parser.parse(source_code)
50
+
51
+ captures = query.captures(tree.root_node)
52
+ properties = PropertyBag()
53
+
54
+ current_declaration = None
55
+ current_type = None
56
+
57
+ for node, label in captures:
58
+ if label == "name":
59
+ property_name = node.text.decode("utf-8")
60
+ properties[property_name]["name_in_file"] = property_name
61
+ properties[property_name]["type"] = current_type
62
+ properties[property_name]["declaration"] = current_declaration
63
+ current_type = None
64
+ current_declaration = None
65
+ elif label == "type":
66
+ current_type = node.text.decode("utf-8")
67
+ elif label == "declaration":
68
+ current_declaration = node.text.decode("utf-8")
69
+
70
+ return properties
71
+
72
+
73
+ def __unquote_string(value):
74
+ # placeholder to keep escaped double quotes (e.g. \"name\")
75
+ escaped_quotes_placeholder = "$$$___quote___$$$"
76
+ return re.sub(
77
+ r'^R?"([^"]*)"\s*$',
78
+ "\\1",
79
+ re.sub(
80
+ '\\\\"',
81
+ escaped_quotes_placeholder,
82
+ value.strip().replace('\\\\"', escaped_quotes_placeholder),
83
+ ),
84
+ ).replace(escaped_quotes_placeholder, '"')
85
+
86
+
87
+ def normalize_string(value):
88
+ return __unquote_string(value)
89
+
90
+
91
+ def __normalize_concatenated_string(value):
92
+ return "".join(
93
+ __unquote_string(s)
94
+ for s in it.filterfalse(lambda r: re.search("^//", r), value.split("\n"))
95
+ )
96
+
97
+
98
+ def __normalize_initializer_list(
99
+ value, node, treesitter_parser, cpp_language, source_code
100
+ ):
101
+ query = cpp_language.query(INITIALIZER_LIST_QUERY)
102
+ tree = treesitter_parser.parse(source_code)
103
+
104
+ captures = query.captures(
105
+ tree.root_node, start_point=node.start_point, end_point=node.end_point
106
+ )
107
+
108
+ if len(captures) == 0:
109
+ return value.replace("\n", "")
110
+
111
+ current_field = None
112
+ fields = PropertyBag()
113
+ for c in captures:
114
+ list_node = c[0]
115
+ capture_label = c[1]
116
+
117
+ if capture_label == "name":
118
+ current_field = list_node.text.decode("utf-8")
119
+ else:
120
+ param = dict(value=list_node.text.decode("utf-8"), type=list_node.type)
121
+ fields[current_field] = (
122
+ __normalize_param(
123
+ param, list_node, treesitter_parser, cpp_language, source_code
124
+ )["value"]
125
+ if current_field
126
+ else node.text.decode("utf-8")
127
+ )
128
+ current_field = None
129
+
130
+ return fields
131
+
132
+
133
+ def __normalize_param(param, node, treesitter_parser, cpp_language, source_code):
134
+ if param["type"] == "comment":
135
+ return
136
+
137
+ if param["type"] == "string_literal" or param["type"] == "raw_string_literal":
138
+ param["value"] = normalize_string(param["value"])
139
+ elif param["type"] == "concatenated_string":
140
+ param["value"] = __normalize_concatenated_string(param["value"])
141
+ param["type"] = "string_literal"
142
+ elif param["type"] == "initializer_list":
143
+ param["value"] = __normalize_initializer_list(
144
+ param["value"], node, treesitter_parser, cpp_language, source_code
145
+ )
146
+ else:
147
+ param["value"] = param["value"].replace("\n", "")
148
+
149
+ return param
150
+
151
+
152
+ def parse_cpp_source(treesitter_parser, cpp_language, source_code):
153
+ query = cpp_language.query(SOURCE_QUERY)
154
+ tree = treesitter_parser.parse(source_code)
155
+
156
+ captures = query.captures(tree.root_node)
157
+
158
+ current_parameter = None
159
+ state = "read_field"
160
+
161
+ parameters = PropertyBag()
162
+
163
+ for i in captures:
164
+ node = i[0]
165
+ if node.type == "field_initializer":
166
+ state = "read_field"
167
+
168
+ if state == "read_field" or node.type == "field_identifier":
169
+ if node.type != "field_identifier":
170
+ continue
171
+ current_parameter = node.text.decode("utf-8")
172
+ parameters[current_parameter] = PropertyBag()
173
+ parameters[current_parameter]["params"] = []
174
+ state = "skip_until_pointer"
175
+ elif state == "skip_until_pointer":
176
+ if node.type != "pointer_expression":
177
+ continue
178
+ state = "read_parameters"
179
+ elif state == "read_parameters":
180
+ param = dict(value=node.text.decode("utf-8"), type=node.type)
181
+ normalized_param = __normalize_param(
182
+ param, node, treesitter_parser, cpp_language, source_code
183
+ )
184
+
185
+ if normalized_param:
186
+ parameters[current_parameter]["params"].append(normalized_param)
187
+
188
+ return parameters
189
+
190
+
191
+ def __merge_header_and_source_properties(header_properties, source_properties):
192
+ properties = deepcopy(header_properties)
193
+
194
+ for key in header_properties.keys():
195
+ if key in source_properties:
196
+ properties[key].update(source_properties[key])
197
+ else:
198
+ return PropertyBag()
199
+
200
+ return properties
201
+
202
+
203
+ def extract_properties_from_file_pair(
204
+ treesitter_parser, cpp_language, file_pair: FilePair
205
+ ):
206
+ header_properties = parse_cpp_header(
207
+ treesitter_parser, cpp_language, get_file_contents(file_pair.header)
208
+ )
209
+
210
+ if len(header_properties) == 0:
211
+ return PropertyBag()
212
+
213
+ source_properties = parse_cpp_source(
214
+ treesitter_parser, cpp_language, get_file_contents(file_pair.implementation)
215
+ )
216
+
217
+ if len(source_properties) == 0:
218
+ return PropertyBag()
219
+
220
+ return __merge_header_and_source_properties(header_properties, source_properties)
221
+
222
+
223
+ def build_treesitter_cpp_library(src_dir, destination_path):
224
+ Language.build_library(destination_path, [src_dir])
@@ -0,0 +1,4 @@
1
+ class PropertyBag(dict):
2
+ def __missing__(self, key):
3
+ self[key] = PropertyBag()
4
+ return self[key]