rolfedh-doc-utils 0.1.40__tar.gz → 0.1.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/PKG-INFO +1 -1
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_deflist.py +4 -25
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/detector.py +14 -4
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_callouts_interactive.py +10 -1
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_callouts_to_deflist.py +6 -2
- rolfedh_doc_utils-0.1.42/convert_id_attributes_to_ids.py +229 -0
- rolfedh_doc_utils-0.1.42/doc_utils/insert_abstract_role.py +220 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/version.py +1 -1
- rolfedh_doc_utils-0.1.42/insert_abstract_role.py +163 -0
- rolfedh_doc_utils-0.1.42/insert_procedure_title.py +257 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/pyproject.toml +5 -2
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/PKG-INFO +1 -1
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/SOURCES.txt +4 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/entry_points.txt +3 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/top_level.txt +3 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/LICENSE +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/README.md +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/__init__.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_bullets.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_comments.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/table_parser.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_published_links.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_source_directives.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_freemarker_to_asciidoc.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_tables_to_deflists.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/__init__.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/convert_freemarker_to_asciidoc.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/duplicate_content.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/duplicate_includes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/extract_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/format_asciidoc_spacing.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/inventory_conditionals.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/missing_source_directive.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/replace_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/scannability.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/spinner.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/topic_map_parser.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_adoc.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/validate_links.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/version_check.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/warnings_report.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils_cli.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/extract_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_duplicate_content.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_duplicate_includes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/format_asciidoc_spacing.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/inventory_conditionals.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/replace_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/setup.cfg +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/setup.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_auto_discovery.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_cli_entry_points.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_convert_tables_to_deflists.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_extract_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_inventory_conditionals.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_parse_exclude_list.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_replace_link_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_symlink_handling.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_table_callout_conversion.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_table_parser.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_topic_map_parser.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_validate_links.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_version_check.py +0 -0
- {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/validate_links.py +0 -0
|
@@ -41,41 +41,20 @@ class DefListConverter:
|
|
|
41
41
|
if table_title:
|
|
42
42
|
# Remove leading dot and trailing period if present
|
|
43
43
|
title_text = table_title.lstrip('.').rstrip('.')
|
|
44
|
-
lines = [f'
|
|
44
|
+
lines = [f'{title_text}, where:']
|
|
45
45
|
else:
|
|
46
|
-
lines = ['
|
|
46
|
+
lines = ['where:']
|
|
47
47
|
|
|
48
48
|
# Process each group (which may contain one or more callouts)
|
|
49
49
|
for group in callout_groups:
|
|
50
50
|
code_line = group.code_line
|
|
51
51
|
callout_nums = group.callout_numbers
|
|
52
52
|
|
|
53
|
-
# COMMENTED OUT: User-replaceable value detection causes false positives
|
|
54
|
-
# with Java generics (e.g., <MyEntity, Integer>) and other valid syntax
|
|
55
|
-
# that uses angle brackets. Always use the full code line as the term.
|
|
56
|
-
#
|
|
57
|
-
# # Check if this is a user-replaceable value (contains angle brackets but not heredoc)
|
|
58
|
-
# # User values are single words/phrases in angle brackets like <my-value>
|
|
59
|
-
# user_values = DefListConverter.USER_VALUE_PATTERN.findall(code_line)
|
|
60
|
-
#
|
|
61
|
-
# if user_values and len(user_values) == 1 and len(code_line) < 100:
|
|
62
|
-
# # This looks like a user-replaceable value placeholder
|
|
63
|
-
# # Format the value (ensure it has angle brackets)
|
|
64
|
-
# user_value = user_values[0]
|
|
65
|
-
# if not user_value.startswith('<'):
|
|
66
|
-
# user_value = f'<{user_value}>'
|
|
67
|
-
# if not user_value.endswith('>'):
|
|
68
|
-
# user_value = f'{user_value}>'
|
|
69
|
-
# term = f'`{user_value}`'
|
|
70
|
-
# else:
|
|
71
|
-
# # This is a code line - strip whitespace before wrapping in backticks
|
|
72
|
-
# term = f'`{code_line.strip()}`'
|
|
73
|
-
|
|
74
53
|
# Always use the full code line - strip whitespace before wrapping in backticks
|
|
75
54
|
term = f'`{code_line.strip()}`'
|
|
76
55
|
|
|
77
|
-
# Add
|
|
78
|
-
lines.append('')
|
|
56
|
+
# Add continuation marker before each definition term
|
|
57
|
+
lines.append('+')
|
|
79
58
|
lines.append(f'{term}::')
|
|
80
59
|
|
|
81
60
|
# Add explanations for all callouts in this group
|
|
@@ -281,8 +281,8 @@ class CalloutDetector:
|
|
|
281
281
|
explanations = {}
|
|
282
282
|
i = start_line + 1 # Start after the closing delimiter
|
|
283
283
|
|
|
284
|
-
# Skip blank lines
|
|
285
|
-
while i < len(lines) and (not lines[i].strip() or lines[i].strip()
|
|
284
|
+
# Skip blank lines, continuation markers (+), and {nbsp} spacers
|
|
285
|
+
while i < len(lines) and (not lines[i].strip() or lines[i].strip() in ('+', '{nbsp}')):
|
|
286
286
|
i += 1
|
|
287
287
|
|
|
288
288
|
# Collect consecutive callout explanation lines
|
|
@@ -298,8 +298,18 @@ class CalloutDetector:
|
|
|
298
298
|
# Continue until we hit a blank line, a new callout, or certain patterns
|
|
299
299
|
while i < len(lines):
|
|
300
300
|
line = lines[i]
|
|
301
|
-
|
|
302
|
-
|
|
301
|
+
stripped = line.strip()
|
|
302
|
+
# Stop if we hit:
|
|
303
|
+
# - blank line
|
|
304
|
+
# - new callout explanation
|
|
305
|
+
# - list start marker [start=N]
|
|
306
|
+
# - standalone + (list continuation that attaches to parent)
|
|
307
|
+
# - admonition block start [NOTE], [IMPORTANT], [WARNING], [TIP], [CAUTION]
|
|
308
|
+
if (not stripped or
|
|
309
|
+
self.CALLOUT_EXPLANATION.match(line) or
|
|
310
|
+
line.startswith('[start=') or
|
|
311
|
+
stripped == '+' or
|
|
312
|
+
stripped in ('[NOTE]', '[IMPORTANT]', '[WARNING]', '[TIP]', '[CAUTION]')):
|
|
303
313
|
break
|
|
304
314
|
# Add continuation line preserving original formatting
|
|
305
315
|
explanation_lines.append(line)
|
|
@@ -381,10 +381,19 @@ class InteractiveCalloutConverter:
|
|
|
381
381
|
)
|
|
382
382
|
else:
|
|
383
383
|
# Remove old explanations, add new list
|
|
384
|
+
# Find where explanations actually start (skip {nbsp} and + markers to preserve them)
|
|
385
|
+
explanation_start_line = block.end_line + 1
|
|
386
|
+
while explanation_start_line < len(new_lines) and (
|
|
387
|
+
not new_lines[explanation_start_line].strip() or
|
|
388
|
+
new_lines[explanation_start_line].strip() in ('+', '{nbsp}')
|
|
389
|
+
):
|
|
390
|
+
explanation_start_line += 1
|
|
391
|
+
|
|
384
392
|
new_section = (
|
|
385
393
|
new_lines[:content_start] +
|
|
386
394
|
converted_content +
|
|
387
|
-
[new_lines[content_end]] +
|
|
395
|
+
[new_lines[content_end]] + # Keep closing delimiter
|
|
396
|
+
new_lines[content_end + 1:explanation_start_line] + # Preserve {nbsp} and + markers
|
|
388
397
|
output_list +
|
|
389
398
|
new_lines[explanation_end + 1:]
|
|
390
399
|
)
|
|
@@ -245,9 +245,13 @@ class CalloutConverter:
|
|
|
245
245
|
# Table format: preserve content between code block and table start
|
|
246
246
|
explanation_start_line = self.detector.last_table.start_line
|
|
247
247
|
else:
|
|
248
|
-
# List format: skip blank lines
|
|
248
|
+
# List format: skip blank lines, {nbsp} spacers, and + continuation markers
|
|
249
|
+
# These will be preserved in the output via the slice below
|
|
249
250
|
explanation_start_line = block.end_line + 1
|
|
250
|
-
while explanation_start_line < len(new_lines) and
|
|
251
|
+
while explanation_start_line < len(new_lines) and (
|
|
252
|
+
not new_lines[explanation_start_line].strip() or
|
|
253
|
+
new_lines[explanation_start_line].strip() in ('+', '{nbsp}')
|
|
254
|
+
):
|
|
251
255
|
explanation_start_line += 1
|
|
252
256
|
|
|
253
257
|
# Build the new section
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
convert-id-attributes-to-ids - Convert :id: attribute definitions to AsciiDoc [id="..."] anchors.
|
|
4
|
+
|
|
5
|
+
This script recursively scans a directory for AsciiDoc files and replaces instances of
|
|
6
|
+
`:id: <id_value>` with `[id="<id_value>_{context}"]`.
|
|
7
|
+
|
|
8
|
+
Optionally, with --clean-up, it also removes related boilerplate lines:
|
|
9
|
+
- // define ID as an attribute
|
|
10
|
+
- // assign ID conditionally, followed by header
|
|
11
|
+
- include::{modules}/common/id.adoc[]
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from doc_utils.version_check import check_version_on_startup
|
|
21
|
+
from doc_utils.version import __version__
|
|
22
|
+
from doc_utils.spinner import Spinner
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_adoc_files(directory: Path) -> list[Path]:
|
|
26
|
+
"""Recursively find all .adoc files in a directory."""
|
|
27
|
+
adoc_files = []
|
|
28
|
+
for root, dirs, files in os.walk(directory, followlinks=False):
|
|
29
|
+
# Skip hidden directories and common non-content directories
|
|
30
|
+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__')]
|
|
31
|
+
for file in files:
|
|
32
|
+
if file.endswith('.adoc'):
|
|
33
|
+
adoc_files.append(Path(root) / file)
|
|
34
|
+
return adoc_files
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def convert_id_attributes(content: str, clean_up: bool = False) -> tuple[str, int, int]:
|
|
38
|
+
"""
|
|
39
|
+
Convert :id: attributes to [id="..._{context}"] format.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
content: The file content to process
|
|
43
|
+
clean_up: If True, also remove boilerplate lines
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Tuple of (modified_content, id_replacements_count, cleanup_removals_count)
|
|
47
|
+
"""
|
|
48
|
+
lines = content.split('\n')
|
|
49
|
+
new_lines = []
|
|
50
|
+
id_replacements = 0
|
|
51
|
+
cleanup_removals = 0
|
|
52
|
+
|
|
53
|
+
# Patterns for clean-up (flexible matching for variations)
|
|
54
|
+
cleanup_patterns = [
|
|
55
|
+
re.compile(r'^\s*//\s*define ID as an attribute', re.IGNORECASE),
|
|
56
|
+
re.compile(r'^\s*//\s*assign.*ID conditionally', re.IGNORECASE),
|
|
57
|
+
re.compile(r'^\s*include::\{modules\}/common/id\.adoc\[\]'),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# Pattern to match :id: <value>
|
|
61
|
+
id_pattern = re.compile(r'^:id:\s*(.+?)\s*$')
|
|
62
|
+
|
|
63
|
+
for line in lines:
|
|
64
|
+
# Check if this is an :id: line
|
|
65
|
+
id_match = id_pattern.match(line)
|
|
66
|
+
if id_match:
|
|
67
|
+
id_value = id_match.group(1)
|
|
68
|
+
new_line = f'[id="{id_value}_{{context}}"]'
|
|
69
|
+
new_lines.append(new_line)
|
|
70
|
+
id_replacements += 1
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Check if clean-up is enabled and line matches cleanup patterns
|
|
74
|
+
if clean_up:
|
|
75
|
+
should_remove = False
|
|
76
|
+
for pattern in cleanup_patterns:
|
|
77
|
+
if pattern.search(line):
|
|
78
|
+
should_remove = True
|
|
79
|
+
cleanup_removals += 1
|
|
80
|
+
break
|
|
81
|
+
if should_remove:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
new_lines.append(line)
|
|
85
|
+
|
|
86
|
+
return '\n'.join(new_lines), id_replacements, cleanup_removals
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def process_file(file_path: Path, dry_run: bool = False, clean_up: bool = False) -> tuple[int, int]:
|
|
90
|
+
"""
|
|
91
|
+
Process a single AsciiDoc file.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Tuple of (id_replacements, cleanup_removals)
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
content = file_path.read_text(encoding='utf-8')
|
|
98
|
+
except Exception as e:
|
|
99
|
+
print(f" Error reading {file_path}: {e}")
|
|
100
|
+
return 0, 0
|
|
101
|
+
|
|
102
|
+
new_content, id_replacements, cleanup_removals = convert_id_attributes(content, clean_up)
|
|
103
|
+
|
|
104
|
+
if id_replacements > 0 or cleanup_removals > 0:
|
|
105
|
+
if not dry_run:
|
|
106
|
+
try:
|
|
107
|
+
file_path.write_text(new_content, encoding='utf-8')
|
|
108
|
+
except Exception as e:
|
|
109
|
+
print(f" Error writing {file_path}: {e}")
|
|
110
|
+
return 0, 0
|
|
111
|
+
|
|
112
|
+
return id_replacements, cleanup_removals
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def main():
|
|
116
|
+
# Check for updates (non-blocking)
|
|
117
|
+
check_version_on_startup()
|
|
118
|
+
|
|
119
|
+
parser = argparse.ArgumentParser(
|
|
120
|
+
description='Convert :id: attribute definitions to AsciiDoc [id="..._{context}"] anchors.'
|
|
121
|
+
)
|
|
122
|
+
parser.add_argument(
|
|
123
|
+
'directory',
|
|
124
|
+
nargs='?',
|
|
125
|
+
default='.',
|
|
126
|
+
help='Directory to scan for .adoc files (default: current directory)'
|
|
127
|
+
)
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
'--dry-run', '-n',
|
|
130
|
+
action='store_true',
|
|
131
|
+
help='Show what would be changed without making actual modifications'
|
|
132
|
+
)
|
|
133
|
+
parser.add_argument(
|
|
134
|
+
'--clean-up',
|
|
135
|
+
action='store_true',
|
|
136
|
+
help='Also remove ID-related boilerplate lines (comments and include directives)'
|
|
137
|
+
)
|
|
138
|
+
parser.add_argument(
|
|
139
|
+
'--verbose', '-v',
|
|
140
|
+
action='store_true',
|
|
141
|
+
help='Show detailed output for each file processed'
|
|
142
|
+
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
'--version',
|
|
145
|
+
action='version',
|
|
146
|
+
version=f'%(prog)s {__version__}'
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
args = parser.parse_args()
|
|
150
|
+
|
|
151
|
+
# Resolve directory path
|
|
152
|
+
directory = Path(args.directory).resolve()
|
|
153
|
+
|
|
154
|
+
if not directory.exists():
|
|
155
|
+
print(f"Error: Directory not found: {directory}")
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
if not directory.is_dir():
|
|
159
|
+
print(f"Error: Not a directory: {directory}")
|
|
160
|
+
sys.exit(1)
|
|
161
|
+
|
|
162
|
+
mode_str = "DRY RUN MODE - " if args.dry_run else ""
|
|
163
|
+
print(f"{mode_str}Scanning directory: {directory}")
|
|
164
|
+
|
|
165
|
+
if args.clean_up:
|
|
166
|
+
print("Clean-up mode enabled: will remove ID-related boilerplate lines")
|
|
167
|
+
|
|
168
|
+
# Find all AsciiDoc files
|
|
169
|
+
spinner = Spinner("Searching for .adoc files")
|
|
170
|
+
spinner.start()
|
|
171
|
+
adoc_files = find_adoc_files(directory)
|
|
172
|
+
spinner.stop(f"Found {len(adoc_files)} .adoc files")
|
|
173
|
+
|
|
174
|
+
if not adoc_files:
|
|
175
|
+
print("No AsciiDoc files found.")
|
|
176
|
+
sys.exit(0)
|
|
177
|
+
|
|
178
|
+
if args.dry_run:
|
|
179
|
+
print("\n*** DRY RUN MODE - No files will be modified ***\n")
|
|
180
|
+
|
|
181
|
+
# Process each file
|
|
182
|
+
total_id_replacements = 0
|
|
183
|
+
total_cleanup_removals = 0
|
|
184
|
+
files_modified = 0
|
|
185
|
+
|
|
186
|
+
spinner = Spinner(f"Processing {len(adoc_files)} files")
|
|
187
|
+
spinner.start()
|
|
188
|
+
|
|
189
|
+
for file_path in adoc_files:
|
|
190
|
+
id_replacements, cleanup_removals = process_file(file_path, args.dry_run, args.clean_up)
|
|
191
|
+
|
|
192
|
+
if id_replacements > 0 or cleanup_removals > 0:
|
|
193
|
+
files_modified += 1
|
|
194
|
+
total_id_replacements += id_replacements
|
|
195
|
+
total_cleanup_removals += cleanup_removals
|
|
196
|
+
|
|
197
|
+
if args.verbose:
|
|
198
|
+
rel_path = file_path.relative_to(directory)
|
|
199
|
+
changes = []
|
|
200
|
+
if id_replacements > 0:
|
|
201
|
+
changes.append(f"{id_replacements} ID conversion(s)")
|
|
202
|
+
if cleanup_removals > 0:
|
|
203
|
+
changes.append(f"{cleanup_removals} line(s) removed")
|
|
204
|
+
print(f" {rel_path}: {', '.join(changes)}")
|
|
205
|
+
|
|
206
|
+
spinner.stop(f"Processed {len(adoc_files)} files")
|
|
207
|
+
|
|
208
|
+
# Summary
|
|
209
|
+
print(f"\nSummary:")
|
|
210
|
+
if args.dry_run:
|
|
211
|
+
print(f" Files that would be modified: {files_modified}")
|
|
212
|
+
print(f" :id: attributes that would be converted: {total_id_replacements}")
|
|
213
|
+
if args.clean_up:
|
|
214
|
+
print(f" Boilerplate lines that would be removed: {total_cleanup_removals}")
|
|
215
|
+
print("\nRun without --dry-run to apply changes.")
|
|
216
|
+
else:
|
|
217
|
+
print(f" Files modified: {files_modified}")
|
|
218
|
+
print(f" :id: attributes converted: {total_id_replacements}")
|
|
219
|
+
if args.clean_up:
|
|
220
|
+
print(f" Boilerplate lines removed: {total_cleanup_removals}")
|
|
221
|
+
|
|
222
|
+
if total_id_replacements == 0:
|
|
223
|
+
print("\nNo :id: attributes found to convert.")
|
|
224
|
+
else:
|
|
225
|
+
print("\nConversion complete!")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == '__main__':
|
|
229
|
+
main()
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Insert abstract role - ensures AsciiDoc files have [role="_abstract"] above the first paragraph.
|
|
3
|
+
|
|
4
|
+
Core logic for adding the [role="_abstract"] attribute required for DITA short description conversion.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Tuple, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_first_paragraph_after_title(lines: List[str]) -> Optional[int]:
|
|
13
|
+
"""
|
|
14
|
+
Find the line index of the first paragraph after the document title.
|
|
15
|
+
|
|
16
|
+
The first paragraph is the first non-empty line that:
|
|
17
|
+
- Comes after a level 1 heading (= Title)
|
|
18
|
+
- Is not an attribute definition (starts with :)
|
|
19
|
+
- Is not a comment (starts with //)
|
|
20
|
+
- Is not a block attribute (starts with [)
|
|
21
|
+
- Is not another heading
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
lines: List of lines from the file (without trailing newlines)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Line index of the first paragraph, or None if not found
|
|
28
|
+
"""
|
|
29
|
+
title_found = False
|
|
30
|
+
title_index = -1
|
|
31
|
+
|
|
32
|
+
for i, line in enumerate(lines):
|
|
33
|
+
# Check for level 1 heading (document title)
|
|
34
|
+
if re.match(r'^=\s+[^=]', line):
|
|
35
|
+
title_found = True
|
|
36
|
+
title_index = i
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
# Only look for first paragraph after we've found the title
|
|
40
|
+
if not title_found:
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
# Skip empty lines
|
|
44
|
+
if re.match(r'^\s*$', line):
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Skip attribute definitions
|
|
48
|
+
if re.match(r'^:', line):
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# Skip comments (single line)
|
|
52
|
+
if re.match(r'^//', line):
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
# Skip block attributes like [role=...], [id=...], etc.
|
|
56
|
+
if re.match(r'^\[', line):
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Skip other headings
|
|
60
|
+
if re.match(r'^=+\s+', line):
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Skip include directives
|
|
64
|
+
if re.match(r'^include::', line):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
# This is the first paragraph
|
|
68
|
+
return i
|
|
69
|
+
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def has_abstract_role(lines: List[str], paragraph_index: int) -> bool:
|
|
74
|
+
"""
|
|
75
|
+
Check if there's already a [role="_abstract"] before the paragraph.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
lines: List of lines from the file
|
|
79
|
+
paragraph_index: Index of the first paragraph
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if [role="_abstract"] already exists before the paragraph
|
|
83
|
+
"""
|
|
84
|
+
# Look at the lines immediately before the paragraph
|
|
85
|
+
for i in range(paragraph_index - 1, -1, -1):
|
|
86
|
+
line = lines[i].strip()
|
|
87
|
+
|
|
88
|
+
# Skip empty lines
|
|
89
|
+
if not line:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
# Found abstract role
|
|
93
|
+
if re.match(r'^\[role=["\']_abstract["\']\]$', line):
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# If we hit any other non-empty content, stop looking
|
|
97
|
+
# (could be attribute, heading, etc.)
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> Tuple[bool, List[str]]:
|
|
104
|
+
"""
|
|
105
|
+
Process a single AsciiDoc file to add [role="_abstract"] if needed.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_path: Path to the file to process
|
|
109
|
+
dry_run: If True, show what would be changed without modifying
|
|
110
|
+
verbose: If True, show detailed output
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Tuple of (changes_made, messages) where messages is a list of verbose output
|
|
114
|
+
"""
|
|
115
|
+
messages = []
|
|
116
|
+
|
|
117
|
+
if verbose:
|
|
118
|
+
messages.append(f"Processing: {file_path}")
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
122
|
+
lines = f.readlines()
|
|
123
|
+
except (IOError, UnicodeDecodeError) as e:
|
|
124
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
|
125
|
+
|
|
126
|
+
# Remove trailing newlines from lines for processing
|
|
127
|
+
lines = [line.rstrip('\n\r') for line in lines]
|
|
128
|
+
|
|
129
|
+
# Find the first paragraph after the title
|
|
130
|
+
paragraph_index = find_first_paragraph_after_title(lines)
|
|
131
|
+
|
|
132
|
+
if paragraph_index is None:
|
|
133
|
+
if verbose:
|
|
134
|
+
messages.append(" No paragraph found after title")
|
|
135
|
+
return False, messages
|
|
136
|
+
|
|
137
|
+
# Check if abstract role already exists
|
|
138
|
+
if has_abstract_role(lines, paragraph_index):
|
|
139
|
+
if verbose:
|
|
140
|
+
messages.append(" [role=\"_abstract\"] already present")
|
|
141
|
+
return False, messages
|
|
142
|
+
|
|
143
|
+
# Insert [role="_abstract"] before the first paragraph
|
|
144
|
+
# We need to add it with a blank line before it if there isn't one
|
|
145
|
+
new_lines = lines[:paragraph_index]
|
|
146
|
+
|
|
147
|
+
# Check if we need to add a blank line before the role
|
|
148
|
+
if paragraph_index > 0 and lines[paragraph_index - 1].strip():
|
|
149
|
+
new_lines.append('')
|
|
150
|
+
|
|
151
|
+
new_lines.append('[role="_abstract"]')
|
|
152
|
+
new_lines.extend(lines[paragraph_index:])
|
|
153
|
+
|
|
154
|
+
if verbose:
|
|
155
|
+
preview = lines[paragraph_index][:60] + "..." if len(lines[paragraph_index]) > 60 else lines[paragraph_index]
|
|
156
|
+
messages.append(f" Adding [role=\"_abstract\"] before line {paragraph_index + 1}: {preview}")
|
|
157
|
+
|
|
158
|
+
if not dry_run:
|
|
159
|
+
try:
|
|
160
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
161
|
+
for line in new_lines:
|
|
162
|
+
f.write(line + '\n')
|
|
163
|
+
except IOError as e:
|
|
164
|
+
raise IOError(f"Error writing {file_path}: {e}")
|
|
165
|
+
|
|
166
|
+
return True, messages
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def find_adoc_files(path: Path, exclude_dirs: List[str] = None, exclude_files: List[str] = None) -> List[Path]:
|
|
170
|
+
"""
|
|
171
|
+
Find all .adoc files in the given path.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
path: File or directory path to search
|
|
175
|
+
exclude_dirs: List of directory paths to exclude
|
|
176
|
+
exclude_files: List of file paths to exclude
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of Path objects for .adoc files
|
|
180
|
+
"""
|
|
181
|
+
exclude_dirs = exclude_dirs or []
|
|
182
|
+
exclude_files = exclude_files or []
|
|
183
|
+
|
|
184
|
+
# Normalize exclusion paths to absolute
|
|
185
|
+
exclude_dirs_abs = [Path(d).resolve() for d in exclude_dirs]
|
|
186
|
+
exclude_files_abs = [Path(f).resolve() for f in exclude_files]
|
|
187
|
+
|
|
188
|
+
adoc_files = []
|
|
189
|
+
|
|
190
|
+
if path.is_file():
|
|
191
|
+
if path.suffix == '.adoc':
|
|
192
|
+
path_abs = path.resolve()
|
|
193
|
+
if path_abs not in exclude_files_abs:
|
|
194
|
+
adoc_files.append(path)
|
|
195
|
+
elif path.is_dir():
|
|
196
|
+
for adoc_path in path.rglob('*.adoc'):
|
|
197
|
+
# Skip symlinks
|
|
198
|
+
if adoc_path.is_symlink():
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
path_abs = adoc_path.resolve()
|
|
202
|
+
|
|
203
|
+
# Check if file is excluded
|
|
204
|
+
if path_abs in exclude_files_abs:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
# Check if any parent directory is excluded
|
|
208
|
+
skip = False
|
|
209
|
+
for exclude_dir in exclude_dirs_abs:
|
|
210
|
+
try:
|
|
211
|
+
path_abs.relative_to(exclude_dir)
|
|
212
|
+
skip = True
|
|
213
|
+
break
|
|
214
|
+
except ValueError:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
if not skip:
|
|
218
|
+
adoc_files.append(adoc_path)
|
|
219
|
+
|
|
220
|
+
return sorted(adoc_files)
|