rolfedh-doc-utils 0.1.40__tar.gz → 0.1.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/PKG-INFO +1 -1
  2. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_deflist.py +4 -25
  3. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/detector.py +14 -4
  4. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_callouts_interactive.py +10 -1
  5. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_callouts_to_deflist.py +6 -2
  6. rolfedh_doc_utils-0.1.42/convert_id_attributes_to_ids.py +229 -0
  7. rolfedh_doc_utils-0.1.42/doc_utils/insert_abstract_role.py +220 -0
  8. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/version.py +1 -1
  9. rolfedh_doc_utils-0.1.42/insert_abstract_role.py +163 -0
  10. rolfedh_doc_utils-0.1.42/insert_procedure_title.py +257 -0
  11. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/pyproject.toml +5 -2
  12. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/PKG-INFO +1 -1
  13. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/SOURCES.txt +4 -0
  14. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/entry_points.txt +3 -0
  15. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/top_level.txt +3 -0
  16. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/LICENSE +0 -0
  17. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/README.md +0 -0
  18. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/archive_unused_files.py +0 -0
  19. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/archive_unused_images.py +0 -0
  20. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/__init__.py +0 -0
  21. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_bullets.py +0 -0
  22. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/converter_comments.py +0 -0
  23. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/callout_lib/table_parser.py +0 -0
  24. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_published_links.py +0 -0
  25. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_scannability.py +0 -0
  26. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/check_source_directives.py +0 -0
  27. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_freemarker_to_asciidoc.py +0 -0
  28. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/convert_tables_to_deflists.py +0 -0
  29. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/__init__.py +0 -0
  30. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/convert_freemarker_to_asciidoc.py +0 -0
  31. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/duplicate_content.py +0 -0
  32. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/duplicate_includes.py +0 -0
  33. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/extract_link_attributes.py +0 -0
  34. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/file_utils.py +0 -0
  35. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/format_asciidoc_spacing.py +0 -0
  36. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/inventory_conditionals.py +0 -0
  37. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/missing_source_directive.py +0 -0
  38. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/replace_link_attributes.py +0 -0
  39. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/scannability.py +0 -0
  40. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/spinner.py +0 -0
  41. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/topic_map_parser.py +0 -0
  42. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_adoc.py +0 -0
  43. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_attributes.py +0 -0
  44. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/unused_images.py +0 -0
  45. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/validate_links.py +0 -0
  46. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/version_check.py +0 -0
  47. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils/warnings_report.py +0 -0
  48. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/doc_utils_cli.py +0 -0
  49. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/extract_link_attributes.py +0 -0
  50. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_duplicate_content.py +0 -0
  51. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_duplicate_includes.py +0 -0
  52. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/find_unused_attributes.py +0 -0
  53. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/format_asciidoc_spacing.py +0 -0
  54. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/inventory_conditionals.py +0 -0
  55. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/replace_link_attributes.py +0 -0
  56. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  57. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  58. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/setup.cfg +0 -0
  59. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/setup.py +0 -0
  60. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_archive_unused_files.py +0 -0
  61. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_archive_unused_images.py +0 -0
  62. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_auto_discovery.py +0 -0
  63. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_check_scannability.py +0 -0
  64. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_cli_entry_points.py +0 -0
  65. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_convert_tables_to_deflists.py +0 -0
  66. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_extract_link_attributes.py +0 -0
  67. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_file_utils.py +0 -0
  68. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_archive_unused_files.py +0 -0
  69. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_archive_unused_images.py +0 -0
  70. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_fixture_check_scannability.py +0 -0
  71. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_inventory_conditionals.py +0 -0
  72. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_parse_exclude_list.py +0 -0
  73. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_replace_link_attributes.py +0 -0
  74. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_symlink_handling.py +0 -0
  75. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_table_callout_conversion.py +0 -0
  76. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_table_parser.py +0 -0
  77. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_topic_map_parser.py +0 -0
  78. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_unused_attributes.py +0 -0
  79. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_validate_links.py +0 -0
  80. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/tests/test_version_check.py +0 -0
  81. {rolfedh_doc_utils-0.1.40 → rolfedh_doc_utils-0.1.42}/validate_links.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.40
3
+ Version: 0.1.42
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -41,41 +41,20 @@ class DefListConverter:
41
41
  if table_title:
42
42
  # Remove leading dot and trailing period if present
43
43
  title_text = table_title.lstrip('.').rstrip('.')
44
- lines = [f'\n{title_text}, where:']
44
+ lines = [f'{title_text}, where:']
45
45
  else:
46
- lines = ['\nwhere:']
46
+ lines = ['where:']
47
47
 
48
48
  # Process each group (which may contain one or more callouts)
49
49
  for group in callout_groups:
50
50
  code_line = group.code_line
51
51
  callout_nums = group.callout_numbers
52
52
 
53
- # COMMENTED OUT: User-replaceable value detection causes false positives
54
- # with Java generics (e.g., <MyEntity, Integer>) and other valid syntax
55
- # that uses angle brackets. Always use the full code line as the term.
56
- #
57
- # # Check if this is a user-replaceable value (contains angle brackets but not heredoc)
58
- # # User values are single words/phrases in angle brackets like <my-value>
59
- # user_values = DefListConverter.USER_VALUE_PATTERN.findall(code_line)
60
- #
61
- # if user_values and len(user_values) == 1 and len(code_line) < 100:
62
- # # This looks like a user-replaceable value placeholder
63
- # # Format the value (ensure it has angle brackets)
64
- # user_value = user_values[0]
65
- # if not user_value.startswith('<'):
66
- # user_value = f'<{user_value}>'
67
- # if not user_value.endswith('>'):
68
- # user_value = f'{user_value}>'
69
- # term = f'`{user_value}`'
70
- # else:
71
- # # This is a code line - strip whitespace before wrapping in backticks
72
- # term = f'`{code_line.strip()}`'
73
-
74
53
  # Always use the full code line - strip whitespace before wrapping in backticks
75
54
  term = f'`{code_line.strip()}`'
76
55
 
77
- # Add blank line before each term
78
- lines.append('')
56
+ # Add continuation marker before each definition term
57
+ lines.append('+')
79
58
  lines.append(f'{term}::')
80
59
 
81
60
  # Add explanations for all callouts in this group
@@ -281,8 +281,8 @@ class CalloutDetector:
281
281
  explanations = {}
282
282
  i = start_line + 1 # Start after the closing delimiter
283
283
 
284
- # Skip blank lines and continuation markers (+)
285
- while i < len(lines) and (not lines[i].strip() or lines[i].strip() == '+'):
284
+ # Skip blank lines, continuation markers (+), and {nbsp} spacers
285
+ while i < len(lines) and (not lines[i].strip() or lines[i].strip() in ('+', '{nbsp}')):
286
286
  i += 1
287
287
 
288
288
  # Collect consecutive callout explanation lines
@@ -298,8 +298,18 @@ class CalloutDetector:
298
298
  # Continue until we hit a blank line, a new callout, or certain patterns
299
299
  while i < len(lines):
300
300
  line = lines[i]
301
- # Stop if we hit a blank line, new callout, or list start marker
302
- if not line.strip() or self.CALLOUT_EXPLANATION.match(line) or line.startswith('[start='):
301
+ stripped = line.strip()
302
+ # Stop if we hit:
303
+ # - blank line
304
+ # - new callout explanation
305
+ # - list start marker [start=N]
306
+ # - standalone + (list continuation that attaches to parent)
307
+ # - admonition block start [NOTE], [IMPORTANT], [WARNING], [TIP], [CAUTION]
308
+ if (not stripped or
309
+ self.CALLOUT_EXPLANATION.match(line) or
310
+ line.startswith('[start=') or
311
+ stripped == '+' or
312
+ stripped in ('[NOTE]', '[IMPORTANT]', '[WARNING]', '[TIP]', '[CAUTION]')):
303
313
  break
304
314
  # Add continuation line preserving original formatting
305
315
  explanation_lines.append(line)
@@ -381,10 +381,19 @@ class InteractiveCalloutConverter:
381
381
  )
382
382
  else:
383
383
  # Remove old explanations, add new list
384
+ # Find where explanations actually start (skip {nbsp} and + markers to preserve them)
385
+ explanation_start_line = block.end_line + 1
386
+ while explanation_start_line < len(new_lines) and (
387
+ not new_lines[explanation_start_line].strip() or
388
+ new_lines[explanation_start_line].strip() in ('+', '{nbsp}')
389
+ ):
390
+ explanation_start_line += 1
391
+
384
392
  new_section = (
385
393
  new_lines[:content_start] +
386
394
  converted_content +
387
- [new_lines[content_end]] +
395
+ [new_lines[content_end]] + # Keep closing delimiter
396
+ new_lines[content_end + 1:explanation_start_line] + # Preserve {nbsp} and + markers
388
397
  output_list +
389
398
  new_lines[explanation_end + 1:]
390
399
  )
@@ -245,9 +245,13 @@ class CalloutConverter:
245
245
  # Table format: preserve content between code block and table start
246
246
  explanation_start_line = self.detector.last_table.start_line
247
247
  else:
248
- # List format: skip blank lines after code block
248
+ # List format: skip blank lines, {nbsp} spacers, and + continuation markers
249
+ # These will be preserved in the output via the slice below
249
250
  explanation_start_line = block.end_line + 1
250
- while explanation_start_line < len(new_lines) and not new_lines[explanation_start_line].strip():
251
+ while explanation_start_line < len(new_lines) and (
252
+ not new_lines[explanation_start_line].strip() or
253
+ new_lines[explanation_start_line].strip() in ('+', '{nbsp}')
254
+ ):
251
255
  explanation_start_line += 1
252
256
 
253
257
  # Build the new section
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ convert-id-attributes-to-ids - Convert :id: attribute definitions to AsciiDoc [id="..."] anchors.
4
+
5
+ This script recursively scans a directory for AsciiDoc files and replaces instances of
6
+ `:id: <id_value>` with `[id="<id_value>_{context}"]`.
7
+
8
+ Optionally, with --clean-up, it also removes related boilerplate lines:
9
+ - // define ID as an attribute
10
+ - // assign ID conditionally, followed by header
11
+ - include::{modules}/common/id.adoc[]
12
+ """
13
+
14
+ import argparse
15
+ import os
16
+ import re
17
+ import sys
18
+ from pathlib import Path
19
+
20
+ from doc_utils.version_check import check_version_on_startup
21
+ from doc_utils.version import __version__
22
+ from doc_utils.spinner import Spinner
23
+
24
+
25
+ def find_adoc_files(directory: Path) -> list[Path]:
26
+ """Recursively find all .adoc files in a directory."""
27
+ adoc_files = []
28
+ for root, dirs, files in os.walk(directory, followlinks=False):
29
+ # Skip hidden directories and common non-content directories
30
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__')]
31
+ for file in files:
32
+ if file.endswith('.adoc'):
33
+ adoc_files.append(Path(root) / file)
34
+ return adoc_files
35
+
36
+
37
+ def convert_id_attributes(content: str, clean_up: bool = False) -> tuple[str, int, int]:
38
+ """
39
+ Convert :id: attributes to [id="..._{context}"] format.
40
+
41
+ Args:
42
+ content: The file content to process
43
+ clean_up: If True, also remove boilerplate lines
44
+
45
+ Returns:
46
+ Tuple of (modified_content, id_replacements_count, cleanup_removals_count)
47
+ """
48
+ lines = content.split('\n')
49
+ new_lines = []
50
+ id_replacements = 0
51
+ cleanup_removals = 0
52
+
53
+ # Patterns for clean-up (flexible matching for variations)
54
+ cleanup_patterns = [
55
+ re.compile(r'^\s*//\s*define ID as an attribute', re.IGNORECASE),
56
+ re.compile(r'^\s*//\s*assign.*ID conditionally', re.IGNORECASE),
57
+ re.compile(r'^\s*include::\{modules\}/common/id\.adoc\[\]'),
58
+ ]
59
+
60
+ # Pattern to match :id: <value>
61
+ id_pattern = re.compile(r'^:id:\s*(.+?)\s*$')
62
+
63
+ for line in lines:
64
+ # Check if this is an :id: line
65
+ id_match = id_pattern.match(line)
66
+ if id_match:
67
+ id_value = id_match.group(1)
68
+ new_line = f'[id="{id_value}_{{context}}"]'
69
+ new_lines.append(new_line)
70
+ id_replacements += 1
71
+ continue
72
+
73
+ # Check if clean-up is enabled and line matches cleanup patterns
74
+ if clean_up:
75
+ should_remove = False
76
+ for pattern in cleanup_patterns:
77
+ if pattern.search(line):
78
+ should_remove = True
79
+ cleanup_removals += 1
80
+ break
81
+ if should_remove:
82
+ continue
83
+
84
+ new_lines.append(line)
85
+
86
+ return '\n'.join(new_lines), id_replacements, cleanup_removals
87
+
88
+
89
+ def process_file(file_path: Path, dry_run: bool = False, clean_up: bool = False) -> tuple[int, int]:
90
+ """
91
+ Process a single AsciiDoc file.
92
+
93
+ Returns:
94
+ Tuple of (id_replacements, cleanup_removals)
95
+ """
96
+ try:
97
+ content = file_path.read_text(encoding='utf-8')
98
+ except Exception as e:
99
+ print(f" Error reading {file_path}: {e}")
100
+ return 0, 0
101
+
102
+ new_content, id_replacements, cleanup_removals = convert_id_attributes(content, clean_up)
103
+
104
+ if id_replacements > 0 or cleanup_removals > 0:
105
+ if not dry_run:
106
+ try:
107
+ file_path.write_text(new_content, encoding='utf-8')
108
+ except Exception as e:
109
+ print(f" Error writing {file_path}: {e}")
110
+ return 0, 0
111
+
112
+ return id_replacements, cleanup_removals
113
+
114
+
115
+ def main():
116
+ # Check for updates (non-blocking)
117
+ check_version_on_startup()
118
+
119
+ parser = argparse.ArgumentParser(
120
+ description='Convert :id: attribute definitions to AsciiDoc [id="..._{context}"] anchors.'
121
+ )
122
+ parser.add_argument(
123
+ 'directory',
124
+ nargs='?',
125
+ default='.',
126
+ help='Directory to scan for .adoc files (default: current directory)'
127
+ )
128
+ parser.add_argument(
129
+ '--dry-run', '-n',
130
+ action='store_true',
131
+ help='Show what would be changed without making actual modifications'
132
+ )
133
+ parser.add_argument(
134
+ '--clean-up',
135
+ action='store_true',
136
+ help='Also remove ID-related boilerplate lines (comments and include directives)'
137
+ )
138
+ parser.add_argument(
139
+ '--verbose', '-v',
140
+ action='store_true',
141
+ help='Show detailed output for each file processed'
142
+ )
143
+ parser.add_argument(
144
+ '--version',
145
+ action='version',
146
+ version=f'%(prog)s {__version__}'
147
+ )
148
+
149
+ args = parser.parse_args()
150
+
151
+ # Resolve directory path
152
+ directory = Path(args.directory).resolve()
153
+
154
+ if not directory.exists():
155
+ print(f"Error: Directory not found: {directory}")
156
+ sys.exit(1)
157
+
158
+ if not directory.is_dir():
159
+ print(f"Error: Not a directory: {directory}")
160
+ sys.exit(1)
161
+
162
+ mode_str = "DRY RUN MODE - " if args.dry_run else ""
163
+ print(f"{mode_str}Scanning directory: {directory}")
164
+
165
+ if args.clean_up:
166
+ print("Clean-up mode enabled: will remove ID-related boilerplate lines")
167
+
168
+ # Find all AsciiDoc files
169
+ spinner = Spinner("Searching for .adoc files")
170
+ spinner.start()
171
+ adoc_files = find_adoc_files(directory)
172
+ spinner.stop(f"Found {len(adoc_files)} .adoc files")
173
+
174
+ if not adoc_files:
175
+ print("No AsciiDoc files found.")
176
+ sys.exit(0)
177
+
178
+ if args.dry_run:
179
+ print("\n*** DRY RUN MODE - No files will be modified ***\n")
180
+
181
+ # Process each file
182
+ total_id_replacements = 0
183
+ total_cleanup_removals = 0
184
+ files_modified = 0
185
+
186
+ spinner = Spinner(f"Processing {len(adoc_files)} files")
187
+ spinner.start()
188
+
189
+ for file_path in adoc_files:
190
+ id_replacements, cleanup_removals = process_file(file_path, args.dry_run, args.clean_up)
191
+
192
+ if id_replacements > 0 or cleanup_removals > 0:
193
+ files_modified += 1
194
+ total_id_replacements += id_replacements
195
+ total_cleanup_removals += cleanup_removals
196
+
197
+ if args.verbose:
198
+ rel_path = file_path.relative_to(directory)
199
+ changes = []
200
+ if id_replacements > 0:
201
+ changes.append(f"{id_replacements} ID conversion(s)")
202
+ if cleanup_removals > 0:
203
+ changes.append(f"{cleanup_removals} line(s) removed")
204
+ print(f" {rel_path}: {', '.join(changes)}")
205
+
206
+ spinner.stop(f"Processed {len(adoc_files)} files")
207
+
208
+ # Summary
209
+ print(f"\nSummary:")
210
+ if args.dry_run:
211
+ print(f" Files that would be modified: {files_modified}")
212
+ print(f" :id: attributes that would be converted: {total_id_replacements}")
213
+ if args.clean_up:
214
+ print(f" Boilerplate lines that would be removed: {total_cleanup_removals}")
215
+ print("\nRun without --dry-run to apply changes.")
216
+ else:
217
+ print(f" Files modified: {files_modified}")
218
+ print(f" :id: attributes converted: {total_id_replacements}")
219
+ if args.clean_up:
220
+ print(f" Boilerplate lines removed: {total_cleanup_removals}")
221
+
222
+ if total_id_replacements == 0:
223
+ print("\nNo :id: attributes found to convert.")
224
+ else:
225
+ print("\nConversion complete!")
226
+
227
+
228
+ if __name__ == '__main__':
229
+ main()
@@ -0,0 +1,220 @@
1
+ """
2
+ Insert abstract role - ensures AsciiDoc files have [role="_abstract"] above the first paragraph.
3
+
4
+ Core logic for adding the [role="_abstract"] attribute required for DITA short description conversion.
5
+ """
6
+
7
+ import re
8
+ from pathlib import Path
9
+ from typing import List, Tuple, Optional
10
+
11
+
12
+ def find_first_paragraph_after_title(lines: List[str]) -> Optional[int]:
13
+ """
14
+ Find the line index of the first paragraph after the document title.
15
+
16
+ The first paragraph is the first non-empty line that:
17
+ - Comes after a level 1 heading (= Title)
18
+ - Is not an attribute definition (starts with :)
19
+ - Is not a comment (starts with //)
20
+ - Is not a block attribute (starts with [)
21
+ - Is not another heading
22
+
23
+ Args:
24
+ lines: List of lines from the file (without trailing newlines)
25
+
26
+ Returns:
27
+ Line index of the first paragraph, or None if not found
28
+ """
29
+ title_found = False
30
+ title_index = -1
31
+
32
+ for i, line in enumerate(lines):
33
+ # Check for level 1 heading (document title)
34
+ if re.match(r'^=\s+[^=]', line):
35
+ title_found = True
36
+ title_index = i
37
+ continue
38
+
39
+ # Only look for first paragraph after we've found the title
40
+ if not title_found:
41
+ continue
42
+
43
+ # Skip empty lines
44
+ if re.match(r'^\s*$', line):
45
+ continue
46
+
47
+ # Skip attribute definitions
48
+ if re.match(r'^:', line):
49
+ continue
50
+
51
+ # Skip comments (single line)
52
+ if re.match(r'^//', line):
53
+ continue
54
+
55
+ # Skip block attributes like [role=...], [id=...], etc.
56
+ if re.match(r'^\[', line):
57
+ continue
58
+
59
+ # Skip other headings
60
+ if re.match(r'^=+\s+', line):
61
+ continue
62
+
63
+ # Skip include directives
64
+ if re.match(r'^include::', line):
65
+ continue
66
+
67
+ # This is the first paragraph
68
+ return i
69
+
70
+ return None
71
+
72
+
73
+ def has_abstract_role(lines: List[str], paragraph_index: int) -> bool:
74
+ """
75
+ Check if there's already a [role="_abstract"] before the paragraph.
76
+
77
+ Args:
78
+ lines: List of lines from the file
79
+ paragraph_index: Index of the first paragraph
80
+
81
+ Returns:
82
+ True if [role="_abstract"] already exists before the paragraph
83
+ """
84
+ # Look at the lines immediately before the paragraph
85
+ for i in range(paragraph_index - 1, -1, -1):
86
+ line = lines[i].strip()
87
+
88
+ # Skip empty lines
89
+ if not line:
90
+ continue
91
+
92
+ # Found abstract role
93
+ if re.match(r'^\[role=["\']_abstract["\']\]$', line):
94
+ return True
95
+
96
+ # If we hit any other non-empty content, stop looking
97
+ # (could be attribute, heading, etc.)
98
+ break
99
+
100
+ return False
101
+
102
+
103
+ def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> Tuple[bool, List[str]]:
104
+ """
105
+ Process a single AsciiDoc file to add [role="_abstract"] if needed.
106
+
107
+ Args:
108
+ file_path: Path to the file to process
109
+ dry_run: If True, show what would be changed without modifying
110
+ verbose: If True, show detailed output
111
+
112
+ Returns:
113
+ Tuple of (changes_made, messages) where messages is a list of verbose output
114
+ """
115
+ messages = []
116
+
117
+ if verbose:
118
+ messages.append(f"Processing: {file_path}")
119
+
120
+ try:
121
+ with open(file_path, 'r', encoding='utf-8') as f:
122
+ lines = f.readlines()
123
+ except (IOError, UnicodeDecodeError) as e:
124
+ raise IOError(f"Error reading {file_path}: {e}")
125
+
126
+ # Remove trailing newlines from lines for processing
127
+ lines = [line.rstrip('\n\r') for line in lines]
128
+
129
+ # Find the first paragraph after the title
130
+ paragraph_index = find_first_paragraph_after_title(lines)
131
+
132
+ if paragraph_index is None:
133
+ if verbose:
134
+ messages.append(" No paragraph found after title")
135
+ return False, messages
136
+
137
+ # Check if abstract role already exists
138
+ if has_abstract_role(lines, paragraph_index):
139
+ if verbose:
140
+ messages.append(" [role=\"_abstract\"] already present")
141
+ return False, messages
142
+
143
+ # Insert [role="_abstract"] before the first paragraph
144
+ # We need to add it with a blank line before it if there isn't one
145
+ new_lines = lines[:paragraph_index]
146
+
147
+ # Check if we need to add a blank line before the role
148
+ if paragraph_index > 0 and lines[paragraph_index - 1].strip():
149
+ new_lines.append('')
150
+
151
+ new_lines.append('[role="_abstract"]')
152
+ new_lines.extend(lines[paragraph_index:])
153
+
154
+ if verbose:
155
+ preview = lines[paragraph_index][:60] + "..." if len(lines[paragraph_index]) > 60 else lines[paragraph_index]
156
+ messages.append(f" Adding [role=\"_abstract\"] before line {paragraph_index + 1}: {preview}")
157
+
158
+ if not dry_run:
159
+ try:
160
+ with open(file_path, 'w', encoding='utf-8') as f:
161
+ for line in new_lines:
162
+ f.write(line + '\n')
163
+ except IOError as e:
164
+ raise IOError(f"Error writing {file_path}: {e}")
165
+
166
+ return True, messages
167
+
168
+
169
+ def find_adoc_files(path: Path, exclude_dirs: List[str] = None, exclude_files: List[str] = None) -> List[Path]:
170
+ """
171
+ Find all .adoc files in the given path.
172
+
173
+ Args:
174
+ path: File or directory path to search
175
+ exclude_dirs: List of directory paths to exclude
176
+ exclude_files: List of file paths to exclude
177
+
178
+ Returns:
179
+ List of Path objects for .adoc files
180
+ """
181
+ exclude_dirs = exclude_dirs or []
182
+ exclude_files = exclude_files or []
183
+
184
+ # Normalize exclusion paths to absolute
185
+ exclude_dirs_abs = [Path(d).resolve() for d in exclude_dirs]
186
+ exclude_files_abs = [Path(f).resolve() for f in exclude_files]
187
+
188
+ adoc_files = []
189
+
190
+ if path.is_file():
191
+ if path.suffix == '.adoc':
192
+ path_abs = path.resolve()
193
+ if path_abs not in exclude_files_abs:
194
+ adoc_files.append(path)
195
+ elif path.is_dir():
196
+ for adoc_path in path.rglob('*.adoc'):
197
+ # Skip symlinks
198
+ if adoc_path.is_symlink():
199
+ continue
200
+
201
+ path_abs = adoc_path.resolve()
202
+
203
+ # Check if file is excluded
204
+ if path_abs in exclude_files_abs:
205
+ continue
206
+
207
+ # Check if any parent directory is excluded
208
+ skip = False
209
+ for exclude_dir in exclude_dirs_abs:
210
+ try:
211
+ path_abs.relative_to(exclude_dir)
212
+ skip = True
213
+ break
214
+ except ValueError:
215
+ pass
216
+
217
+ if not skip:
218
+ adoc_files.append(adoc_path)
219
+
220
+ return sorted(adoc_files)
@@ -1,7 +1,7 @@
1
1
  """Version information for doc-utils."""
2
2
 
3
3
  # This should match the version in pyproject.toml
4
- __version__ = "0.1.40"
4
+ __version__ = "0.1.42"
5
5
 
6
6
  def get_version():
7
7
  """Return the current version string."""