rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. archive_unused_files.py +18 -5
  2. archive_unused_images.py +9 -2
  3. callout_lib/__init__.py +22 -0
  4. callout_lib/converter_bullets.py +103 -0
  5. callout_lib/converter_comments.py +295 -0
  6. callout_lib/converter_deflist.py +134 -0
  7. callout_lib/detector.py +364 -0
  8. callout_lib/table_parser.py +804 -0
  9. check_published_links.py +1083 -0
  10. check_scannability.py +6 -0
  11. check_source_directives.py +101 -0
  12. convert_callouts_interactive.py +567 -0
  13. convert_callouts_to_deflist.py +628 -0
  14. convert_freemarker_to_asciidoc.py +288 -0
  15. convert_tables_to_deflists.py +479 -0
  16. doc_utils/convert_freemarker_to_asciidoc.py +708 -0
  17. doc_utils/duplicate_content.py +409 -0
  18. doc_utils/duplicate_includes.py +347 -0
  19. doc_utils/extract_link_attributes.py +618 -0
  20. doc_utils/format_asciidoc_spacing.py +285 -0
  21. doc_utils/insert_abstract_role.py +220 -0
  22. doc_utils/inventory_conditionals.py +164 -0
  23. doc_utils/missing_source_directive.py +211 -0
  24. doc_utils/replace_link_attributes.py +187 -0
  25. doc_utils/spinner.py +119 -0
  26. doc_utils/unused_adoc.py +150 -22
  27. doc_utils/unused_attributes.py +218 -6
  28. doc_utils/unused_images.py +81 -9
  29. doc_utils/validate_links.py +576 -0
  30. doc_utils/version.py +8 -0
  31. doc_utils/version_check.py +243 -0
  32. doc_utils/warnings_report.py +237 -0
  33. doc_utils_cli.py +158 -0
  34. extract_link_attributes.py +120 -0
  35. find_duplicate_content.py +209 -0
  36. find_duplicate_includes.py +198 -0
  37. find_unused_attributes.py +84 -6
  38. format_asciidoc_spacing.py +134 -0
  39. insert_abstract_role.py +163 -0
  40. inventory_conditionals.py +53 -0
  41. replace_link_attributes.py +214 -0
  42. rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
  43. rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
  44. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
  45. rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
  46. rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
  47. validate_links.py +213 -0
  48. rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
  49. rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
  50. rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
  51. rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
  52. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,211 @@
1
+ # doc_utils/missing_source_directive.py
2
+
3
+ """
4
+ Detects code blocks (----) that are missing [source] directive on the preceding line.
5
+
6
+ This module provides functionality to scan AsciiDoc files for code blocks that lack
7
+ proper source directives, which can cause issues with AsciiDoc-to-DocBook XML conversion.
8
+ """
9
+
10
+ import os
11
+ import re
12
+
13
+ def is_code_block_start(line):
14
+ """Check if line is a code block delimiter (4 or more dashes)"""
15
+ return re.match(r'^-{4,}$', line.strip())
16
+
17
+ def has_source_directive(line):
18
+ """Check if line contains [source] directive"""
19
+ # Match [source], [source,lang], [source, lang], etc.
20
+ return re.match(r'^\[source[\s,]', line.strip())
21
+
22
+ def is_empty_or_whitespace(line):
23
+ """Check if line is empty or contains only whitespace"""
24
+ return len(line.strip()) == 0
25
+
26
+ def scan_file(filepath):
27
+ """
28
+ Scan a single AsciiDoc file for missing [source] directives.
29
+
30
+ Args:
31
+ filepath: Path to the AsciiDoc file to scan
32
+
33
+ Returns:
34
+ List of issue dictionaries containing line_num, prev_line_num, and prev_line
35
+ """
36
+ issues = []
37
+
38
+ try:
39
+ with open(filepath, 'r', encoding='utf-8') as f:
40
+ lines = f.readlines()
41
+
42
+ in_code_block = False
43
+
44
+ for i, line in enumerate(lines, start=1):
45
+ # Check if current line is a code block delimiter
46
+ if is_code_block_start(line):
47
+ if not in_code_block:
48
+ # This is the START of a code block
49
+ prev_line_num = i - 1
50
+ prev_line = lines[prev_line_num - 1].rstrip() if prev_line_num > 0 else ""
51
+
52
+ # Check if [source] exists in previous lines (within last 3 lines)
53
+ # This handles cases where there's a title between [source] and ----
54
+ has_source_in_context = False
55
+ for lookback in range(1, min(4, i)):
56
+ check_line = lines[i - lookback - 1].strip()
57
+ if has_source_directive(check_line):
58
+ has_source_in_context = True
59
+ break
60
+ # Stop looking if we hit an empty line or structural element
61
+ if not check_line or check_line.startswith(('=', '----')):
62
+ break
63
+
64
+ # Only flag if:
65
+ # 1. No [source] directive in recent context
66
+ # 2. Previous line is not empty (which might be valid formatting)
67
+ if (not has_source_in_context and
68
+ not is_empty_or_whitespace(prev_line)):
69
+
70
+ # Additional heuristic: check if previous line looks like it should have [source]
71
+ # Skip if previous line is a title, comment, or other structural element
72
+ prev_stripped = prev_line.strip()
73
+
74
+ # Skip common valid patterns
75
+ if prev_stripped.startswith(('=', '//', 'NOTE:', 'TIP:', 'WARNING:', 'IMPORTANT:', 'CAUTION:')):
76
+ in_code_block = True
77
+ continue
78
+
79
+ # Skip if previous line is already an attribute block (but not [source])
80
+ if prev_stripped.startswith('[') and prev_stripped.endswith(']'):
81
+ # It's some other attribute like [id], [role], etc., might be intentional
82
+ in_code_block = True
83
+ continue
84
+
85
+ # Skip if previous line is just a plus sign (continuation)
86
+ if prev_stripped == '+':
87
+ in_code_block = True
88
+ continue
89
+
90
+ # Skip if previous line is a block title (starts with .)
91
+ if prev_stripped.startswith('.') and len(prev_stripped) > 1:
92
+ # This might be a title for a source block that's defined earlier
93
+ # Check if there's a [source] before the title
94
+ if i >= 3:
95
+ two_lines_back = lines[i - 3].strip()
96
+ if has_source_directive(two_lines_back):
97
+ in_code_block = True
98
+ continue
99
+
100
+ issues.append({
101
+ 'line_num': i,
102
+ 'prev_line_num': prev_line_num,
103
+ 'prev_line': prev_line[:80] # Truncate for display
104
+ })
105
+
106
+ in_code_block = True
107
+ else:
108
+ # This is the END of a code block
109
+ in_code_block = False
110
+
111
+ except Exception as e:
112
+ raise IOError(f"Error reading {filepath}: {e}")
113
+
114
+ return issues
115
+
116
+ def fix_file(filepath, issues):
117
+ """
118
+ Insert [source] directives for missing code blocks.
119
+
120
+ Args:
121
+ filepath: Path to the AsciiDoc file to fix
122
+ issues: List of issue dictionaries from scan_file()
123
+
124
+ Returns:
125
+ True if successful, False otherwise
126
+ """
127
+ try:
128
+ with open(filepath, 'r', encoding='utf-8') as f:
129
+ lines = f.readlines()
130
+
131
+ # Sort issues by line number in reverse order so we can insert from bottom to top
132
+ # This prevents line number shifts from affecting subsequent insertions
133
+ sorted_issues = sorted(issues, key=lambda x: x['line_num'], reverse=True)
134
+
135
+ for issue in sorted_issues:
136
+ line_num = issue['line_num']
137
+ # Insert [source] directive before the ---- line (at line_num - 1, which is index line_num - 1)
138
+ insert_index = line_num - 1
139
+ lines.insert(insert_index, '[source]\n')
140
+
141
+ # Write the modified content back to the file
142
+ with open(filepath, 'w', encoding='utf-8') as f:
143
+ f.writelines(lines)
144
+
145
+ return True
146
+
147
+ except Exception as e:
148
+ raise IOError(f"Error fixing {filepath}: {e}")
149
+
150
+ def find_missing_source_directives(scan_dir='.', auto_fix=False):
151
+ """
152
+ Scan directory for AsciiDoc files with missing [source] directives.
153
+
154
+ Args:
155
+ scan_dir: Directory to scan (default: current directory)
156
+ auto_fix: If True, automatically insert [source] directives
157
+
158
+ Returns:
159
+ Dictionary with statistics:
160
+ - total_issues: Total number of issues found
161
+ - files_with_issues: Number of files with issues
162
+ - files_fixed: Number of files successfully fixed (if auto_fix=True)
163
+ - file_details: List of dictionaries with file paths and their issues
164
+ """
165
+ if not os.path.isdir(scan_dir):
166
+ raise ValueError(f"Directory '{scan_dir}' does not exist")
167
+
168
+ total_issues = 0
169
+ files_with_issues = 0
170
+ files_fixed = 0
171
+ file_details = []
172
+
173
+ # Find all .adoc files (excluding symbolic links)
174
+ adoc_files = []
175
+ for root, dirs, files in os.walk(scan_dir):
176
+ for filename in files:
177
+ if filename.endswith('.adoc'):
178
+ filepath = os.path.join(root, filename)
179
+ # Skip symbolic links
180
+ if not os.path.islink(filepath):
181
+ adoc_files.append(filepath)
182
+
183
+ for filepath in sorted(adoc_files):
184
+ issues = scan_file(filepath)
185
+
186
+ if issues:
187
+ files_with_issues += 1
188
+ total_issues += len(issues)
189
+
190
+ file_info = {
191
+ 'filepath': filepath,
192
+ 'issues': issues,
193
+ 'fixed': False
194
+ }
195
+
196
+ if auto_fix:
197
+ try:
198
+ if fix_file(filepath, issues):
199
+ files_fixed += 1
200
+ file_info['fixed'] = True
201
+ except Exception as e:
202
+ file_info['error'] = str(e)
203
+
204
+ file_details.append(file_info)
205
+
206
+ return {
207
+ 'total_issues': total_issues,
208
+ 'files_with_issues': files_with_issues,
209
+ 'files_fixed': files_fixed,
210
+ 'file_details': file_details
211
+ }
@@ -0,0 +1,187 @@
1
+ """
2
+ Replace AsciiDoc attributes within link URLs with their actual values.
3
+
4
+ This module finds and replaces attribute references (like {attribute-name}) that appear
5
+ in the URL portion of AsciiDoc link macros (link: and xref:) with their resolved values
6
+ from attributes.adoc. Link text is preserved unchanged.
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Tuple, Optional
12
+
13
+
14
+ def find_attributes_files(root_dir: Path) -> List[Path]:
15
+ """Find all attributes.adoc files in the repository."""
16
+ attributes_files = []
17
+
18
+ for path in root_dir.rglob('**/attributes.adoc'):
19
+ # Skip hidden directories and common build directories
20
+ parts = path.parts
21
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules'] for part in parts):
22
+ continue
23
+ attributes_files.append(path)
24
+
25
+ return attributes_files
26
+
27
+
28
+ def load_attributes(attributes_file: Path) -> Dict[str, str]:
29
+ """Load attribute definitions from an attributes.adoc file."""
30
+ attributes = {}
31
+
32
+ with open(attributes_file, 'r', encoding='utf-8') as f:
33
+ for line in f:
34
+ # Match attribute definitions
35
+ # Format: :attribute-name: value
36
+ match = re.match(r'^:([a-zA-Z0-9_-]+):\s*(.*)$', line)
37
+ if match:
38
+ attr_name = match.group(1)
39
+ attr_value = match.group(2).strip()
40
+ attributes[attr_name] = attr_value
41
+
42
+ return attributes
43
+
44
+
45
+ def resolve_nested_attributes(attributes: Dict[str, str], max_iterations: int = 10) -> Dict[str, str]:
46
+ """Resolve nested attribute references within attribute values."""
47
+ for _ in range(max_iterations):
48
+ changes_made = False
49
+
50
+ for attr_name, attr_value in attributes.items():
51
+ # Find all attribute references in the value
52
+ refs = re.findall(r'\{([a-zA-Z0-9_-]+)\}', attr_value)
53
+
54
+ for ref in refs:
55
+ if ref in attributes:
56
+ new_value = attr_value.replace(f'{{{ref}}}', attributes[ref])
57
+ if new_value != attr_value:
58
+ attributes[attr_name] = new_value
59
+ changes_made = True
60
+ attr_value = new_value
61
+
62
+ if not changes_made:
63
+ break
64
+
65
+ return attributes
66
+
67
+
68
+ def replace_link_attributes_in_file(file_path: Path, attributes: Dict[str, str], dry_run: bool = False, macro_type: str = 'both') -> int:
69
+ """
70
+ Replace attribute references within link macros in a single file.
71
+
72
+ Args:
73
+ file_path: Path to the file to process
74
+ attributes: Dictionary of attribute definitions
75
+ dry_run: Preview changes without modifying files
76
+ macro_type: Type of macros to process - 'link', 'xref', or 'both' (default: 'both')
77
+
78
+ Returns: Number of replacements made
79
+ """
80
+ with open(file_path, 'r', encoding='utf-8') as f:
81
+ content = f.read()
82
+
83
+ original_content = content
84
+ replacement_count = 0
85
+
86
+ # Find all link macros containing attributes in the URL portion only
87
+ # Match link: and xref: macros, capturing URL and text separately
88
+ link_patterns = []
89
+
90
+ if macro_type in ('link', 'both'):
91
+ # link:url[text] - replace only in URL portion
92
+ link_patterns.append((r'link:([^[\]]*)\[([^\]]*)\]', 'link'))
93
+
94
+ if macro_type in ('xref', 'both'):
95
+ # xref:target[text] - replace only in target portion
96
+ link_patterns.append((r'xref:([^[\]]*)\[([^\]]*)\]', 'xref'))
97
+
98
+ # Handle empty text cases based on macro type
99
+ if macro_type == 'both':
100
+ link_patterns.append((r'(link|xref):([^[\]]*)\[\]', 'empty_text'))
101
+ elif macro_type == 'link':
102
+ link_patterns.append((r'(link):([^[\]]*)\[\]', 'empty_text'))
103
+ elif macro_type == 'xref':
104
+ link_patterns.append((r'(xref):([^[\]]*)\[\]', 'empty_text'))
105
+
106
+ for pattern, link_type in link_patterns:
107
+ matches = list(re.finditer(pattern, content))
108
+
109
+ # Process matches in reverse order to maintain string positions
110
+ for match in reversed(matches):
111
+ if link_type == 'empty_text':
112
+ # For links with empty text []
113
+ macro_type = match.group(1) # 'link' or 'xref'
114
+ url_part = match.group(2)
115
+ text_part = ''
116
+
117
+ # Check if URL contains attributes
118
+ if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
119
+ modified_url = url_part
120
+
121
+ # Replace attributes only in URL
122
+ attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
123
+ for attr_name in attr_matches:
124
+ if attr_name in attributes:
125
+ attr_pattern = re.escape(f'{{{attr_name}}}')
126
+ modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
127
+ replacement_count += 1
128
+
129
+ if modified_url != url_part:
130
+ # Reconstruct the link with modified URL
131
+ modified = f'{macro_type}:{modified_url}[]'
132
+ start = match.start()
133
+ end = match.end()
134
+ content = content[:start] + modified + content[end:]
135
+ else:
136
+ # For links with text
137
+ url_part = match.group(1)
138
+ text_part = match.group(2)
139
+
140
+ # Check if URL contains attributes
141
+ if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
142
+ modified_url = url_part
143
+
144
+ # Replace attributes only in URL
145
+ attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
146
+ for attr_name in attr_matches:
147
+ if attr_name in attributes:
148
+ attr_pattern = re.escape(f'{{{attr_name}}}')
149
+ modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
150
+ replacement_count += 1
151
+
152
+ if modified_url != url_part:
153
+ # Reconstruct the link with modified URL but original text
154
+ if link_type == 'link':
155
+ modified = f'link:{modified_url}[{text_part}]'
156
+ else: # xref
157
+ modified = f'xref:{modified_url}[{text_part}]'
158
+
159
+ start = match.start()
160
+ end = match.end()
161
+ content = content[:start] + modified + content[end:]
162
+
163
+ # Write changes if not in dry-run mode
164
+ if content != original_content:
165
+ if not dry_run:
166
+ with open(file_path, 'w', encoding='utf-8') as f:
167
+ f.write(content)
168
+
169
+ return replacement_count
170
+
171
+ return 0
172
+
173
+
174
+ def find_adoc_files(root_dir: Path, exclude_dirs: Optional[set] = None) -> List[Path]:
175
+ """Find all *.adoc files in the repository."""
176
+ if exclude_dirs is None:
177
+ exclude_dirs = {'.git', 'target', 'build', 'node_modules'}
178
+
179
+ adoc_files = []
180
+
181
+ for path in root_dir.rglob('*.adoc'):
182
+ # Check if any part of the path is in exclude_dirs
183
+ parts = set(path.parts)
184
+ if not parts.intersection(exclude_dirs):
185
+ adoc_files.append(path)
186
+
187
+ return adoc_files
doc_utils/spinner.py ADDED
@@ -0,0 +1,119 @@
1
+ """
2
+ Spinner utility for showing progress during long-running operations.
3
+
4
+ This module provides a simple spinner that can be used by all doc-utils tools
5
+ to indicate that processing is in progress.
6
+ """
7
+
8
+ import sys
9
+ import time
10
+ import threading
11
+ from typing import Optional
12
+
13
+
14
+ class Spinner:
15
+ """A simple spinner to show progress during long operations."""
16
+
17
+ FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
18
+
19
+ def __init__(self, message: str = "Processing"):
20
+ """
21
+ Initialize the spinner with a message.
22
+
23
+ Args:
24
+ message: The message to display alongside the spinner
25
+ """
26
+ self.message = message
27
+ self.spinning = False
28
+ self.thread: Optional[threading.Thread] = None
29
+ self.frame_index = 0
30
+
31
+ def _spin(self):
32
+ """Internal method that runs in a separate thread to animate the spinner."""
33
+ while self.spinning:
34
+ frame = self.FRAMES[self.frame_index % len(self.FRAMES)]
35
+ sys.stdout.write(f'\r{frame} {self.message}...')
36
+ sys.stdout.flush()
37
+ self.frame_index += 1
38
+ time.sleep(0.1)
39
+
40
+ def start(self):
41
+ """Start the spinner animation."""
42
+ if not self.spinning:
43
+ self.spinning = True
44
+ self.thread = threading.Thread(target=self._spin)
45
+ self.thread.daemon = True
46
+ self.thread.start()
47
+
48
+ def stop(self, final_message: Optional[str] = None, success: bool = True):
49
+ """
50
+ Stop the spinner animation.
51
+
52
+ Args:
53
+ final_message: Optional message to display after stopping
54
+ success: Whether the operation was successful (affects the symbol shown)
55
+ """
56
+ if self.spinning:
57
+ self.spinning = False
58
+ if self.thread:
59
+ self.thread.join()
60
+
61
+ # Clear the spinner line completely
62
+ sys.stdout.write('\r' + ' ' * 80 + '\r')
63
+
64
+ # Write final message if provided
65
+ if final_message:
66
+ symbol = '✓' if success else '✗'
67
+ sys.stdout.write(f'{symbol} {final_message}\n')
68
+
69
+ sys.stdout.flush()
70
+
71
+ def __enter__(self):
72
+ """Context manager entry - start the spinner."""
73
+ self.start()
74
+ return self
75
+
76
+ def __exit__(self, exc_type, exc_val, exc_tb):
77
+ """Context manager exit - stop the spinner."""
78
+ success = exc_type is None
79
+ self.stop(success=success)
80
+ return False
81
+
82
+
83
+ def with_spinner(message: str = "Processing"):
84
+ """
85
+ Decorator to add a spinner to a function.
86
+
87
+ Usage:
88
+ @with_spinner("Loading data")
89
+ def load_data():
90
+ # ... long running operation
91
+ return data
92
+ """
93
+ def decorator(func):
94
+ def wrapper(*args, **kwargs):
95
+ spinner = Spinner(message)
96
+ spinner.start()
97
+ try:
98
+ result = func(*args, **kwargs)
99
+ spinner.stop(success=True)
100
+ return result
101
+ except Exception as e:
102
+ spinner.stop(success=False)
103
+ raise e
104
+ return wrapper
105
+ return decorator
106
+
107
+
108
+ # Convenience functions for common operations
109
+ def show_progress(message: str = "Processing", total: Optional[int] = None):
110
+ """
111
+ Show progress with optional item count.
112
+
113
+ Args:
114
+ message: The base message to display
115
+ total: Optional total number of items being processed
116
+ """
117
+ if total:
118
+ return Spinner(f"{message} ({total} items)")
119
+ return Spinner(message)