rolfedh-doc-utils 0.1.35__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ # doc_utils/missing_source_directive.py
2
+
3
+ """
4
+ Detects code blocks (----) that are missing [source] directive on the preceding line.
5
+
6
+ This module provides functionality to scan AsciiDoc files for code blocks that lack
7
+ proper source directives, which can cause issues with AsciiDoc-to-DocBook XML conversion.
8
+ """
9
+
10
+ import os
11
+ import re
12
+
13
+ def is_code_block_start(line):
14
+ """Check if line is a code block delimiter (4 or more dashes)"""
15
+ return re.match(r'^-{4,}$', line.strip())
16
+
17
+ def has_source_directive(line):
18
+ """Check if line contains [source] directive"""
19
+ # Match [source], [source,lang], [source, lang], etc.
20
+ return re.match(r'^\[source[\s,]', line.strip())
21
+
22
+ def is_empty_or_whitespace(line):
23
+ """Check if line is empty or contains only whitespace"""
24
+ return len(line.strip()) == 0
25
+
26
+ def scan_file(filepath):
27
+ """
28
+ Scan a single AsciiDoc file for missing [source] directives.
29
+
30
+ Args:
31
+ filepath: Path to the AsciiDoc file to scan
32
+
33
+ Returns:
34
+ List of issue dictionaries containing line_num, prev_line_num, and prev_line
35
+ """
36
+ issues = []
37
+
38
+ try:
39
+ with open(filepath, 'r', encoding='utf-8') as f:
40
+ lines = f.readlines()
41
+
42
+ in_code_block = False
43
+
44
+ for i, line in enumerate(lines, start=1):
45
+ # Check if current line is a code block delimiter
46
+ if is_code_block_start(line):
47
+ if not in_code_block:
48
+ # This is the START of a code block
49
+ prev_line_num = i - 1
50
+ prev_line = lines[prev_line_num - 1].rstrip() if prev_line_num > 0 else ""
51
+
52
+ # Check if [source] exists in previous lines (within last 3 lines)
53
+ # This handles cases where there's a title between [source] and ----
54
+ has_source_in_context = False
55
+ for lookback in range(1, min(4, i)):
56
+ check_line = lines[i - lookback - 1].strip()
57
+ if has_source_directive(check_line):
58
+ has_source_in_context = True
59
+ break
60
+ # Stop looking if we hit an empty line or structural element
61
+ if not check_line or check_line.startswith(('=', '----')):
62
+ break
63
+
64
+ # Only flag if:
65
+ # 1. No [source] directive in recent context
66
+ # 2. Previous line is not empty (which might be valid formatting)
67
+ if (not has_source_in_context and
68
+ not is_empty_or_whitespace(prev_line)):
69
+
70
+ # Additional heuristic: check if previous line looks like it should have [source]
71
+ # Skip if previous line is a title, comment, or other structural element
72
+ prev_stripped = prev_line.strip()
73
+
74
+ # Skip common valid patterns
75
+ if prev_stripped.startswith(('=', '//', 'NOTE:', 'TIP:', 'WARNING:', 'IMPORTANT:', 'CAUTION:')):
76
+ in_code_block = True
77
+ continue
78
+
79
+ # Skip if previous line is already an attribute block (but not [source])
80
+ if prev_stripped.startswith('[') and prev_stripped.endswith(']'):
81
+ # It's some other attribute like [id], [role], etc., might be intentional
82
+ in_code_block = True
83
+ continue
84
+
85
+ # Skip if previous line is just a plus sign (continuation)
86
+ if prev_stripped == '+':
87
+ in_code_block = True
88
+ continue
89
+
90
+ # Skip if previous line is a block title (starts with .)
91
+ if prev_stripped.startswith('.') and len(prev_stripped) > 1:
92
+ # This might be a title for a source block that's defined earlier
93
+ # Check if there's a [source] before the title
94
+ if i >= 3:
95
+ two_lines_back = lines[i - 3].strip()
96
+ if has_source_directive(two_lines_back):
97
+ in_code_block = True
98
+ continue
99
+
100
+ issues.append({
101
+ 'line_num': i,
102
+ 'prev_line_num': prev_line_num,
103
+ 'prev_line': prev_line[:80] # Truncate for display
104
+ })
105
+
106
+ in_code_block = True
107
+ else:
108
+ # This is the END of a code block
109
+ in_code_block = False
110
+
111
+ except Exception as e:
112
+ raise IOError(f"Error reading {filepath}: {e}")
113
+
114
+ return issues
115
+
116
+ def fix_file(filepath, issues):
117
+ """
118
+ Insert [source] directives for missing code blocks.
119
+
120
+ Args:
121
+ filepath: Path to the AsciiDoc file to fix
122
+ issues: List of issue dictionaries from scan_file()
123
+
124
+ Returns:
125
+ True if successful, False otherwise
126
+ """
127
+ try:
128
+ with open(filepath, 'r', encoding='utf-8') as f:
129
+ lines = f.readlines()
130
+
131
+ # Sort issues by line number in reverse order so we can insert from bottom to top
132
+ # This prevents line number shifts from affecting subsequent insertions
133
+ sorted_issues = sorted(issues, key=lambda x: x['line_num'], reverse=True)
134
+
135
+ for issue in sorted_issues:
136
+ line_num = issue['line_num']
137
+ # Insert [source] directive before the ---- line (at line_num - 1, which is index line_num - 1)
138
+ insert_index = line_num - 1
139
+ lines.insert(insert_index, '[source]\n')
140
+
141
+ # Write the modified content back to the file
142
+ with open(filepath, 'w', encoding='utf-8') as f:
143
+ f.writelines(lines)
144
+
145
+ return True
146
+
147
+ except Exception as e:
148
+ raise IOError(f"Error fixing {filepath}: {e}")
149
+
150
+ def find_missing_source_directives(scan_dir='.', auto_fix=False):
151
+ """
152
+ Scan directory for AsciiDoc files with missing [source] directives.
153
+
154
+ Args:
155
+ scan_dir: Directory to scan (default: current directory)
156
+ auto_fix: If True, automatically insert [source] directives
157
+
158
+ Returns:
159
+ Dictionary with statistics:
160
+ - total_issues: Total number of issues found
161
+ - files_with_issues: Number of files with issues
162
+ - files_fixed: Number of files successfully fixed (if auto_fix=True)
163
+ - file_details: List of dictionaries with file paths and their issues
164
+ """
165
+ if not os.path.isdir(scan_dir):
166
+ raise ValueError(f"Directory '{scan_dir}' does not exist")
167
+
168
+ total_issues = 0
169
+ files_with_issues = 0
170
+ files_fixed = 0
171
+ file_details = []
172
+
173
+ # Find all .adoc files (excluding symbolic links)
174
+ adoc_files = []
175
+ for root, dirs, files in os.walk(scan_dir):
176
+ for filename in files:
177
+ if filename.endswith('.adoc'):
178
+ filepath = os.path.join(root, filename)
179
+ # Skip symbolic links
180
+ if not os.path.islink(filepath):
181
+ adoc_files.append(filepath)
182
+
183
+ for filepath in sorted(adoc_files):
184
+ issues = scan_file(filepath)
185
+
186
+ if issues:
187
+ files_with_issues += 1
188
+ total_issues += len(issues)
189
+
190
+ file_info = {
191
+ 'filepath': filepath,
192
+ 'issues': issues,
193
+ 'fixed': False
194
+ }
195
+
196
+ if auto_fix:
197
+ try:
198
+ if fix_file(filepath, issues):
199
+ files_fixed += 1
200
+ file_info['fixed'] = True
201
+ except Exception as e:
202
+ file_info['error'] = str(e)
203
+
204
+ file_details.append(file_info)
205
+
206
+ return {
207
+ 'total_issues': total_issues,
208
+ 'files_with_issues': files_with_issues,
209
+ 'files_fixed': files_fixed,
210
+ 'file_details': file_details
211
+ }
doc_utils/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Version information for doc-utils."""
2
2
 
3
3
  # This should match the version in pyproject.toml
4
- __version__ = "0.1.35"
4
+ __version__ = "0.1.37"
5
5
 
6
6
  def get_version():
7
7
  """Return the current version string."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.35
3
+ Version: 0.1.38
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -94,7 +94,8 @@ doc-utils --version # Show version
94
94
 
95
95
  | Tool | Description | Usage |
96
96
  |------|-------------|-------|
97
- | **`validate-links`** | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
97
+ | **`validate-links`** | Validates links in AsciiDoc source files, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
98
+ | **`check-published-links`** | Validates links on published HTML docs using linkchecker, with URL rewriting for misresolved paths | `check-published-links https://docs.example.com/guide/` |
98
99
  | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
99
100
  | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
100
101
  | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
@@ -1,8 +1,11 @@
1
1
  archive_unused_files.py,sha256=YKYPtuBHEZcsyQSwSYxSYvw9v9Mh6Of8MqT53A5bM44,2438
2
2
  archive_unused_images.py,sha256=EvPhMIwp6_AHKtuNYQ663q6biXBeXaqf88NzWrhvtIE,2029
3
+ check_published_links.py,sha256=nk07prV6xHVqVrYCy2Eb8BWkjkgJBhczk8U0E-KeIvA,43258
3
4
  check_scannability.py,sha256=O6ROr-e624jVPvPpASpsWo0gTfuCFpA2mTSX61BjAEI,5478
5
+ check_source_directives.py,sha256=JiIvn_ph9VKPMH4zg-aSsuIGQZcnI_imj7rZLLE04L8,3660
4
6
  convert_callouts_interactive.py,sha256=4PjiVIOWxNJiJLQuBHT3x6rE46-hgfFHSaoo5quYIs8,22889
5
7
  convert_callouts_to_deflist.py,sha256=BoqW5_GkQ-KqNzn4vmE6lsQosrPV0lkB-bfAx3dzyMw,25886
8
+ convert_tables_to_deflists.py,sha256=PIP6xummuMqC3aSzahKKRBYahes_j5ZpHp_-k6BjurY,15599
6
9
  doc_utils_cli.py,sha256=J3CE7cTDDCRGkhAknYejNWHhk5t9YFGt27WDVfR98Xk,5111
7
10
  extract_link_attributes.py,sha256=wR2SmR2la-jR6DzDbas2PoNONgRZ4dZ6aqwzkwEv8Gs,3516
8
11
  find_unused_attributes.py,sha256=77CxFdm72wj6SO81w-auMdDjnvF83jWy_qaM7DsAtBw,4263
@@ -19,6 +22,7 @@ doc_utils/__init__.py,sha256=qqZR3lohzkP63soymrEZPBGzzk6-nFzi4_tSffjmu_0,74
19
22
  doc_utils/extract_link_attributes.py,sha256=U0EvPZReJQigNfbT-icBsVT6Li64hYki5W7MQz6qqbc,22743
20
23
  doc_utils/file_utils.py,sha256=fpTh3xx759sF8sNocdn_arsP3KAv8XA6cTQTAVIZiZg,4247
21
24
  doc_utils/format_asciidoc_spacing.py,sha256=RL2WU_dG_UfGL01LnevcyJfKsvYy_ogNyeoVX-Fyqks,13579
25
+ doc_utils/missing_source_directive.py,sha256=X3Acn0QJTk6XjmBXhGus5JAjlIitCiicCRE3fslifyw,8048
22
26
  doc_utils/replace_link_attributes.py,sha256=gmAs68_njBqEz-Qni-UGgeYEDTMxlTWk_IOm76FONNE,7279
23
27
  doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
24
28
  doc_utils/spinner.py,sha256=lJg15qzODiKoR0G6uFIk2BdVNgn9jFexoTRUMrjiWvk,3554
@@ -27,12 +31,12 @@ doc_utils/unused_adoc.py,sha256=LPQWPGEOizXECxepk7E_5cjTVvKn6RXQYTWG97Ps5VQ,9077
27
31
  doc_utils/unused_attributes.py,sha256=OHyAdaBD7aNo357B0SLBN5NC_jNY5TWXMwgtfJNh3X8,7621
28
32
  doc_utils/unused_images.py,sha256=hL8Qrik9QCkVh54eBLuNczRS9tMnsqIEfavNamM1UeQ,5664
29
33
  doc_utils/validate_links.py,sha256=iBGXnwdeLlgIT3fo3v01ApT5k0X2FtctsvkrE6E3VMk,19610
30
- doc_utils/version.py,sha256=rh_oI-y8ZbMOpFC_a1Qkwb1Tq4FPWdm-vTVRUBqtHGU,203
34
+ doc_utils/version.py,sha256=zVnktTYITGhLqPNoyXbSnWi5bQassZ3M9S4LgDCGD-E,203
31
35
  doc_utils/version_check.py,sha256=-31Y6AN0KGi_CUCAVOOhf6bPO3r7SQIXPxxeffLAF0w,7535
32
36
  doc_utils/warnings_report.py,sha256=20yfwqBjOprfFhQwCujbcsvjJCbHHhmH84uAujm-y-o,8877
33
- rolfedh_doc_utils-0.1.35.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
34
- rolfedh_doc_utils-0.1.35.dist-info/METADATA,sha256=hsWjS4apZYXM4Qk38o3KOhMBEaqe55gc9e5QGZqAYEc,8325
35
- rolfedh_doc_utils-0.1.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
- rolfedh_doc_utils-0.1.35.dist-info/entry_points.txt,sha256=vL_LlLKOiurRzchrq8iRUQG19Xi9lSAFVZGjO-xyErk,577
37
- rolfedh_doc_utils-0.1.35.dist-info/top_level.txt,sha256=J4xtr3zoyCip27b3GnticFVZoyz5HHtgGqHQ-SZONCA,265
38
- rolfedh_doc_utils-0.1.35.dist-info/RECORD,,
37
+ rolfedh_doc_utils-0.1.38.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
38
+ rolfedh_doc_utils-0.1.38.dist-info/METADATA,sha256=gkxP50nyURTB-KhxNcT9rlDP77xVOXaRts6F1J6Dvus,8520
39
+ rolfedh_doc_utils-0.1.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ rolfedh_doc_utils-0.1.38.dist-info/entry_points.txt,sha256=SpOivexG_Bx1MWn4yC1UUXhttcXqZG851kejOAa3RFQ,744
41
+ rolfedh_doc_utils-0.1.38.dist-info/top_level.txt,sha256=FdhF5liH3KqypcFiLU-3GJdXJTIGwJO_UBAcOnC1yTo,338
42
+ rolfedh_doc_utils-0.1.38.dist-info/RECORD,,
@@ -1,9 +1,12 @@
1
1
  [console_scripts]
2
2
  archive-unused-files = archive_unused_files:main
3
3
  archive-unused-images = archive_unused_images:main
4
+ check-published-links = check_published_links:main
4
5
  check-scannability = check_scannability:main
6
+ check-source-directives = check_source_directives:main
5
7
  convert-callouts-interactive = convert_callouts_interactive:main
6
8
  convert-callouts-to-deflist = convert_callouts_to_deflist:main
9
+ convert-tables-to-deflists = convert_tables_to_deflists:main
7
10
  doc-utils = doc_utils_cli:main
8
11
  extract-link-attributes = extract_link_attributes:main
9
12
  find-unused-attributes = find_unused_attributes:main
@@ -1,9 +1,12 @@
1
1
  archive_unused_files
2
2
  archive_unused_images
3
3
  callout_lib
4
+ check_published_links
4
5
  check_scannability
6
+ check_source_directives
5
7
  convert_callouts_interactive
6
8
  convert_callouts_to_deflist
9
+ convert_tables_to_deflists
7
10
  doc_utils
8
11
  doc_utils_cli
9
12
  extract_link_attributes