rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Extract link and xref macros containing attributes into attribute definitions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Set, Tuple, Optional
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
import unicodedata
|
|
12
|
+
|
|
13
|
+
from .spinner import Spinner
|
|
14
|
+
from .validate_links import LinkValidator
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def find_attribute_files(base_path: str = '.') -> List[str]:
|
|
18
|
+
"""Find potential attribute files in the repository."""
|
|
19
|
+
common_patterns = [
|
|
20
|
+
'**/common-attributes.adoc',
|
|
21
|
+
'**/attributes.adoc',
|
|
22
|
+
'**/*-attributes.adoc',
|
|
23
|
+
'**/attributes-*.adoc',
|
|
24
|
+
'**/common_attributes.adoc',
|
|
25
|
+
'**/_common-attributes.adoc'
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
attribute_files = []
|
|
29
|
+
base = Path(base_path)
|
|
30
|
+
|
|
31
|
+
for pattern in common_patterns:
|
|
32
|
+
for file_path in base.glob(pattern):
|
|
33
|
+
if file_path.is_file():
|
|
34
|
+
rel_path = file_path.relative_to(base)
|
|
35
|
+
attribute_files.append(str(rel_path))
|
|
36
|
+
|
|
37
|
+
# Remove duplicates while preserving order
|
|
38
|
+
seen = set()
|
|
39
|
+
unique_files = []
|
|
40
|
+
for f in attribute_files:
|
|
41
|
+
if f not in seen:
|
|
42
|
+
seen.add(f)
|
|
43
|
+
unique_files.append(f)
|
|
44
|
+
|
|
45
|
+
return sorted(unique_files)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def select_attribute_file(attribute_files: List[str]) -> str:
|
|
49
|
+
"""Let user interactively select an attribute file."""
|
|
50
|
+
if not attribute_files:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
print("\nMultiple attribute files found. Please select one:")
|
|
54
|
+
for i, file_path in enumerate(attribute_files, 1):
|
|
55
|
+
print(f" {i}. {file_path}")
|
|
56
|
+
|
|
57
|
+
while True:
|
|
58
|
+
try:
|
|
59
|
+
choice = input(f"\nEnter your choice (1-{len(attribute_files)}): ").strip()
|
|
60
|
+
index = int(choice) - 1
|
|
61
|
+
if 0 <= index < len(attribute_files):
|
|
62
|
+
return attribute_files[index]
|
|
63
|
+
else:
|
|
64
|
+
print(f"Please enter a number between 1 and {len(attribute_files)}")
|
|
65
|
+
except (ValueError, EOFError, KeyboardInterrupt):
|
|
66
|
+
print("\nOperation cancelled.")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def load_existing_attributes(file_path: str) -> Dict[str, str]:
|
|
71
|
+
"""Load existing attributes from file."""
|
|
72
|
+
attributes = {}
|
|
73
|
+
if not os.path.exists(file_path):
|
|
74
|
+
return attributes
|
|
75
|
+
|
|
76
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
77
|
+
for line in f:
|
|
78
|
+
# Match attribute definitions
|
|
79
|
+
match = re.match(r'^:([^:]+):\s*(.+)$', line)
|
|
80
|
+
if match:
|
|
81
|
+
attr_name = match.group(1).strip()
|
|
82
|
+
attr_value = match.group(2).strip()
|
|
83
|
+
attributes[attr_name] = attr_value
|
|
84
|
+
|
|
85
|
+
return attributes
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def find_link_macros(file_path: str, macro_type: str = 'both') -> List[Tuple[str, str, str, int]]:
|
|
89
|
+
"""
|
|
90
|
+
Find all link: and xref: macros containing attributes in their URLs.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
file_path: Path to the file to scan
|
|
94
|
+
macro_type: Type of macros to find - 'link', 'xref', or 'both' (default: 'both')
|
|
95
|
+
|
|
96
|
+
Returns list of tuples: (full_macro, url, link_text, line_number)
|
|
97
|
+
"""
|
|
98
|
+
macros = []
|
|
99
|
+
|
|
100
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
101
|
+
for line_num, line in enumerate(f, 1):
|
|
102
|
+
# Pattern to match link: and xref: macros
|
|
103
|
+
# Matches: (link|xref):url[text] where url contains {attribute}
|
|
104
|
+
patterns = []
|
|
105
|
+
|
|
106
|
+
if macro_type in ('link', 'both'):
|
|
107
|
+
patterns.append(r'(link:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])')
|
|
108
|
+
|
|
109
|
+
if macro_type in ('xref', 'both'):
|
|
110
|
+
patterns.append(r'(xref:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])')
|
|
111
|
+
|
|
112
|
+
for pattern in patterns:
|
|
113
|
+
for match in re.finditer(pattern, line, re.IGNORECASE):
|
|
114
|
+
full_macro = match.group(1)
|
|
115
|
+
url = match.group(2)
|
|
116
|
+
link_text = match.group(3)
|
|
117
|
+
macros.append((full_macro, url, link_text, line_num))
|
|
118
|
+
|
|
119
|
+
return macros
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def generate_attribute_name(url: str, existing_attrs: Set[str], counter: int) -> str:
|
|
123
|
+
"""Generate a unique attribute name from URL."""
|
|
124
|
+
# Start with a base name from the URL
|
|
125
|
+
base_name = url
|
|
126
|
+
|
|
127
|
+
# Extract domain or path components
|
|
128
|
+
if '://' in url:
|
|
129
|
+
# Remove protocol
|
|
130
|
+
base_name = re.sub(r'^[^:]+://', '', url)
|
|
131
|
+
|
|
132
|
+
# Remove attributes from the name generation
|
|
133
|
+
base_name = re.sub(r'\{[^}]+\}', '', base_name)
|
|
134
|
+
|
|
135
|
+
# Extract meaningful parts
|
|
136
|
+
if '/' in base_name:
|
|
137
|
+
parts = base_name.split('/')
|
|
138
|
+
# Use domain and last path component
|
|
139
|
+
if len(parts) > 1:
|
|
140
|
+
domain_part = parts[0].replace('.', '-')
|
|
141
|
+
path_part = parts[-1].split('.')[0] if parts[-1] else ''
|
|
142
|
+
if path_part:
|
|
143
|
+
base_name = f"{domain_part}-{path_part}"
|
|
144
|
+
else:
|
|
145
|
+
base_name = domain_part
|
|
146
|
+
|
|
147
|
+
# Clean up the name
|
|
148
|
+
base_name = re.sub(r'[^a-zA-Z0-9-]', '-', base_name)
|
|
149
|
+
base_name = re.sub(r'-+', '-', base_name)
|
|
150
|
+
base_name = base_name.strip('-').lower()
|
|
151
|
+
|
|
152
|
+
# Limit length
|
|
153
|
+
if len(base_name) > 30:
|
|
154
|
+
base_name = base_name[:30]
|
|
155
|
+
|
|
156
|
+
# Make it unique
|
|
157
|
+
attr_name = f"link-{base_name}"
|
|
158
|
+
original_name = attr_name
|
|
159
|
+
suffix = 1
|
|
160
|
+
|
|
161
|
+
while attr_name in existing_attrs:
|
|
162
|
+
attr_name = f"{original_name}-{suffix}"
|
|
163
|
+
suffix += 1
|
|
164
|
+
|
|
165
|
+
return attr_name
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def group_macros_by_url(macros: List[Tuple[str, str, str, str, int]]) -> Dict[str, List[Tuple[str, str, str, int]]]:
|
|
169
|
+
"""
|
|
170
|
+
Group macros by URL, collecting all link text variations.
|
|
171
|
+
|
|
172
|
+
Returns: Dict[url, List[(file_path, link_text, full_macro, line_number)]]
|
|
173
|
+
"""
|
|
174
|
+
url_groups = defaultdict(list)
|
|
175
|
+
|
|
176
|
+
for file_path, full_macro, url, link_text, line_num in macros:
|
|
177
|
+
url_groups[url].append((file_path, link_text, full_macro, line_num))
|
|
178
|
+
|
|
179
|
+
return url_groups
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def select_link_text(url: str, variations: List[Tuple[str, str, str, int]], interactive: bool = True) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Select link text for a URL with multiple variations.
|
|
185
|
+
|
|
186
|
+
variations: List[(file_path, link_text, full_macro, line_number)]
|
|
187
|
+
"""
|
|
188
|
+
# Extract unique link texts
|
|
189
|
+
unique_texts = {}
|
|
190
|
+
for file_path, link_text, _, line_num in variations:
|
|
191
|
+
if link_text not in unique_texts:
|
|
192
|
+
unique_texts[link_text] = []
|
|
193
|
+
unique_texts[link_text].append(f"{file_path}:{line_num}")
|
|
194
|
+
|
|
195
|
+
if len(unique_texts) == 1:
|
|
196
|
+
# Only one variation, use it
|
|
197
|
+
return list(unique_texts.keys())[0]
|
|
198
|
+
|
|
199
|
+
if not interactive:
|
|
200
|
+
# Use most common (appears in most locations)
|
|
201
|
+
most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
|
|
202
|
+
return most_common[0]
|
|
203
|
+
|
|
204
|
+
# Interactive selection
|
|
205
|
+
print(f"\nMultiple link text variations found for URL: {url}")
|
|
206
|
+
print("Please select the preferred text:")
|
|
207
|
+
|
|
208
|
+
text_list = list(unique_texts.items())
|
|
209
|
+
for i, (text, locations) in enumerate(text_list, 1):
|
|
210
|
+
print(f"\n {i}. \"{text}\"")
|
|
211
|
+
print(f" Used in: {', '.join(locations[:3])}")
|
|
212
|
+
if len(locations) > 3:
|
|
213
|
+
print(f" ... and {len(locations) - 3} more locations")
|
|
214
|
+
|
|
215
|
+
print(f"\n {len(text_list) + 1}. Enter custom text")
|
|
216
|
+
|
|
217
|
+
while True:
|
|
218
|
+
try:
|
|
219
|
+
choice = input(f"\nEnter your choice (1-{len(text_list) + 1}): ").strip()
|
|
220
|
+
index = int(choice) - 1
|
|
221
|
+
|
|
222
|
+
if 0 <= index < len(text_list):
|
|
223
|
+
return text_list[index][0]
|
|
224
|
+
elif index == len(text_list):
|
|
225
|
+
custom_text = input("Enter custom link text: ").strip()
|
|
226
|
+
if custom_text:
|
|
227
|
+
return custom_text
|
|
228
|
+
else:
|
|
229
|
+
print("Text cannot be empty. Please try again.")
|
|
230
|
+
else:
|
|
231
|
+
print(f"Please enter a number between 1 and {len(text_list) + 1}")
|
|
232
|
+
except (ValueError, EOFError, KeyboardInterrupt):
|
|
233
|
+
print("\nUsing most common text variation.")
|
|
234
|
+
most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
|
|
235
|
+
return most_common[0]
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def collect_all_macros(scan_dirs: List[str] = None, macro_type: str = 'both', exclude_files: List[str] = None) -> List[Tuple[str, str, str, str, int]]:
|
|
239
|
+
"""
|
|
240
|
+
Collect all link/xref macros with attributes from all .adoc files.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
scan_dirs: Directories to scan (default: current directory)
|
|
244
|
+
macro_type: Type of macros to find - 'link', 'xref', or 'both' (default: 'both')
|
|
245
|
+
exclude_files: List of file paths to exclude from scanning (typically all attributes files)
|
|
246
|
+
|
|
247
|
+
Returns: List[(file_path, full_macro, url, link_text, line_number)]
|
|
248
|
+
"""
|
|
249
|
+
if scan_dirs is None:
|
|
250
|
+
scan_dirs = ['.']
|
|
251
|
+
|
|
252
|
+
all_macros = []
|
|
253
|
+
|
|
254
|
+
# Normalize all exclude file paths
|
|
255
|
+
exclude_paths = set()
|
|
256
|
+
if exclude_files:
|
|
257
|
+
for file in exclude_files:
|
|
258
|
+
if file: # Check for None or empty string
|
|
259
|
+
exclude_paths.add(os.path.abspath(file))
|
|
260
|
+
|
|
261
|
+
for scan_dir in scan_dirs:
|
|
262
|
+
for root, _, files in os.walk(scan_dir):
|
|
263
|
+
# Skip hidden directories and .archive
|
|
264
|
+
if '/.archive' in root or '/.' in root:
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
for file in files:
|
|
268
|
+
if file.endswith('.adoc'):
|
|
269
|
+
file_path = os.path.join(root, file)
|
|
270
|
+
# Skip any attributes files to prevent self-referencing
|
|
271
|
+
if exclude_paths and os.path.abspath(file_path) in exclude_paths:
|
|
272
|
+
continue
|
|
273
|
+
macros = find_link_macros(file_path, macro_type)
|
|
274
|
+
for full_macro, url, link_text, line_num in macros:
|
|
275
|
+
all_macros.append((file_path, full_macro, url, link_text, line_num))
|
|
276
|
+
|
|
277
|
+
return all_macros
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def create_attributes(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
|
|
281
|
+
existing_attrs: Dict[str, str],
|
|
282
|
+
interactive: bool = True) -> Tuple[Dict[str, str], Dict[str, str]]:
|
|
283
|
+
"""
|
|
284
|
+
Create new attributes for each unique URL and track existing ones.
|
|
285
|
+
|
|
286
|
+
Returns: Tuple[new_attributes, existing_matching_attributes]
|
|
287
|
+
"""
|
|
288
|
+
new_attributes = {}
|
|
289
|
+
existing_matching_attributes = {}
|
|
290
|
+
existing_attr_names = set(existing_attrs.keys())
|
|
291
|
+
counter = 1
|
|
292
|
+
|
|
293
|
+
for url, variations in url_groups.items():
|
|
294
|
+
# Check if this URL already has an attribute
|
|
295
|
+
existing_attr = None
|
|
296
|
+
for attr_name, attr_value in existing_attrs.items():
|
|
297
|
+
if url in attr_value:
|
|
298
|
+
existing_attr = attr_name
|
|
299
|
+
existing_matching_attributes[attr_name] = attr_value
|
|
300
|
+
break
|
|
301
|
+
|
|
302
|
+
if existing_attr:
|
|
303
|
+
print(f"URL already has attribute {{{existing_attr}}}: {url}")
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
# Select link text
|
|
307
|
+
link_text = select_link_text(url, variations, interactive)
|
|
308
|
+
|
|
309
|
+
# Generate attribute name
|
|
310
|
+
attr_name = generate_attribute_name(url, existing_attr_names | set(new_attributes.keys()), counter)
|
|
311
|
+
counter += 1
|
|
312
|
+
|
|
313
|
+
# Determine macro type (link or xref)
|
|
314
|
+
first_macro = variations[0][2] # full_macro from first variation
|
|
315
|
+
macro_type = 'xref' if first_macro.startswith('xref:') else 'link'
|
|
316
|
+
|
|
317
|
+
# Create attribute value
|
|
318
|
+
attr_value = f"{macro_type}:{url}[{link_text}]"
|
|
319
|
+
new_attributes[attr_name] = attr_value
|
|
320
|
+
|
|
321
|
+
print(f"Created attribute: :{attr_name}: {attr_value}")
|
|
322
|
+
|
|
323
|
+
return new_attributes, existing_matching_attributes
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def update_attribute_file(file_path: str, new_attributes: Dict[str, str], dry_run: bool = False):
|
|
327
|
+
"""Add new attributes to the attribute file."""
|
|
328
|
+
if not new_attributes:
|
|
329
|
+
print("No new attributes to add.")
|
|
330
|
+
return
|
|
331
|
+
|
|
332
|
+
if dry_run:
|
|
333
|
+
print(f"\n[DRY RUN] Would add {len(new_attributes)} attributes to {file_path}:")
|
|
334
|
+
for attr_name, attr_value in new_attributes.items():
|
|
335
|
+
print(f" :{attr_name}: {attr_value}")
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
# Ensure directory exists
|
|
339
|
+
os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True)
|
|
340
|
+
|
|
341
|
+
# Append new attributes
|
|
342
|
+
with open(file_path, 'a', encoding='utf-8') as f:
|
|
343
|
+
if os.path.getsize(file_path) > 0:
|
|
344
|
+
f.write('\n') # Add newline if file not empty
|
|
345
|
+
f.write('// Extracted link attributes\n')
|
|
346
|
+
for attr_name, attr_value in sorted(new_attributes.items()):
|
|
347
|
+
f.write(f":{attr_name}: {attr_value}\n")
|
|
348
|
+
|
|
349
|
+
print(f"Added {len(new_attributes)} attributes to {file_path}")
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def replace_macros_with_attributes(file_updates: Dict[str, List[Tuple[str, str]]], dry_run: bool = False):
|
|
353
|
+
"""
|
|
354
|
+
Replace link/xref macros with their attribute references.
|
|
355
|
+
|
|
356
|
+
file_updates: Dict[file_path, List[(old_macro, attribute_ref)]]
|
|
357
|
+
"""
|
|
358
|
+
for file_path, replacements in file_updates.items():
|
|
359
|
+
if dry_run:
|
|
360
|
+
print(f"\n[DRY RUN] Would update {file_path}:")
|
|
361
|
+
for old_macro, attr_ref in replacements[:3]:
|
|
362
|
+
print(f" Replace: {old_macro}")
|
|
363
|
+
print(f" With: {attr_ref}")
|
|
364
|
+
if len(replacements) > 3:
|
|
365
|
+
print(f" ... and {len(replacements) - 3} more replacements")
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
# Read file
|
|
369
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
370
|
+
content = f.read()
|
|
371
|
+
|
|
372
|
+
# Apply replacements
|
|
373
|
+
for old_macro, attr_ref in replacements:
|
|
374
|
+
content = content.replace(old_macro, attr_ref)
|
|
375
|
+
|
|
376
|
+
# Write file
|
|
377
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
378
|
+
f.write(content)
|
|
379
|
+
|
|
380
|
+
print(f"Updated {file_path}: {len(replacements)} replacements")
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
|
|
384
|
+
attribute_mapping: Dict[str, str]) -> Dict[str, List[Tuple[str, str]]]:
|
|
385
|
+
"""
|
|
386
|
+
Prepare file updates mapping macros to attribute references.
|
|
387
|
+
|
|
388
|
+
Returns: Dict[file_path, List[(old_macro, attribute_ref)]]
|
|
389
|
+
"""
|
|
390
|
+
file_updates = defaultdict(list)
|
|
391
|
+
|
|
392
|
+
# Create reverse mapping from URL to attribute name
|
|
393
|
+
url_to_attr = {}
|
|
394
|
+
for attr_name, attr_value in attribute_mapping.items():
|
|
395
|
+
# Extract URL from attribute value
|
|
396
|
+
match = re.match(r'(?:link|xref):([^\[]+)\[', attr_value)
|
|
397
|
+
if match:
|
|
398
|
+
url = match.group(1)
|
|
399
|
+
url_to_attr[url] = attr_name
|
|
400
|
+
|
|
401
|
+
# Map each macro occurrence to its attribute
|
|
402
|
+
for url, variations in url_groups.items():
|
|
403
|
+
if url in url_to_attr:
|
|
404
|
+
attr_name = url_to_attr[url]
|
|
405
|
+
for file_path, _, full_macro, _ in variations:
|
|
406
|
+
file_updates[file_path].append((full_macro, f"{{{attr_name}}}"))
|
|
407
|
+
|
|
408
|
+
return dict(file_updates)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def validate_link_attributes(attributes_file: str, fail_on_broken: bool = False) -> bool:
|
|
412
|
+
"""
|
|
413
|
+
Validate URLs in link-* attributes.
|
|
414
|
+
|
|
415
|
+
Returns: True if validation passes (no broken links or fail_on_broken is False), False otherwise
|
|
416
|
+
"""
|
|
417
|
+
if not os.path.exists(attributes_file):
|
|
418
|
+
return True # No file to validate yet
|
|
419
|
+
|
|
420
|
+
print(f"\nValidating links in {attributes_file}...")
|
|
421
|
+
spinner = Spinner("Validating link attributes")
|
|
422
|
+
spinner.start()
|
|
423
|
+
|
|
424
|
+
# Extract link attributes from file
|
|
425
|
+
link_attributes = {}
|
|
426
|
+
with open(attributes_file, 'r', encoding='utf-8') as f:
|
|
427
|
+
for line_num, line in enumerate(f, 1):
|
|
428
|
+
# Match :link-*: URL patterns
|
|
429
|
+
match = re.match(r'^:(link-[a-zA-Z0-9_-]+):\s*(https?://[^\s]+)', line)
|
|
430
|
+
if match:
|
|
431
|
+
attr_name = match.group(1)
|
|
432
|
+
url = match.group(2).strip()
|
|
433
|
+
link_attributes[attr_name] = (url, line_num)
|
|
434
|
+
|
|
435
|
+
if not link_attributes:
|
|
436
|
+
spinner.stop("No link attributes to validate")
|
|
437
|
+
return True
|
|
438
|
+
|
|
439
|
+
# Validate each URL
|
|
440
|
+
validator = LinkValidator(timeout=10, retry=2, parallel=5)
|
|
441
|
+
broken_links = []
|
|
442
|
+
|
|
443
|
+
for attr_name, (url, line_num) in link_attributes.items():
|
|
444
|
+
try:
|
|
445
|
+
is_valid = validator.validate_url(url)
|
|
446
|
+
if not is_valid:
|
|
447
|
+
broken_links.append((attr_name, url, line_num))
|
|
448
|
+
except Exception as e:
|
|
449
|
+
broken_links.append((attr_name, url, line_num))
|
|
450
|
+
|
|
451
|
+
# Report results
|
|
452
|
+
total = len(link_attributes)
|
|
453
|
+
broken = len(broken_links)
|
|
454
|
+
valid = total - broken
|
|
455
|
+
|
|
456
|
+
spinner.stop(f"Validated {total} link attributes: {valid} valid, {broken} broken")
|
|
457
|
+
|
|
458
|
+
if broken_links:
|
|
459
|
+
print("\n⚠️ Broken link attributes found:")
|
|
460
|
+
for attr_name, url, line_num in broken_links:
|
|
461
|
+
print(f" Line {line_num}: :{attr_name}: {url}")
|
|
462
|
+
|
|
463
|
+
if fail_on_broken:
|
|
464
|
+
print("\nStopping extraction due to broken links (--fail-on-broken)")
|
|
465
|
+
return False
|
|
466
|
+
else:
|
|
467
|
+
print("\nContinuing with extraction despite broken links...")
|
|
468
|
+
|
|
469
|
+
return True
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def extract_link_attributes(attributes_file: str = None,
|
|
473
|
+
scan_dirs: List[str] = None,
|
|
474
|
+
interactive: bool = True,
|
|
475
|
+
dry_run: bool = False,
|
|
476
|
+
validate_links: bool = False,
|
|
477
|
+
fail_on_broken: bool = False,
|
|
478
|
+
macro_type: str = 'both') -> bool:
|
|
479
|
+
"""
|
|
480
|
+
Main function to extract link attributes.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
attributes_file: Path to attributes file
|
|
484
|
+
scan_dirs: Directories to scan
|
|
485
|
+
interactive: Enable interactive mode
|
|
486
|
+
dry_run: Preview changes without modifying files
|
|
487
|
+
validate_links: Validate URLs before extraction
|
|
488
|
+
fail_on_broken: Exit if broken links found
|
|
489
|
+
macro_type: Type of macros to process - 'link', 'xref', or 'both' (default: 'both')
|
|
490
|
+
|
|
491
|
+
Returns: True if successful, False otherwise
|
|
492
|
+
"""
|
|
493
|
+
# Find or confirm attributes file
|
|
494
|
+
if not attributes_file:
|
|
495
|
+
found_files = find_attribute_files()
|
|
496
|
+
|
|
497
|
+
if not found_files:
|
|
498
|
+
print("No attribute files found.")
|
|
499
|
+
response = input("Create common-attributes.adoc? (y/n): ").strip().lower()
|
|
500
|
+
if response == 'y':
|
|
501
|
+
attributes_file = 'common-attributes.adoc'
|
|
502
|
+
else:
|
|
503
|
+
print("Please specify an attribute file with --attributes-file")
|
|
504
|
+
return False
|
|
505
|
+
elif len(found_files) == 1:
|
|
506
|
+
attributes_file = found_files[0]
|
|
507
|
+
print(f"Using attribute file: {attributes_file}")
|
|
508
|
+
else:
|
|
509
|
+
attributes_file = select_attribute_file(found_files)
|
|
510
|
+
if not attributes_file:
|
|
511
|
+
return False
|
|
512
|
+
|
|
513
|
+
# Validate existing link attributes if requested
|
|
514
|
+
if validate_links:
|
|
515
|
+
if not validate_link_attributes(attributes_file, fail_on_broken):
|
|
516
|
+
return False
|
|
517
|
+
|
|
518
|
+
# Load existing attributes
|
|
519
|
+
spinner = Spinner("Loading existing attributes")
|
|
520
|
+
spinner.start()
|
|
521
|
+
existing_attrs = load_existing_attributes(attributes_file)
|
|
522
|
+
spinner.stop(f"Loaded {len(existing_attrs)} existing attributes")
|
|
523
|
+
|
|
524
|
+
# Find all attributes files to exclude from processing
|
|
525
|
+
all_attribute_files = find_attribute_files()
|
|
526
|
+
|
|
527
|
+
# Notify user about excluded files if there are multiple
|
|
528
|
+
if len(all_attribute_files) > 1:
|
|
529
|
+
print(f"Excluding {len(all_attribute_files)} attributes files from processing:")
|
|
530
|
+
for f in all_attribute_files:
|
|
531
|
+
print(f" - {f}")
|
|
532
|
+
|
|
533
|
+
# Collect all macros, excluding ALL attributes files
|
|
534
|
+
macro_desc = {'link': 'link', 'xref': 'xref', 'both': 'link and xref'}[macro_type]
|
|
535
|
+
spinner = Spinner(f"Scanning for {macro_desc} macros with attributes")
|
|
536
|
+
spinner.start()
|
|
537
|
+
all_macros = collect_all_macros(scan_dirs, macro_type, exclude_files=all_attribute_files)
|
|
538
|
+
spinner.stop()
|
|
539
|
+
|
|
540
|
+
if not all_macros:
|
|
541
|
+
print(f"No {macro_desc} macros with attributes found.")
|
|
542
|
+
return True
|
|
543
|
+
|
|
544
|
+
print(f"Found {len(all_macros)} {macro_desc} macros with attributes")
|
|
545
|
+
|
|
546
|
+
# Group by URL
|
|
547
|
+
spinner = Spinner("Grouping macros by URL")
|
|
548
|
+
spinner.start()
|
|
549
|
+
url_groups = group_macros_by_url(all_macros)
|
|
550
|
+
spinner.stop(f"Grouped into {len(url_groups)} unique URLs")
|
|
551
|
+
|
|
552
|
+
# Create new attributes and track existing ones
|
|
553
|
+
new_attributes, existing_matching_attributes = create_attributes(url_groups, existing_attrs, interactive)
|
|
554
|
+
|
|
555
|
+
if not new_attributes and not existing_matching_attributes:
|
|
556
|
+
print("No new attributes to create and no existing attributes match found URLs.")
|
|
557
|
+
return True
|
|
558
|
+
|
|
559
|
+
# Validate new attributes before writing if requested
|
|
560
|
+
if validate_links and not dry_run and new_attributes:
|
|
561
|
+
print("\nValidating new link attributes...")
|
|
562
|
+
spinner = Spinner("Validating new URLs")
|
|
563
|
+
spinner.start()
|
|
564
|
+
|
|
565
|
+
validator = LinkValidator(timeout=10, retry=2, parallel=5)
|
|
566
|
+
broken_new = []
|
|
567
|
+
|
|
568
|
+
for attr_name, attr_value in new_attributes.items():
|
|
569
|
+
# Extract URL from attribute value (could be link: or xref:)
|
|
570
|
+
url_match = re.search(r'(https?://[^\[]+)', attr_value)
|
|
571
|
+
if url_match:
|
|
572
|
+
url = url_match.group(1).strip()
|
|
573
|
+
try:
|
|
574
|
+
if not validator.validate_url(url):
|
|
575
|
+
broken_new.append((attr_name, url))
|
|
576
|
+
except Exception:
|
|
577
|
+
broken_new.append((attr_name, url))
|
|
578
|
+
|
|
579
|
+
spinner.stop(f"Validated {len(new_attributes)} new attributes")
|
|
580
|
+
|
|
581
|
+
if broken_new:
|
|
582
|
+
print("\n⚠️ Broken URLs in new attributes:")
|
|
583
|
+
for attr_name, url in broken_new:
|
|
584
|
+
print(f" :{attr_name}: {url}")
|
|
585
|
+
|
|
586
|
+
if fail_on_broken:
|
|
587
|
+
print("\nStopping due to broken URLs in new attributes (--fail-on-broken)")
|
|
588
|
+
return False
|
|
589
|
+
|
|
590
|
+
# Update attribute file (only if there are new attributes)
|
|
591
|
+
if new_attributes:
|
|
592
|
+
update_attribute_file(attributes_file, new_attributes, dry_run)
|
|
593
|
+
|
|
594
|
+
# Prepare file updates (include both new and existing matching attributes)
|
|
595
|
+
all_attributes = {**existing_attrs, **new_attributes}
|
|
596
|
+
file_updates = prepare_file_updates(url_groups, all_attributes)
|
|
597
|
+
|
|
598
|
+
# Replace macros
|
|
599
|
+
if file_updates:
|
|
600
|
+
spinner = Spinner(f"Updating {len(file_updates)} files")
|
|
601
|
+
spinner.start()
|
|
602
|
+
replace_macros_with_attributes(file_updates, dry_run)
|
|
603
|
+
spinner.stop(f"Updated {len(file_updates)} files")
|
|
604
|
+
|
|
605
|
+
if dry_run:
|
|
606
|
+
print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")
|
|
607
|
+
else:
|
|
608
|
+
total_processed = len(new_attributes) + len(existing_matching_attributes)
|
|
609
|
+
if new_attributes and existing_matching_attributes:
|
|
610
|
+
print(f"\nSuccessfully processed {total_processed} link attributes:")
|
|
611
|
+
print(f" - Created {len(new_attributes)} new attributes")
|
|
612
|
+
print(f" - Replaced macros using {len(existing_matching_attributes)} existing attributes")
|
|
613
|
+
elif new_attributes:
|
|
614
|
+
print(f"\nSuccessfully extracted {len(new_attributes)} link attributes")
|
|
615
|
+
elif existing_matching_attributes:
|
|
616
|
+
print(f"\nSuccessfully replaced macros using {len(existing_matching_attributes)} existing link attributes")
|
|
617
|
+
|
|
618
|
+
return True
|