rolfedh-doc-utils 0.1.9__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {rolfedh_doc_utils-0.1.9/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.10}/PKG-INFO +3 -2
  2. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/README.md +2 -1
  3. rolfedh_doc_utils-0.1.10/doc_utils/extract_link_attributes.py +453 -0
  4. rolfedh_doc_utils-0.1.10/extract_link_attributes.py +93 -0
  5. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/pyproject.toml +3 -2
  6. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10/rolfedh_doc_utils.egg-info}/PKG-INFO +3 -2
  7. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/rolfedh_doc_utils.egg-info/SOURCES.txt +3 -0
  8. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/rolfedh_doc_utils.egg-info/entry_points.txt +1 -0
  9. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/rolfedh_doc_utils.egg-info/top_level.txt +1 -0
  10. rolfedh_doc_utils-0.1.10/tests/test_extract_link_attributes.py +363 -0
  11. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/LICENSE +0 -0
  12. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/archive_unused_files.py +0 -0
  13. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/archive_unused_images.py +0 -0
  14. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/check_scannability.py +0 -0
  15. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/__init__.py +0 -0
  16. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/file_utils.py +0 -0
  17. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/format_asciidoc_spacing.py +0 -0
  18. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/replace_link_attributes.py +0 -0
  19. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/scannability.py +0 -0
  20. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/topic_map_parser.py +0 -0
  21. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/unused_adoc.py +0 -0
  22. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/unused_attributes.py +0 -0
  23. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/doc_utils/unused_images.py +0 -0
  24. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/find_unused_attributes.py +0 -0
  25. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/format_asciidoc_spacing.py +0 -0
  26. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/replace_link_attributes.py +0 -0
  27. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  28. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  29. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/setup.cfg +0 -0
  30. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/setup.py +0 -0
  31. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_archive_unused_files.py +0 -0
  32. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_archive_unused_images.py +0 -0
  33. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_auto_discovery.py +0 -0
  34. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_check_scannability.py +0 -0
  35. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_cli_entry_points.py +0 -0
  36. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_file_utils.py +0 -0
  37. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_fixture_archive_unused_files.py +0 -0
  38. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_fixture_archive_unused_images.py +0 -0
  39. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_fixture_check_scannability.py +0 -0
  40. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_parse_exclude_list.py +0 -0
  41. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_symlink_handling.py +0 -0
  42. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_topic_map_parser.py +0 -0
  43. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.10}/tests/test_unused_attributes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.9
3
+ Version: 0.1.10
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -79,7 +79,8 @@ pip install -e .
79
79
 
80
80
  | Tool | Description | Usage |
81
81
  |------|-------------|-------|
82
- | **`replace-link-attributes`** | Resolves Vale LinkAttribute violations by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
82
+ | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
83
+ | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
83
84
  | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
84
85
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
85
86
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
@@ -46,7 +46,8 @@ pip install -e .
46
46
 
47
47
  | Tool | Description | Usage |
48
48
  |------|-------------|-------|
49
- | **`replace-link-attributes`** | Resolves Vale LinkAttribute violations by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
49
+ | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
50
+ | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
50
51
  | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
51
52
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
52
53
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
@@ -0,0 +1,453 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract link and xref macros containing attributes into attribute definitions.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Dict, List, Set, Tuple, Optional
10
+ from collections import defaultdict
11
+ import unicodedata
12
+
13
+
14
+ def find_attribute_files(base_path: str = '.') -> List[str]:
15
+ """Find potential attribute files in the repository."""
16
+ common_patterns = [
17
+ '**/common-attributes.adoc',
18
+ '**/attributes.adoc',
19
+ '**/*-attributes.adoc',
20
+ '**/attributes-*.adoc',
21
+ '**/common_attributes.adoc',
22
+ '**/_common-attributes.adoc'
23
+ ]
24
+
25
+ attribute_files = []
26
+ base = Path(base_path)
27
+
28
+ for pattern in common_patterns:
29
+ for file_path in base.glob(pattern):
30
+ if file_path.is_file():
31
+ rel_path = file_path.relative_to(base)
32
+ attribute_files.append(str(rel_path))
33
+
34
+ # Remove duplicates while preserving order
35
+ seen = set()
36
+ unique_files = []
37
+ for f in attribute_files:
38
+ if f not in seen:
39
+ seen.add(f)
40
+ unique_files.append(f)
41
+
42
+ return sorted(unique_files)
43
+
44
+
45
+ def select_attribute_file(attribute_files: List[str]) -> str:
46
+ """Let user interactively select an attribute file."""
47
+ if not attribute_files:
48
+ return None
49
+
50
+ print("\nMultiple attribute files found. Please select one:")
51
+ for i, file_path in enumerate(attribute_files, 1):
52
+ print(f" {i}. {file_path}")
53
+
54
+ while True:
55
+ try:
56
+ choice = input(f"\nEnter your choice (1-{len(attribute_files)}): ").strip()
57
+ index = int(choice) - 1
58
+ if 0 <= index < len(attribute_files):
59
+ return attribute_files[index]
60
+ else:
61
+ print(f"Please enter a number between 1 and {len(attribute_files)}")
62
+ except (ValueError, EOFError, KeyboardInterrupt):
63
+ print("\nOperation cancelled.")
64
+ return None
65
+
66
+
67
+ def load_existing_attributes(file_path: str) -> Dict[str, str]:
68
+ """Load existing attributes from file."""
69
+ attributes = {}
70
+ if not os.path.exists(file_path):
71
+ return attributes
72
+
73
+ with open(file_path, 'r', encoding='utf-8') as f:
74
+ for line in f:
75
+ # Match attribute definitions
76
+ match = re.match(r'^:([^:]+):\s*(.+)$', line)
77
+ if match:
78
+ attr_name = match.group(1).strip()
79
+ attr_value = match.group(2).strip()
80
+ attributes[attr_name] = attr_value
81
+
82
+ return attributes
83
+
84
+
85
+ def find_link_macros(file_path: str) -> List[Tuple[str, str, str, int]]:
86
+ """
87
+ Find all link: and xref: macros containing attributes in their URLs.
88
+
89
+ Returns list of tuples: (full_macro, url, link_text, line_number)
90
+ """
91
+ macros = []
92
+
93
+ with open(file_path, 'r', encoding='utf-8') as f:
94
+ for line_num, line in enumerate(f, 1):
95
+ # Pattern to match link: and xref: macros
96
+ # Matches: (link|xref):url[text] where url contains {attribute}
97
+ patterns = [
98
+ r'(link:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])',
99
+ r'(xref:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])'
100
+ ]
101
+
102
+ for pattern in patterns:
103
+ for match in re.finditer(pattern, line, re.IGNORECASE):
104
+ full_macro = match.group(1)
105
+ url = match.group(2)
106
+ link_text = match.group(3)
107
+ macros.append((full_macro, url, link_text, line_num))
108
+
109
+ return macros
110
+
111
+
112
+ def generate_attribute_name(url: str, existing_attrs: Set[str], counter: int) -> str:
113
+ """Generate a unique attribute name from URL."""
114
+ # Start with a base name from the URL
115
+ base_name = url
116
+
117
+ # Extract domain or path components
118
+ if '://' in url:
119
+ # Remove protocol
120
+ base_name = re.sub(r'^[^:]+://', '', url)
121
+
122
+ # Remove attributes from the name generation
123
+ base_name = re.sub(r'\{[^}]+\}', '', base_name)
124
+
125
+ # Extract meaningful parts
126
+ if '/' in base_name:
127
+ parts = base_name.split('/')
128
+ # Use domain and last path component
129
+ if len(parts) > 1:
130
+ domain_part = parts[0].replace('.', '-')
131
+ path_part = parts[-1].split('.')[0] if parts[-1] else ''
132
+ if path_part:
133
+ base_name = f"{domain_part}-{path_part}"
134
+ else:
135
+ base_name = domain_part
136
+
137
+ # Clean up the name
138
+ base_name = re.sub(r'[^a-zA-Z0-9-]', '-', base_name)
139
+ base_name = re.sub(r'-+', '-', base_name)
140
+ base_name = base_name.strip('-').lower()
141
+
142
+ # Limit length
143
+ if len(base_name) > 30:
144
+ base_name = base_name[:30]
145
+
146
+ # Make it unique
147
+ attr_name = f"link-{base_name}"
148
+ original_name = attr_name
149
+ suffix = 1
150
+
151
+ while attr_name in existing_attrs:
152
+ attr_name = f"{original_name}-{suffix}"
153
+ suffix += 1
154
+
155
+ return attr_name
156
+
157
+
158
+ def group_macros_by_url(macros: List[Tuple[str, str, str, str, int]]) -> Dict[str, List[Tuple[str, str, str, int]]]:
159
+ """
160
+ Group macros by URL, collecting all link text variations.
161
+
162
+ Returns: Dict[url, List[(file_path, link_text, full_macro, line_number)]]
163
+ """
164
+ url_groups = defaultdict(list)
165
+
166
+ for file_path, full_macro, url, link_text, line_num in macros:
167
+ url_groups[url].append((file_path, link_text, full_macro, line_num))
168
+
169
+ return url_groups
170
+
171
+
172
+ def select_link_text(url: str, variations: List[Tuple[str, str, str, int]], interactive: bool = True) -> str:
173
+ """
174
+ Select link text for a URL with multiple variations.
175
+
176
+ variations: List[(file_path, link_text, full_macro, line_number)]
177
+ """
178
+ # Extract unique link texts
179
+ unique_texts = {}
180
+ for file_path, link_text, _, line_num in variations:
181
+ if link_text not in unique_texts:
182
+ unique_texts[link_text] = []
183
+ unique_texts[link_text].append(f"{file_path}:{line_num}")
184
+
185
+ if len(unique_texts) == 1:
186
+ # Only one variation, use it
187
+ return list(unique_texts.keys())[0]
188
+
189
+ if not interactive:
190
+ # Use most common (appears in most locations)
191
+ most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
192
+ return most_common[0]
193
+
194
+ # Interactive selection
195
+ print(f"\nMultiple link text variations found for URL: {url}")
196
+ print("Please select the preferred text:")
197
+
198
+ text_list = list(unique_texts.items())
199
+ for i, (text, locations) in enumerate(text_list, 1):
200
+ print(f"\n {i}. \"{text}\"")
201
+ print(f" Used in: {', '.join(locations[:3])}")
202
+ if len(locations) > 3:
203
+ print(f" ... and {len(locations) - 3} more locations")
204
+
205
+ print(f"\n {len(text_list) + 1}. Enter custom text")
206
+
207
+ while True:
208
+ try:
209
+ choice = input(f"\nEnter your choice (1-{len(text_list) + 1}): ").strip()
210
+ index = int(choice) - 1
211
+
212
+ if 0 <= index < len(text_list):
213
+ return text_list[index][0]
214
+ elif index == len(text_list):
215
+ custom_text = input("Enter custom link text: ").strip()
216
+ if custom_text:
217
+ return custom_text
218
+ else:
219
+ print("Text cannot be empty. Please try again.")
220
+ else:
221
+ print(f"Please enter a number between 1 and {len(text_list) + 1}")
222
+ except (ValueError, EOFError, KeyboardInterrupt):
223
+ print("\nUsing most common text variation.")
224
+ most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
225
+ return most_common[0]
226
+
227
+
228
+ def collect_all_macros(scan_dirs: List[str] = None) -> List[Tuple[str, str, str, str, int]]:
229
+ """
230
+ Collect all link/xref macros with attributes from all .adoc files.
231
+
232
+ Returns: List[(file_path, full_macro, url, link_text, line_number)]
233
+ """
234
+ if scan_dirs is None:
235
+ scan_dirs = ['.']
236
+
237
+ all_macros = []
238
+
239
+ for scan_dir in scan_dirs:
240
+ for root, _, files in os.walk(scan_dir):
241
+ # Skip hidden directories and .archive
242
+ if '/.archive' in root or '/.' in root:
243
+ continue
244
+
245
+ for file in files:
246
+ if file.endswith('.adoc'):
247
+ file_path = os.path.join(root, file)
248
+ macros = find_link_macros(file_path)
249
+ for full_macro, url, link_text, line_num in macros:
250
+ all_macros.append((file_path, full_macro, url, link_text, line_num))
251
+
252
+ return all_macros
253
+
254
+
255
+ def create_attributes(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
256
+ existing_attrs: Dict[str, str],
257
+ interactive: bool = True) -> Dict[str, str]:
258
+ """
259
+ Create new attributes for each unique URL.
260
+
261
+ Returns: Dict[attribute_name, attribute_value]
262
+ """
263
+ new_attributes = {}
264
+ existing_attr_names = set(existing_attrs.keys())
265
+ counter = 1
266
+
267
+ for url, variations in url_groups.items():
268
+ # Check if this URL already has an attribute
269
+ existing_attr = None
270
+ for attr_name, attr_value in existing_attrs.items():
271
+ if url in attr_value:
272
+ existing_attr = attr_name
273
+ break
274
+
275
+ if existing_attr:
276
+ print(f"URL already has attribute {{{existing_attr}}}: {url}")
277
+ continue
278
+
279
+ # Select link text
280
+ link_text = select_link_text(url, variations, interactive)
281
+
282
+ # Generate attribute name
283
+ attr_name = generate_attribute_name(url, existing_attr_names | set(new_attributes.keys()), counter)
284
+ counter += 1
285
+
286
+ # Determine macro type (link or xref)
287
+ first_macro = variations[0][2] # full_macro from first variation
288
+ macro_type = 'xref' if first_macro.startswith('xref:') else 'link'
289
+
290
+ # Create attribute value
291
+ attr_value = f"{macro_type}:{url}[{link_text}]"
292
+ new_attributes[attr_name] = attr_value
293
+
294
+ print(f"Created attribute: :{attr_name}: {attr_value}")
295
+
296
+ return new_attributes
297
+
298
+
299
+ def update_attribute_file(file_path: str, new_attributes: Dict[str, str], dry_run: bool = False):
300
+ """Add new attributes to the attribute file."""
301
+ if not new_attributes:
302
+ print("No new attributes to add.")
303
+ return
304
+
305
+ if dry_run:
306
+ print(f"\n[DRY RUN] Would add {len(new_attributes)} attributes to {file_path}:")
307
+ for attr_name, attr_value in new_attributes.items():
308
+ print(f" :{attr_name}: {attr_value}")
309
+ return
310
+
311
+ # Ensure directory exists
312
+ os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True)
313
+
314
+ # Append new attributes
315
+ with open(file_path, 'a', encoding='utf-8') as f:
316
+ if os.path.getsize(file_path) > 0:
317
+ f.write('\n') # Add newline if file not empty
318
+ f.write('// Extracted link attributes\n')
319
+ for attr_name, attr_value in sorted(new_attributes.items()):
320
+ f.write(f":{attr_name}: {attr_value}\n")
321
+
322
+ print(f"Added {len(new_attributes)} attributes to {file_path}")
323
+
324
+
325
+ def replace_macros_with_attributes(file_updates: Dict[str, List[Tuple[str, str]]], dry_run: bool = False):
326
+ """
327
+ Replace link/xref macros with their attribute references.
328
+
329
+ file_updates: Dict[file_path, List[(old_macro, attribute_ref)]]
330
+ """
331
+ for file_path, replacements in file_updates.items():
332
+ if dry_run:
333
+ print(f"\n[DRY RUN] Would update {file_path}:")
334
+ for old_macro, attr_ref in replacements[:3]:
335
+ print(f" Replace: {old_macro}")
336
+ print(f" With: {attr_ref}")
337
+ if len(replacements) > 3:
338
+ print(f" ... and {len(replacements) - 3} more replacements")
339
+ continue
340
+
341
+ # Read file
342
+ with open(file_path, 'r', encoding='utf-8') as f:
343
+ content = f.read()
344
+
345
+ # Apply replacements
346
+ for old_macro, attr_ref in replacements:
347
+ content = content.replace(old_macro, attr_ref)
348
+
349
+ # Write file
350
+ with open(file_path, 'w', encoding='utf-8') as f:
351
+ f.write(content)
352
+
353
+ print(f"Updated {file_path}: {len(replacements)} replacements")
354
+
355
+
356
+ def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
357
+ attribute_mapping: Dict[str, str]) -> Dict[str, List[Tuple[str, str]]]:
358
+ """
359
+ Prepare file updates mapping macros to attribute references.
360
+
361
+ Returns: Dict[file_path, List[(old_macro, attribute_ref)]]
362
+ """
363
+ file_updates = defaultdict(list)
364
+
365
+ # Create reverse mapping from URL to attribute name
366
+ url_to_attr = {}
367
+ for attr_name, attr_value in attribute_mapping.items():
368
+ # Extract URL from attribute value
369
+ match = re.match(r'(?:link|xref):([^\[]+)\[', attr_value)
370
+ if match:
371
+ url = match.group(1)
372
+ url_to_attr[url] = attr_name
373
+
374
+ # Map each macro occurrence to its attribute
375
+ for url, variations in url_groups.items():
376
+ if url in url_to_attr:
377
+ attr_name = url_to_attr[url]
378
+ for file_path, _, full_macro, _ in variations:
379
+ file_updates[file_path].append((full_macro, f"{{{attr_name}}}"))
380
+
381
+ return dict(file_updates)
382
+
383
+
384
+ def extract_link_attributes(attributes_file: str = None,
385
+ scan_dirs: List[str] = None,
386
+ interactive: bool = True,
387
+ dry_run: bool = False) -> bool:
388
+ """
389
+ Main function to extract link attributes.
390
+
391
+ Returns: True if successful, False otherwise
392
+ """
393
+ # Find or confirm attributes file
394
+ if not attributes_file:
395
+ found_files = find_attribute_files()
396
+
397
+ if not found_files:
398
+ print("No attribute files found.")
399
+ response = input("Create common-attributes.adoc? (y/n): ").strip().lower()
400
+ if response == 'y':
401
+ attributes_file = 'common-attributes.adoc'
402
+ else:
403
+ print("Please specify an attribute file with --attributes-file")
404
+ return False
405
+ elif len(found_files) == 1:
406
+ attributes_file = found_files[0]
407
+ print(f"Using attribute file: {attributes_file}")
408
+ else:
409
+ attributes_file = select_attribute_file(found_files)
410
+ if not attributes_file:
411
+ return False
412
+
413
+ # Load existing attributes
414
+ existing_attrs = load_existing_attributes(attributes_file)
415
+ print(f"Loaded {len(existing_attrs)} existing attributes")
416
+
417
+ # Collect all macros
418
+ print("\nScanning for link and xref macros with attributes...")
419
+ all_macros = collect_all_macros(scan_dirs)
420
+
421
+ if not all_macros:
422
+ print("No link or xref macros with attributes found.")
423
+ return True
424
+
425
+ print(f"Found {len(all_macros)} link/xref macros with attributes")
426
+
427
+ # Group by URL
428
+ url_groups = group_macros_by_url(all_macros)
429
+ print(f"Grouped into {len(url_groups)} unique URLs")
430
+
431
+ # Create new attributes
432
+ new_attributes = create_attributes(url_groups, existing_attrs, interactive)
433
+
434
+ if not new_attributes:
435
+ print("No new attributes to create.")
436
+ return True
437
+
438
+ # Update attribute file
439
+ update_attribute_file(attributes_file, new_attributes, dry_run)
440
+
441
+ # Prepare file updates
442
+ all_attributes = {**existing_attrs, **new_attributes}
443
+ file_updates = prepare_file_updates(url_groups, all_attributes)
444
+
445
+ # Replace macros
446
+ replace_macros_with_attributes(file_updates, dry_run)
447
+
448
+ if dry_run:
449
+ print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")
450
+ else:
451
+ print(f"\nSuccessfully extracted {len(new_attributes)} link attributes")
452
+
453
+ return True
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract link and xref macros containing attributes into attribute definitions.
4
+
5
+ This tool finds all link: and xref: macros whose URLs contain attributes,
6
+ creates attribute definitions for them, and replaces the macros with
7
+ attribute references.
8
+ """
9
+
10
+ import argparse
11
+ import sys
12
+ from doc_utils.extract_link_attributes import extract_link_attributes
13
+
14
+
15
+ def main():
16
+ """Main entry point for the extract-link-attributes CLI tool."""
17
+ parser = argparse.ArgumentParser(
18
+ description='Extract link and xref macros containing attributes into attribute definitions',
19
+ formatter_class=argparse.RawDescriptionHelpFormatter,
20
+ epilog="""
21
+ Examples:
22
+ # Interactive mode with auto-discovery
23
+ extract-link-attributes
24
+
25
+ # Specify attribute file
26
+ extract-link-attributes --attributes-file common-attributes.adoc
27
+
28
+ # Non-interactive mode (uses most common link text)
29
+ extract-link-attributes --non-interactive
30
+
31
+ # Dry run to preview changes
32
+ extract-link-attributes --dry-run
33
+
34
+ # Scan specific directories
35
+ extract-link-attributes --scan-dir modules --scan-dir assemblies
36
+ """
37
+ )
38
+
39
+ parser.add_argument(
40
+ '--attributes-file',
41
+ help='Path to the attributes file to update (auto-discovered if not specified)'
42
+ )
43
+
44
+ parser.add_argument(
45
+ '--scan-dir',
46
+ action='append',
47
+ help='Directory to scan for .adoc files (can be used multiple times, default: current directory)'
48
+ )
49
+
50
+ parser.add_argument(
51
+ '--non-interactive',
52
+ action='store_true',
53
+ help='Non-interactive mode: automatically use most common link text for variations'
54
+ )
55
+
56
+ parser.add_argument(
57
+ '--dry-run',
58
+ action='store_true',
59
+ help='Preview changes without modifying files'
60
+ )
61
+
62
+ parser.add_argument(
63
+ '-v', '--verbose',
64
+ action='store_true',
65
+ help='Enable verbose output'
66
+ )
67
+
68
+ args = parser.parse_args()
69
+
70
+ try:
71
+ success = extract_link_attributes(
72
+ attributes_file=args.attributes_file,
73
+ scan_dirs=args.scan_dir,
74
+ interactive=not args.non_interactive,
75
+ dry_run=args.dry_run
76
+ )
77
+
78
+ if not success:
79
+ sys.exit(1)
80
+
81
+ except KeyboardInterrupt:
82
+ print("\nOperation cancelled.")
83
+ sys.exit(1)
84
+ except Exception as e:
85
+ print(f"Error: {e}", file=sys.stderr)
86
+ if args.verbose:
87
+ import traceback
88
+ traceback.print_exc()
89
+ sys.exit(1)
90
+
91
+
92
+ if __name__ == '__main__':
93
+ main()
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rolfedh-doc-utils"
7
- version = "0.1.9"
7
+ version = "0.1.10"
8
8
  description = "CLI tools for AsciiDoc documentation projects"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -21,10 +21,11 @@ archive-unused-images = "archive_unused_images:main"
21
21
  find-unused-attributes = "find_unused_attributes:main"
22
22
  format-asciidoc-spacing = "format_asciidoc_spacing:main"
23
23
  replace-link-attributes = "replace_link_attributes:main"
24
+ extract-link-attributes = "extract_link_attributes:main"
24
25
 
25
26
  [tool.setuptools.packages.find]
26
27
  where = ["."]
27
28
  include = ["doc_utils*"]
28
29
 
29
30
  [tool.setuptools]
30
- py-modules = ["find_unused_attributes", "check_scannability", "archive_unused_files", "archive_unused_images", "format_asciidoc_spacing", "replace_link_attributes"]
31
+ py-modules = ["find_unused_attributes", "check_scannability", "archive_unused_files", "archive_unused_images", "format_asciidoc_spacing", "replace_link_attributes", "extract_link_attributes"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.9
3
+ Version: 0.1.10
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -79,7 +79,8 @@ pip install -e .
79
79
 
80
80
  | Tool | Description | Usage |
81
81
  |------|-------------|-------|
82
- | **`replace-link-attributes`** | Resolves Vale LinkAttribute violations by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
82
+ | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
83
+ | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
83
84
  | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
84
85
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
85
86
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
@@ -3,12 +3,14 @@ README.md
3
3
  archive_unused_files.py
4
4
  archive_unused_images.py
5
5
  check_scannability.py
6
+ extract_link_attributes.py
6
7
  find_unused_attributes.py
7
8
  format_asciidoc_spacing.py
8
9
  pyproject.toml
9
10
  replace_link_attributes.py
10
11
  setup.py
11
12
  doc_utils/__init__.py
13
+ doc_utils/extract_link_attributes.py
12
14
  doc_utils/file_utils.py
13
15
  doc_utils/format_asciidoc_spacing.py
14
16
  doc_utils/replace_link_attributes.py
@@ -28,6 +30,7 @@ tests/test_archive_unused_images.py
28
30
  tests/test_auto_discovery.py
29
31
  tests/test_check_scannability.py
30
32
  tests/test_cli_entry_points.py
33
+ tests/test_extract_link_attributes.py
31
34
  tests/test_file_utils.py
32
35
  tests/test_fixture_archive_unused_files.py
33
36
  tests/test_fixture_archive_unused_images.py
@@ -2,6 +2,7 @@
2
2
  archive-unused-files = archive_unused_files:main
3
3
  archive-unused-images = archive_unused_images:main
4
4
  check-scannability = check_scannability:main
5
+ extract-link-attributes = extract_link_attributes:main
5
6
  find-unused-attributes = find_unused_attributes:main
6
7
  format-asciidoc-spacing = format_asciidoc_spacing:main
7
8
  replace-link-attributes = replace_link_attributes:main
@@ -2,6 +2,7 @@ archive_unused_files
2
2
  archive_unused_images
3
3
  check_scannability
4
4
  doc_utils
5
+ extract_link_attributes
5
6
  find_unused_attributes
6
7
  format_asciidoc_spacing
7
8
  replace_link_attributes
@@ -0,0 +1,363 @@
1
+ #!/usr/bin/env python3
2
+ """Tests for extract_link_attributes module."""
3
+
4
+ import os
5
+ import tempfile
6
+ import shutil
7
+ from pathlib import Path
8
+ import pytest
9
+ from doc_utils.extract_link_attributes import (
10
+ find_attribute_files,
11
+ find_link_macros,
12
+ generate_attribute_name,
13
+ group_macros_by_url,
14
+ select_link_text,
15
+ load_existing_attributes,
16
+ create_attributes,
17
+ prepare_file_updates,
18
+ extract_link_attributes
19
+ )
20
+
21
+
22
+ class TestFindAttributeFiles:
23
+ """Tests for find_attribute_files function."""
24
+
25
+ def test_find_common_attribute_files(self, tmp_path):
26
+ """Test finding common attribute file patterns."""
27
+ # Create test files
28
+ (tmp_path / "common-attributes.adoc").touch()
29
+ (tmp_path / "attributes.adoc").touch()
30
+ (tmp_path / "custom-attributes.adoc").touch()
31
+ subdir = tmp_path / "modules"
32
+ subdir.mkdir()
33
+ (subdir / "module-attributes.adoc").touch()
34
+
35
+ # Find files
36
+ files = find_attribute_files(str(tmp_path))
37
+
38
+ assert len(files) == 4
39
+ assert "common-attributes.adoc" in files
40
+ assert "attributes.adoc" in files
41
+ assert "custom-attributes.adoc" in files
42
+ assert "modules/module-attributes.adoc" in files
43
+
44
+ def test_no_attribute_files(self, tmp_path):
45
+ """Test when no attribute files exist."""
46
+ files = find_attribute_files(str(tmp_path))
47
+ assert files == []
48
+
49
+
50
+ class TestFindLinkMacros:
51
+ """Tests for find_link_macros function."""
52
+
53
+ def test_find_link_with_attributes(self, tmp_path):
54
+ """Test finding link macros with attributes."""
55
+ test_file = tmp_path / "test.adoc"
56
+ test_file.write_text("""
57
+ = Test Document
58
+
59
+ See link:https://example.com/{product-version}/guide.html[User Guide] for details.
60
+
61
+ Also check xref:{base-url}/intro.html[Introduction].
62
+
63
+ Regular link without attributes: link:https://example.com/static.html[Static Link]
64
+ """)
65
+
66
+ macros = find_link_macros(str(test_file))
67
+
68
+ assert len(macros) == 2
69
+ # First macro
70
+ assert macros[0][0] == "link:https://example.com/{product-version}/guide.html[User Guide]"
71
+ assert macros[0][1] == "https://example.com/{product-version}/guide.html"
72
+ assert macros[0][2] == "User Guide"
73
+ assert macros[0][3] == 4 # Line number (accounting for empty first line)
74
+
75
+ # Second macro
76
+ assert macros[1][0] == "xref:{base-url}/intro.html[Introduction]"
77
+ assert macros[1][1] == "{base-url}/intro.html"
78
+ assert macros[1][2] == "Introduction"
79
+ assert macros[1][3] == 6 # Line number
80
+
81
+ def test_no_macros_with_attributes(self, tmp_path):
82
+ """Test file with no macros containing attributes."""
83
+ test_file = tmp_path / "test.adoc"
84
+ test_file.write_text("""
85
+ = Test Document
86
+
87
+ Regular link: link:https://example.com/guide.html[Guide]
88
+ """)
89
+
90
+ macros = find_link_macros(str(test_file))
91
+ assert len(macros) == 0
92
+
93
+
94
+ class TestGenerateAttributeName:
95
+ """Tests for generate_attribute_name function."""
96
+
97
+ def test_generate_from_url_with_domain(self):
98
+ """Test attribute name generation from URL with domain."""
99
+ url = "https://docs.example.com/{version}/guide.html"
100
+ existing = set()
101
+
102
+ name = generate_attribute_name(url, existing, 1)
103
+
104
+ assert name.startswith("link-")
105
+ assert "docs-example" in name or "guide" in name
106
+
107
+ def test_generate_unique_names(self):
108
+ """Test generating unique names when conflicts exist."""
109
+ url = "https://example.com/guide.html"
110
+ existing = {"link-example-com-guide"}
111
+
112
+ name = generate_attribute_name(url, existing, 1)
113
+
114
+ assert name != "link-example-com-guide"
115
+ assert name.startswith("link-")
116
+
117
+ def test_clean_special_characters(self):
118
+ """Test that special characters are cleaned."""
119
+ url = "https://example.com/path with spaces/{attr}/file.html"
120
+ existing = set()
121
+
122
+ name = generate_attribute_name(url, existing, 1)
123
+
124
+ # Should not contain spaces or special chars
125
+ assert " " not in name
126
+ assert "/" not in name
127
+ assert "." not in name
128
+
129
+
130
+ class TestGroupMacrosByUrl:
131
+ """Tests for group_macros_by_url function."""
132
+
133
+ def test_group_same_urls(self):
134
+ """Test grouping macros with the same URL."""
135
+ macros = [
136
+ ("file1.adoc", "link:url1[Text1]", "url1", "Text1", 1),
137
+ ("file2.adoc", "link:url1[Text2]", "url1", "Text2", 5),
138
+ ("file3.adoc", "link:url2[Text3]", "url2", "Text3", 10),
139
+ ]
140
+
141
+ groups = group_macros_by_url(macros)
142
+
143
+ assert len(groups) == 2
144
+ assert "url1" in groups
145
+ assert "url2" in groups
146
+ assert len(groups["url1"]) == 2
147
+ assert len(groups["url2"]) == 1
148
+
149
+
150
+ class TestSelectLinkText:
151
+ """Tests for select_link_text function."""
152
+
153
+ def test_single_variation(self):
154
+ """Test when there's only one link text variation."""
155
+ variations = [
156
+ ("file1.adoc", "Same Text", "link:url[Same Text]", 1),
157
+ ("file2.adoc", "Same Text", "link:url[Same Text]", 2),
158
+ ]
159
+
160
+ result = select_link_text("url", variations, interactive=False)
161
+
162
+ assert result == "Same Text"
163
+
164
+ def test_non_interactive_most_common(self):
165
+ """Test non-interactive mode selects most common text."""
166
+ variations = [
167
+ ("file1.adoc", "Text A", "link:url[Text A]", 1),
168
+ ("file2.adoc", "Text B", "link:url[Text B]", 2),
169
+ ("file3.adoc", "Text A", "link:url[Text A]", 3),
170
+ ]
171
+
172
+ result = select_link_text("url", variations, interactive=False)
173
+
174
+ assert result == "Text A" # Most common
175
+
176
+
177
+ class TestLoadExistingAttributes:
178
+ """Tests for load_existing_attributes function."""
179
+
180
+ def test_load_attributes(self, tmp_path):
181
+ """Test loading existing attributes from file."""
182
+ attr_file = tmp_path / "attributes.adoc"
183
+ attr_file.write_text("""
184
+ // Common attributes
185
+ :product-name: My Product
186
+ :version: 1.0
187
+ :docs-url: https://docs.example.com
188
+ :link-guide: link:https://example.com/guide.html[User Guide]
189
+ """)
190
+
191
+ attrs = load_existing_attributes(str(attr_file))
192
+
193
+ assert len(attrs) == 4
194
+ assert attrs["product-name"] == "My Product"
195
+ assert attrs["version"] == "1.0"
196
+ assert attrs["docs-url"] == "https://docs.example.com"
197
+ assert attrs["link-guide"] == "link:https://example.com/guide.html[User Guide]"
198
+
199
+ def test_load_nonexistent_file(self, tmp_path):
200
+ """Test loading from non-existent file."""
201
+ attrs = load_existing_attributes(str(tmp_path / "missing.adoc"))
202
+ assert attrs == {}
203
+
204
+
205
+ class TestCreateAttributes:
206
+ """Tests for create_attributes function."""
207
+
208
+ def test_create_new_attributes(self):
209
+ """Test creating new attributes for URLs."""
210
+ url_groups = {
211
+ "https://example.com/{version}/guide.html": [
212
+ ("file1.adoc", "Guide", "link:https://example.com/{version}/guide.html[Guide]", 1)
213
+ ],
214
+ "{base-url}/intro.html": [
215
+ ("file2.adoc", "Intro", "xref:{base-url}/intro.html[Intro]", 2)
216
+ ]
217
+ }
218
+ existing_attrs = {}
219
+
220
+ new_attrs = create_attributes(url_groups, existing_attrs, interactive=False)
221
+
222
+ assert len(new_attrs) == 2
223
+ # Check that attributes were created
224
+ attr_values = list(new_attrs.values())
225
+ assert any("link:https://example.com/{version}/guide.html[Guide]" in v for v in attr_values)
226
+ assert any("xref:{base-url}/intro.html[Intro]" in v for v in attr_values)
227
+
228
+ def test_skip_existing_urls(self, capsys):
229
+ """Test that URLs with existing attributes are skipped."""
230
+ url_groups = {
231
+ "https://example.com/guide.html": [
232
+ ("file1.adoc", "Guide", "link:https://example.com/guide.html[Guide]", 1)
233
+ ]
234
+ }
235
+ existing_attrs = {
236
+ "existing-link": "link:https://example.com/guide.html[Existing Guide]"
237
+ }
238
+
239
+ new_attrs = create_attributes(url_groups, existing_attrs, interactive=False)
240
+
241
+ assert len(new_attrs) == 0
242
+ captured = capsys.readouterr()
243
+ assert "already has attribute" in captured.out
244
+
245
+
246
+ class TestPrepareFileUpdates:
247
+ """Tests for prepare_file_updates function."""
248
+
249
+ def test_prepare_updates(self):
250
+ """Test preparing file updates."""
251
+ url_groups = {
252
+ "url1": [
253
+ ("file1.adoc", "Text1", "link:url1[Text1]", 1),
254
+ ("file2.adoc", "Text1", "link:url1[Text1]", 2),
255
+ ],
256
+ "url2": [
257
+ ("file1.adoc", "Text2", "link:url2[Text2]", 3),
258
+ ]
259
+ }
260
+ attribute_mapping = {
261
+ "link-1": "link:url1[Text1]",
262
+ "link-2": "link:url2[Text2]"
263
+ }
264
+
265
+ updates = prepare_file_updates(url_groups, attribute_mapping)
266
+
267
+ assert len(updates) == 2
268
+ assert "file1.adoc" in updates
269
+ assert "file2.adoc" in updates
270
+ assert len(updates["file1.adoc"]) == 2 # Two replacements in file1
271
+ assert len(updates["file2.adoc"]) == 1 # One replacement in file2
272
+
273
+ # Check replacement pairs
274
+ file1_updates = updates["file1.adoc"]
275
+ assert ("link:url1[Text1]", "{link-1}") in file1_updates
276
+ assert ("link:url2[Text2]", "{link-2}") in file1_updates
277
+
278
+
279
+ class TestExtractLinkAttributesIntegration:
280
+ """Integration tests for the main extract_link_attributes function."""
281
+
282
+ def test_full_extraction_dry_run(self, tmp_path, capsys):
283
+ """Test full extraction process in dry-run mode."""
284
+ # Create test structure
285
+ modules_dir = tmp_path / "modules"
286
+ modules_dir.mkdir()
287
+
288
+ # Create attribute file
289
+ attr_file = tmp_path / "common-attributes.adoc"
290
+ attr_file.write_text(":existing-attr: value\n")
291
+
292
+ # Create test .adoc file with links
293
+ test_file = modules_dir / "test.adoc"
294
+ test_file.write_text("""
295
+ = Test Module
296
+
297
+ See link:https://docs.example.com/{version}/guide.html[Installation Guide] for setup.
298
+
299
+ Also check link:https://docs.example.com/{version}/guide.html[Setup Guide] for config.
300
+
301
+ And xref:{base-url}/intro.html[Introduction] for overview.
302
+ """)
303
+
304
+ # Run extraction in dry-run mode
305
+ os.chdir(tmp_path)
306
+ result = extract_link_attributes(
307
+ attributes_file=str(attr_file),
308
+ scan_dirs=[str(modules_dir)],
309
+ interactive=False,
310
+ dry_run=True
311
+ )
312
+
313
+ assert result is True
314
+
315
+ # Check output
316
+ captured = capsys.readouterr()
317
+ assert "Found 3 link/xref macros" in captured.out
318
+ assert "Grouped into 2 unique URLs" in captured.out
319
+ assert "[DRY RUN]" in captured.out
320
+
321
+ # Verify no files were modified
322
+ assert attr_file.read_text() == ":existing-attr: value\n"
323
+
324
+ def test_full_extraction_with_write(self, tmp_path):
325
+ """Test full extraction process with actual file updates."""
326
+ # Create test structure
327
+ modules_dir = tmp_path / "modules"
328
+ modules_dir.mkdir()
329
+
330
+ # Create attribute file
331
+ attr_file = tmp_path / "common-attributes.adoc"
332
+ attr_file.write_text(":existing-attr: value\n")
333
+
334
+ # Create test .adoc file
335
+ test_file = modules_dir / "test.adoc"
336
+ original_content = """
337
+ = Test Module
338
+
339
+ See link:https://example.com/{version}/guide.html[Guide] for info.
340
+ """
341
+ test_file.write_text(original_content)
342
+
343
+ # Run extraction
344
+ os.chdir(tmp_path)
345
+ result = extract_link_attributes(
346
+ attributes_file=str(attr_file),
347
+ scan_dirs=[str(modules_dir)],
348
+ interactive=False,
349
+ dry_run=False
350
+ )
351
+
352
+ assert result is True
353
+
354
+ # Check that attribute file was updated
355
+ attr_content = attr_file.read_text()
356
+ assert ":existing-attr: value" in attr_content
357
+ assert "// Extracted link attributes" in attr_content
358
+ assert "link:https://example.com/{version}/guide.html[Guide]" in attr_content
359
+
360
+ # Check that .adoc file was updated
361
+ updated_content = test_file.read_text()
362
+ assert "link:https://example.com/{version}/guide.html[Guide]" not in updated_content
363
+ assert "{link-" in updated_content # Should have attribute reference