rolfedh-doc-utils 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {rolfedh_doc_utils-0.1.9/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.11}/PKG-INFO +5 -3
  2. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/README.md +4 -2
  3. rolfedh_doc_utils-0.1.11/doc_utils/extract_link_attributes.py +453 -0
  4. rolfedh_doc_utils-0.1.11/doc_utils/unused_attributes.py +138 -0
  5. rolfedh_doc_utils-0.1.11/doc_utils/validate_links.py +576 -0
  6. rolfedh_doc_utils-0.1.11/extract_link_attributes.py +93 -0
  7. rolfedh_doc_utils-0.1.11/find_unused_attributes.py +82 -0
  8. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/pyproject.toml +4 -2
  9. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11/rolfedh_doc_utils.egg-info}/PKG-INFO +5 -3
  10. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/rolfedh_doc_utils.egg-info/SOURCES.txt +7 -1
  11. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/rolfedh_doc_utils.egg-info/entry_points.txt +2 -0
  12. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/rolfedh_doc_utils.egg-info/top_level.txt +2 -0
  13. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_cli_entry_points.py +13 -6
  14. rolfedh_doc_utils-0.1.11/tests/test_extract_link_attributes.py +363 -0
  15. rolfedh_doc_utils-0.1.11/tests/test_validate_links.py +385 -0
  16. rolfedh_doc_utils-0.1.11/validate_links.py +202 -0
  17. rolfedh_doc_utils-0.1.9/doc_utils/unused_attributes.py +0 -50
  18. rolfedh_doc_utils-0.1.9/find_unused_attributes.py +0 -41
  19. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/LICENSE +0 -0
  20. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/archive_unused_files.py +0 -0
  21. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/archive_unused_images.py +0 -0
  22. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/check_scannability.py +0 -0
  23. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/__init__.py +0 -0
  24. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/file_utils.py +0 -0
  25. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/format_asciidoc_spacing.py +0 -0
  26. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/replace_link_attributes.py +0 -0
  27. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/scannability.py +0 -0
  28. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/topic_map_parser.py +0 -0
  29. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/unused_adoc.py +0 -0
  30. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/doc_utils/unused_images.py +0 -0
  31. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/format_asciidoc_spacing.py +0 -0
  32. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/replace_link_attributes.py +0 -0
  33. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  34. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  35. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/setup.cfg +0 -0
  36. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/setup.py +0 -0
  37. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_archive_unused_files.py +0 -0
  38. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_archive_unused_images.py +0 -0
  39. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_auto_discovery.py +0 -0
  40. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_check_scannability.py +0 -0
  41. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_file_utils.py +0 -0
  42. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_fixture_archive_unused_files.py +0 -0
  43. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_fixture_archive_unused_images.py +0 -0
  44. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_fixture_check_scannability.py +0 -0
  45. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_parse_exclude_list.py +0 -0
  46. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_symlink_handling.py +0 -0
  47. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_topic_map_parser.py +0 -0
  48. {rolfedh_doc_utils-0.1.9 → rolfedh_doc_utils-0.1.11}/tests/test_unused_attributes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -79,8 +79,10 @@ pip install -e .
79
79
 
80
80
  | Tool | Description | Usage |
81
81
  |------|-------------|-------|
82
- | **`replace-link-attributes`** | Resolves Vale LinkAttribute violations by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
83
- | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
82
+ | **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
83
+ | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
84
+ | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
85
+ | **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
84
86
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
85
87
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
86
88
  | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
@@ -46,8 +46,10 @@ pip install -e .
46
46
 
47
47
  | Tool | Description | Usage |
48
48
  |------|-------------|-------|
49
- | **`replace-link-attributes`** | Resolves Vale LinkAttribute violations by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
50
- | **`format-asciidoc-spacing`** | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
49
+ | **`validate-links`** [EXPERIMENTAL] | Validates all links in documentation, with URL transposition for preview environments | `validate-links --transpose "https://prod--https://preview"` |
50
+ | **`extract-link-attributes`** | Extracts link/xref macros with attributes into reusable definitions | `extract-link-attributes --dry-run` |
51
+ | **`replace-link-attributes`** | Resolves Vale LinkAttribute issues by replacing attributes in link URLs | `replace-link-attributes --dry-run` |
52
+ | **`format-asciidoc-spacing`** [EXPERIMENTAL] | Standardizes spacing after headings and around includes | `format-asciidoc-spacing --dry-run modules/` |
51
53
  | **`check-scannability`** | Analyzes readability (sentence/paragraph length) | `check-scannability --max-words 25` |
52
54
  | **`archive-unused-files`** | Finds and archives unreferenced .adoc files | `archive-unused-files` (preview)<br>`archive-unused-files --archive` (execute) |
53
55
  | **`archive-unused-images`** | Finds and archives unreferenced images | `archive-unused-images` (preview)<br>`archive-unused-images --archive` (execute) |
@@ -0,0 +1,453 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract link and xref macros containing attributes into attribute definitions.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Dict, List, Set, Tuple, Optional
10
+ from collections import defaultdict
11
+ import unicodedata
12
+
13
+
14
+ def find_attribute_files(base_path: str = '.') -> List[str]:
15
+ """Find potential attribute files in the repository."""
16
+ common_patterns = [
17
+ '**/common-attributes.adoc',
18
+ '**/attributes.adoc',
19
+ '**/*-attributes.adoc',
20
+ '**/attributes-*.adoc',
21
+ '**/common_attributes.adoc',
22
+ '**/_common-attributes.adoc'
23
+ ]
24
+
25
+ attribute_files = []
26
+ base = Path(base_path)
27
+
28
+ for pattern in common_patterns:
29
+ for file_path in base.glob(pattern):
30
+ if file_path.is_file():
31
+ rel_path = file_path.relative_to(base)
32
+ attribute_files.append(str(rel_path))
33
+
34
+ # Remove duplicates while preserving order
35
+ seen = set()
36
+ unique_files = []
37
+ for f in attribute_files:
38
+ if f not in seen:
39
+ seen.add(f)
40
+ unique_files.append(f)
41
+
42
+ return sorted(unique_files)
43
+
44
+
45
+ def select_attribute_file(attribute_files: List[str]) -> str:
46
+ """Let user interactively select an attribute file."""
47
+ if not attribute_files:
48
+ return None
49
+
50
+ print("\nMultiple attribute files found. Please select one:")
51
+ for i, file_path in enumerate(attribute_files, 1):
52
+ print(f" {i}. {file_path}")
53
+
54
+ while True:
55
+ try:
56
+ choice = input(f"\nEnter your choice (1-{len(attribute_files)}): ").strip()
57
+ index = int(choice) - 1
58
+ if 0 <= index < len(attribute_files):
59
+ return attribute_files[index]
60
+ else:
61
+ print(f"Please enter a number between 1 and {len(attribute_files)}")
62
+ except (ValueError, EOFError, KeyboardInterrupt):
63
+ print("\nOperation cancelled.")
64
+ return None
65
+
66
+
67
+ def load_existing_attributes(file_path: str) -> Dict[str, str]:
68
+ """Load existing attributes from file."""
69
+ attributes = {}
70
+ if not os.path.exists(file_path):
71
+ return attributes
72
+
73
+ with open(file_path, 'r', encoding='utf-8') as f:
74
+ for line in f:
75
+ # Match attribute definitions
76
+ match = re.match(r'^:([^:]+):\s*(.+)$', line)
77
+ if match:
78
+ attr_name = match.group(1).strip()
79
+ attr_value = match.group(2).strip()
80
+ attributes[attr_name] = attr_value
81
+
82
+ return attributes
83
+
84
+
85
+ def find_link_macros(file_path: str) -> List[Tuple[str, str, str, int]]:
86
+ """
87
+ Find all link: and xref: macros containing attributes in their URLs.
88
+
89
+ Returns list of tuples: (full_macro, url, link_text, line_number)
90
+ """
91
+ macros = []
92
+
93
+ with open(file_path, 'r', encoding='utf-8') as f:
94
+ for line_num, line in enumerate(f, 1):
95
+ # Pattern to match link: and xref: macros
96
+ # Matches: (link|xref):url[text] where url contains {attribute}
97
+ patterns = [
98
+ r'(link:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])',
99
+ r'(xref:([^[\]]*\{[^}]+\}[^[\]]*)\[([^\]]*)\])'
100
+ ]
101
+
102
+ for pattern in patterns:
103
+ for match in re.finditer(pattern, line, re.IGNORECASE):
104
+ full_macro = match.group(1)
105
+ url = match.group(2)
106
+ link_text = match.group(3)
107
+ macros.append((full_macro, url, link_text, line_num))
108
+
109
+ return macros
110
+
111
+
112
+ def generate_attribute_name(url: str, existing_attrs: Set[str], counter: int) -> str:
113
+ """Generate a unique attribute name from URL."""
114
+ # Start with a base name from the URL
115
+ base_name = url
116
+
117
+ # Extract domain or path components
118
+ if '://' in url:
119
+ # Remove protocol
120
+ base_name = re.sub(r'^[^:]+://', '', url)
121
+
122
+ # Remove attributes from the name generation
123
+ base_name = re.sub(r'\{[^}]+\}', '', base_name)
124
+
125
+ # Extract meaningful parts
126
+ if '/' in base_name:
127
+ parts = base_name.split('/')
128
+ # Use domain and last path component
129
+ if len(parts) > 1:
130
+ domain_part = parts[0].replace('.', '-')
131
+ path_part = parts[-1].split('.')[0] if parts[-1] else ''
132
+ if path_part:
133
+ base_name = f"{domain_part}-{path_part}"
134
+ else:
135
+ base_name = domain_part
136
+
137
+ # Clean up the name
138
+ base_name = re.sub(r'[^a-zA-Z0-9-]', '-', base_name)
139
+ base_name = re.sub(r'-+', '-', base_name)
140
+ base_name = base_name.strip('-').lower()
141
+
142
+ # Limit length
143
+ if len(base_name) > 30:
144
+ base_name = base_name[:30]
145
+
146
+ # Make it unique
147
+ attr_name = f"link-{base_name}"
148
+ original_name = attr_name
149
+ suffix = 1
150
+
151
+ while attr_name in existing_attrs:
152
+ attr_name = f"{original_name}-{suffix}"
153
+ suffix += 1
154
+
155
+ return attr_name
156
+
157
+
158
+ def group_macros_by_url(macros: List[Tuple[str, str, str, str, int]]) -> Dict[str, List[Tuple[str, str, str, int]]]:
159
+ """
160
+ Group macros by URL, collecting all link text variations.
161
+
162
+ Returns: Dict[url, List[(file_path, link_text, full_macro, line_number)]]
163
+ """
164
+ url_groups = defaultdict(list)
165
+
166
+ for file_path, full_macro, url, link_text, line_num in macros:
167
+ url_groups[url].append((file_path, link_text, full_macro, line_num))
168
+
169
+ return url_groups
170
+
171
+
172
+ def select_link_text(url: str, variations: List[Tuple[str, str, str, int]], interactive: bool = True) -> str:
173
+ """
174
+ Select link text for a URL with multiple variations.
175
+
176
+ variations: List[(file_path, link_text, full_macro, line_number)]
177
+ """
178
+ # Extract unique link texts
179
+ unique_texts = {}
180
+ for file_path, link_text, _, line_num in variations:
181
+ if link_text not in unique_texts:
182
+ unique_texts[link_text] = []
183
+ unique_texts[link_text].append(f"{file_path}:{line_num}")
184
+
185
+ if len(unique_texts) == 1:
186
+ # Only one variation, use it
187
+ return list(unique_texts.keys())[0]
188
+
189
+ if not interactive:
190
+ # Use most common (appears in most locations)
191
+ most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
192
+ return most_common[0]
193
+
194
+ # Interactive selection
195
+ print(f"\nMultiple link text variations found for URL: {url}")
196
+ print("Please select the preferred text:")
197
+
198
+ text_list = list(unique_texts.items())
199
+ for i, (text, locations) in enumerate(text_list, 1):
200
+ print(f"\n {i}. \"{text}\"")
201
+ print(f" Used in: {', '.join(locations[:3])}")
202
+ if len(locations) > 3:
203
+ print(f" ... and {len(locations) - 3} more locations")
204
+
205
+ print(f"\n {len(text_list) + 1}. Enter custom text")
206
+
207
+ while True:
208
+ try:
209
+ choice = input(f"\nEnter your choice (1-{len(text_list) + 1}): ").strip()
210
+ index = int(choice) - 1
211
+
212
+ if 0 <= index < len(text_list):
213
+ return text_list[index][0]
214
+ elif index == len(text_list):
215
+ custom_text = input("Enter custom link text: ").strip()
216
+ if custom_text:
217
+ return custom_text
218
+ else:
219
+ print("Text cannot be empty. Please try again.")
220
+ else:
221
+ print(f"Please enter a number between 1 and {len(text_list) + 1}")
222
+ except (ValueError, EOFError, KeyboardInterrupt):
223
+ print("\nUsing most common text variation.")
224
+ most_common = max(unique_texts.items(), key=lambda x: len(x[1]))
225
+ return most_common[0]
226
+
227
+
228
+ def collect_all_macros(scan_dirs: List[str] = None) -> List[Tuple[str, str, str, str, int]]:
229
+ """
230
+ Collect all link/xref macros with attributes from all .adoc files.
231
+
232
+ Returns: List[(file_path, full_macro, url, link_text, line_number)]
233
+ """
234
+ if scan_dirs is None:
235
+ scan_dirs = ['.']
236
+
237
+ all_macros = []
238
+
239
+ for scan_dir in scan_dirs:
240
+ for root, _, files in os.walk(scan_dir):
241
+ # Skip hidden directories and .archive
242
+ if '/.archive' in root or '/.' in root:
243
+ continue
244
+
245
+ for file in files:
246
+ if file.endswith('.adoc'):
247
+ file_path = os.path.join(root, file)
248
+ macros = find_link_macros(file_path)
249
+ for full_macro, url, link_text, line_num in macros:
250
+ all_macros.append((file_path, full_macro, url, link_text, line_num))
251
+
252
+ return all_macros
253
+
254
+
255
+ def create_attributes(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
256
+ existing_attrs: Dict[str, str],
257
+ interactive: bool = True) -> Dict[str, str]:
258
+ """
259
+ Create new attributes for each unique URL.
260
+
261
+ Returns: Dict[attribute_name, attribute_value]
262
+ """
263
+ new_attributes = {}
264
+ existing_attr_names = set(existing_attrs.keys())
265
+ counter = 1
266
+
267
+ for url, variations in url_groups.items():
268
+ # Check if this URL already has an attribute
269
+ existing_attr = None
270
+ for attr_name, attr_value in existing_attrs.items():
271
+ if url in attr_value:
272
+ existing_attr = attr_name
273
+ break
274
+
275
+ if existing_attr:
276
+ print(f"URL already has attribute {{{existing_attr}}}: {url}")
277
+ continue
278
+
279
+ # Select link text
280
+ link_text = select_link_text(url, variations, interactive)
281
+
282
+ # Generate attribute name
283
+ attr_name = generate_attribute_name(url, existing_attr_names | set(new_attributes.keys()), counter)
284
+ counter += 1
285
+
286
+ # Determine macro type (link or xref)
287
+ first_macro = variations[0][2] # full_macro from first variation
288
+ macro_type = 'xref' if first_macro.startswith('xref:') else 'link'
289
+
290
+ # Create attribute value
291
+ attr_value = f"{macro_type}:{url}[{link_text}]"
292
+ new_attributes[attr_name] = attr_value
293
+
294
+ print(f"Created attribute: :{attr_name}: {attr_value}")
295
+
296
+ return new_attributes
297
+
298
+
299
+ def update_attribute_file(file_path: str, new_attributes: Dict[str, str], dry_run: bool = False):
300
+ """Add new attributes to the attribute file."""
301
+ if not new_attributes:
302
+ print("No new attributes to add.")
303
+ return
304
+
305
+ if dry_run:
306
+ print(f"\n[DRY RUN] Would add {len(new_attributes)} attributes to {file_path}:")
307
+ for attr_name, attr_value in new_attributes.items():
308
+ print(f" :{attr_name}: {attr_value}")
309
+ return
310
+
311
+ # Ensure directory exists
312
+ os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True)
313
+
314
+ # Append new attributes
315
+ with open(file_path, 'a', encoding='utf-8') as f:
316
+ if os.path.getsize(file_path) > 0:
317
+ f.write('\n') # Add newline if file not empty
318
+ f.write('// Extracted link attributes\n')
319
+ for attr_name, attr_value in sorted(new_attributes.items()):
320
+ f.write(f":{attr_name}: {attr_value}\n")
321
+
322
+ print(f"Added {len(new_attributes)} attributes to {file_path}")
323
+
324
+
325
+ def replace_macros_with_attributes(file_updates: Dict[str, List[Tuple[str, str]]], dry_run: bool = False):
326
+ """
327
+ Replace link/xref macros with their attribute references.
328
+
329
+ file_updates: Dict[file_path, List[(old_macro, attribute_ref)]]
330
+ """
331
+ for file_path, replacements in file_updates.items():
332
+ if dry_run:
333
+ print(f"\n[DRY RUN] Would update {file_path}:")
334
+ for old_macro, attr_ref in replacements[:3]:
335
+ print(f" Replace: {old_macro}")
336
+ print(f" With: {attr_ref}")
337
+ if len(replacements) > 3:
338
+ print(f" ... and {len(replacements) - 3} more replacements")
339
+ continue
340
+
341
+ # Read file
342
+ with open(file_path, 'r', encoding='utf-8') as f:
343
+ content = f.read()
344
+
345
+ # Apply replacements
346
+ for old_macro, attr_ref in replacements:
347
+ content = content.replace(old_macro, attr_ref)
348
+
349
+ # Write file
350
+ with open(file_path, 'w', encoding='utf-8') as f:
351
+ f.write(content)
352
+
353
+ print(f"Updated {file_path}: {len(replacements)} replacements")
354
+
355
+
356
+ def prepare_file_updates(url_groups: Dict[str, List[Tuple[str, str, str, int]]],
357
+ attribute_mapping: Dict[str, str]) -> Dict[str, List[Tuple[str, str]]]:
358
+ """
359
+ Prepare file updates mapping macros to attribute references.
360
+
361
+ Returns: Dict[file_path, List[(old_macro, attribute_ref)]]
362
+ """
363
+ file_updates = defaultdict(list)
364
+
365
+ # Create reverse mapping from URL to attribute name
366
+ url_to_attr = {}
367
+ for attr_name, attr_value in attribute_mapping.items():
368
+ # Extract URL from attribute value
369
+ match = re.match(r'(?:link|xref):([^\[]+)\[', attr_value)
370
+ if match:
371
+ url = match.group(1)
372
+ url_to_attr[url] = attr_name
373
+
374
+ # Map each macro occurrence to its attribute
375
+ for url, variations in url_groups.items():
376
+ if url in url_to_attr:
377
+ attr_name = url_to_attr[url]
378
+ for file_path, _, full_macro, _ in variations:
379
+ file_updates[file_path].append((full_macro, f"{{{attr_name}}}"))
380
+
381
+ return dict(file_updates)
382
+
383
+
384
+ def extract_link_attributes(attributes_file: str = None,
385
+ scan_dirs: List[str] = None,
386
+ interactive: bool = True,
387
+ dry_run: bool = False) -> bool:
388
+ """
389
+ Main function to extract link attributes.
390
+
391
+ Returns: True if successful, False otherwise
392
+ """
393
+ # Find or confirm attributes file
394
+ if not attributes_file:
395
+ found_files = find_attribute_files()
396
+
397
+ if not found_files:
398
+ print("No attribute files found.")
399
+ response = input("Create common-attributes.adoc? (y/n): ").strip().lower()
400
+ if response == 'y':
401
+ attributes_file = 'common-attributes.adoc'
402
+ else:
403
+ print("Please specify an attribute file with --attributes-file")
404
+ return False
405
+ elif len(found_files) == 1:
406
+ attributes_file = found_files[0]
407
+ print(f"Using attribute file: {attributes_file}")
408
+ else:
409
+ attributes_file = select_attribute_file(found_files)
410
+ if not attributes_file:
411
+ return False
412
+
413
+ # Load existing attributes
414
+ existing_attrs = load_existing_attributes(attributes_file)
415
+ print(f"Loaded {len(existing_attrs)} existing attributes")
416
+
417
+ # Collect all macros
418
+ print("\nScanning for link and xref macros with attributes...")
419
+ all_macros = collect_all_macros(scan_dirs)
420
+
421
+ if not all_macros:
422
+ print("No link or xref macros with attributes found.")
423
+ return True
424
+
425
+ print(f"Found {len(all_macros)} link/xref macros with attributes")
426
+
427
+ # Group by URL
428
+ url_groups = group_macros_by_url(all_macros)
429
+ print(f"Grouped into {len(url_groups)} unique URLs")
430
+
431
+ # Create new attributes
432
+ new_attributes = create_attributes(url_groups, existing_attrs, interactive)
433
+
434
+ if not new_attributes:
435
+ print("No new attributes to create.")
436
+ return True
437
+
438
+ # Update attribute file
439
+ update_attribute_file(attributes_file, new_attributes, dry_run)
440
+
441
+ # Prepare file updates
442
+ all_attributes = {**existing_attrs, **new_attributes}
443
+ file_updates = prepare_file_updates(url_groups, all_attributes)
444
+
445
+ # Replace macros
446
+ replace_macros_with_attributes(file_updates, dry_run)
447
+
448
+ if dry_run:
449
+ print("\n[DRY RUN] No files were modified. Run without --dry-run to apply changes.")
450
+ else:
451
+ print(f"\nSuccessfully extracted {len(new_attributes)} link attributes")
452
+
453
+ return True
@@ -0,0 +1,138 @@
1
+ """
2
+ Module for finding unused AsciiDoc attributes.
3
+
4
+ Functions:
5
+ - parse_attributes_file: Parse attribute names from an attributes.adoc file.
6
+ - find_adoc_files: Recursively find all .adoc files in a directory (ignoring symlinks).
7
+ - scan_for_attribute_usage: Find which attributes are used in a set of .adoc files.
8
+ - find_unused_attributes: Main function to return unused attributes.
9
+ - find_attributes_files: Find all potential attributes files in the repository.
10
+ """
11
+
12
+ import os
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Set, List, Optional
16
+
17
+ def parse_attributes_file(attr_file: str) -> Set[str]:
18
+ attributes = set()
19
+
20
+ # Check if file exists
21
+ if not os.path.exists(attr_file):
22
+ raise FileNotFoundError(f"Attributes file not found: {attr_file}")
23
+
24
+ # Check if it's a file (not a directory)
25
+ if not os.path.isfile(attr_file):
26
+ raise ValueError(f"Path is not a file: {attr_file}")
27
+
28
+ try:
29
+ with open(attr_file, 'r', encoding='utf-8') as f:
30
+ for line in f:
31
+ match = re.match(r'^:([\w-]+):', line.strip())
32
+ if match:
33
+ attributes.add(match.group(1))
34
+ except PermissionError:
35
+ raise PermissionError(f"Permission denied reading file: {attr_file}")
36
+ except UnicodeDecodeError as e:
37
+ raise ValueError(f"Unable to read file (encoding issue): {attr_file}\n{str(e)}")
38
+
39
+ return attributes
40
+
41
+ def find_adoc_files(root_dir: str) -> List[str]:
42
+ adoc_files = []
43
+ for dirpath, dirnames, filenames in os.walk(root_dir, followlinks=False):
44
+ for fname in filenames:
45
+ if fname.endswith('.adoc'):
46
+ full_path = os.path.join(dirpath, fname)
47
+ if not os.path.islink(full_path):
48
+ adoc_files.append(full_path)
49
+ return adoc_files
50
+
51
+ def scan_for_attribute_usage(adoc_files: List[str], attributes: Set[str]) -> Set[str]:
52
+ used = set()
53
+ attr_pattern = re.compile(r'\{([\w-]+)\}')
54
+ for file in adoc_files:
55
+ with open(file, 'r', encoding='utf-8') as f:
56
+ for line in f:
57
+ for match in attr_pattern.findall(line):
58
+ if match in attributes:
59
+ used.add(match)
60
+ return used
61
+
62
+ def find_attributes_files(root_dir: str = '.') -> List[str]:
63
+ """Find all attributes.adoc files in the repository."""
64
+ attributes_files = []
65
+ root_path = Path(root_dir)
66
+
67
+ # Common attribute file patterns
68
+ patterns = ['**/attributes.adoc', '**/attributes*.adoc', '**/*attributes.adoc', '**/*-attributes.adoc']
69
+
70
+ for pattern in patterns:
71
+ for path in root_path.glob(pattern):
72
+ # Skip hidden directories and common build directories
73
+ parts = path.parts
74
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules', '.archive'] for part in parts):
75
+ continue
76
+ # Convert to string and avoid duplicates
77
+ str_path = str(path)
78
+ if str_path not in attributes_files:
79
+ attributes_files.append(str_path)
80
+
81
+ # Sort for consistent ordering
82
+ attributes_files.sort()
83
+ return attributes_files
84
+
85
+
86
+ def select_attributes_file(attributes_files: List[str]) -> Optional[str]:
87
+ """Interactive selection of attributes file from a list."""
88
+ if not attributes_files:
89
+ return None
90
+
91
+ if len(attributes_files) == 1:
92
+ print(f"Found attributes file: {attributes_files[0]}")
93
+ response = input("Use this file? (y/n): ").strip().lower()
94
+ if response == 'y':
95
+ return attributes_files[0]
96
+ else:
97
+ response = input("Enter the path to your attributes file: ").strip()
98
+ if os.path.exists(response) and os.path.isfile(response):
99
+ return response
100
+ else:
101
+ print(f"Error: File not found: {response}")
102
+ return None
103
+
104
+ # Multiple files found
105
+ print("\nFound multiple attributes files:")
106
+ for i, file_path in enumerate(attributes_files, 1):
107
+ print(f" {i}. {file_path}")
108
+ print(f" {len(attributes_files) + 1}. Enter custom path")
109
+
110
+ while True:
111
+ response = input(f"\nSelect option (1-{len(attributes_files) + 1}) or 'q' to quit: ").strip()
112
+ if response.lower() == 'q':
113
+ return None
114
+
115
+ try:
116
+ choice = int(response)
117
+ if 1 <= choice <= len(attributes_files):
118
+ return attributes_files[choice - 1]
119
+ elif choice == len(attributes_files) + 1:
120
+ response = input("Enter the path to your attributes file: ").strip()
121
+ if os.path.exists(response) and os.path.isfile(response):
122
+ return response
123
+ else:
124
+ print(f"Error: File not found: {response}")
125
+ else:
126
+ print(f"Invalid choice. Please enter a number between 1 and {len(attributes_files) + 1}")
127
+ except ValueError:
128
+ print("Invalid input. Please enter a number.")
129
+
130
+ return None
131
+
132
+
133
+ def find_unused_attributes(attr_file: str, adoc_root: str = '.') -> List[str]:
134
+ attributes = parse_attributes_file(attr_file)
135
+ adoc_files = find_adoc_files(adoc_root)
136
+ used = scan_for_attribute_usage(adoc_files, attributes)
137
+ unused = sorted(attributes - used)
138
+ return unused