rolfedh-doc-utils 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ """
2
+ Replace AsciiDoc attributes within link URLs with their actual values.
3
+
4
+ This module finds and replaces attribute references (like {attribute-name}) that appear
5
+ in the URL portion of AsciiDoc link macros (link: and xref:) with their resolved values
6
+ from attributes.adoc. Link text is preserved unchanged.
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Tuple, Optional
12
+
13
+
14
+ def find_attributes_files(root_dir: Path) -> List[Path]:
15
+ """Find all attributes.adoc files in the repository."""
16
+ attributes_files = []
17
+
18
+ for path in root_dir.rglob('**/attributes.adoc'):
19
+ # Skip hidden directories and common build directories
20
+ parts = path.parts
21
+ if any(part.startswith('.') or part in ['target', 'build', 'node_modules'] for part in parts):
22
+ continue
23
+ attributes_files.append(path)
24
+
25
+ return attributes_files
26
+
27
+
28
+ def load_attributes(attributes_file: Path) -> Dict[str, str]:
29
+ """Load attribute definitions from an attributes.adoc file."""
30
+ attributes = {}
31
+
32
+ with open(attributes_file, 'r', encoding='utf-8') as f:
33
+ for line in f:
34
+ # Match attribute definitions
35
+ # Format: :attribute-name: value
36
+ match = re.match(r'^:([a-zA-Z0-9_-]+):\s*(.*)$', line)
37
+ if match:
38
+ attr_name = match.group(1)
39
+ attr_value = match.group(2).strip()
40
+ attributes[attr_name] = attr_value
41
+
42
+ return attributes
43
+
44
+
45
+ def resolve_nested_attributes(attributes: Dict[str, str], max_iterations: int = 10) -> Dict[str, str]:
46
+ """Resolve nested attribute references within attribute values."""
47
+ for _ in range(max_iterations):
48
+ changes_made = False
49
+
50
+ for attr_name, attr_value in attributes.items():
51
+ # Find all attribute references in the value
52
+ refs = re.findall(r'\{([a-zA-Z0-9_-]+)\}', attr_value)
53
+
54
+ for ref in refs:
55
+ if ref in attributes:
56
+ new_value = attr_value.replace(f'{{{ref}}}', attributes[ref])
57
+ if new_value != attr_value:
58
+ attributes[attr_name] = new_value
59
+ changes_made = True
60
+ attr_value = new_value
61
+
62
+ if not changes_made:
63
+ break
64
+
65
+ return attributes
66
+
67
+
68
+ def replace_link_attributes_in_file(file_path: Path, attributes: Dict[str, str], dry_run: bool = False) -> int:
69
+ """Replace attribute references within link macros in a single file."""
70
+ with open(file_path, 'r', encoding='utf-8') as f:
71
+ content = f.read()
72
+
73
+ original_content = content
74
+ replacement_count = 0
75
+
76
+ # Find all link macros containing attributes in the URL portion only
77
+ # Match link: and xref: macros, capturing URL and text separately
78
+ link_patterns = [
79
+ # link:url[text] - replace only in URL portion
80
+ (r'link:([^[\]]*)\[([^\]]*)\]', 'link'),
81
+ # xref:target[text] - replace only in target portion
82
+ (r'xref:([^[\]]*)\[([^\]]*)\]', 'xref'),
83
+ # link:url[] or xref:target[] - replace in URL/target portion
84
+ (r'(link|xref):([^[\]]*)\[\]', 'empty_text')
85
+ ]
86
+
87
+ for pattern, link_type in link_patterns:
88
+ matches = list(re.finditer(pattern, content))
89
+
90
+ # Process matches in reverse order to maintain string positions
91
+ for match in reversed(matches):
92
+ if link_type == 'empty_text':
93
+ # For links with empty text []
94
+ macro_type = match.group(1) # 'link' or 'xref'
95
+ url_part = match.group(2)
96
+ text_part = ''
97
+
98
+ # Check if URL contains attributes
99
+ if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
100
+ modified_url = url_part
101
+
102
+ # Replace attributes only in URL
103
+ attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
104
+ for attr_name in attr_matches:
105
+ if attr_name in attributes:
106
+ attr_pattern = re.escape(f'{{{attr_name}}}')
107
+ modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
108
+ replacement_count += 1
109
+
110
+ if modified_url != url_part:
111
+ # Reconstruct the link with modified URL
112
+ modified = f'{macro_type}:{modified_url}[]'
113
+ start = match.start()
114
+ end = match.end()
115
+ content = content[:start] + modified + content[end:]
116
+ else:
117
+ # For links with text
118
+ url_part = match.group(1)
119
+ text_part = match.group(2)
120
+
121
+ # Check if URL contains attributes
122
+ if re.search(r'\{[a-zA-Z0-9_-]+\}', url_part):
123
+ modified_url = url_part
124
+
125
+ # Replace attributes only in URL
126
+ attr_matches = re.findall(r'\{([a-zA-Z0-9_-]+)\}', url_part)
127
+ for attr_name in attr_matches:
128
+ if attr_name in attributes:
129
+ attr_pattern = re.escape(f'{{{attr_name}}}')
130
+ modified_url = re.sub(attr_pattern, attributes[attr_name], modified_url)
131
+ replacement_count += 1
132
+
133
+ if modified_url != url_part:
134
+ # Reconstruct the link with modified URL but original text
135
+ if link_type == 'link':
136
+ modified = f'link:{modified_url}[{text_part}]'
137
+ else: # xref
138
+ modified = f'xref:{modified_url}[{text_part}]'
139
+
140
+ start = match.start()
141
+ end = match.end()
142
+ content = content[:start] + modified + content[end:]
143
+
144
+ # Write changes if not in dry-run mode
145
+ if content != original_content:
146
+ if not dry_run:
147
+ with open(file_path, 'w', encoding='utf-8') as f:
148
+ f.write(content)
149
+
150
+ return replacement_count
151
+
152
+ return 0
153
+
154
+
155
+ def find_adoc_files(root_dir: Path, exclude_dirs: Optional[set] = None) -> List[Path]:
156
+ """Find all *.adoc files in the repository."""
157
+ if exclude_dirs is None:
158
+ exclude_dirs = {'.git', 'target', 'build', 'node_modules'}
159
+
160
+ adoc_files = []
161
+
162
+ for path in root_dir.rglob('*.adoc'):
163
+ # Check if any part of the path is in exclude_dirs
164
+ parts = set(path.parts)
165
+ if not parts.intersection(exclude_dirs):
166
+ adoc_files.append(path)
167
+
168
+ return adoc_files
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract link and xref macros containing attributes into attribute definitions.
4
+
5
+ This tool finds all link: and xref: macros whose URLs contain attributes,
6
+ creates attribute definitions for them, and replaces the macros with
7
+ attribute references.
8
+ """
9
+
10
+ import argparse
11
+ import sys
12
+ from doc_utils.extract_link_attributes import extract_link_attributes
13
+
14
+
15
+ def main():
16
+ """Main entry point for the extract-link-attributes CLI tool."""
17
+ parser = argparse.ArgumentParser(
18
+ description='Extract link and xref macros containing attributes into attribute definitions',
19
+ formatter_class=argparse.RawDescriptionHelpFormatter,
20
+ epilog="""
21
+ Examples:
22
+ # Interactive mode with auto-discovery
23
+ extract-link-attributes
24
+
25
+ # Specify attribute file
26
+ extract-link-attributes --attributes-file common-attributes.adoc
27
+
28
+ # Non-interactive mode (uses most common link text)
29
+ extract-link-attributes --non-interactive
30
+
31
+ # Dry run to preview changes
32
+ extract-link-attributes --dry-run
33
+
34
+ # Scan specific directories
35
+ extract-link-attributes --scan-dir modules --scan-dir assemblies
36
+ """
37
+ )
38
+
39
+ parser.add_argument(
40
+ '--attributes-file',
41
+ help='Path to the attributes file to update (auto-discovered if not specified)'
42
+ )
43
+
44
+ parser.add_argument(
45
+ '--scan-dir',
46
+ action='append',
47
+ help='Directory to scan for .adoc files (can be used multiple times, default: current directory)'
48
+ )
49
+
50
+ parser.add_argument(
51
+ '--non-interactive',
52
+ action='store_true',
53
+ help='Non-interactive mode: automatically use most common link text for variations'
54
+ )
55
+
56
+ parser.add_argument(
57
+ '--dry-run',
58
+ action='store_true',
59
+ help='Preview changes without modifying files'
60
+ )
61
+
62
+ parser.add_argument(
63
+ '-v', '--verbose',
64
+ action='store_true',
65
+ help='Enable verbose output'
66
+ )
67
+
68
+ args = parser.parse_args()
69
+
70
+ try:
71
+ success = extract_link_attributes(
72
+ attributes_file=args.attributes_file,
73
+ scan_dirs=args.scan_dir,
74
+ interactive=not args.non_interactive,
75
+ dry_run=args.dry_run
76
+ )
77
+
78
+ if not success:
79
+ sys.exit(1)
80
+
81
+ except KeyboardInterrupt:
82
+ print("\nOperation cancelled.")
83
+ sys.exit(1)
84
+ except Exception as e:
85
+ print(f"Error: {e}", file=sys.stderr)
86
+ if args.verbose:
87
+ import traceback
88
+ traceback.print_exc()
89
+ sys.exit(1)
90
+
91
+
92
+ if __name__ == '__main__':
93
+ main()
@@ -1,13 +1,15 @@
1
1
  #!/usr/bin/env python3
2
+ """
3
+ format-asciidoc-spacing - Format AsciiDoc spacing.
2
4
 
3
- """Format AsciiDoc spacing - ensures blank lines after headings and around include directives"""
5
+ Ensures blank lines after headings and around include directives.
6
+ """
4
7
 
5
8
  import argparse
6
- import os
7
- import re
8
9
  import sys
9
10
  from pathlib import Path
10
- from typing import List, Tuple
11
+
12
+ from doc_utils.format_asciidoc_spacing import process_file, find_adoc_files
11
13
 
12
14
 
13
15
  # Colors for output
@@ -23,224 +25,6 @@ def print_colored(message: str, color: str = Colors.NC) -> None:
23
25
  print(f"{color}{message}{Colors.NC}")
24
26
 
25
27
 
26
- def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> bool:
27
- """
28
- Process a single AsciiDoc file to fix spacing issues.
29
-
30
- Args:
31
- file_path: Path to the file to process
32
- dry_run: If True, show what would be changed without modifying
33
- verbose: If True, show detailed output
34
-
35
- Returns:
36
- True if changes were made (or would be made in dry-run), False otherwise
37
- """
38
- if verbose:
39
- print(f"Processing: {file_path}")
40
-
41
- try:
42
- with open(file_path, 'r', encoding='utf-8') as f:
43
- lines = f.readlines()
44
- except (IOError, UnicodeDecodeError) as e:
45
- print_colored(f"Error reading {file_path}: {e}", Colors.RED)
46
- return False
47
-
48
- # Remove trailing newlines from lines for processing
49
- lines = [line.rstrip('\n\r') for line in lines]
50
-
51
- new_lines = []
52
- changes_made = False
53
- in_block = False # Track if we're inside a block (admonition, listing, etc.)
54
- in_conditional = False # Track if we're inside a conditional block
55
-
56
- for i, current_line in enumerate(lines):
57
- prev_line = lines[i-1] if i > 0 else ""
58
- next_line = lines[i+1] if i + 1 < len(lines) else ""
59
-
60
- # Check for conditional start (ifdef:: or ifndef::)
61
- if re.match(r'^(ifdef::|ifndef::)', current_line):
62
- in_conditional = True
63
- # Add blank line before conditional if needed
64
- if (prev_line and
65
- not re.match(r'^\s*$', prev_line) and
66
- not re.match(r'^(ifdef::|ifndef::|endif::)', prev_line)):
67
- new_lines.append("")
68
- changes_made = True
69
- if verbose:
70
- print(f" Added blank line before conditional block")
71
- new_lines.append(current_line)
72
-
73
- # Check for conditional end (endif::)
74
- elif re.match(r'^endif::', current_line):
75
- new_lines.append(current_line)
76
- in_conditional = False
77
- # Add blank line after conditional if needed
78
- if (next_line and
79
- not re.match(r'^\s*$', next_line) and
80
- not re.match(r'^(ifdef::|ifndef::|endif::)', next_line)):
81
- new_lines.append("")
82
- changes_made = True
83
- if verbose:
84
- print(f" Added blank line after conditional block")
85
-
86
- # Check for block delimiters (====, ----, ...., ____)
87
- # These are used for admonitions, listing blocks, literal blocks, etc.
88
- elif re.match(r'^(====+|----+|\.\.\.\.+|____+)$', current_line):
89
- in_block = not in_block # Toggle block state
90
- new_lines.append(current_line)
91
- # Check if current line is a heading (but not if we're in a block)
92
- elif not in_block and re.match(r'^=+\s+', current_line):
93
- new_lines.append(current_line)
94
-
95
- # Check if next line is not empty and not another heading
96
- if (next_line and
97
- not re.match(r'^=+\s+', next_line) and
98
- not re.match(r'^\s*$', next_line)):
99
- new_lines.append("")
100
- changes_made = True
101
- if verbose:
102
- truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
103
- print(f" Added blank line after heading: {truncated}")
104
-
105
- # Check if current line is a comment (AsciiDoc comments start with //)
106
- elif re.match(r'^//', current_line):
107
- # Skip special handling if we're inside a conditional block
108
- if in_conditional:
109
- new_lines.append(current_line)
110
- else:
111
- # Check if next line is an include directive
112
- if next_line and re.match(r'^include::', next_line):
113
- # This comment belongs to the include, add blank line before comment if needed
114
- if (prev_line and
115
- not re.match(r'^\s*$', prev_line) and
116
- not re.match(r'^//', prev_line) and
117
- not re.match(r'^:', prev_line)): # Don't add if previous is attribute
118
- new_lines.append("")
119
- changes_made = True
120
- if verbose:
121
- print(f" Added blank line before comment above include")
122
- new_lines.append(current_line)
123
-
124
- # Check if current line is an attribute (starts with :)
125
- elif re.match(r'^:', current_line):
126
- # Skip special handling if we're inside a conditional block
127
- if in_conditional:
128
- new_lines.append(current_line)
129
- else:
130
- # Check if next line is an include directive
131
- if next_line and re.match(r'^include::', next_line):
132
- # This attribute belongs to the include, add blank line before attribute if needed
133
- if (prev_line and
134
- not re.match(r'^\s*$', prev_line) and
135
- not re.match(r'^//', prev_line) and
136
- not re.match(r'^:', prev_line)): # Don't add if previous is comment or attribute
137
- new_lines.append("")
138
- changes_made = True
139
- if verbose:
140
- print(f" Added blank line before attribute above include")
141
- new_lines.append(current_line)
142
-
143
- # Check if current line is an include directive
144
- elif re.match(r'^include::', current_line):
145
- # Skip special handling if we're inside a conditional block
146
- if in_conditional:
147
- new_lines.append(current_line)
148
- else:
149
- # Check if this is an attribute include (contains "attribute" in the path)
150
- is_attribute_include = 'attribute' in current_line.lower()
151
-
152
- # Check if this appears near the top of the file (within first 10 lines after H1)
153
- # Find the H1 heading position
154
- h1_position = -1
155
- for j in range(min(i, 10)): # Look back up to 10 lines or to current position
156
- if re.match(r'^=\s+', lines[j]): # H1 heading starts with single =
157
- h1_position = j
158
- break
159
-
160
- # If this is an attribute include near the H1 heading, don't add surrounding blank lines
161
- is_near_h1 = h1_position >= 0 and (i - h1_position) <= 2
162
-
163
- # Check if previous line is a comment or attribute (which belongs to this include)
164
- has_comment_above = prev_line and re.match(r'^//', prev_line)
165
- has_attribute_above = prev_line and re.match(r'^:', prev_line)
166
-
167
- # If it's an attribute include near H1, only the heading's blank line is needed
168
- if not (is_attribute_include and is_near_h1):
169
- # Don't add blank line if there's a comment or attribute above (it was handled by the comment/attribute logic)
170
- if not has_comment_above and not has_attribute_above:
171
- # Add blank line before include if previous line is not empty and not an include
172
- if (prev_line and
173
- not re.match(r'^\s*$', prev_line) and
174
- not re.match(r'^include::', prev_line)):
175
- new_lines.append("")
176
- changes_made = True
177
- if verbose:
178
- truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
179
- print(f" Added blank line before include: {truncated}")
180
-
181
- new_lines.append(current_line)
182
-
183
- # If it's an attribute include near H1, don't add blank line after
184
- if not (is_attribute_include and is_near_h1):
185
- # Add blank line after include if next line exists and is not empty and not an include
186
- if (next_line and
187
- not re.match(r'^\s*$', next_line) and
188
- not re.match(r'^include::', next_line)):
189
- new_lines.append("")
190
- changes_made = True
191
- if verbose:
192
- truncated = current_line[:50] + "..." if len(current_line) > 50 else current_line
193
- print(f" Added blank line after include: {truncated}")
194
-
195
- else:
196
- new_lines.append(current_line)
197
-
198
- # Apply changes if any were made
199
- if changes_made:
200
- # Clean up any consecutive blank lines we may have added
201
- cleaned_lines = []
202
- for i, line in enumerate(new_lines):
203
- # Check if this is a blank line we're about to add
204
- if line == "":
205
- # Check if the previous line is also a blank line
206
- if i > 0 and cleaned_lines and cleaned_lines[-1] == "":
207
- # Skip this blank line as we already have one
208
- continue
209
- cleaned_lines.append(line)
210
-
211
- if dry_run:
212
- print_colored(f"Would modify: {file_path}", Colors.YELLOW)
213
- else:
214
- try:
215
- with open(file_path, 'w', encoding='utf-8') as f:
216
- for line in cleaned_lines:
217
- f.write(line + '\n')
218
- print_colored(f"Modified: {file_path}", Colors.GREEN)
219
- except IOError as e:
220
- print_colored(f"Error writing {file_path}: {e}", Colors.RED)
221
- return False
222
- else:
223
- if verbose:
224
- print(" No changes needed")
225
-
226
- return changes_made
227
-
228
-
229
- def find_adoc_files(path: Path) -> List[Path]:
230
- """Find all .adoc files in the given path"""
231
- adoc_files = []
232
-
233
- if path.is_file():
234
- if path.suffix == '.adoc':
235
- adoc_files.append(path)
236
- else:
237
- print_colored(f"Warning: {path} is not an AsciiDoc file (.adoc)", Colors.YELLOW)
238
- elif path.is_dir():
239
- adoc_files = list(path.rglob('*.adoc'))
240
-
241
- return adoc_files
242
-
243
-
244
28
  def main():
245
29
  """Main entry point"""
246
30
  parser = argparse.ArgumentParser(
@@ -258,7 +42,7 @@ Examples:
258
42
  %(prog)s --dry-run modules/ # Preview changes without modifying
259
43
  """
260
44
  )
261
-
45
+
262
46
  parser.add_argument(
263
47
  'path',
264
48
  nargs='?',
@@ -275,42 +59,68 @@ Examples:
275
59
  action='store_true',
276
60
  help='Show detailed output'
277
61
  )
278
-
62
+
279
63
  args = parser.parse_args()
280
-
64
+
281
65
  # Convert path to Path object
282
66
  target_path = Path(args.path)
283
-
67
+
284
68
  # Check if path exists
285
69
  if not target_path.exists():
286
70
  print_colored(f"Error: Path does not exist: {target_path}", Colors.RED)
287
71
  sys.exit(1)
288
-
72
+
289
73
  # Display dry-run mode message
290
74
  if args.dry_run:
291
75
  print_colored("DRY RUN MODE - No files will be modified", Colors.YELLOW)
292
-
76
+
293
77
  # Find all AsciiDoc files
294
78
  adoc_files = find_adoc_files(target_path)
295
-
79
+
296
80
  if not adoc_files:
81
+ if target_path.is_file():
82
+ print_colored(f"Warning: {target_path} is not an AsciiDoc file (.adoc)", Colors.YELLOW)
297
83
  print(f"Processed 0 AsciiDoc file(s)")
298
84
  print("AsciiDoc spacing formatting complete!")
299
85
  return
300
-
86
+
301
87
  # Process each file
302
88
  files_processed = 0
89
+ files_modified = 0
90
+
303
91
  for file_path in adoc_files:
304
92
  try:
305
- process_file(file_path, args.dry_run, args.verbose)
93
+ changes_made, messages = process_file(file_path, args.dry_run, args.verbose)
94
+
95
+ # Print verbose messages
96
+ if args.verbose:
97
+ for msg in messages:
98
+ print(msg)
99
+
100
+ if changes_made:
101
+ files_modified += 1
102
+ if args.dry_run:
103
+ print_colored(f"Would modify: {file_path}", Colors.YELLOW)
104
+ else:
105
+ print_colored(f"Modified: {file_path}", Colors.GREEN)
106
+ elif args.verbose:
107
+ print(f" No changes needed for: {file_path}")
108
+
306
109
  files_processed += 1
110
+
307
111
  except KeyboardInterrupt:
308
112
  print_colored("\nOperation cancelled by user", Colors.YELLOW)
309
113
  sys.exit(1)
114
+ except IOError as e:
115
+ print_colored(f"{e}", Colors.RED)
310
116
  except Exception as e:
311
117
  print_colored(f"Unexpected error processing {file_path}: {e}", Colors.RED)
312
-
118
+
313
119
  print(f"Processed {files_processed} AsciiDoc file(s)")
120
+ if args.dry_run and files_modified > 0:
121
+ print(f"Would modify {files_modified} file(s)")
122
+ elif files_modified > 0:
123
+ print(f"Modified {files_modified} file(s)")
314
124
  print("AsciiDoc spacing formatting complete!")
315
125
 
316
126