rolfedh-doc-utils 0.1.39__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,347 @@
1
+ """
2
+ Core logic for finding AsciiDoc files that are included more than once.
3
+
4
+ Scans AsciiDoc files for include:: macros and identifies files that are
5
+ included from multiple locations, which may indicate opportunities for
6
+ content reuse or potential maintenance issues.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ from collections import defaultdict
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+
16
+ INCLUDE_PATTERN = re.compile(r'^include::([^\[]+)\[', re.MULTILINE)
17
+
18
+ # Files commonly expected to be included in multiple places
19
+ DEFAULT_COMMON_INCLUDES = {
20
+ 'attributes.adoc',
21
+ 'common/attributes.adoc',
22
+ 'common/revision-info.adoc',
23
+ '_attributes.adoc',
24
+ }
25
+
26
+ # Default directories to exclude
27
+ DEFAULT_EXCLUDE_DIRS = {'.git', '.archive', 'target', 'build', 'node_modules'}
28
+
29
+
30
+ @dataclass
31
+ class IncludeLocation:
32
+ """Represents where an include was found."""
33
+ source_file: str
34
+ line_number: int
35
+ raw_include_path: str
36
+
37
+
38
+ @dataclass
39
+ class DuplicateInclude:
40
+ """Represents a file that is included multiple times."""
41
+ resolved_path: str
42
+ locations: list[IncludeLocation] = field(default_factory=list)
43
+ is_common: bool = False
44
+
45
+ @property
46
+ def count(self) -> int:
47
+ return len(self.locations)
48
+
49
+
50
+ def find_includes_in_file(file_path: str) -> list[tuple[str, int]]:
51
+ """
52
+ Extract all include:: targets from an AsciiDoc file.
53
+
54
+ Returns list of (include_target, line_number) tuples.
55
+ """
56
+ includes = []
57
+ try:
58
+ with open(file_path, 'r', encoding='utf-8') as f:
59
+ for line_num, line in enumerate(f, 1):
60
+ match = re.match(r'^include::([^\[]+)\[', line)
61
+ if match:
62
+ includes.append((match.group(1), line_num))
63
+ except (IOError, UnicodeDecodeError) as e:
64
+ print(f"Warning: Could not read {file_path}: {e}")
65
+ return includes
66
+
67
+
68
+ def resolve_include_path(include_target: str, source_file: str, base_dir: str) -> str:
69
+ """
70
+ Resolve an include target to a normalized path relative to base directory.
71
+ """
72
+ source_dir = os.path.dirname(source_file)
73
+
74
+ # Resolve the path relative to source file's directory
75
+ if include_target.startswith('../') or include_target.startswith('./'):
76
+ resolved = os.path.normpath(os.path.join(source_dir, include_target))
77
+ else:
78
+ resolved = os.path.normpath(os.path.join(source_dir, include_target))
79
+
80
+ # Make relative to base directory if possible
81
+ try:
82
+ resolved = os.path.relpath(resolved, base_dir)
83
+ except ValueError:
84
+ pass # Keep absolute path if on different drive (Windows)
85
+
86
+ return resolved
87
+
88
+
89
+ def is_common_include(path: str, common_includes: set[str]) -> bool:
90
+ """Check if a path matches a common include pattern."""
91
+ basename = os.path.basename(path)
92
+ return path in common_includes or basename in common_includes
93
+
94
+
95
+ def collect_adoc_files(
96
+ directory: str,
97
+ exclude_dirs: set[str] | None = None,
98
+ exclude_files: set[str] | None = None
99
+ ) -> list[str]:
100
+ """
101
+ Collect all .adoc files in a directory recursively.
102
+
103
+ Args:
104
+ directory: Base directory to scan
105
+ exclude_dirs: Directory names to exclude
106
+ exclude_files: File names or paths to exclude
107
+
108
+ Returns:
109
+ List of absolute paths to .adoc files
110
+ """
111
+ exclude_dirs = exclude_dirs or DEFAULT_EXCLUDE_DIRS
112
+ exclude_files = exclude_files or set()
113
+
114
+ adoc_files = []
115
+ base_path = os.path.abspath(directory)
116
+
117
+ for root, dirs, files in os.walk(base_path, followlinks=False):
118
+ # Filter out excluded directories
119
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
120
+
121
+ for filename in files:
122
+ if not filename.endswith('.adoc'):
123
+ continue
124
+
125
+ filepath = os.path.join(root, filename)
126
+ rel_path = os.path.relpath(filepath, base_path)
127
+
128
+ # Check exclusions
129
+ if filename in exclude_files or rel_path in exclude_files:
130
+ continue
131
+
132
+ adoc_files.append(filepath)
133
+
134
+ return sorted(adoc_files)
135
+
136
+
137
+ def find_duplicate_includes(
138
+ directory: str,
139
+ exclude_dirs: set[str] | None = None,
140
+ exclude_files: set[str] | None = None,
141
+ include_common: bool = False,
142
+ common_includes: set[str] | None = None
143
+ ) -> tuple[list[DuplicateInclude], int, int]:
144
+ """
145
+ Find all files that are included more than once.
146
+
147
+ Args:
148
+ directory: Base directory to scan
149
+ exclude_dirs: Directory names to exclude
150
+ exclude_files: File names or paths to exclude
151
+ include_common: If True, include common files in results
152
+ common_includes: Set of paths considered "common" (expected duplicates)
153
+
154
+ Returns:
155
+ Tuple of (duplicates, total_files_scanned, excluded_common_count)
156
+ """
157
+ if common_includes is None:
158
+ common_includes = DEFAULT_COMMON_INCLUDES
159
+
160
+ # Collect all .adoc files
161
+ adoc_files = collect_adoc_files(directory, exclude_dirs, exclude_files)
162
+ base_dir = os.path.abspath(directory)
163
+
164
+ # Track includes: {resolved_path: [IncludeLocation, ...]}
165
+ include_map: dict[str, list[IncludeLocation]] = defaultdict(list)
166
+
167
+ for source_file in adoc_files:
168
+ includes = find_includes_in_file(source_file)
169
+ for include_target, line_num in includes:
170
+ resolved = resolve_include_path(include_target, source_file, base_dir)
171
+ rel_source = os.path.relpath(source_file, base_dir)
172
+
173
+ include_map[resolved].append(IncludeLocation(
174
+ source_file=rel_source,
175
+ line_number=line_num,
176
+ raw_include_path=include_target
177
+ ))
178
+
179
+ # Find duplicates
180
+ duplicates = []
181
+ excluded_common_count = 0
182
+
183
+ for path, locations in include_map.items():
184
+ if len(locations) <= 1:
185
+ continue
186
+
187
+ is_common = is_common_include(path, common_includes)
188
+
189
+ if is_common and not include_common:
190
+ excluded_common_count += 1
191
+ continue
192
+
193
+ duplicates.append(DuplicateInclude(
194
+ resolved_path=path,
195
+ locations=locations,
196
+ is_common=is_common
197
+ ))
198
+
199
+ # Sort by count descending
200
+ duplicates.sort(key=lambda d: d.count, reverse=True)
201
+
202
+ return duplicates, len(adoc_files), excluded_common_count
203
+
204
+
205
+ def format_txt_report(
206
+ duplicates: list[DuplicateInclude],
207
+ total_files: int,
208
+ excluded_common: int,
209
+ directory: str,
210
+ cmd_line: str
211
+ ) -> str:
212
+ """Format results as plain text."""
213
+ lines = []
214
+
215
+ lines.append(f"Command: {cmd_line}")
216
+ lines.append(f"Directory: {os.path.abspath(directory)}")
217
+ lines.append(f"Files scanned: {total_files}")
218
+ lines.append("")
219
+
220
+ if not duplicates:
221
+ if excluded_common:
222
+ lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
223
+ lines.append("Use --include-common to see all duplicates.")
224
+ else:
225
+ lines.append("No files are included more than once.")
226
+ return '\n'.join(lines)
227
+
228
+ lines.append(f"Found {len(duplicates)} files included more than once:")
229
+ if excluded_common:
230
+ lines.append(f" ({excluded_common} common files excluded; use --include-common to see all)")
231
+ lines.append("")
232
+ lines.append("=" * 70)
233
+
234
+ for i, dup in enumerate(duplicates, 1):
235
+ common_marker = " [COMMON]" if dup.is_common else ""
236
+ lines.append(f"\n[{i}] {dup.resolved_path}{common_marker}")
237
+ lines.append(f" Included {dup.count} times:")
238
+ lines.append("-" * 50)
239
+
240
+ for loc in dup.locations:
241
+ lines.append(f" - {loc.source_file}:{loc.line_number}")
242
+
243
+ return '\n'.join(lines)
244
+
245
+
246
+ def format_csv_report(
247
+ duplicates: list[DuplicateInclude],
248
+ total_files: int,
249
+ excluded_common: int,
250
+ directory: str,
251
+ cmd_line: str
252
+ ) -> str:
253
+ """Format results as CSV."""
254
+ lines = []
255
+ lines.append("Included File,Inclusion Count,Is Common,Source File,Line Number,Raw Include Path")
256
+
257
+ for dup in duplicates:
258
+ for loc in dup.locations:
259
+ lines.append(
260
+ f'"{dup.resolved_path}",{dup.count},{dup.is_common},'
261
+ f'"{loc.source_file}",{loc.line_number},"{loc.raw_include_path}"'
262
+ )
263
+
264
+ return '\n'.join(lines)
265
+
266
+
267
+ def format_json_report(
268
+ duplicates: list[DuplicateInclude],
269
+ total_files: int,
270
+ excluded_common: int,
271
+ directory: str,
272
+ cmd_line: str
273
+ ) -> str:
274
+ """Format results as JSON."""
275
+ import json
276
+
277
+ data = {
278
+ "command": cmd_line,
279
+ "directory": os.path.abspath(directory),
280
+ "files_scanned": total_files,
281
+ "excluded_common_count": excluded_common,
282
+ "duplicate_count": len(duplicates),
283
+ "duplicates": [
284
+ {
285
+ "path": dup.resolved_path,
286
+ "count": dup.count,
287
+ "is_common": dup.is_common,
288
+ "locations": [
289
+ {
290
+ "source_file": loc.source_file,
291
+ "line_number": loc.line_number,
292
+ "raw_include_path": loc.raw_include_path
293
+ }
294
+ for loc in dup.locations
295
+ ]
296
+ }
297
+ for dup in duplicates
298
+ ]
299
+ }
300
+
301
+ return json.dumps(data, indent=2)
302
+
303
+
304
+ def format_md_report(
305
+ duplicates: list[DuplicateInclude],
306
+ total_files: int,
307
+ excluded_common: int,
308
+ directory: str,
309
+ cmd_line: str
310
+ ) -> str:
311
+ """Format results as Markdown."""
312
+ lines = []
313
+
314
+ lines.append("# Duplicate Includes Report")
315
+ lines.append("")
316
+ lines.append(f"**Command:** `{cmd_line}`")
317
+ lines.append(f"**Directory:** `{os.path.abspath(directory)}`")
318
+ lines.append(f"**Files scanned:** {total_files}")
319
+ lines.append("")
320
+
321
+ if not duplicates:
322
+ if excluded_common:
323
+ lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
324
+ else:
325
+ lines.append("No files are included more than once.")
326
+ return '\n'.join(lines)
327
+
328
+ lines.append(f"## Summary")
329
+ lines.append("")
330
+ lines.append(f"Found **{len(duplicates)}** files included more than once.")
331
+ if excluded_common:
332
+ lines.append(f"({excluded_common} common files excluded)")
333
+ lines.append("")
334
+
335
+ for i, dup in enumerate(duplicates, 1):
336
+ common_marker = " *(common)*" if dup.is_common else ""
337
+ lines.append(f"### {i}. `{dup.resolved_path}`{common_marker}")
338
+ lines.append("")
339
+ lines.append(f"Included **{dup.count}** times:")
340
+ lines.append("")
341
+
342
+ for loc in dup.locations:
343
+ lines.append(f"- `{loc.source_file}:{loc.line_number}`")
344
+
345
+ lines.append("")
346
+
347
+ return '\n'.join(lines)
@@ -0,0 +1,164 @@
1
+ """
2
+ Module for inventorying AsciiDoc conditional directives.
3
+
4
+ Functions:
5
+ - find_adoc_files: Recursively find all .adoc files in a directory.
6
+ - scan_file_for_conditionals: Scan a file for conditional directives.
7
+ - create_inventory: Create an inventory of all conditionals found in .adoc files.
8
+ """
9
+
10
+ import re
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from collections import defaultdict
14
+ from typing import List, Tuple, Dict, Set
15
+
16
+
17
+ # Pattern to match AsciiDoc conditionals
18
+ CONDITIONAL_PATTERN = re.compile(
19
+ r'^(ifdef|ifndef|endif|ifeval)::(.*)$',
20
+ re.MULTILINE
21
+ )
22
+
23
+
24
+ def find_adoc_files(directory: Path) -> List[Path]:
25
+ """Find all .adoc files in the given directory recursively."""
26
+ return sorted(directory.rglob('*.adoc'))
27
+
28
+
29
+ def scan_file_for_conditionals(filepath: Path) -> List[Tuple[int, str, str]]:
30
+ """
31
+ Scan a file for conditional directives.
32
+
33
+ Args:
34
+ filepath: Path to the .adoc file to scan.
35
+
36
+ Returns:
37
+ A list of tuples: (line_number, directive_type, condition)
38
+ """
39
+ results = []
40
+ try:
41
+ content = filepath.read_text(encoding='utf-8')
42
+ for i, line in enumerate(content.splitlines(), start=1):
43
+ match = CONDITIONAL_PATTERN.match(line.strip())
44
+ if match:
45
+ directive_type = match.group(1)
46
+ condition = match.group(2)
47
+ results.append((i, directive_type, condition))
48
+ except Exception as e:
49
+ print(f"Warning: Could not read {filepath}: {e}")
50
+ return results
51
+
52
+
53
+ def create_inventory(directory: Path, output_dir: Path = None) -> Path:
54
+ """
55
+ Create an inventory of all conditionals found in .adoc files.
56
+
57
+ Args:
58
+ directory: Directory to scan for .adoc files.
59
+ output_dir: Directory to write the inventory file. Defaults to current directory.
60
+
61
+ Returns:
62
+ The path to the created inventory file.
63
+ """
64
+ if output_dir is None:
65
+ output_dir = Path.cwd()
66
+
67
+ timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
68
+ output_file = output_dir / f'inventory-{timestamp}.txt'
69
+
70
+ adoc_files = find_adoc_files(directory)
71
+
72
+ # Track statistics
73
+ stats: Dict[str, int] = defaultdict(int)
74
+ conditions_used: Dict[str, List[Tuple[Path, int]]] = defaultdict(list)
75
+ total_files_with_conditionals = 0
76
+
77
+ lines = []
78
+ lines.append("AsciiDoc Conditionals Inventory")
79
+ lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
80
+ lines.append(f"Directory: {directory.resolve()}")
81
+ lines.append("=" * 80)
82
+ lines.append("")
83
+
84
+ for filepath in adoc_files:
85
+ conditionals = scan_file_for_conditionals(filepath)
86
+ if conditionals:
87
+ total_files_with_conditionals += 1
88
+ relative_path = filepath.relative_to(directory)
89
+ lines.append(f"File: {relative_path}")
90
+ lines.append("-" * 60)
91
+
92
+ for line_num, directive, condition in conditionals:
93
+ stats[directive] += 1
94
+ # Extract the condition name (before any brackets)
95
+ cond_name = condition.split('[')[0] if condition else '(empty)'
96
+ if directive in ('ifdef', 'ifndef', 'ifeval'):
97
+ conditions_used[cond_name].append((relative_path, line_num))
98
+
99
+ lines.append(f" Line {line_num:5d}: {directive}::{condition}")
100
+
101
+ lines.append("")
102
+
103
+ # Add summary section
104
+ lines.append("=" * 80)
105
+ lines.append("SUMMARY")
106
+ lines.append("=" * 80)
107
+ lines.append("")
108
+ lines.append(f"Total .adoc files scanned: {len(adoc_files)}")
109
+ lines.append(f"Files with conditionals: {total_files_with_conditionals}")
110
+ lines.append("")
111
+ lines.append("Directive counts:")
112
+ for directive in sorted(stats.keys()):
113
+ lines.append(f" {directive}: {stats[directive]}")
114
+ lines.append(f" Total: {sum(stats.values())}")
115
+ lines.append("")
116
+
117
+ # List unique conditions
118
+ lines.append("=" * 80)
119
+ lines.append("UNIQUE CONDITIONS USED")
120
+ lines.append("=" * 80)
121
+ lines.append("")
122
+ for cond in sorted(conditions_used.keys()):
123
+ occurrences = conditions_used[cond]
124
+ lines.append(f" {cond}: {len(occurrences)} occurrences")
125
+
126
+ # Write the inventory file
127
+ output_file.write_text('\n'.join(lines), encoding='utf-8')
128
+
129
+ return output_file
130
+
131
+
132
+ def get_inventory_stats(directory: Path) -> Dict:
133
+ """
134
+ Get statistics about conditionals without writing a file.
135
+
136
+ Args:
137
+ directory: Directory to scan for .adoc files.
138
+
139
+ Returns:
140
+ Dictionary with statistics about conditionals found.
141
+ """
142
+ adoc_files = find_adoc_files(directory)
143
+
144
+ stats: Dict[str, int] = defaultdict(int)
145
+ conditions_used: Dict[str, int] = defaultdict(int)
146
+ files_with_conditionals: Set[Path] = set()
147
+
148
+ for filepath in adoc_files:
149
+ conditionals = scan_file_for_conditionals(filepath)
150
+ if conditionals:
151
+ files_with_conditionals.add(filepath)
152
+ for line_num, directive, condition in conditionals:
153
+ stats[directive] += 1
154
+ cond_name = condition.split('[')[0] if condition else '(empty)'
155
+ if directive in ('ifdef', 'ifndef', 'ifeval'):
156
+ conditions_used[cond_name] += 1
157
+
158
+ return {
159
+ 'total_files': len(adoc_files),
160
+ 'files_with_conditionals': len(files_with_conditionals),
161
+ 'directive_counts': dict(stats),
162
+ 'total_conditionals': sum(stats.values()),
163
+ 'unique_conditions': dict(conditions_used),
164
+ }
@@ -212,3 +212,51 @@ def comment_out_unused_attributes(attr_file: str, unused_attrs: List[str]) -> in
212
212
  f.writelines(new_lines)
213
213
 
214
214
  return commented_count
215
+
216
+
217
+ def remove_unused_attributes(attr_file: str, unused_attrs: List[str] = None) -> int:
218
+ """
219
+ Remove unused attributes from the attributes file.
220
+
221
+ This removes lines that either:
222
+ - Define an attribute in the unused_attrs list, or
223
+ - Are already marked with "// Unused" prefix
224
+
225
+ Args:
226
+ attr_file: Path to the attributes file
227
+ unused_attrs: Optional list of unused attribute names. If None, only
228
+ removes lines already marked with "// Unused".
229
+
230
+ Returns:
231
+ Number of lines removed
232
+ """
233
+ # Read the file
234
+ with open(attr_file, 'r', encoding='utf-8') as f:
235
+ lines = f.readlines()
236
+
237
+ # Create a set for faster lookup
238
+ unused_set = set(unused_attrs) if unused_attrs else set()
239
+ removed_count = 0
240
+
241
+ # Process each line
242
+ new_lines = []
243
+ for line in lines:
244
+ # Check if line is already marked as unused
245
+ if line.startswith('// Unused '):
246
+ removed_count += 1
247
+ continue
248
+
249
+ # Check if this line defines an unused attribute
250
+ if unused_attrs:
251
+ match = re.match(r'^:([\w-]+):', line)
252
+ if match and match.group(1) in unused_set:
253
+ removed_count += 1
254
+ continue
255
+
256
+ new_lines.append(line)
257
+
258
+ # Write back to the file
259
+ with open(attr_file, 'w', encoding='utf-8') as f:
260
+ f.writelines(new_lines)
261
+
262
+ return removed_count
doc_utils/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Version information for doc-utils."""
2
2
 
3
3
  # This should match the version in pyproject.toml
4
- __version__ = "0.1.39"
4
+ __version__ = "0.1.40"
5
5
 
6
6
  def get_version():
7
7
  """Return the current version string."""