rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. archive_unused_files.py +18 -5
  2. archive_unused_images.py +9 -2
  3. callout_lib/__init__.py +22 -0
  4. callout_lib/converter_bullets.py +103 -0
  5. callout_lib/converter_comments.py +295 -0
  6. callout_lib/converter_deflist.py +134 -0
  7. callout_lib/detector.py +364 -0
  8. callout_lib/table_parser.py +804 -0
  9. check_published_links.py +1083 -0
  10. check_scannability.py +6 -0
  11. check_source_directives.py +101 -0
  12. convert_callouts_interactive.py +567 -0
  13. convert_callouts_to_deflist.py +628 -0
  14. convert_freemarker_to_asciidoc.py +288 -0
  15. convert_tables_to_deflists.py +479 -0
  16. doc_utils/convert_freemarker_to_asciidoc.py +708 -0
  17. doc_utils/duplicate_content.py +409 -0
  18. doc_utils/duplicate_includes.py +347 -0
  19. doc_utils/extract_link_attributes.py +618 -0
  20. doc_utils/format_asciidoc_spacing.py +285 -0
  21. doc_utils/insert_abstract_role.py +220 -0
  22. doc_utils/inventory_conditionals.py +164 -0
  23. doc_utils/missing_source_directive.py +211 -0
  24. doc_utils/replace_link_attributes.py +187 -0
  25. doc_utils/spinner.py +119 -0
  26. doc_utils/unused_adoc.py +150 -22
  27. doc_utils/unused_attributes.py +218 -6
  28. doc_utils/unused_images.py +81 -9
  29. doc_utils/validate_links.py +576 -0
  30. doc_utils/version.py +8 -0
  31. doc_utils/version_check.py +243 -0
  32. doc_utils/warnings_report.py +237 -0
  33. doc_utils_cli.py +158 -0
  34. extract_link_attributes.py +120 -0
  35. find_duplicate_content.py +209 -0
  36. find_duplicate_includes.py +198 -0
  37. find_unused_attributes.py +84 -6
  38. format_asciidoc_spacing.py +134 -0
  39. insert_abstract_role.py +163 -0
  40. inventory_conditionals.py +53 -0
  41. replace_link_attributes.py +214 -0
  42. rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
  43. rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
  44. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
  45. rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
  46. rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
  47. validate_links.py +213 -0
  48. rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
  49. rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
  50. rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
  51. rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
  52. {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,347 @@
1
+ """
2
+ Core logic for finding AsciiDoc files that are included more than once.
3
+
4
+ Scans AsciiDoc files for include:: macros and identifies files that are
5
+ included from multiple locations, which may indicate opportunities for
6
+ content reuse or potential maintenance issues.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ from collections import defaultdict
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+
16
+ INCLUDE_PATTERN = re.compile(r'^include::([^\[]+)\[', re.MULTILINE)
17
+
18
+ # Files commonly expected to be included in multiple places
19
+ DEFAULT_COMMON_INCLUDES = {
20
+ 'attributes.adoc',
21
+ 'common/attributes.adoc',
22
+ 'common/revision-info.adoc',
23
+ '_attributes.adoc',
24
+ }
25
+
26
+ # Default directories to exclude
27
+ DEFAULT_EXCLUDE_DIRS = {'.git', '.archive', 'target', 'build', 'node_modules'}
28
+
29
+
30
+ @dataclass
31
+ class IncludeLocation:
32
+ """Represents where an include was found."""
33
+ source_file: str
34
+ line_number: int
35
+ raw_include_path: str
36
+
37
+
38
+ @dataclass
39
+ class DuplicateInclude:
40
+ """Represents a file that is included multiple times."""
41
+ resolved_path: str
42
+ locations: list[IncludeLocation] = field(default_factory=list)
43
+ is_common: bool = False
44
+
45
+ @property
46
+ def count(self) -> int:
47
+ return len(self.locations)
48
+
49
+
50
+ def find_includes_in_file(file_path: str) -> list[tuple[str, int]]:
51
+ """
52
+ Extract all include:: targets from an AsciiDoc file.
53
+
54
+ Returns list of (include_target, line_number) tuples.
55
+ """
56
+ includes = []
57
+ try:
58
+ with open(file_path, 'r', encoding='utf-8') as f:
59
+ for line_num, line in enumerate(f, 1):
60
+ match = re.match(r'^include::([^\[]+)\[', line)
61
+ if match:
62
+ includes.append((match.group(1), line_num))
63
+ except (IOError, UnicodeDecodeError) as e:
64
+ print(f"Warning: Could not read {file_path}: {e}")
65
+ return includes
66
+
67
+
68
+ def resolve_include_path(include_target: str, source_file: str, base_dir: str) -> str:
69
+ """
70
+ Resolve an include target to a normalized path relative to base directory.
71
+ """
72
+ source_dir = os.path.dirname(source_file)
73
+
74
+ # Resolve the path relative to source file's directory
75
+ if include_target.startswith('../') or include_target.startswith('./'):
76
+ resolved = os.path.normpath(os.path.join(source_dir, include_target))
77
+ else:
78
+ resolved = os.path.normpath(os.path.join(source_dir, include_target))
79
+
80
+ # Make relative to base directory if possible
81
+ try:
82
+ resolved = os.path.relpath(resolved, base_dir)
83
+ except ValueError:
84
+ pass # Keep absolute path if on different drive (Windows)
85
+
86
+ return resolved
87
+
88
+
89
+ def is_common_include(path: str, common_includes: set[str]) -> bool:
90
+ """Check if a path matches a common include pattern."""
91
+ basename = os.path.basename(path)
92
+ return path in common_includes or basename in common_includes
93
+
94
+
95
+ def collect_adoc_files(
96
+ directory: str,
97
+ exclude_dirs: set[str] | None = None,
98
+ exclude_files: set[str] | None = None
99
+ ) -> list[str]:
100
+ """
101
+ Collect all .adoc files in a directory recursively.
102
+
103
+ Args:
104
+ directory: Base directory to scan
105
+ exclude_dirs: Directory names to exclude
106
+ exclude_files: File names or paths to exclude
107
+
108
+ Returns:
109
+ List of absolute paths to .adoc files
110
+ """
111
+ exclude_dirs = exclude_dirs or DEFAULT_EXCLUDE_DIRS
112
+ exclude_files = exclude_files or set()
113
+
114
+ adoc_files = []
115
+ base_path = os.path.abspath(directory)
116
+
117
+ for root, dirs, files in os.walk(base_path, followlinks=False):
118
+ # Filter out excluded directories
119
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
120
+
121
+ for filename in files:
122
+ if not filename.endswith('.adoc'):
123
+ continue
124
+
125
+ filepath = os.path.join(root, filename)
126
+ rel_path = os.path.relpath(filepath, base_path)
127
+
128
+ # Check exclusions
129
+ if filename in exclude_files or rel_path in exclude_files:
130
+ continue
131
+
132
+ adoc_files.append(filepath)
133
+
134
+ return sorted(adoc_files)
135
+
136
+
137
+ def find_duplicate_includes(
138
+ directory: str,
139
+ exclude_dirs: set[str] | None = None,
140
+ exclude_files: set[str] | None = None,
141
+ include_common: bool = False,
142
+ common_includes: set[str] | None = None
143
+ ) -> tuple[list[DuplicateInclude], int, int]:
144
+ """
145
+ Find all files that are included more than once.
146
+
147
+ Args:
148
+ directory: Base directory to scan
149
+ exclude_dirs: Directory names to exclude
150
+ exclude_files: File names or paths to exclude
151
+ include_common: If True, include common files in results
152
+ common_includes: Set of paths considered "common" (expected duplicates)
153
+
154
+ Returns:
155
+ Tuple of (duplicates, total_files_scanned, excluded_common_count)
156
+ """
157
+ if common_includes is None:
158
+ common_includes = DEFAULT_COMMON_INCLUDES
159
+
160
+ # Collect all .adoc files
161
+ adoc_files = collect_adoc_files(directory, exclude_dirs, exclude_files)
162
+ base_dir = os.path.abspath(directory)
163
+
164
+ # Track includes: {resolved_path: [IncludeLocation, ...]}
165
+ include_map: dict[str, list[IncludeLocation]] = defaultdict(list)
166
+
167
+ for source_file in adoc_files:
168
+ includes = find_includes_in_file(source_file)
169
+ for include_target, line_num in includes:
170
+ resolved = resolve_include_path(include_target, source_file, base_dir)
171
+ rel_source = os.path.relpath(source_file, base_dir)
172
+
173
+ include_map[resolved].append(IncludeLocation(
174
+ source_file=rel_source,
175
+ line_number=line_num,
176
+ raw_include_path=include_target
177
+ ))
178
+
179
+ # Find duplicates
180
+ duplicates = []
181
+ excluded_common_count = 0
182
+
183
+ for path, locations in include_map.items():
184
+ if len(locations) <= 1:
185
+ continue
186
+
187
+ is_common = is_common_include(path, common_includes)
188
+
189
+ if is_common and not include_common:
190
+ excluded_common_count += 1
191
+ continue
192
+
193
+ duplicates.append(DuplicateInclude(
194
+ resolved_path=path,
195
+ locations=locations,
196
+ is_common=is_common
197
+ ))
198
+
199
+ # Sort by count descending
200
+ duplicates.sort(key=lambda d: d.count, reverse=True)
201
+
202
+ return duplicates, len(adoc_files), excluded_common_count
203
+
204
+
205
+ def format_txt_report(
206
+ duplicates: list[DuplicateInclude],
207
+ total_files: int,
208
+ excluded_common: int,
209
+ directory: str,
210
+ cmd_line: str
211
+ ) -> str:
212
+ """Format results as plain text."""
213
+ lines = []
214
+
215
+ lines.append(f"Command: {cmd_line}")
216
+ lines.append(f"Directory: {os.path.abspath(directory)}")
217
+ lines.append(f"Files scanned: {total_files}")
218
+ lines.append("")
219
+
220
+ if not duplicates:
221
+ if excluded_common:
222
+ lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
223
+ lines.append("Use --include-common to see all duplicates.")
224
+ else:
225
+ lines.append("No files are included more than once.")
226
+ return '\n'.join(lines)
227
+
228
+ lines.append(f"Found {len(duplicates)} files included more than once:")
229
+ if excluded_common:
230
+ lines.append(f" ({excluded_common} common files excluded; use --include-common to see all)")
231
+ lines.append("")
232
+ lines.append("=" * 70)
233
+
234
+ for i, dup in enumerate(duplicates, 1):
235
+ common_marker = " [COMMON]" if dup.is_common else ""
236
+ lines.append(f"\n[{i}] {dup.resolved_path}{common_marker}")
237
+ lines.append(f" Included {dup.count} times:")
238
+ lines.append("-" * 50)
239
+
240
+ for loc in dup.locations:
241
+ lines.append(f" - {loc.source_file}:{loc.line_number}")
242
+
243
+ return '\n'.join(lines)
244
+
245
+
246
+ def format_csv_report(
247
+ duplicates: list[DuplicateInclude],
248
+ total_files: int,
249
+ excluded_common: int,
250
+ directory: str,
251
+ cmd_line: str
252
+ ) -> str:
253
+ """Format results as CSV."""
254
+ lines = []
255
+ lines.append("Included File,Inclusion Count,Is Common,Source File,Line Number,Raw Include Path")
256
+
257
+ for dup in duplicates:
258
+ for loc in dup.locations:
259
+ lines.append(
260
+ f'"{dup.resolved_path}",{dup.count},{dup.is_common},'
261
+ f'"{loc.source_file}",{loc.line_number},"{loc.raw_include_path}"'
262
+ )
263
+
264
+ return '\n'.join(lines)
265
+
266
+
267
+ def format_json_report(
268
+ duplicates: list[DuplicateInclude],
269
+ total_files: int,
270
+ excluded_common: int,
271
+ directory: str,
272
+ cmd_line: str
273
+ ) -> str:
274
+ """Format results as JSON."""
275
+ import json
276
+
277
+ data = {
278
+ "command": cmd_line,
279
+ "directory": os.path.abspath(directory),
280
+ "files_scanned": total_files,
281
+ "excluded_common_count": excluded_common,
282
+ "duplicate_count": len(duplicates),
283
+ "duplicates": [
284
+ {
285
+ "path": dup.resolved_path,
286
+ "count": dup.count,
287
+ "is_common": dup.is_common,
288
+ "locations": [
289
+ {
290
+ "source_file": loc.source_file,
291
+ "line_number": loc.line_number,
292
+ "raw_include_path": loc.raw_include_path
293
+ }
294
+ for loc in dup.locations
295
+ ]
296
+ }
297
+ for dup in duplicates
298
+ ]
299
+ }
300
+
301
+ return json.dumps(data, indent=2)
302
+
303
+
304
+ def format_md_report(
305
+ duplicates: list[DuplicateInclude],
306
+ total_files: int,
307
+ excluded_common: int,
308
+ directory: str,
309
+ cmd_line: str
310
+ ) -> str:
311
+ """Format results as Markdown."""
312
+ lines = []
313
+
314
+ lines.append("# Duplicate Includes Report")
315
+ lines.append("")
316
+ lines.append(f"**Command:** `{cmd_line}`")
317
+ lines.append(f"**Directory:** `{os.path.abspath(directory)}`")
318
+ lines.append(f"**Files scanned:** {total_files}")
319
+ lines.append("")
320
+
321
+ if not duplicates:
322
+ if excluded_common:
323
+ lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
324
+ else:
325
+ lines.append("No files are included more than once.")
326
+ return '\n'.join(lines)
327
+
328
+ lines.append(f"## Summary")
329
+ lines.append("")
330
+ lines.append(f"Found **{len(duplicates)}** files included more than once.")
331
+ if excluded_common:
332
+ lines.append(f"({excluded_common} common files excluded)")
333
+ lines.append("")
334
+
335
+ for i, dup in enumerate(duplicates, 1):
336
+ common_marker = " *(common)*" if dup.is_common else ""
337
+ lines.append(f"### {i}. `{dup.resolved_path}`{common_marker}")
338
+ lines.append("")
339
+ lines.append(f"Included **{dup.count}** times:")
340
+ lines.append("")
341
+
342
+ for loc in dup.locations:
343
+ lines.append(f"- `{loc.source_file}:{loc.line_number}`")
344
+
345
+ lines.append("")
346
+
347
+ return '\n'.join(lines)