rolfedh-doc-utils 0.1.4__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archive_unused_files.py +18 -5
- archive_unused_images.py +9 -2
- callout_lib/__init__.py +22 -0
- callout_lib/converter_bullets.py +103 -0
- callout_lib/converter_comments.py +295 -0
- callout_lib/converter_deflist.py +134 -0
- callout_lib/detector.py +364 -0
- callout_lib/table_parser.py +804 -0
- check_published_links.py +1083 -0
- check_scannability.py +6 -0
- check_source_directives.py +101 -0
- convert_callouts_interactive.py +567 -0
- convert_callouts_to_deflist.py +628 -0
- convert_freemarker_to_asciidoc.py +288 -0
- convert_tables_to_deflists.py +479 -0
- doc_utils/convert_freemarker_to_asciidoc.py +708 -0
- doc_utils/duplicate_content.py +409 -0
- doc_utils/duplicate_includes.py +347 -0
- doc_utils/extract_link_attributes.py +618 -0
- doc_utils/format_asciidoc_spacing.py +285 -0
- doc_utils/insert_abstract_role.py +220 -0
- doc_utils/inventory_conditionals.py +164 -0
- doc_utils/missing_source_directive.py +211 -0
- doc_utils/replace_link_attributes.py +187 -0
- doc_utils/spinner.py +119 -0
- doc_utils/unused_adoc.py +150 -22
- doc_utils/unused_attributes.py +218 -6
- doc_utils/unused_images.py +81 -9
- doc_utils/validate_links.py +576 -0
- doc_utils/version.py +8 -0
- doc_utils/version_check.py +243 -0
- doc_utils/warnings_report.py +237 -0
- doc_utils_cli.py +158 -0
- extract_link_attributes.py +120 -0
- find_duplicate_content.py +209 -0
- find_duplicate_includes.py +198 -0
- find_unused_attributes.py +84 -6
- format_asciidoc_spacing.py +134 -0
- insert_abstract_role.py +163 -0
- inventory_conditionals.py +53 -0
- replace_link_attributes.py +214 -0
- rolfedh_doc_utils-0.1.41.dist-info/METADATA +246 -0
- rolfedh_doc_utils-0.1.41.dist-info/RECORD +52 -0
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/WHEEL +1 -1
- rolfedh_doc_utils-0.1.41.dist-info/entry_points.txt +20 -0
- rolfedh_doc_utils-0.1.41.dist-info/top_level.txt +21 -0
- validate_links.py +213 -0
- rolfedh_doc_utils-0.1.4.dist-info/METADATA +0 -285
- rolfedh_doc_utils-0.1.4.dist-info/RECORD +0 -17
- rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt +0 -5
- rolfedh_doc_utils-0.1.4.dist-info/top_level.txt +0 -5
- {rolfedh_doc_utils-0.1.4.dist-info → rolfedh_doc_utils-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core logic for finding AsciiDoc files that are included more than once.
|
|
3
|
+
|
|
4
|
+
Scans AsciiDoc files for include:: macros and identifies files that are
|
|
5
|
+
included from multiple locations, which may indicate opportunities for
|
|
6
|
+
content reuse or potential maintenance issues.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
INCLUDE_PATTERN = re.compile(r'^include::([^\[]+)\[', re.MULTILINE)
|
|
17
|
+
|
|
18
|
+
# Files commonly expected to be included in multiple places
|
|
19
|
+
DEFAULT_COMMON_INCLUDES = {
|
|
20
|
+
'attributes.adoc',
|
|
21
|
+
'common/attributes.adoc',
|
|
22
|
+
'common/revision-info.adoc',
|
|
23
|
+
'_attributes.adoc',
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Default directories to exclude
|
|
27
|
+
DEFAULT_EXCLUDE_DIRS = {'.git', '.archive', 'target', 'build', 'node_modules'}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class IncludeLocation:
|
|
32
|
+
"""Represents where an include was found."""
|
|
33
|
+
source_file: str
|
|
34
|
+
line_number: int
|
|
35
|
+
raw_include_path: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DuplicateInclude:
|
|
40
|
+
"""Represents a file that is included multiple times."""
|
|
41
|
+
resolved_path: str
|
|
42
|
+
locations: list[IncludeLocation] = field(default_factory=list)
|
|
43
|
+
is_common: bool = False
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def count(self) -> int:
|
|
47
|
+
return len(self.locations)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def find_includes_in_file(file_path: str) -> list[tuple[str, int]]:
|
|
51
|
+
"""
|
|
52
|
+
Extract all include:: targets from an AsciiDoc file.
|
|
53
|
+
|
|
54
|
+
Returns list of (include_target, line_number) tuples.
|
|
55
|
+
"""
|
|
56
|
+
includes = []
|
|
57
|
+
try:
|
|
58
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
59
|
+
for line_num, line in enumerate(f, 1):
|
|
60
|
+
match = re.match(r'^include::([^\[]+)\[', line)
|
|
61
|
+
if match:
|
|
62
|
+
includes.append((match.group(1), line_num))
|
|
63
|
+
except (IOError, UnicodeDecodeError) as e:
|
|
64
|
+
print(f"Warning: Could not read {file_path}: {e}")
|
|
65
|
+
return includes
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def resolve_include_path(include_target: str, source_file: str, base_dir: str) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Resolve an include target to a normalized path relative to base directory.
|
|
71
|
+
"""
|
|
72
|
+
source_dir = os.path.dirname(source_file)
|
|
73
|
+
|
|
74
|
+
# Resolve the path relative to source file's directory
|
|
75
|
+
if include_target.startswith('../') or include_target.startswith('./'):
|
|
76
|
+
resolved = os.path.normpath(os.path.join(source_dir, include_target))
|
|
77
|
+
else:
|
|
78
|
+
resolved = os.path.normpath(os.path.join(source_dir, include_target))
|
|
79
|
+
|
|
80
|
+
# Make relative to base directory if possible
|
|
81
|
+
try:
|
|
82
|
+
resolved = os.path.relpath(resolved, base_dir)
|
|
83
|
+
except ValueError:
|
|
84
|
+
pass # Keep absolute path if on different drive (Windows)
|
|
85
|
+
|
|
86
|
+
return resolved
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def is_common_include(path: str, common_includes: set[str]) -> bool:
|
|
90
|
+
"""Check if a path matches a common include pattern."""
|
|
91
|
+
basename = os.path.basename(path)
|
|
92
|
+
return path in common_includes or basename in common_includes
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def collect_adoc_files(
|
|
96
|
+
directory: str,
|
|
97
|
+
exclude_dirs: set[str] | None = None,
|
|
98
|
+
exclude_files: set[str] | None = None
|
|
99
|
+
) -> list[str]:
|
|
100
|
+
"""
|
|
101
|
+
Collect all .adoc files in a directory recursively.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
directory: Base directory to scan
|
|
105
|
+
exclude_dirs: Directory names to exclude
|
|
106
|
+
exclude_files: File names or paths to exclude
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of absolute paths to .adoc files
|
|
110
|
+
"""
|
|
111
|
+
exclude_dirs = exclude_dirs or DEFAULT_EXCLUDE_DIRS
|
|
112
|
+
exclude_files = exclude_files or set()
|
|
113
|
+
|
|
114
|
+
adoc_files = []
|
|
115
|
+
base_path = os.path.abspath(directory)
|
|
116
|
+
|
|
117
|
+
for root, dirs, files in os.walk(base_path, followlinks=False):
|
|
118
|
+
# Filter out excluded directories
|
|
119
|
+
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
|
120
|
+
|
|
121
|
+
for filename in files:
|
|
122
|
+
if not filename.endswith('.adoc'):
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
filepath = os.path.join(root, filename)
|
|
126
|
+
rel_path = os.path.relpath(filepath, base_path)
|
|
127
|
+
|
|
128
|
+
# Check exclusions
|
|
129
|
+
if filename in exclude_files or rel_path in exclude_files:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
adoc_files.append(filepath)
|
|
133
|
+
|
|
134
|
+
return sorted(adoc_files)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def find_duplicate_includes(
|
|
138
|
+
directory: str,
|
|
139
|
+
exclude_dirs: set[str] | None = None,
|
|
140
|
+
exclude_files: set[str] | None = None,
|
|
141
|
+
include_common: bool = False,
|
|
142
|
+
common_includes: set[str] | None = None
|
|
143
|
+
) -> tuple[list[DuplicateInclude], int, int]:
|
|
144
|
+
"""
|
|
145
|
+
Find all files that are included more than once.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
directory: Base directory to scan
|
|
149
|
+
exclude_dirs: Directory names to exclude
|
|
150
|
+
exclude_files: File names or paths to exclude
|
|
151
|
+
include_common: If True, include common files in results
|
|
152
|
+
common_includes: Set of paths considered "common" (expected duplicates)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Tuple of (duplicates, total_files_scanned, excluded_common_count)
|
|
156
|
+
"""
|
|
157
|
+
if common_includes is None:
|
|
158
|
+
common_includes = DEFAULT_COMMON_INCLUDES
|
|
159
|
+
|
|
160
|
+
# Collect all .adoc files
|
|
161
|
+
adoc_files = collect_adoc_files(directory, exclude_dirs, exclude_files)
|
|
162
|
+
base_dir = os.path.abspath(directory)
|
|
163
|
+
|
|
164
|
+
# Track includes: {resolved_path: [IncludeLocation, ...]}
|
|
165
|
+
include_map: dict[str, list[IncludeLocation]] = defaultdict(list)
|
|
166
|
+
|
|
167
|
+
for source_file in adoc_files:
|
|
168
|
+
includes = find_includes_in_file(source_file)
|
|
169
|
+
for include_target, line_num in includes:
|
|
170
|
+
resolved = resolve_include_path(include_target, source_file, base_dir)
|
|
171
|
+
rel_source = os.path.relpath(source_file, base_dir)
|
|
172
|
+
|
|
173
|
+
include_map[resolved].append(IncludeLocation(
|
|
174
|
+
source_file=rel_source,
|
|
175
|
+
line_number=line_num,
|
|
176
|
+
raw_include_path=include_target
|
|
177
|
+
))
|
|
178
|
+
|
|
179
|
+
# Find duplicates
|
|
180
|
+
duplicates = []
|
|
181
|
+
excluded_common_count = 0
|
|
182
|
+
|
|
183
|
+
for path, locations in include_map.items():
|
|
184
|
+
if len(locations) <= 1:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
is_common = is_common_include(path, common_includes)
|
|
188
|
+
|
|
189
|
+
if is_common and not include_common:
|
|
190
|
+
excluded_common_count += 1
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
duplicates.append(DuplicateInclude(
|
|
194
|
+
resolved_path=path,
|
|
195
|
+
locations=locations,
|
|
196
|
+
is_common=is_common
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
# Sort by count descending
|
|
200
|
+
duplicates.sort(key=lambda d: d.count, reverse=True)
|
|
201
|
+
|
|
202
|
+
return duplicates, len(adoc_files), excluded_common_count
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def format_txt_report(
|
|
206
|
+
duplicates: list[DuplicateInclude],
|
|
207
|
+
total_files: int,
|
|
208
|
+
excluded_common: int,
|
|
209
|
+
directory: str,
|
|
210
|
+
cmd_line: str
|
|
211
|
+
) -> str:
|
|
212
|
+
"""Format results as plain text."""
|
|
213
|
+
lines = []
|
|
214
|
+
|
|
215
|
+
lines.append(f"Command: {cmd_line}")
|
|
216
|
+
lines.append(f"Directory: {os.path.abspath(directory)}")
|
|
217
|
+
lines.append(f"Files scanned: {total_files}")
|
|
218
|
+
lines.append("")
|
|
219
|
+
|
|
220
|
+
if not duplicates:
|
|
221
|
+
if excluded_common:
|
|
222
|
+
lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
|
|
223
|
+
lines.append("Use --include-common to see all duplicates.")
|
|
224
|
+
else:
|
|
225
|
+
lines.append("No files are included more than once.")
|
|
226
|
+
return '\n'.join(lines)
|
|
227
|
+
|
|
228
|
+
lines.append(f"Found {len(duplicates)} files included more than once:")
|
|
229
|
+
if excluded_common:
|
|
230
|
+
lines.append(f" ({excluded_common} common files excluded; use --include-common to see all)")
|
|
231
|
+
lines.append("")
|
|
232
|
+
lines.append("=" * 70)
|
|
233
|
+
|
|
234
|
+
for i, dup in enumerate(duplicates, 1):
|
|
235
|
+
common_marker = " [COMMON]" if dup.is_common else ""
|
|
236
|
+
lines.append(f"\n[{i}] {dup.resolved_path}{common_marker}")
|
|
237
|
+
lines.append(f" Included {dup.count} times:")
|
|
238
|
+
lines.append("-" * 50)
|
|
239
|
+
|
|
240
|
+
for loc in dup.locations:
|
|
241
|
+
lines.append(f" - {loc.source_file}:{loc.line_number}")
|
|
242
|
+
|
|
243
|
+
return '\n'.join(lines)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def format_csv_report(
|
|
247
|
+
duplicates: list[DuplicateInclude],
|
|
248
|
+
total_files: int,
|
|
249
|
+
excluded_common: int,
|
|
250
|
+
directory: str,
|
|
251
|
+
cmd_line: str
|
|
252
|
+
) -> str:
|
|
253
|
+
"""Format results as CSV."""
|
|
254
|
+
lines = []
|
|
255
|
+
lines.append("Included File,Inclusion Count,Is Common,Source File,Line Number,Raw Include Path")
|
|
256
|
+
|
|
257
|
+
for dup in duplicates:
|
|
258
|
+
for loc in dup.locations:
|
|
259
|
+
lines.append(
|
|
260
|
+
f'"{dup.resolved_path}",{dup.count},{dup.is_common},'
|
|
261
|
+
f'"{loc.source_file}",{loc.line_number},"{loc.raw_include_path}"'
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return '\n'.join(lines)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def format_json_report(
|
|
268
|
+
duplicates: list[DuplicateInclude],
|
|
269
|
+
total_files: int,
|
|
270
|
+
excluded_common: int,
|
|
271
|
+
directory: str,
|
|
272
|
+
cmd_line: str
|
|
273
|
+
) -> str:
|
|
274
|
+
"""Format results as JSON."""
|
|
275
|
+
import json
|
|
276
|
+
|
|
277
|
+
data = {
|
|
278
|
+
"command": cmd_line,
|
|
279
|
+
"directory": os.path.abspath(directory),
|
|
280
|
+
"files_scanned": total_files,
|
|
281
|
+
"excluded_common_count": excluded_common,
|
|
282
|
+
"duplicate_count": len(duplicates),
|
|
283
|
+
"duplicates": [
|
|
284
|
+
{
|
|
285
|
+
"path": dup.resolved_path,
|
|
286
|
+
"count": dup.count,
|
|
287
|
+
"is_common": dup.is_common,
|
|
288
|
+
"locations": [
|
|
289
|
+
{
|
|
290
|
+
"source_file": loc.source_file,
|
|
291
|
+
"line_number": loc.line_number,
|
|
292
|
+
"raw_include_path": loc.raw_include_path
|
|
293
|
+
}
|
|
294
|
+
for loc in dup.locations
|
|
295
|
+
]
|
|
296
|
+
}
|
|
297
|
+
for dup in duplicates
|
|
298
|
+
]
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return json.dumps(data, indent=2)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def format_md_report(
|
|
305
|
+
duplicates: list[DuplicateInclude],
|
|
306
|
+
total_files: int,
|
|
307
|
+
excluded_common: int,
|
|
308
|
+
directory: str,
|
|
309
|
+
cmd_line: str
|
|
310
|
+
) -> str:
|
|
311
|
+
"""Format results as Markdown."""
|
|
312
|
+
lines = []
|
|
313
|
+
|
|
314
|
+
lines.append("# Duplicate Includes Report")
|
|
315
|
+
lines.append("")
|
|
316
|
+
lines.append(f"**Command:** `{cmd_line}`")
|
|
317
|
+
lines.append(f"**Directory:** `{os.path.abspath(directory)}`")
|
|
318
|
+
lines.append(f"**Files scanned:** {total_files}")
|
|
319
|
+
lines.append("")
|
|
320
|
+
|
|
321
|
+
if not duplicates:
|
|
322
|
+
if excluded_common:
|
|
323
|
+
lines.append(f"No unexpected duplicates found ({excluded_common} common files excluded).")
|
|
324
|
+
else:
|
|
325
|
+
lines.append("No files are included more than once.")
|
|
326
|
+
return '\n'.join(lines)
|
|
327
|
+
|
|
328
|
+
lines.append(f"## Summary")
|
|
329
|
+
lines.append("")
|
|
330
|
+
lines.append(f"Found **{len(duplicates)}** files included more than once.")
|
|
331
|
+
if excluded_common:
|
|
332
|
+
lines.append(f"({excluded_common} common files excluded)")
|
|
333
|
+
lines.append("")
|
|
334
|
+
|
|
335
|
+
for i, dup in enumerate(duplicates, 1):
|
|
336
|
+
common_marker = " *(common)*" if dup.is_common else ""
|
|
337
|
+
lines.append(f"### {i}. `{dup.resolved_path}`{common_marker}")
|
|
338
|
+
lines.append("")
|
|
339
|
+
lines.append(f"Included **{dup.count}** times:")
|
|
340
|
+
lines.append("")
|
|
341
|
+
|
|
342
|
+
for loc in dup.locations:
|
|
343
|
+
lines.append(f"- `{loc.source_file}:{loc.line_number}`")
|
|
344
|
+
|
|
345
|
+
lines.append("")
|
|
346
|
+
|
|
347
|
+
return '\n'.join(lines)
|