rolfedh-doc-utils 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rolfedh_doc_utils-0.1.4/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.6}/PKG-INFO +24 -6
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/README.md +23 -5
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/archive_unused_files.py +9 -3
- rolfedh_doc_utils-0.1.6/doc_utils/unused_adoc.py +120 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/pyproject.toml +3 -2
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6/rolfedh_doc_utils.egg-info}/PKG-INFO +24 -6
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/SOURCES.txt +1 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/entry_points.txt +1 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/top_level.txt +1 -0
- rolfedh_doc_utils-0.1.6/tests/test_auto_discovery.py +146 -0
- rolfedh_doc_utils-0.1.4/doc_utils/unused_adoc.py +0 -53
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/LICENSE +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/__init__.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/scannability.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/topic_map_parser.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/doc_utils/unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/find_unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/setup.cfg +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/setup.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_cli_entry_points.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_parse_exclude_list.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_symlink_handling.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_topic_map_parser.py +0 -0
- {rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_unused_attributes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rolfedh-doc-utils
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: CLI tools for AsciiDoc documentation projects
|
|
5
5
|
Author: Rolfe Dlugy-Hegwer
|
|
6
6
|
License: MIT License
|
|
@@ -99,6 +99,7 @@ The following CLI tools are installed:
|
|
|
99
99
|
* `archive-unused-files`
|
|
100
100
|
* `archive-unused-images`
|
|
101
101
|
* `find-unused-attributes`
|
|
102
|
+
* `format-asciidoc-spacing`
|
|
102
103
|
|
|
103
104
|
These tools can be run from any directory.
|
|
104
105
|
|
|
@@ -132,7 +133,7 @@ Scans `.adoc` files in the current directory to report:
|
|
|
132
133
|
* Paragraphs with too many sentences (default: 3 sentences)
|
|
133
134
|
* Supports exclusion of files and directories
|
|
134
135
|
|
|
135
|
-
➡️ See [
|
|
136
|
+
➡️ See [Scannability Checker for AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/check_scannability.md) for details.
|
|
136
137
|
|
|
137
138
|
---
|
|
138
139
|
|
|
@@ -144,7 +145,7 @@ Works with both:
|
|
|
144
145
|
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
145
146
|
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
146
147
|
|
|
147
|
-
➡️ See [
|
|
148
|
+
➡️ See [Archive Unused AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
148
149
|
|
|
149
150
|
---
|
|
150
151
|
|
|
@@ -152,7 +153,7 @@ Works with both:
|
|
|
152
153
|
|
|
153
154
|
Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the current directory and optionally archives and deletes them.
|
|
154
155
|
|
|
155
|
-
➡️ See [
|
|
156
|
+
➡️ See [Archive Unused Images](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_images.md).
|
|
156
157
|
|
|
157
158
|
---
|
|
158
159
|
|
|
@@ -160,7 +161,22 @@ Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the
|
|
|
160
161
|
|
|
161
162
|
Scans an attributes file (e.g., `attributes.adoc`) for unused attribute definitions across all `.adoc` files in the current directory.
|
|
162
163
|
|
|
163
|
-
➡️ See [
|
|
164
|
+
➡️ See [Find Unused AsciiDoc Attributes](https://github.com/rolfedh/doc-utils/blob/main/find_unused_attributes.md).
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
### `format-asciidoc-spacing`
|
|
169
|
+
|
|
170
|
+
Ensures proper spacing in AsciiDoc files by adding blank lines after headings and around `include::` directives.
|
|
171
|
+
|
|
172
|
+
Formatting rules:
|
|
173
|
+
- Adds blank line after headings (`=`, `==`, `===`, etc.)
|
|
174
|
+
- Adds blank lines before and after `include::` directives
|
|
175
|
+
- Preserves existing spacing where appropriate
|
|
176
|
+
|
|
177
|
+
Implemented as a Python script (`format-asciidoc-spacing.py`).
|
|
178
|
+
|
|
179
|
+
➡️ See [AsciiDoc Spacing Formatter](https://github.com/rolfedh/doc-utils/blob/main/format_asciidoc_spacing.md).
|
|
164
180
|
|
|
165
181
|
## Best Practices for Safe Usage
|
|
166
182
|
|
|
@@ -191,6 +207,7 @@ To run the tools after installation:
|
|
|
191
207
|
check-scannability --help
|
|
192
208
|
archive-unused-files --help
|
|
193
209
|
find-unused-attributes attributes.adoc
|
|
210
|
+
format-asciidoc-spacing --help
|
|
194
211
|
```
|
|
195
212
|
|
|
196
213
|
Or run them directly from source:
|
|
@@ -199,6 +216,7 @@ Or run them directly from source:
|
|
|
199
216
|
python3 check_scannability.py
|
|
200
217
|
python3 archive_unused_files.py
|
|
201
218
|
python3 find_unused_attributes.py attributes.adoc
|
|
219
|
+
python3 format-asciidoc-spacing.py
|
|
202
220
|
```
|
|
203
221
|
|
|
204
222
|
### Directory/File Exclusion
|
|
@@ -278,7 +296,7 @@ Contributions are welcome! Please ensure:
|
|
|
278
296
|
- Code follows PEP 8 style guidelines
|
|
279
297
|
- Documentation is updated as needed
|
|
280
298
|
|
|
281
|
-
See [
|
|
299
|
+
See [Contributing Guidelines](https://github.com/rolfedh/doc-utils/blob/main/CONTRIBUTING.md) for more details.
|
|
282
300
|
|
|
283
301
|
## License
|
|
284
302
|
|
|
@@ -66,6 +66,7 @@ The following CLI tools are installed:
|
|
|
66
66
|
* `archive-unused-files`
|
|
67
67
|
* `archive-unused-images`
|
|
68
68
|
* `find-unused-attributes`
|
|
69
|
+
* `format-asciidoc-spacing`
|
|
69
70
|
|
|
70
71
|
These tools can be run from any directory.
|
|
71
72
|
|
|
@@ -99,7 +100,7 @@ Scans `.adoc` files in the current directory to report:
|
|
|
99
100
|
* Paragraphs with too many sentences (default: 3 sentences)
|
|
100
101
|
* Supports exclusion of files and directories
|
|
101
102
|
|
|
102
|
-
➡️ See [
|
|
103
|
+
➡️ See [Scannability Checker for AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/check_scannability.md) for details.
|
|
103
104
|
|
|
104
105
|
---
|
|
105
106
|
|
|
@@ -111,7 +112,7 @@ Works with both:
|
|
|
111
112
|
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
112
113
|
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
113
114
|
|
|
114
|
-
➡️ See [
|
|
115
|
+
➡️ See [Archive Unused AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
115
116
|
|
|
116
117
|
---
|
|
117
118
|
|
|
@@ -119,7 +120,7 @@ Works with both:
|
|
|
119
120
|
|
|
120
121
|
Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the current directory and optionally archives and deletes them.
|
|
121
122
|
|
|
122
|
-
➡️ See [
|
|
123
|
+
➡️ See [Archive Unused Images](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_images.md).
|
|
123
124
|
|
|
124
125
|
---
|
|
125
126
|
|
|
@@ -127,7 +128,22 @@ Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the
|
|
|
127
128
|
|
|
128
129
|
Scans an attributes file (e.g., `attributes.adoc`) for unused attribute definitions across all `.adoc` files in the current directory.
|
|
129
130
|
|
|
130
|
-
➡️ See [
|
|
131
|
+
➡️ See [Find Unused AsciiDoc Attributes](https://github.com/rolfedh/doc-utils/blob/main/find_unused_attributes.md).
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
### `format-asciidoc-spacing`
|
|
136
|
+
|
|
137
|
+
Ensures proper spacing in AsciiDoc files by adding blank lines after headings and around `include::` directives.
|
|
138
|
+
|
|
139
|
+
Formatting rules:
|
|
140
|
+
- Adds blank line after headings (`=`, `==`, `===`, etc.)
|
|
141
|
+
- Adds blank lines before and after `include::` directives
|
|
142
|
+
- Preserves existing spacing where appropriate
|
|
143
|
+
|
|
144
|
+
Implemented as a Python script (`format-asciidoc-spacing.py`).
|
|
145
|
+
|
|
146
|
+
➡️ See [AsciiDoc Spacing Formatter](https://github.com/rolfedh/doc-utils/blob/main/format_asciidoc_spacing.md).
|
|
131
147
|
|
|
132
148
|
## Best Practices for Safe Usage
|
|
133
149
|
|
|
@@ -158,6 +174,7 @@ To run the tools after installation:
|
|
|
158
174
|
check-scannability --help
|
|
159
175
|
archive-unused-files --help
|
|
160
176
|
find-unused-attributes attributes.adoc
|
|
177
|
+
format-asciidoc-spacing --help
|
|
161
178
|
```
|
|
162
179
|
|
|
163
180
|
Or run them directly from source:
|
|
@@ -166,6 +183,7 @@ Or run them directly from source:
|
|
|
166
183
|
python3 check_scannability.py
|
|
167
184
|
python3 archive_unused_files.py
|
|
168
185
|
python3 find_unused_attributes.py attributes.adoc
|
|
186
|
+
python3 format-asciidoc-spacing.py
|
|
169
187
|
```
|
|
170
188
|
|
|
171
189
|
### Directory/File Exclusion
|
|
@@ -245,7 +263,7 @@ Contributions are welcome! Please ensure:
|
|
|
245
263
|
- Code follows PEP 8 style guidelines
|
|
246
264
|
- Documentation is updated as needed
|
|
247
265
|
|
|
248
|
-
See [
|
|
266
|
+
See [Contributing Guidelines](https://github.com/rolfedh/doc-utils/blob/main/CONTRIBUTING.md) for more details.
|
|
249
267
|
|
|
250
268
|
## License
|
|
251
269
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Archive Unused AsciiDoc Files
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Automatically discovers and scans 'modules' and 'assemblies' directories for AsciiDoc files
|
|
5
|
+
not referenced by any other AsciiDoc file in the project. Optionally archives and deletes them.
|
|
5
6
|
|
|
6
7
|
For full documentation and usage examples, see archive_unused_files.md in this directory.
|
|
7
8
|
"""
|
|
@@ -11,14 +12,19 @@ from doc_utils.unused_adoc import find_unused_adoc
|
|
|
11
12
|
from doc_utils.file_utils import parse_exclude_list_file
|
|
12
13
|
|
|
13
14
|
def main():
|
|
14
|
-
parser = argparse.ArgumentParser(
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
description='Archive unused AsciiDoc files.',
|
|
17
|
+
epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
|
|
18
|
+
)
|
|
15
19
|
parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
|
|
20
|
+
parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
|
|
16
21
|
parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
|
|
17
22
|
parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
|
|
18
23
|
parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
|
|
19
24
|
args = parser.parse_args()
|
|
20
25
|
|
|
21
|
-
|
|
26
|
+
# Use provided scan directories or None for auto-discovery
|
|
27
|
+
scan_dirs = args.scan_dir if args.scan_dir else None
|
|
22
28
|
archive_dir = './archive'
|
|
23
29
|
|
|
24
30
|
exclude_dirs = list(args.exclude_dir)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# doc_utils/unused_adoc.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from .file_utils import collect_files, write_manifest_and_archive
|
|
6
|
+
from .topic_map_parser import detect_repo_type, get_all_topic_map_references
|
|
7
|
+
|
|
8
|
+
def find_scan_directories(base_path='.', exclude_dirs=None):
|
|
9
|
+
"""
|
|
10
|
+
Automatically find all 'modules' and 'assemblies' directories in the repository.
|
|
11
|
+
|
|
12
|
+
Returns a list of paths to scan.
|
|
13
|
+
"""
|
|
14
|
+
scan_dirs = []
|
|
15
|
+
exclude_dirs = exclude_dirs or []
|
|
16
|
+
|
|
17
|
+
for root, dirs, files in os.walk(base_path):
|
|
18
|
+
# Skip symbolic links to prevent issues
|
|
19
|
+
dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
|
|
20
|
+
|
|
21
|
+
# Skip excluded directories
|
|
22
|
+
for exclude_dir in exclude_dirs:
|
|
23
|
+
abs_exclude = os.path.abspath(exclude_dir)
|
|
24
|
+
if os.path.abspath(root).startswith(abs_exclude):
|
|
25
|
+
dirs[:] = [] # Don't descend into excluded directories
|
|
26
|
+
break
|
|
27
|
+
|
|
28
|
+
# Skip hidden directories and common non-content directories
|
|
29
|
+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
|
|
30
|
+
|
|
31
|
+
# Look for modules and assemblies directories
|
|
32
|
+
for d in dirs:
|
|
33
|
+
if d in ['modules', 'assemblies']:
|
|
34
|
+
dir_path = os.path.join(root, d)
|
|
35
|
+
# Check if this directory or any subdirectory contains .adoc files
|
|
36
|
+
has_adoc = False
|
|
37
|
+
for subroot, subdirs, subfiles in os.walk(dir_path):
|
|
38
|
+
# Skip symbolic links
|
|
39
|
+
subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
|
|
40
|
+
if any(f.endswith('.adoc') for f in subfiles):
|
|
41
|
+
has_adoc = True
|
|
42
|
+
break
|
|
43
|
+
if has_adoc:
|
|
44
|
+
scan_dirs.append(dir_path)
|
|
45
|
+
|
|
46
|
+
# Also check for modules/rn pattern if modules exists
|
|
47
|
+
modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
|
|
48
|
+
for modules_dir in modules_dirs:
|
|
49
|
+
rn_dir = os.path.join(modules_dir, 'rn')
|
|
50
|
+
if os.path.isdir(rn_dir):
|
|
51
|
+
# Check if rn directory or subdirectories contain .adoc files
|
|
52
|
+
has_adoc = False
|
|
53
|
+
for subroot, subdirs, subfiles in os.walk(rn_dir):
|
|
54
|
+
subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
|
|
55
|
+
if any(f.endswith('.adoc') for f in subfiles):
|
|
56
|
+
has_adoc = True
|
|
57
|
+
break
|
|
58
|
+
if has_adoc:
|
|
59
|
+
scan_dirs.append(rn_dir)
|
|
60
|
+
|
|
61
|
+
return scan_dirs
|
|
62
|
+
|
|
63
|
+
def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
|
|
64
|
+
# Print safety warning
|
|
65
|
+
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
66
|
+
|
|
67
|
+
# If no scan_dirs provided, auto-discover them
|
|
68
|
+
if not scan_dirs:
|
|
69
|
+
scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
|
|
70
|
+
if scan_dirs:
|
|
71
|
+
print(f"Auto-discovered directories to scan:")
|
|
72
|
+
for dir_path in sorted(scan_dirs):
|
|
73
|
+
print(f" - {dir_path}")
|
|
74
|
+
else:
|
|
75
|
+
print("No 'modules' or 'assemblies' directories found containing .adoc files.")
|
|
76
|
+
print("Please run this tool from your documentation repository root.")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
# Detect repository type
|
|
80
|
+
repo_type = detect_repo_type()
|
|
81
|
+
print(f"Detected repository type: {repo_type}")
|
|
82
|
+
|
|
83
|
+
# Collect all .adoc files in scan directories
|
|
84
|
+
asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
|
|
85
|
+
|
|
86
|
+
# Track which files are referenced
|
|
87
|
+
referenced_files = set()
|
|
88
|
+
|
|
89
|
+
if repo_type == 'topic_map':
|
|
90
|
+
# For OpenShift-docs style repos, get references from topic maps
|
|
91
|
+
topic_references = get_all_topic_map_references()
|
|
92
|
+
# Convert to basenames for comparison
|
|
93
|
+
referenced_files.update(os.path.basename(ref) for ref in topic_references)
|
|
94
|
+
|
|
95
|
+
# Always scan for include:: directives in all .adoc files
|
|
96
|
+
include_pattern = re.compile(r'include::(.+?)\[')
|
|
97
|
+
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
98
|
+
|
|
99
|
+
for file_path in adoc_files:
|
|
100
|
+
try:
|
|
101
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
102
|
+
content = f.read()
|
|
103
|
+
includes = include_pattern.findall(content)
|
|
104
|
+
# Extract just the filename from the include path
|
|
105
|
+
for include in includes:
|
|
106
|
+
# Handle both relative and absolute includes
|
|
107
|
+
include_basename = os.path.basename(include)
|
|
108
|
+
referenced_files.add(include_basename)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
print(f"Warning: could not read {file_path}: {e}")
|
|
111
|
+
|
|
112
|
+
# Find unused files by comparing basenames
|
|
113
|
+
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
|
|
114
|
+
unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
|
|
115
|
+
|
|
116
|
+
print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
|
|
117
|
+
|
|
118
|
+
return write_manifest_and_archive(
|
|
119
|
+
unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
|
|
120
|
+
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rolfedh-doc-utils"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "CLI tools for AsciiDoc documentation projects"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -19,10 +19,11 @@ check-scannability = "check_scannability:main"
|
|
|
19
19
|
archive-unused-files = "archive_unused_files:main"
|
|
20
20
|
archive-unused-images = "archive_unused_images:main"
|
|
21
21
|
find-unused-attributes = "find_unused_attributes:main"
|
|
22
|
+
format-asciidoc-spacing = "format_asciidoc_spacing:main"
|
|
22
23
|
|
|
23
24
|
[tool.setuptools.packages.find]
|
|
24
25
|
where = ["."]
|
|
25
26
|
include = ["doc_utils*"]
|
|
26
27
|
|
|
27
28
|
[tool.setuptools]
|
|
28
|
-
py-modules = ["find_unused_attributes", "check_scannability", "archive_unused_files", "archive_unused_images"]
|
|
29
|
+
py-modules = ["find_unused_attributes", "check_scannability", "archive_unused_files", "archive_unused_images", "format_asciidoc_spacing"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rolfedh-doc-utils
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: CLI tools for AsciiDoc documentation projects
|
|
5
5
|
Author: Rolfe Dlugy-Hegwer
|
|
6
6
|
License: MIT License
|
|
@@ -99,6 +99,7 @@ The following CLI tools are installed:
|
|
|
99
99
|
* `archive-unused-files`
|
|
100
100
|
* `archive-unused-images`
|
|
101
101
|
* `find-unused-attributes`
|
|
102
|
+
* `format-asciidoc-spacing`
|
|
102
103
|
|
|
103
104
|
These tools can be run from any directory.
|
|
104
105
|
|
|
@@ -132,7 +133,7 @@ Scans `.adoc` files in the current directory to report:
|
|
|
132
133
|
* Paragraphs with too many sentences (default: 3 sentences)
|
|
133
134
|
* Supports exclusion of files and directories
|
|
134
135
|
|
|
135
|
-
➡️ See [
|
|
136
|
+
➡️ See [Scannability Checker for AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/check_scannability.md) for details.
|
|
136
137
|
|
|
137
138
|
---
|
|
138
139
|
|
|
@@ -144,7 +145,7 @@ Works with both:
|
|
|
144
145
|
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
145
146
|
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
146
147
|
|
|
147
|
-
➡️ See [
|
|
148
|
+
➡️ See [Archive Unused AsciiDoc Files](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
148
149
|
|
|
149
150
|
---
|
|
150
151
|
|
|
@@ -152,7 +153,7 @@ Works with both:
|
|
|
152
153
|
|
|
153
154
|
Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the current directory and optionally archives and deletes them.
|
|
154
155
|
|
|
155
|
-
➡️ See [
|
|
156
|
+
➡️ See [Archive Unused Images](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_images.md).
|
|
156
157
|
|
|
157
158
|
---
|
|
158
159
|
|
|
@@ -160,7 +161,22 @@ Finds unused image files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the
|
|
|
160
161
|
|
|
161
162
|
Scans an attributes file (e.g., `attributes.adoc`) for unused attribute definitions across all `.adoc` files in the current directory.
|
|
162
163
|
|
|
163
|
-
➡️ See [
|
|
164
|
+
➡️ See [Find Unused AsciiDoc Attributes](https://github.com/rolfedh/doc-utils/blob/main/find_unused_attributes.md).
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
### `format-asciidoc-spacing`
|
|
169
|
+
|
|
170
|
+
Ensures proper spacing in AsciiDoc files by adding blank lines after headings and around `include::` directives.
|
|
171
|
+
|
|
172
|
+
Formatting rules:
|
|
173
|
+
- Adds blank line after headings (`=`, `==`, `===`, etc.)
|
|
174
|
+
- Adds blank lines before and after `include::` directives
|
|
175
|
+
- Preserves existing spacing where appropriate
|
|
176
|
+
|
|
177
|
+
Implemented as a Python script (`format-asciidoc-spacing.py`).
|
|
178
|
+
|
|
179
|
+
➡️ See [AsciiDoc Spacing Formatter](https://github.com/rolfedh/doc-utils/blob/main/format_asciidoc_spacing.md).
|
|
164
180
|
|
|
165
181
|
## Best Practices for Safe Usage
|
|
166
182
|
|
|
@@ -191,6 +207,7 @@ To run the tools after installation:
|
|
|
191
207
|
check-scannability --help
|
|
192
208
|
archive-unused-files --help
|
|
193
209
|
find-unused-attributes attributes.adoc
|
|
210
|
+
format-asciidoc-spacing --help
|
|
194
211
|
```
|
|
195
212
|
|
|
196
213
|
Or run them directly from source:
|
|
@@ -199,6 +216,7 @@ Or run them directly from source:
|
|
|
199
216
|
python3 check_scannability.py
|
|
200
217
|
python3 archive_unused_files.py
|
|
201
218
|
python3 find_unused_attributes.py attributes.adoc
|
|
219
|
+
python3 format-asciidoc-spacing.py
|
|
202
220
|
```
|
|
203
221
|
|
|
204
222
|
### Directory/File Exclusion
|
|
@@ -278,7 +296,7 @@ Contributions are welcome! Please ensure:
|
|
|
278
296
|
- Code follows PEP 8 style guidelines
|
|
279
297
|
- Documentation is updated as needed
|
|
280
298
|
|
|
281
|
-
See [
|
|
299
|
+
See [Contributing Guidelines](https://github.com/rolfedh/doc-utils/blob/main/CONTRIBUTING.md) for more details.
|
|
282
300
|
|
|
283
301
|
## License
|
|
284
302
|
|
|
@@ -21,6 +21,7 @@ rolfedh_doc_utils.egg-info/requires.txt
|
|
|
21
21
|
rolfedh_doc_utils.egg-info/top_level.txt
|
|
22
22
|
tests/test_archive_unused_files.py
|
|
23
23
|
tests/test_archive_unused_images.py
|
|
24
|
+
tests/test_auto_discovery.py
|
|
24
25
|
tests/test_check_scannability.py
|
|
25
26
|
tests/test_cli_entry_points.py
|
|
26
27
|
tests/test_file_utils.py
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Test automatic directory discovery for archive-unused-files."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
import pytest
|
|
6
|
+
from doc_utils.unused_adoc import find_scan_directories
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_find_scan_directories_standard_structure(tmp_path):
|
|
10
|
+
"""Test discovery with standard directory structure."""
|
|
11
|
+
|
|
12
|
+
# Create standard structure
|
|
13
|
+
(tmp_path / "modules").mkdir()
|
|
14
|
+
(tmp_path / "modules" / "test.adoc").write_text("= Test\n")
|
|
15
|
+
|
|
16
|
+
(tmp_path / "assemblies").mkdir()
|
|
17
|
+
(tmp_path / "assemblies" / "assembly.adoc").write_text("= Assembly\n")
|
|
18
|
+
|
|
19
|
+
# Find directories
|
|
20
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
21
|
+
|
|
22
|
+
# Should find both directories
|
|
23
|
+
assert len(dirs) == 2
|
|
24
|
+
assert any("modules" in d for d in dirs)
|
|
25
|
+
assert any("assemblies" in d for d in dirs)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_find_scan_directories_nested_structure(tmp_path):
|
|
29
|
+
"""Test discovery with nested directory structure."""
|
|
30
|
+
|
|
31
|
+
# Create nested structure like red-hat-insights-documentation
|
|
32
|
+
downstream = tmp_path / "downstream"
|
|
33
|
+
downstream.mkdir()
|
|
34
|
+
|
|
35
|
+
(downstream / "modules").mkdir()
|
|
36
|
+
(downstream / "modules" / "test.adoc").write_text("= Test\n")
|
|
37
|
+
|
|
38
|
+
(downstream / "assemblies").mkdir()
|
|
39
|
+
(downstream / "assemblies" / "assembly.adoc").write_text("= Assembly\n")
|
|
40
|
+
|
|
41
|
+
# Find directories
|
|
42
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
43
|
+
|
|
44
|
+
# Should find both nested directories
|
|
45
|
+
assert len(dirs) == 2
|
|
46
|
+
assert any("downstream/modules" in d or "downstream\\modules" in d for d in dirs)
|
|
47
|
+
assert any("downstream/assemblies" in d or "downstream\\assemblies" in d for d in dirs)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_find_scan_directories_with_rn(tmp_path):
|
|
51
|
+
"""Test discovery includes modules/rn directory when it contains .adoc files."""
|
|
52
|
+
|
|
53
|
+
# Create modules with rn subdirectory
|
|
54
|
+
modules = tmp_path / "modules"
|
|
55
|
+
modules.mkdir()
|
|
56
|
+
(modules / "test.adoc").write_text("= Test\n")
|
|
57
|
+
|
|
58
|
+
rn_dir = modules / "rn"
|
|
59
|
+
rn_dir.mkdir()
|
|
60
|
+
(rn_dir / "release-notes.adoc").write_text("= Release Notes\n")
|
|
61
|
+
|
|
62
|
+
# Find directories
|
|
63
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
64
|
+
|
|
65
|
+
# Should find modules and modules/rn
|
|
66
|
+
assert len(dirs) == 2
|
|
67
|
+
assert any("modules/rn" in d or "modules\\rn" in d for d in dirs)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_find_scan_directories_empty_dirs(tmp_path):
|
|
71
|
+
"""Test that empty directories without .adoc files are not included."""
|
|
72
|
+
|
|
73
|
+
# Create directories without .adoc files
|
|
74
|
+
(tmp_path / "modules").mkdir()
|
|
75
|
+
(tmp_path / "assemblies").mkdir()
|
|
76
|
+
|
|
77
|
+
# Find directories
|
|
78
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
79
|
+
|
|
80
|
+
# Should find no directories since they don't contain .adoc files
|
|
81
|
+
assert len(dirs) == 0
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_find_scan_directories_with_exclusions(tmp_path):
|
|
85
|
+
"""Test directory discovery with exclusions."""
|
|
86
|
+
|
|
87
|
+
# Create multiple module directories
|
|
88
|
+
(tmp_path / "modules").mkdir()
|
|
89
|
+
(tmp_path / "modules" / "test.adoc").write_text("= Test\n")
|
|
90
|
+
|
|
91
|
+
archived = tmp_path / "archived"
|
|
92
|
+
archived.mkdir()
|
|
93
|
+
(archived / "modules").mkdir()
|
|
94
|
+
(archived / "modules" / "old.adoc").write_text("= Old\n")
|
|
95
|
+
|
|
96
|
+
# Find directories excluding archived
|
|
97
|
+
dirs = find_scan_directories(str(tmp_path), exclude_dirs=[str(archived)])
|
|
98
|
+
|
|
99
|
+
# Should only find the non-excluded modules
|
|
100
|
+
assert len(dirs) == 1
|
|
101
|
+
assert "archived" not in dirs[0]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def test_find_scan_directories_skips_hidden(tmp_path):
|
|
105
|
+
"""Test that hidden directories are skipped."""
|
|
106
|
+
|
|
107
|
+
# Create visible modules
|
|
108
|
+
(tmp_path / "modules").mkdir()
|
|
109
|
+
(tmp_path / "modules" / "test.adoc").write_text("= Test\n")
|
|
110
|
+
|
|
111
|
+
# Create hidden directory
|
|
112
|
+
hidden = tmp_path / ".archive"
|
|
113
|
+
hidden.mkdir()
|
|
114
|
+
(hidden / "modules").mkdir()
|
|
115
|
+
(hidden / "modules" / "archived.adoc").write_text("= Archived\n")
|
|
116
|
+
|
|
117
|
+
# Find directories
|
|
118
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
119
|
+
|
|
120
|
+
# Should only find visible modules
|
|
121
|
+
assert len(dirs) == 1
|
|
122
|
+
assert ".archive" not in dirs[0]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_find_scan_directories_multiple_locations(tmp_path):
|
|
126
|
+
"""Test discovery with modules/assemblies in multiple locations."""
|
|
127
|
+
|
|
128
|
+
# Create modules in multiple places
|
|
129
|
+
(tmp_path / "modules").mkdir()
|
|
130
|
+
(tmp_path / "modules" / "root.adoc").write_text("= Root\n")
|
|
131
|
+
|
|
132
|
+
content1 = tmp_path / "content1"
|
|
133
|
+
content1.mkdir()
|
|
134
|
+
(content1 / "modules").mkdir()
|
|
135
|
+
(content1 / "modules" / "content1.adoc").write_text("= Content1\n")
|
|
136
|
+
|
|
137
|
+
content2 = tmp_path / "content2"
|
|
138
|
+
content2.mkdir()
|
|
139
|
+
(content2 / "assemblies").mkdir()
|
|
140
|
+
(content2 / "assemblies" / "assembly2.adoc").write_text("= Assembly2\n")
|
|
141
|
+
|
|
142
|
+
# Find directories
|
|
143
|
+
dirs = find_scan_directories(str(tmp_path))
|
|
144
|
+
|
|
145
|
+
# Should find all three directories
|
|
146
|
+
assert len(dirs) == 3
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
# doc_utils/unused_adoc.py
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import re
|
|
5
|
-
from .file_utils import collect_files, write_manifest_and_archive
|
|
6
|
-
from .topic_map_parser import detect_repo_type, get_all_topic_map_references
|
|
7
|
-
|
|
8
|
-
def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
|
|
9
|
-
# Print safety warning
|
|
10
|
-
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
11
|
-
|
|
12
|
-
# Detect repository type
|
|
13
|
-
repo_type = detect_repo_type()
|
|
14
|
-
print(f"Detected repository type: {repo_type}")
|
|
15
|
-
|
|
16
|
-
# Collect all .adoc files in scan directories
|
|
17
|
-
asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
|
|
18
|
-
|
|
19
|
-
# Track which files are referenced
|
|
20
|
-
referenced_files = set()
|
|
21
|
-
|
|
22
|
-
if repo_type == 'topic_map':
|
|
23
|
-
# For OpenShift-docs style repos, get references from topic maps
|
|
24
|
-
topic_references = get_all_topic_map_references()
|
|
25
|
-
# Convert to basenames for comparison
|
|
26
|
-
referenced_files.update(os.path.basename(ref) for ref in topic_references)
|
|
27
|
-
|
|
28
|
-
# Always scan for include:: directives in all .adoc files
|
|
29
|
-
include_pattern = re.compile(r'include::(.+?)\[')
|
|
30
|
-
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
31
|
-
|
|
32
|
-
for file_path in adoc_files:
|
|
33
|
-
try:
|
|
34
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
35
|
-
content = f.read()
|
|
36
|
-
includes = include_pattern.findall(content)
|
|
37
|
-
# Extract just the filename from the include path
|
|
38
|
-
for include in includes:
|
|
39
|
-
# Handle both relative and absolute includes
|
|
40
|
-
include_basename = os.path.basename(include)
|
|
41
|
-
referenced_files.add(include_basename)
|
|
42
|
-
except Exception as e:
|
|
43
|
-
print(f"Warning: could not read {file_path}: {e}")
|
|
44
|
-
|
|
45
|
-
# Find unused files by comparing basenames
|
|
46
|
-
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
|
|
47
|
-
unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
|
|
48
|
-
|
|
49
|
-
print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
|
|
50
|
-
|
|
51
|
-
return write_manifest_and_archive(
|
|
52
|
-
unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
|
|
53
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/rolfedh_doc_utils.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_archive_unused_files.py
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_archive_unused_images.py
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.4 → rolfedh_doc_utils-0.1.6}/tests/test_fixture_check_scannability.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|