rolfedh-doc-utils 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rolfedh_doc_utils-0.1.2/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.4}/PKG-INFO +32 -4
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/README.md +30 -3
- rolfedh_doc_utils-0.1.4/doc_utils/topic_map_parser.py +130 -0
- rolfedh_doc_utils-0.1.4/doc_utils/unused_adoc.py +53 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/doc_utils/unused_images.py +3 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/pyproject.toml +2 -2
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4/rolfedh_doc_utils.egg-info}/PKG-INFO +32 -4
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/SOURCES.txt +5 -0
- rolfedh_doc_utils-0.1.4/rolfedh_doc_utils.egg-info/requires.txt +1 -0
- rolfedh_doc_utils-0.1.4/setup.py +45 -0
- rolfedh_doc_utils-0.1.4/tests/test_symlink_handling.py +91 -0
- rolfedh_doc_utils-0.1.4/tests/test_topic_map_parser.py +190 -0
- rolfedh_doc_utils-0.1.2/doc_utils/unused_adoc.py +0 -24
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/LICENSE +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/doc_utils/__init__.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/doc_utils/file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/doc_utils/scannability.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/doc_utils/unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/find_unused_attributes.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/entry_points.txt +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/top_level.txt +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/setup.cfg +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_cli_entry_points.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_file_utils.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_archive_unused_files.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_archive_unused_images.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_check_scannability.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_parse_exclude_list.py +0 -0
- {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_unused_attributes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rolfedh-doc-utils
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: CLI tools for AsciiDoc documentation projects
|
|
5
5
|
Author: Rolfe Dlugy-Hegwer
|
|
6
6
|
License: MIT License
|
|
@@ -28,6 +28,7 @@ License: MIT License
|
|
|
28
28
|
Requires-Python: >=3.8
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
|
+
Requires-Dist: PyYAML>=6.0
|
|
31
32
|
Dynamic: license-file
|
|
32
33
|
|
|
33
34
|
# doc-utils
|
|
@@ -50,11 +51,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
|
|
|
50
51
|
|
|
51
52
|
### From PyPI
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
|
|
54
55
|
|
|
56
|
+
**Option 1: pipx (Recommended for CLI tools)**
|
|
57
|
+
```sh
|
|
58
|
+
pipx install rolfedh-doc-utils
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Option 2: pip with --user flag**
|
|
62
|
+
```sh
|
|
63
|
+
pip install --user rolfedh-doc-utils
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Option 3: Traditional pip (may require virtual environment)**
|
|
55
67
|
```sh
|
|
56
68
|
pip install rolfedh-doc-utils
|
|
57
|
-
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Upgrading
|
|
72
|
+
|
|
73
|
+
To upgrade to the latest version:
|
|
74
|
+
|
|
75
|
+
```sh
|
|
76
|
+
# If installed with pipx:
|
|
77
|
+
pipx upgrade rolfedh-doc-utils
|
|
78
|
+
|
|
79
|
+
# If installed with pip:
|
|
80
|
+
pip install --upgrade rolfedh-doc-utils # or --user flag if needed
|
|
81
|
+
```
|
|
58
82
|
|
|
59
83
|
### For Development
|
|
60
84
|
|
|
@@ -116,6 +140,10 @@ Scans `.adoc` files in the current directory to report:
|
|
|
116
140
|
|
|
117
141
|
Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
|
|
118
142
|
|
|
143
|
+
Works with both:
|
|
144
|
+
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
145
|
+
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
146
|
+
|
|
119
147
|
➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
120
148
|
|
|
121
149
|
---
|
|
@@ -211,7 +239,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
|
|
|
211
239
|
|
|
212
240
|
1. The package isn't installed. Run:
|
|
213
241
|
```sh
|
|
214
|
-
pip install rolfedh-doc-utils
|
|
242
|
+
pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
|
|
215
243
|
```
|
|
216
244
|
|
|
217
245
|
2. You're trying to run the script directly without installation. Either:
|
|
@@ -18,11 +18,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
|
|
|
18
18
|
|
|
19
19
|
### From PyPI
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
|
|
22
22
|
|
|
23
|
+
**Option 1: pipx (Recommended for CLI tools)**
|
|
24
|
+
```sh
|
|
25
|
+
pipx install rolfedh-doc-utils
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**Option 2: pip with --user flag**
|
|
29
|
+
```sh
|
|
30
|
+
pip install --user rolfedh-doc-utils
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
**Option 3: Traditional pip (may require virtual environment)**
|
|
23
34
|
```sh
|
|
24
35
|
pip install rolfedh-doc-utils
|
|
25
|
-
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Upgrading
|
|
39
|
+
|
|
40
|
+
To upgrade to the latest version:
|
|
41
|
+
|
|
42
|
+
```sh
|
|
43
|
+
# If installed with pipx:
|
|
44
|
+
pipx upgrade rolfedh-doc-utils
|
|
45
|
+
|
|
46
|
+
# If installed with pip:
|
|
47
|
+
pip install --upgrade rolfedh-doc-utils # or --user flag if needed
|
|
48
|
+
```
|
|
26
49
|
|
|
27
50
|
### For Development
|
|
28
51
|
|
|
@@ -84,6 +107,10 @@ Scans `.adoc` files in the current directory to report:
|
|
|
84
107
|
|
|
85
108
|
Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
|
|
86
109
|
|
|
110
|
+
Works with both:
|
|
111
|
+
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
112
|
+
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
113
|
+
|
|
87
114
|
➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
88
115
|
|
|
89
116
|
---
|
|
@@ -179,7 +206,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
|
|
|
179
206
|
|
|
180
207
|
1. The package isn't installed. Run:
|
|
181
208
|
```sh
|
|
182
|
-
pip install rolfedh-doc-utils
|
|
209
|
+
pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
|
|
183
210
|
```
|
|
184
211
|
|
|
185
212
|
2. You're trying to run the script directly without installation. Either:
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# doc_utils/topic_map_parser.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import yaml
|
|
5
|
+
import glob
|
|
6
|
+
|
|
7
|
+
def detect_repo_type(base_path='.'):
|
|
8
|
+
"""
|
|
9
|
+
Detect whether the repository uses topic maps (OpenShift-docs style)
|
|
10
|
+
or master.adoc files (traditional style).
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
'topic_map' - if _topic_maps directory with .yml files exists
|
|
14
|
+
'master_adoc' - if master.adoc files are found
|
|
15
|
+
'unknown' - if neither pattern is detected
|
|
16
|
+
"""
|
|
17
|
+
topic_maps_dir = os.path.join(base_path, '_topic_maps')
|
|
18
|
+
|
|
19
|
+
# Check for topic maps
|
|
20
|
+
if os.path.isdir(topic_maps_dir):
|
|
21
|
+
yml_files = glob.glob(os.path.join(topic_maps_dir, '*.yml'))
|
|
22
|
+
if yml_files:
|
|
23
|
+
return 'topic_map'
|
|
24
|
+
|
|
25
|
+
# Check for master.adoc files using os.walk to avoid symlink issues
|
|
26
|
+
master_files = []
|
|
27
|
+
for root, dirs, files in os.walk(base_path):
|
|
28
|
+
# Skip symbolic link directories to prevent infinite recursion
|
|
29
|
+
dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
|
|
30
|
+
|
|
31
|
+
# Check for master.adoc in this directory
|
|
32
|
+
if 'master.adoc' in files:
|
|
33
|
+
master_files.append(os.path.join(root, 'master.adoc'))
|
|
34
|
+
|
|
35
|
+
if master_files:
|
|
36
|
+
return 'master_adoc'
|
|
37
|
+
|
|
38
|
+
return 'unknown'
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_files_from_topic_map(topic_map_path):
|
|
42
|
+
"""
|
|
43
|
+
Extract all referenced .adoc files from a topic map YAML file.
|
|
44
|
+
|
|
45
|
+
Returns a set of file paths referenced in the topic map.
|
|
46
|
+
"""
|
|
47
|
+
referenced_files = set()
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
with open(topic_map_path, 'r', encoding='utf-8') as f:
|
|
51
|
+
# Use safe_load_all to handle multiple YAML documents
|
|
52
|
+
documents = yaml.safe_load_all(f)
|
|
53
|
+
|
|
54
|
+
for doc in documents:
|
|
55
|
+
if doc is None:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
# Process each topic group
|
|
59
|
+
process_topic_group(doc, referenced_files)
|
|
60
|
+
|
|
61
|
+
except Exception as e:
|
|
62
|
+
print(f"Warning: Could not parse topic map {topic_map_path}: {e}")
|
|
63
|
+
|
|
64
|
+
return referenced_files
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def process_topic_group(group, referenced_files, parent_dir=''):
|
|
68
|
+
"""
|
|
69
|
+
Recursively process a topic group to extract all file references.
|
|
70
|
+
"""
|
|
71
|
+
if not isinstance(group, dict):
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
# Get the directory for this group
|
|
75
|
+
current_dir = group.get('Dir', '')
|
|
76
|
+
if parent_dir and current_dir:
|
|
77
|
+
current_dir = os.path.join(parent_dir, current_dir)
|
|
78
|
+
elif parent_dir:
|
|
79
|
+
current_dir = parent_dir
|
|
80
|
+
|
|
81
|
+
# Process topics in this group
|
|
82
|
+
topics = group.get('Topics', [])
|
|
83
|
+
if isinstance(topics, list):
|
|
84
|
+
for topic in topics:
|
|
85
|
+
if isinstance(topic, dict):
|
|
86
|
+
# If topic has a File, add it
|
|
87
|
+
if 'File' in topic:
|
|
88
|
+
file_path = topic['File']
|
|
89
|
+
if current_dir:
|
|
90
|
+
file_path = os.path.join(current_dir, file_path)
|
|
91
|
+
# Add .adoc extension if not present
|
|
92
|
+
if not file_path.endswith('.adoc'):
|
|
93
|
+
file_path += '.adoc'
|
|
94
|
+
referenced_files.add(file_path)
|
|
95
|
+
|
|
96
|
+
# If topic has nested topics (sub-group), process recursively
|
|
97
|
+
if 'Topics' in topic:
|
|
98
|
+
# For nested topics, use the Dir from the topic if present
|
|
99
|
+
sub_dir = topic.get('Dir', '')
|
|
100
|
+
if sub_dir:
|
|
101
|
+
# If topic has its own Dir, append it to current_dir
|
|
102
|
+
if current_dir:
|
|
103
|
+
next_dir = os.path.join(current_dir, sub_dir)
|
|
104
|
+
else:
|
|
105
|
+
next_dir = sub_dir
|
|
106
|
+
else:
|
|
107
|
+
# If no Dir specified, keep current_dir
|
|
108
|
+
next_dir = current_dir
|
|
109
|
+
# Process only the Topics, not the whole topic dict
|
|
110
|
+
process_topic_group({'Topics': topic['Topics']}, referenced_files, next_dir)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_all_topic_map_references(base_path='.'):
|
|
114
|
+
"""
|
|
115
|
+
Get all .adoc files referenced in all topic maps.
|
|
116
|
+
|
|
117
|
+
Returns a set of all referenced file paths.
|
|
118
|
+
"""
|
|
119
|
+
topic_maps_dir = os.path.join(base_path, '_topic_maps')
|
|
120
|
+
all_references = set()
|
|
121
|
+
|
|
122
|
+
if not os.path.isdir(topic_maps_dir):
|
|
123
|
+
return all_references
|
|
124
|
+
|
|
125
|
+
# Process all .yml files in _topic_maps
|
|
126
|
+
for yml_file in glob.glob(os.path.join(topic_maps_dir, '*.yml')):
|
|
127
|
+
references = extract_files_from_topic_map(yml_file)
|
|
128
|
+
all_references.update(references)
|
|
129
|
+
|
|
130
|
+
return all_references
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# doc_utils/unused_adoc.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from .file_utils import collect_files, write_manifest_and_archive
|
|
6
|
+
from .topic_map_parser import detect_repo_type, get_all_topic_map_references
|
|
7
|
+
|
|
8
|
+
def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
|
|
9
|
+
# Print safety warning
|
|
10
|
+
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
11
|
+
|
|
12
|
+
# Detect repository type
|
|
13
|
+
repo_type = detect_repo_type()
|
|
14
|
+
print(f"Detected repository type: {repo_type}")
|
|
15
|
+
|
|
16
|
+
# Collect all .adoc files in scan directories
|
|
17
|
+
asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
|
|
18
|
+
|
|
19
|
+
# Track which files are referenced
|
|
20
|
+
referenced_files = set()
|
|
21
|
+
|
|
22
|
+
if repo_type == 'topic_map':
|
|
23
|
+
# For OpenShift-docs style repos, get references from topic maps
|
|
24
|
+
topic_references = get_all_topic_map_references()
|
|
25
|
+
# Convert to basenames for comparison
|
|
26
|
+
referenced_files.update(os.path.basename(ref) for ref in topic_references)
|
|
27
|
+
|
|
28
|
+
# Always scan for include:: directives in all .adoc files
|
|
29
|
+
include_pattern = re.compile(r'include::(.+?)\[')
|
|
30
|
+
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
31
|
+
|
|
32
|
+
for file_path in adoc_files:
|
|
33
|
+
try:
|
|
34
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
35
|
+
content = f.read()
|
|
36
|
+
includes = include_pattern.findall(content)
|
|
37
|
+
# Extract just the filename from the include path
|
|
38
|
+
for include in includes:
|
|
39
|
+
# Handle both relative and absolute includes
|
|
40
|
+
include_basename = os.path.basename(include)
|
|
41
|
+
referenced_files.add(include_basename)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(f"Warning: could not read {file_path}: {e}")
|
|
44
|
+
|
|
45
|
+
# Find unused files by comparing basenames
|
|
46
|
+
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
|
|
47
|
+
unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
|
|
48
|
+
|
|
49
|
+
print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
|
|
50
|
+
|
|
51
|
+
return write_manifest_and_archive(
|
|
52
|
+
unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
|
|
53
|
+
)
|
|
@@ -7,6 +7,9 @@ from .file_utils import collect_files, write_manifest_and_archive
|
|
|
7
7
|
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
|
|
8
8
|
|
|
9
9
|
def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
|
|
10
|
+
# Print safety warning
|
|
11
|
+
print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
|
|
12
|
+
|
|
10
13
|
image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
|
|
11
14
|
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
12
15
|
referenced_images = set()
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rolfedh-doc-utils"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.4"
|
|
8
8
|
description = "CLI tools for AsciiDoc documentation projects"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -12,7 +12,7 @@ license = { file = "LICENSE" }
|
|
|
12
12
|
authors = [
|
|
13
13
|
{ name="Rolfe Dlugy-Hegwer" }
|
|
14
14
|
]
|
|
15
|
-
dependencies = []
|
|
15
|
+
dependencies = ["PyYAML>=6.0"]
|
|
16
16
|
|
|
17
17
|
[project.scripts]
|
|
18
18
|
check-scannability = "check_scannability:main"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rolfedh-doc-utils
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: CLI tools for AsciiDoc documentation projects
|
|
5
5
|
Author: Rolfe Dlugy-Hegwer
|
|
6
6
|
License: MIT License
|
|
@@ -28,6 +28,7 @@ License: MIT License
|
|
|
28
28
|
Requires-Python: >=3.8
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
|
+
Requires-Dist: PyYAML>=6.0
|
|
31
32
|
Dynamic: license-file
|
|
32
33
|
|
|
33
34
|
# doc-utils
|
|
@@ -50,11 +51,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
|
|
|
50
51
|
|
|
51
52
|
### From PyPI
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
|
|
54
55
|
|
|
56
|
+
**Option 1: pipx (Recommended for CLI tools)**
|
|
57
|
+
```sh
|
|
58
|
+
pipx install rolfedh-doc-utils
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Option 2: pip with --user flag**
|
|
62
|
+
```sh
|
|
63
|
+
pip install --user rolfedh-doc-utils
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Option 3: Traditional pip (may require virtual environment)**
|
|
55
67
|
```sh
|
|
56
68
|
pip install rolfedh-doc-utils
|
|
57
|
-
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Upgrading
|
|
72
|
+
|
|
73
|
+
To upgrade to the latest version:
|
|
74
|
+
|
|
75
|
+
```sh
|
|
76
|
+
# If installed with pipx:
|
|
77
|
+
pipx upgrade rolfedh-doc-utils
|
|
78
|
+
|
|
79
|
+
# If installed with pip:
|
|
80
|
+
pip install --upgrade rolfedh-doc-utils # or --user flag if needed
|
|
81
|
+
```
|
|
58
82
|
|
|
59
83
|
### For Development
|
|
60
84
|
|
|
@@ -116,6 +140,10 @@ Scans `.adoc` files in the current directory to report:
|
|
|
116
140
|
|
|
117
141
|
Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
|
|
118
142
|
|
|
143
|
+
Works with both:
|
|
144
|
+
- **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
|
|
145
|
+
- **Traditional AsciiDoc** repositories (uses `master.adoc` files)
|
|
146
|
+
|
|
119
147
|
➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
|
|
120
148
|
|
|
121
149
|
---
|
|
@@ -211,7 +239,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
|
|
|
211
239
|
|
|
212
240
|
1. The package isn't installed. Run:
|
|
213
241
|
```sh
|
|
214
|
-
pip install rolfedh-doc-utils
|
|
242
|
+
pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
|
|
215
243
|
```
|
|
216
244
|
|
|
217
245
|
2. You're trying to run the script directly without installation. Either:
|
|
@@ -5,9 +5,11 @@ archive_unused_images.py
|
|
|
5
5
|
check_scannability.py
|
|
6
6
|
find_unused_attributes.py
|
|
7
7
|
pyproject.toml
|
|
8
|
+
setup.py
|
|
8
9
|
doc_utils/__init__.py
|
|
9
10
|
doc_utils/file_utils.py
|
|
10
11
|
doc_utils/scannability.py
|
|
12
|
+
doc_utils/topic_map_parser.py
|
|
11
13
|
doc_utils/unused_adoc.py
|
|
12
14
|
doc_utils/unused_attributes.py
|
|
13
15
|
doc_utils/unused_images.py
|
|
@@ -15,6 +17,7 @@ rolfedh_doc_utils.egg-info/PKG-INFO
|
|
|
15
17
|
rolfedh_doc_utils.egg-info/SOURCES.txt
|
|
16
18
|
rolfedh_doc_utils.egg-info/dependency_links.txt
|
|
17
19
|
rolfedh_doc_utils.egg-info/entry_points.txt
|
|
20
|
+
rolfedh_doc_utils.egg-info/requires.txt
|
|
18
21
|
rolfedh_doc_utils.egg-info/top_level.txt
|
|
19
22
|
tests/test_archive_unused_files.py
|
|
20
23
|
tests/test_archive_unused_images.py
|
|
@@ -25,4 +28,6 @@ tests/test_fixture_archive_unused_files.py
|
|
|
25
28
|
tests/test_fixture_archive_unused_images.py
|
|
26
29
|
tests/test_fixture_check_scannability.py
|
|
27
30
|
tests/test_parse_exclude_list.py
|
|
31
|
+
tests/test_symlink_handling.py
|
|
32
|
+
tests/test_topic_map_parser.py
|
|
28
33
|
tests/test_unused_attributes.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
PyYAML>=6.0
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Setup script for doc-utils package.
|
|
4
|
+
This file is only needed if we want to customize the installation process.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from setuptools import setup
|
|
8
|
+
from setuptools.command.install import install
|
|
9
|
+
from setuptools.command.develop import develop
|
|
10
|
+
from setuptools.command.egg_info import egg_info
|
|
11
|
+
|
|
12
|
+
def custom_post_install():
|
|
13
|
+
"""Display safety message after installation."""
|
|
14
|
+
print("\n" + "="*60)
|
|
15
|
+
print("✅ doc-utils installed successfully!")
|
|
16
|
+
print("\n⚠️ IMPORTANT: Safety First")
|
|
17
|
+
print(" • Work in a git branch (never main/master)")
|
|
18
|
+
print(" • Run without --archive first to preview")
|
|
19
|
+
print(" • Review changes with git diff")
|
|
20
|
+
print("="*60 + "\n")
|
|
21
|
+
|
|
22
|
+
class CustomInstallCommand(install):
|
|
23
|
+
"""Customized setuptools install command."""
|
|
24
|
+
def run(self):
|
|
25
|
+
install.run(self)
|
|
26
|
+
custom_post_install()
|
|
27
|
+
|
|
28
|
+
class CustomDevelopCommand(develop):
|
|
29
|
+
"""Customized setuptools develop command."""
|
|
30
|
+
def run(self):
|
|
31
|
+
develop.run(self)
|
|
32
|
+
custom_post_install()
|
|
33
|
+
|
|
34
|
+
class CustomEggInfoCommand(egg_info):
|
|
35
|
+
"""Customized setuptools egg_info command."""
|
|
36
|
+
def run(self):
|
|
37
|
+
egg_info.run(self)
|
|
38
|
+
|
|
39
|
+
setup(
|
|
40
|
+
cmdclass={
|
|
41
|
+
'install': CustomInstallCommand,
|
|
42
|
+
'develop': CustomDevelopCommand,
|
|
43
|
+
'egg_info': CustomEggInfoCommand,
|
|
44
|
+
},
|
|
45
|
+
)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Test that topic_map_parser handles symbolic links correctly without freezing."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
import pytest
|
|
6
|
+
from doc_utils.topic_map_parser import detect_repo_type
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_detect_repo_type_with_circular_symlinks(tmp_path):
|
|
10
|
+
"""Test that detect_repo_type doesn't freeze with circular symbolic links."""
|
|
11
|
+
|
|
12
|
+
# Create a directory structure with circular symlinks
|
|
13
|
+
modules_dir = tmp_path / "modules"
|
|
14
|
+
modules_dir.mkdir()
|
|
15
|
+
|
|
16
|
+
# Create a master.adoc file in modules
|
|
17
|
+
(modules_dir / "master.adoc").write_text("= Test Doc\n")
|
|
18
|
+
|
|
19
|
+
# Create a circular symlink: modules/modules -> ../modules
|
|
20
|
+
circular_link = modules_dir / "modules"
|
|
21
|
+
try:
|
|
22
|
+
os.symlink("../../modules", str(circular_link))
|
|
23
|
+
except OSError:
|
|
24
|
+
pytest.skip("Cannot create symbolic links on this system")
|
|
25
|
+
|
|
26
|
+
# This should not freeze - it should skip the symlink
|
|
27
|
+
repo_type = detect_repo_type(str(tmp_path))
|
|
28
|
+
assert repo_type == "master_adoc"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_detect_repo_type_with_nested_circular_symlinks(tmp_path):
|
|
32
|
+
"""Test detection with nested directories containing circular symlinks."""
|
|
33
|
+
|
|
34
|
+
# Create nested structure
|
|
35
|
+
(tmp_path / "assemblies").mkdir()
|
|
36
|
+
modules_dir = tmp_path / "modules"
|
|
37
|
+
modules_dir.mkdir()
|
|
38
|
+
|
|
39
|
+
# Create master.adoc
|
|
40
|
+
(tmp_path / "master.adoc").write_text("= Main Doc\n")
|
|
41
|
+
|
|
42
|
+
# Create archive directory with circular symlinks (similar to real case)
|
|
43
|
+
archive_dir = tmp_path / ".archive" / "archived-content" / "modules"
|
|
44
|
+
archive_dir.mkdir(parents=True)
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
# Create circular symlink in archive
|
|
48
|
+
os.symlink("../../modules", str(archive_dir / "modules"))
|
|
49
|
+
except OSError:
|
|
50
|
+
pytest.skip("Cannot create symbolic links on this system")
|
|
51
|
+
|
|
52
|
+
# Should detect master_adoc without freezing
|
|
53
|
+
repo_type = detect_repo_type(str(tmp_path))
|
|
54
|
+
assert repo_type == "master_adoc"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_detect_repo_type_skips_symlink_directories(tmp_path):
|
|
58
|
+
"""Test that symlinked directories are skipped during traversal."""
|
|
59
|
+
|
|
60
|
+
# Create main directories
|
|
61
|
+
real_dir = tmp_path / "real_modules"
|
|
62
|
+
real_dir.mkdir()
|
|
63
|
+
(real_dir / "master.adoc").write_text("= Real Master\n")
|
|
64
|
+
|
|
65
|
+
# Create a symlink to real_dir
|
|
66
|
+
linked_dir = tmp_path / "linked_modules"
|
|
67
|
+
try:
|
|
68
|
+
os.symlink(str(real_dir), str(linked_dir))
|
|
69
|
+
except OSError:
|
|
70
|
+
pytest.skip("Cannot create symbolic links on this system")
|
|
71
|
+
|
|
72
|
+
# The function should find the master.adoc in real_dir but not traverse linked_dir
|
|
73
|
+
repo_type = detect_repo_type(str(tmp_path))
|
|
74
|
+
assert repo_type == "master_adoc"
|
|
75
|
+
|
|
76
|
+
# Test that symlinked directories are not traversed
|
|
77
|
+
only_symlink_path = tmp_path / "only_symlink_test"
|
|
78
|
+
only_symlink_path.mkdir()
|
|
79
|
+
|
|
80
|
+
# Create a directory outside that contains master.adoc
|
|
81
|
+
external_dir = tmp_path / "external"
|
|
82
|
+
external_dir.mkdir()
|
|
83
|
+
(external_dir / "master.adoc").write_text("= External Master\n")
|
|
84
|
+
|
|
85
|
+
# Create only a symlink to it inside our test directory
|
|
86
|
+
symlinked_dir = only_symlink_path / "linked_dir"
|
|
87
|
+
os.symlink(str(external_dir), str(symlinked_dir))
|
|
88
|
+
|
|
89
|
+
# Should not find master.adoc since it's only accessible via symlink
|
|
90
|
+
repo_type = detect_repo_type(str(only_symlink_path))
|
|
91
|
+
assert repo_type == "unknown"
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
import yaml
|
|
5
|
+
from doc_utils.topic_map_parser import (
|
|
6
|
+
detect_repo_type,
|
|
7
|
+
extract_files_from_topic_map,
|
|
8
|
+
process_topic_group,
|
|
9
|
+
get_all_topic_map_references
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestDetectRepoType:
|
|
14
|
+
def test_detect_topic_map_repo(self):
|
|
15
|
+
"""Test detection of OpenShift-docs style repository with topic maps."""
|
|
16
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
17
|
+
# Create _topic_maps directory with a .yml file
|
|
18
|
+
topic_maps_dir = os.path.join(tmpdir, '_topic_maps')
|
|
19
|
+
os.makedirs(topic_maps_dir)
|
|
20
|
+
|
|
21
|
+
# Create a sample topic map file
|
|
22
|
+
topic_map_file = os.path.join(topic_maps_dir, '_topic_map.yml')
|
|
23
|
+
with open(topic_map_file, 'w') as f:
|
|
24
|
+
f.write("---\nName: Test\nDir: test\n")
|
|
25
|
+
|
|
26
|
+
assert detect_repo_type(tmpdir) == 'topic_map'
|
|
27
|
+
|
|
28
|
+
def test_detect_master_adoc_repo(self):
|
|
29
|
+
"""Test detection of traditional repository with master.adoc files."""
|
|
30
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
31
|
+
# Create a master.adoc file
|
|
32
|
+
master_file = os.path.join(tmpdir, 'master.adoc')
|
|
33
|
+
with open(master_file, 'w') as f:
|
|
34
|
+
f.write("= Master Document\n")
|
|
35
|
+
|
|
36
|
+
assert detect_repo_type(tmpdir) == 'master_adoc'
|
|
37
|
+
|
|
38
|
+
def test_detect_unknown_repo(self):
|
|
39
|
+
"""Test detection returns 'unknown' for unrecognized repository structure."""
|
|
40
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
41
|
+
# Empty directory
|
|
42
|
+
assert detect_repo_type(tmpdir) == 'unknown'
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TestExtractFilesFromTopicMap:
|
|
46
|
+
def test_extract_simple_topic_map(self):
|
|
47
|
+
"""Test extracting files from a simple topic map."""
|
|
48
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
|
|
49
|
+
f.write("""---
|
|
50
|
+
Name: Overview
|
|
51
|
+
Dir: welcome
|
|
52
|
+
Topics:
|
|
53
|
+
- Name: Welcome
|
|
54
|
+
File: index
|
|
55
|
+
- Name: Introduction
|
|
56
|
+
File: intro
|
|
57
|
+
""")
|
|
58
|
+
f.flush()
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
files = extract_files_from_topic_map(f.name)
|
|
62
|
+
assert 'welcome/index.adoc' in files
|
|
63
|
+
assert 'welcome/intro.adoc' in files
|
|
64
|
+
assert len(files) == 2
|
|
65
|
+
finally:
|
|
66
|
+
os.unlink(f.name)
|
|
67
|
+
|
|
68
|
+
def test_extract_nested_topic_map(self):
|
|
69
|
+
"""Test extracting files from a topic map with nested topics."""
|
|
70
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
|
|
71
|
+
f.write("""---
|
|
72
|
+
Name: Architecture
|
|
73
|
+
Dir: architecture
|
|
74
|
+
Topics:
|
|
75
|
+
- Name: Overview
|
|
76
|
+
File: index
|
|
77
|
+
- Name: Components
|
|
78
|
+
Dir: components
|
|
79
|
+
Topics:
|
|
80
|
+
- Name: API Server
|
|
81
|
+
File: api-server
|
|
82
|
+
- Name: Controller
|
|
83
|
+
File: controller
|
|
84
|
+
""")
|
|
85
|
+
f.flush()
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
files = extract_files_from_topic_map(f.name)
|
|
89
|
+
assert 'architecture/index.adoc' in files
|
|
90
|
+
assert 'architecture/components/api-server.adoc' in files
|
|
91
|
+
assert 'architecture/components/controller.adoc' in files
|
|
92
|
+
assert len(files) == 3
|
|
93
|
+
finally:
|
|
94
|
+
os.unlink(f.name)
|
|
95
|
+
|
|
96
|
+
def test_extract_multiple_documents(self):
|
|
97
|
+
"""Test extracting files from a YAML file with multiple documents."""
|
|
98
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
|
|
99
|
+
f.write("""---
|
|
100
|
+
Name: Overview
|
|
101
|
+
Dir: welcome
|
|
102
|
+
Topics:
|
|
103
|
+
- Name: Welcome
|
|
104
|
+
File: index
|
|
105
|
+
---
|
|
106
|
+
Name: Installation
|
|
107
|
+
Dir: install
|
|
108
|
+
Topics:
|
|
109
|
+
- Name: Quick Start
|
|
110
|
+
File: quickstart
|
|
111
|
+
""")
|
|
112
|
+
f.flush()
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
files = extract_files_from_topic_map(f.name)
|
|
116
|
+
assert 'welcome/index.adoc' in files
|
|
117
|
+
assert 'install/quickstart.adoc' in files
|
|
118
|
+
assert len(files) == 2
|
|
119
|
+
finally:
|
|
120
|
+
os.unlink(f.name)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestProcessTopicGroup:
|
|
124
|
+
def test_process_simple_group(self):
|
|
125
|
+
"""Test processing a simple topic group."""
|
|
126
|
+
group = {
|
|
127
|
+
'Name': 'Test Group',
|
|
128
|
+
'Dir': 'test',
|
|
129
|
+
'Topics': [
|
|
130
|
+
{'Name': 'Topic 1', 'File': 'topic1'},
|
|
131
|
+
{'Name': 'Topic 2', 'File': 'topic2'}
|
|
132
|
+
]
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
referenced_files = set()
|
|
136
|
+
process_topic_group(group, referenced_files)
|
|
137
|
+
|
|
138
|
+
assert 'test/topic1.adoc' in referenced_files
|
|
139
|
+
assert 'test/topic2.adoc' in referenced_files
|
|
140
|
+
|
|
141
|
+
def test_process_group_with_parent_dir(self):
|
|
142
|
+
"""Test processing a topic group with a parent directory."""
|
|
143
|
+
group = {
|
|
144
|
+
'Name': 'Subgroup',
|
|
145
|
+
'Dir': 'sub',
|
|
146
|
+
'Topics': [
|
|
147
|
+
{'Name': 'Topic', 'File': 'topic'}
|
|
148
|
+
]
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
referenced_files = set()
|
|
152
|
+
process_topic_group(group, referenced_files, parent_dir='parent')
|
|
153
|
+
|
|
154
|
+
assert 'parent/sub/topic.adoc' in referenced_files
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class TestGetAllTopicMapReferences:
|
|
158
|
+
def test_get_all_references(self):
|
|
159
|
+
"""Test getting all references from all topic maps."""
|
|
160
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
161
|
+
# Create _topic_maps directory
|
|
162
|
+
topic_maps_dir = os.path.join(tmpdir, '_topic_maps')
|
|
163
|
+
os.makedirs(topic_maps_dir)
|
|
164
|
+
|
|
165
|
+
# Create first topic map
|
|
166
|
+
topic_map1 = os.path.join(topic_maps_dir, 'map1.yml')
|
|
167
|
+
with open(topic_map1, 'w') as f:
|
|
168
|
+
f.write("""---
|
|
169
|
+
Name: Group1
|
|
170
|
+
Dir: group1
|
|
171
|
+
Topics:
|
|
172
|
+
- Name: Topic1
|
|
173
|
+
File: topic1
|
|
174
|
+
""")
|
|
175
|
+
|
|
176
|
+
# Create second topic map
|
|
177
|
+
topic_map2 = os.path.join(topic_maps_dir, 'map2.yml')
|
|
178
|
+
with open(topic_map2, 'w') as f:
|
|
179
|
+
f.write("""---
|
|
180
|
+
Name: Group2
|
|
181
|
+
Dir: group2
|
|
182
|
+
Topics:
|
|
183
|
+
- Name: Topic2
|
|
184
|
+
File: topic2
|
|
185
|
+
""")
|
|
186
|
+
|
|
187
|
+
references = get_all_topic_map_references(tmpdir)
|
|
188
|
+
assert 'group1/topic1.adoc' in references
|
|
189
|
+
assert 'group2/topic2.adoc' in references
|
|
190
|
+
assert len(references) == 2
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# doc_utils/unused_adoc.py
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import re
|
|
5
|
-
from .file_utils import collect_files, write_manifest_and_archive
|
|
6
|
-
|
|
7
|
-
def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
|
|
8
|
-
asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
|
|
9
|
-
include_pattern = re.compile(r'include::(.+?)\[')
|
|
10
|
-
included_files = set()
|
|
11
|
-
adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
|
|
12
|
-
for file_path in adoc_files:
|
|
13
|
-
try:
|
|
14
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
15
|
-
content = f.read()
|
|
16
|
-
includes = include_pattern.findall(content)
|
|
17
|
-
included_files.update(os.path.basename(include) for include in includes)
|
|
18
|
-
except Exception as e:
|
|
19
|
-
print(f"Warning: could not read {file_path}: {e}")
|
|
20
|
-
unused_files = [f for f in asciidoc_files if os.path.basename(f) not in included_files]
|
|
21
|
-
unused_files = list(dict.fromkeys(unused_files))
|
|
22
|
-
return write_manifest_and_archive(
|
|
23
|
-
unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
|
|
24
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/rolfedh_doc_utils.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_archive_unused_files.py
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_archive_unused_images.py
RENAMED
|
File without changes
|
{rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.4}/tests/test_fixture_check_scannability.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|