rolfedh-doc-utils 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ # doc_utils/topic_map_parser.py
2
+
3
+ import os
4
+ import yaml
5
+ import glob
6
+
7
+ def detect_repo_type(base_path='.'):
8
+ """
9
+ Detect whether the repository uses topic maps (OpenShift-docs style)
10
+ or master.adoc files (traditional style).
11
+
12
+ Returns:
13
+ 'topic_map' - if _topic_maps directory with .yml files exists
14
+ 'master_adoc' - if master.adoc files are found
15
+ 'unknown' - if neither pattern is detected
16
+ """
17
+ topic_maps_dir = os.path.join(base_path, '_topic_maps')
18
+
19
+ # Check for topic maps
20
+ if os.path.isdir(topic_maps_dir):
21
+ yml_files = glob.glob(os.path.join(topic_maps_dir, '*.yml'))
22
+ if yml_files:
23
+ return 'topic_map'
24
+
25
+ # Check for master.adoc files using os.walk to avoid symlink issues
26
+ master_files = []
27
+ for root, dirs, files in os.walk(base_path):
28
+ # Skip symbolic link directories to prevent infinite recursion
29
+ dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
30
+
31
+ # Check for master.adoc in this directory
32
+ if 'master.adoc' in files:
33
+ master_files.append(os.path.join(root, 'master.adoc'))
34
+
35
+ if master_files:
36
+ return 'master_adoc'
37
+
38
+ return 'unknown'
39
+
40
+
41
+ def extract_files_from_topic_map(topic_map_path):
42
+ """
43
+ Extract all referenced .adoc files from a topic map YAML file.
44
+
45
+ Returns a set of file paths referenced in the topic map.
46
+ """
47
+ referenced_files = set()
48
+
49
+ try:
50
+ with open(topic_map_path, 'r', encoding='utf-8') as f:
51
+ # Use safe_load_all to handle multiple YAML documents
52
+ documents = yaml.safe_load_all(f)
53
+
54
+ for doc in documents:
55
+ if doc is None:
56
+ continue
57
+
58
+ # Process each topic group
59
+ process_topic_group(doc, referenced_files)
60
+
61
+ except Exception as e:
62
+ print(f"Warning: Could not parse topic map {topic_map_path}: {e}")
63
+
64
+ return referenced_files
65
+
66
+
67
+ def process_topic_group(group, referenced_files, parent_dir=''):
68
+ """
69
+ Recursively process a topic group to extract all file references.
70
+ """
71
+ if not isinstance(group, dict):
72
+ return
73
+
74
+ # Get the directory for this group
75
+ current_dir = group.get('Dir', '')
76
+ if parent_dir and current_dir:
77
+ current_dir = os.path.join(parent_dir, current_dir)
78
+ elif parent_dir:
79
+ current_dir = parent_dir
80
+
81
+ # Process topics in this group
82
+ topics = group.get('Topics', [])
83
+ if isinstance(topics, list):
84
+ for topic in topics:
85
+ if isinstance(topic, dict):
86
+ # If topic has a File, add it
87
+ if 'File' in topic:
88
+ file_path = topic['File']
89
+ if current_dir:
90
+ file_path = os.path.join(current_dir, file_path)
91
+ # Add .adoc extension if not present
92
+ if not file_path.endswith('.adoc'):
93
+ file_path += '.adoc'
94
+ referenced_files.add(file_path)
95
+
96
+ # If topic has nested topics (sub-group), process recursively
97
+ if 'Topics' in topic:
98
+ # For nested topics, use the Dir from the topic if present
99
+ sub_dir = topic.get('Dir', '')
100
+ if sub_dir:
101
+ # If topic has its own Dir, append it to current_dir
102
+ if current_dir:
103
+ next_dir = os.path.join(current_dir, sub_dir)
104
+ else:
105
+ next_dir = sub_dir
106
+ else:
107
+ # If no Dir specified, keep current_dir
108
+ next_dir = current_dir
109
+ # Process only the Topics, not the whole topic dict
110
+ process_topic_group({'Topics': topic['Topics']}, referenced_files, next_dir)
111
+
112
+
113
+ def get_all_topic_map_references(base_path='.'):
114
+ """
115
+ Get all .adoc files referenced in all topic maps.
116
+
117
+ Returns a set of all referenced file paths.
118
+ """
119
+ topic_maps_dir = os.path.join(base_path, '_topic_maps')
120
+ all_references = set()
121
+
122
+ if not os.path.isdir(topic_maps_dir):
123
+ return all_references
124
+
125
+ # Process all .yml files in _topic_maps
126
+ for yml_file in glob.glob(os.path.join(topic_maps_dir, '*.yml')):
127
+ references = extract_files_from_topic_map(yml_file)
128
+ all_references.update(references)
129
+
130
+ return all_references
doc_utils/unused_adoc.py CHANGED
@@ -3,22 +3,51 @@
3
3
  import os
4
4
  import re
5
5
  from .file_utils import collect_files, write_manifest_and_archive
6
+ from .topic_map_parser import detect_repo_type, get_all_topic_map_references
6
7
 
7
8
  def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
9
+ # Print safety warning
10
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
11
+
12
+ # Detect repository type
13
+ repo_type = detect_repo_type()
14
+ print(f"Detected repository type: {repo_type}")
15
+
16
+ # Collect all .adoc files in scan directories
8
17
  asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
18
+
19
+ # Track which files are referenced
20
+ referenced_files = set()
21
+
22
+ if repo_type == 'topic_map':
23
+ # For OpenShift-docs style repos, get references from topic maps
24
+ topic_references = get_all_topic_map_references()
25
+ # Convert to basenames for comparison
26
+ referenced_files.update(os.path.basename(ref) for ref in topic_references)
27
+
28
+ # Always scan for include:: directives in all .adoc files
9
29
  include_pattern = re.compile(r'include::(.+?)\[')
10
- included_files = set()
11
30
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
31
+
12
32
  for file_path in adoc_files:
13
33
  try:
14
34
  with open(file_path, 'r', encoding='utf-8') as f:
15
35
  content = f.read()
16
36
  includes = include_pattern.findall(content)
17
- included_files.update(os.path.basename(include) for include in includes)
37
+ # Extract just the filename from the include path
38
+ for include in includes:
39
+ # Handle both relative and absolute includes
40
+ include_basename = os.path.basename(include)
41
+ referenced_files.add(include_basename)
18
42
  except Exception as e:
19
43
  print(f"Warning: could not read {file_path}: {e}")
20
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in included_files]
21
- unused_files = list(dict.fromkeys(unused_files))
44
+
45
+ # Find unused files by comparing basenames
46
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
47
+ unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
48
+
49
+ print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
50
+
22
51
  return write_manifest_and_archive(
23
52
  unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
24
53
  )
@@ -7,6 +7,9 @@ from .file_utils import collect_files, write_manifest_and_archive
7
7
  IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
8
 
9
9
  def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
10
+ # Print safety warning
11
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
+
10
13
  image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
11
14
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
12
15
  referenced_images = set()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -28,6 +28,7 @@ License: MIT License
28
28
  Requires-Python: >=3.8
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
+ Requires-Dist: PyYAML>=6.0
31
32
  Dynamic: license-file
32
33
 
33
34
  # doc-utils
@@ -50,11 +51,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
50
51
 
51
52
  ### From PyPI
52
53
 
53
- Install the package from PyPI:
54
+ On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
54
55
 
56
+ **Option 1: pipx (Recommended for CLI tools)**
57
+ ```sh
58
+ pipx install rolfedh-doc-utils
59
+ ```
60
+
61
+ **Option 2: pip with --user flag**
62
+ ```sh
63
+ pip install --user rolfedh-doc-utils
64
+ ```
65
+
66
+ **Option 3: Traditional pip (may require virtual environment)**
55
67
  ```sh
56
68
  pip install rolfedh-doc-utils
57
- ````
69
+ ```
70
+
71
+ ### Upgrading
72
+
73
+ To upgrade to the latest version:
74
+
75
+ ```sh
76
+ # If installed with pipx:
77
+ pipx upgrade rolfedh-doc-utils
78
+
79
+ # If installed with pip:
80
+ pip install --upgrade rolfedh-doc-utils # or --user flag if needed
81
+ ```
58
82
 
59
83
  ### For Development
60
84
 
@@ -116,6 +140,10 @@ Scans `.adoc` files in the current directory to report:
116
140
 
117
141
  Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
118
142
 
143
+ Works with both:
144
+ - **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
145
+ - **Traditional AsciiDoc** repositories (uses `master.adoc` files)
146
+
119
147
  ➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
120
148
 
121
149
  ---
@@ -211,7 +239,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
211
239
 
212
240
  1. The package isn't installed. Run:
213
241
  ```sh
214
- pip install rolfedh-doc-utils
242
+ pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
215
243
  ```
216
244
 
217
245
  2. You're trying to run the script directly without installation. Either:
@@ -5,12 +5,13 @@ find_unused_attributes.py,sha256=fk-K32eoCVHxoj7RiBNgSmX1arBLuwYfdSAOMc-wIx0,167
5
5
  doc_utils/__init__.py,sha256=qqZR3lohzkP63soymrEZPBGzzk6-nFzi4_tSffjmu_0,74
6
6
  doc_utils/file_utils.py,sha256=fpTh3xx759sF8sNocdn_arsP3KAv8XA6cTQTAVIZiZg,4247
7
7
  doc_utils/scannability.py,sha256=XwlmHqDs69p_V36X7DLjPTy0DUoLszSGqYjJ9wE-3hg,982
8
- doc_utils/unused_adoc.py,sha256=gvP1eClEbVebN2jXA41-bPnbVhYz6JHEIbGZCg8JD0s,1115
8
+ doc_utils/topic_map_parser.py,sha256=tKcIO1m9r2K6dvPRGue58zqMr0O2zKU1gnZMzEE3U6o,4571
9
+ doc_utils/unused_adoc.py,sha256=Bx8TH7twhtuDfxzrPLOWOzTLc5YBhIqrCNaKJJR_V38,2312
9
10
  doc_utils/unused_attributes.py,sha256=HBgmHelqearfWl3TTC2bZGiJytjLADIgiGQUNKqXXPg,1847
10
- doc_utils/unused_images.py,sha256=P9vcm00BidrLmxhjeczBtiFU-1wgfN5nCYdZjeCH1kM,1329
11
- rolfedh_doc_utils-0.1.2.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
12
- rolfedh_doc_utils-0.1.2.dist-info/METADATA,sha256=cAvrckQs2hogV3m8mpBzlkVSWBi3a5sUHxaQOi2ipKU,7389
13
- rolfedh_doc_utils-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- rolfedh_doc_utils-0.1.2.dist-info/entry_points.txt,sha256=i8LqEsp0KD4YyVI_7wQ1TMgCuag32D7gQes6bLufmtM,216
15
- rolfedh_doc_utils-0.1.2.dist-info/top_level.txt,sha256=BkaYN3KbtNvLQjs-QGBKCJb5UAtjEbC_IqxSSIN9P-w,95
16
- rolfedh_doc_utils-0.1.2.dist-info/RECORD,,
11
+ doc_utils/unused_images.py,sha256=nqn36Bbrmon2KlGlcaruNjJJvTQ8_9H0WU9GvCW7rW8,1456
12
+ rolfedh_doc_utils-0.1.4.dist-info/licenses/LICENSE,sha256=vLxtwMVOJA_hEy8b77niTkdmQI9kNJskXHq0dBS36e0,1075
13
+ rolfedh_doc_utils-0.1.4.dist-info/METADATA,sha256=lPNDY0Lu9pBKxxzCXZp5TqSe3DuBqvBHYPVFyLWpP14,8152
14
+ rolfedh_doc_utils-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ rolfedh_doc_utils-0.1.4.dist-info/entry_points.txt,sha256=i8LqEsp0KD4YyVI_7wQ1TMgCuag32D7gQes6bLufmtM,216
16
+ rolfedh_doc_utils-0.1.4.dist-info/top_level.txt,sha256=BkaYN3KbtNvLQjs-QGBKCJb5UAtjEbC_IqxSSIN9P-w,95
17
+ rolfedh_doc_utils-0.1.4.dist-info/RECORD,,