rolfedh-doc-utils 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {rolfedh_doc_utils-0.1.2/rolfedh_doc_utils.egg-info → rolfedh_doc_utils-0.1.3}/PKG-INFO +32 -4
  2. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/README.md +30 -3
  3. rolfedh_doc_utils-0.1.3/doc_utils/topic_map_parser.py +122 -0
  4. rolfedh_doc_utils-0.1.3/doc_utils/unused_adoc.py +50 -0
  5. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/pyproject.toml +2 -2
  6. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3/rolfedh_doc_utils.egg-info}/PKG-INFO +32 -4
  7. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/rolfedh_doc_utils.egg-info/SOURCES.txt +3 -0
  8. rolfedh_doc_utils-0.1.3/rolfedh_doc_utils.egg-info/requires.txt +1 -0
  9. rolfedh_doc_utils-0.1.3/tests/test_topic_map_parser.py +190 -0
  10. rolfedh_doc_utils-0.1.2/doc_utils/unused_adoc.py +0 -24
  11. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/LICENSE +0 -0
  12. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/archive_unused_files.py +0 -0
  13. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/archive_unused_images.py +0 -0
  14. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/check_scannability.py +0 -0
  15. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/doc_utils/__init__.py +0 -0
  16. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/doc_utils/file_utils.py +0 -0
  17. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/doc_utils/scannability.py +0 -0
  18. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/doc_utils/unused_attributes.py +0 -0
  19. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/doc_utils/unused_images.py +0 -0
  20. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/find_unused_attributes.py +0 -0
  21. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  22. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/rolfedh_doc_utils.egg-info/entry_points.txt +0 -0
  23. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/rolfedh_doc_utils.egg-info/top_level.txt +0 -0
  24. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/setup.cfg +0 -0
  25. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_archive_unused_files.py +0 -0
  26. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_archive_unused_images.py +0 -0
  27. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_check_scannability.py +0 -0
  28. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_cli_entry_points.py +0 -0
  29. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_file_utils.py +0 -0
  30. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_fixture_archive_unused_files.py +0 -0
  31. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_fixture_archive_unused_images.py +0 -0
  32. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_fixture_check_scannability.py +0 -0
  33. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_parse_exclude_list.py +0 -0
  34. {rolfedh_doc_utils-0.1.2 → rolfedh_doc_utils-0.1.3}/tests/test_unused_attributes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -28,6 +28,7 @@ License: MIT License
28
28
  Requires-Python: >=3.8
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
+ Requires-Dist: PyYAML>=6.0
31
32
  Dynamic: license-file
32
33
 
33
34
  # doc-utils
@@ -50,11 +51,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
50
51
 
51
52
  ### From PyPI
52
53
 
53
- Install the package from PyPI:
54
+ On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
54
55
 
56
+ **Option 1: pipx (Recommended for CLI tools)**
57
+ ```sh
58
+ pipx install rolfedh-doc-utils
59
+ ```
60
+
61
+ **Option 2: pip with --user flag**
62
+ ```sh
63
+ pip install --user rolfedh-doc-utils
64
+ ```
65
+
66
+ **Option 3: Traditional pip (may require virtual environment)**
55
67
  ```sh
56
68
  pip install rolfedh-doc-utils
57
- ````
69
+ ```
70
+
71
+ ### Upgrading
72
+
73
+ To upgrade to the latest version:
74
+
75
+ ```sh
76
+ # If installed with pipx:
77
+ pipx upgrade rolfedh-doc-utils
78
+
79
+ # If installed with pip:
80
+ pip install --upgrade rolfedh-doc-utils # or --user flag if needed
81
+ ```
58
82
 
59
83
  ### For Development
60
84
 
@@ -116,6 +140,10 @@ Scans `.adoc` files in the current directory to report:
116
140
 
117
141
  Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
118
142
 
143
+ Works with both:
144
+ - **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
145
+ - **Traditional AsciiDoc** repositories (uses `master.adoc` files)
146
+
119
147
  ➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
120
148
 
121
149
  ---
@@ -211,7 +239,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
211
239
 
212
240
  1. The package isn't installed. Run:
213
241
  ```sh
214
- pip install rolfedh-doc-utils
242
+ pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
215
243
  ```
216
244
 
217
245
  2. You're trying to run the script directly without installation. Either:
@@ -18,11 +18,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
18
18
 
19
19
  ### From PyPI
20
20
 
21
- Install the package from PyPI:
21
+ On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
22
22
 
23
+ **Option 1: pipx (Recommended for CLI tools)**
24
+ ```sh
25
+ pipx install rolfedh-doc-utils
26
+ ```
27
+
28
+ **Option 2: pip with --user flag**
29
+ ```sh
30
+ pip install --user rolfedh-doc-utils
31
+ ```
32
+
33
+ **Option 3: Traditional pip (may require virtual environment)**
23
34
  ```sh
24
35
  pip install rolfedh-doc-utils
25
- ````
36
+ ```
37
+
38
+ ### Upgrading
39
+
40
+ To upgrade to the latest version:
41
+
42
+ ```sh
43
+ # If installed with pipx:
44
+ pipx upgrade rolfedh-doc-utils
45
+
46
+ # If installed with pip:
47
+ pip install --upgrade rolfedh-doc-utils # or --user flag if needed
48
+ ```
26
49
 
27
50
  ### For Development
28
51
 
@@ -84,6 +107,10 @@ Scans `.adoc` files in the current directory to report:
84
107
 
85
108
  Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
86
109
 
110
+ Works with both:
111
+ - **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
112
+ - **Traditional AsciiDoc** repositories (uses `master.adoc` files)
113
+
87
114
  ➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
88
115
 
89
116
  ---
@@ -179,7 +206,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
179
206
 
180
207
  1. The package isn't installed. Run:
181
208
  ```sh
182
- pip install rolfedh-doc-utils
209
+ pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
183
210
  ```
184
211
 
185
212
  2. You're trying to run the script directly without installation. Either:
@@ -0,0 +1,122 @@
1
+ # doc_utils/topic_map_parser.py
2
+
3
+ import os
4
+ import yaml
5
+ import glob
6
+
7
+ def detect_repo_type(base_path='.'):
8
+ """
9
+ Detect whether the repository uses topic maps (OpenShift-docs style)
10
+ or master.adoc files (traditional style).
11
+
12
+ Returns:
13
+ 'topic_map' - if _topic_maps directory with .yml files exists
14
+ 'master_adoc' - if master.adoc files are found
15
+ 'unknown' - if neither pattern is detected
16
+ """
17
+ topic_maps_dir = os.path.join(base_path, '_topic_maps')
18
+
19
+ # Check for topic maps
20
+ if os.path.isdir(topic_maps_dir):
21
+ yml_files = glob.glob(os.path.join(topic_maps_dir, '*.yml'))
22
+ if yml_files:
23
+ return 'topic_map'
24
+
25
+ # Check for master.adoc files
26
+ master_files = glob.glob(os.path.join(base_path, '**/master.adoc'), recursive=True)
27
+ if master_files:
28
+ return 'master_adoc'
29
+
30
+ return 'unknown'
31
+
32
+
33
+ def extract_files_from_topic_map(topic_map_path):
34
+ """
35
+ Extract all referenced .adoc files from a topic map YAML file.
36
+
37
+ Returns a set of file paths referenced in the topic map.
38
+ """
39
+ referenced_files = set()
40
+
41
+ try:
42
+ with open(topic_map_path, 'r', encoding='utf-8') as f:
43
+ # Use safe_load_all to handle multiple YAML documents
44
+ documents = yaml.safe_load_all(f)
45
+
46
+ for doc in documents:
47
+ if doc is None:
48
+ continue
49
+
50
+ # Process each topic group
51
+ process_topic_group(doc, referenced_files)
52
+
53
+ except Exception as e:
54
+ print(f"Warning: Could not parse topic map {topic_map_path}: {e}")
55
+
56
+ return referenced_files
57
+
58
+
59
+ def process_topic_group(group, referenced_files, parent_dir=''):
60
+ """
61
+ Recursively process a topic group to extract all file references.
62
+ """
63
+ if not isinstance(group, dict):
64
+ return
65
+
66
+ # Get the directory for this group
67
+ current_dir = group.get('Dir', '')
68
+ if parent_dir and current_dir:
69
+ current_dir = os.path.join(parent_dir, current_dir)
70
+ elif parent_dir:
71
+ current_dir = parent_dir
72
+
73
+ # Process topics in this group
74
+ topics = group.get('Topics', [])
75
+ if isinstance(topics, list):
76
+ for topic in topics:
77
+ if isinstance(topic, dict):
78
+ # If topic has a File, add it
79
+ if 'File' in topic:
80
+ file_path = topic['File']
81
+ if current_dir:
82
+ file_path = os.path.join(current_dir, file_path)
83
+ # Add .adoc extension if not present
84
+ if not file_path.endswith('.adoc'):
85
+ file_path += '.adoc'
86
+ referenced_files.add(file_path)
87
+
88
+ # If topic has nested topics (sub-group), process recursively
89
+ if 'Topics' in topic:
90
+ # For nested topics, use the Dir from the topic if present
91
+ sub_dir = topic.get('Dir', '')
92
+ if sub_dir:
93
+ # If topic has its own Dir, append it to current_dir
94
+ if current_dir:
95
+ next_dir = os.path.join(current_dir, sub_dir)
96
+ else:
97
+ next_dir = sub_dir
98
+ else:
99
+ # If no Dir specified, keep current_dir
100
+ next_dir = current_dir
101
+ # Process only the Topics, not the whole topic dict
102
+ process_topic_group({'Topics': topic['Topics']}, referenced_files, next_dir)
103
+
104
+
105
+ def get_all_topic_map_references(base_path='.'):
106
+ """
107
+ Get all .adoc files referenced in all topic maps.
108
+
109
+ Returns a set of all referenced file paths.
110
+ """
111
+ topic_maps_dir = os.path.join(base_path, '_topic_maps')
112
+ all_references = set()
113
+
114
+ if not os.path.isdir(topic_maps_dir):
115
+ return all_references
116
+
117
+ # Process all .yml files in _topic_maps
118
+ for yml_file in glob.glob(os.path.join(topic_maps_dir, '*.yml')):
119
+ references = extract_files_from_topic_map(yml_file)
120
+ all_references.update(references)
121
+
122
+ return all_references
@@ -0,0 +1,50 @@
1
+ # doc_utils/unused_adoc.py
2
+
3
+ import os
4
+ import re
5
+ from .file_utils import collect_files, write_manifest_and_archive
6
+ from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
+
8
+ def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
9
+ # Detect repository type
10
+ repo_type = detect_repo_type()
11
+ print(f"Detected repository type: {repo_type}")
12
+
13
+ # Collect all .adoc files in scan directories
14
+ asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
15
+
16
+ # Track which files are referenced
17
+ referenced_files = set()
18
+
19
+ if repo_type == 'topic_map':
20
+ # For OpenShift-docs style repos, get references from topic maps
21
+ topic_references = get_all_topic_map_references()
22
+ # Convert to basenames for comparison
23
+ referenced_files.update(os.path.basename(ref) for ref in topic_references)
24
+
25
+ # Always scan for include:: directives in all .adoc files
26
+ include_pattern = re.compile(r'include::(.+?)\[')
27
+ adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
28
+
29
+ for file_path in adoc_files:
30
+ try:
31
+ with open(file_path, 'r', encoding='utf-8') as f:
32
+ content = f.read()
33
+ includes = include_pattern.findall(content)
34
+ # Extract just the filename from the include path
35
+ for include in includes:
36
+ # Handle both relative and absolute includes
37
+ include_basename = os.path.basename(include)
38
+ referenced_files.add(include_basename)
39
+ except Exception as e:
40
+ print(f"Warning: could not read {file_path}: {e}")
41
+
42
+ # Find unused files by comparing basenames
43
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
44
+ unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
45
+
46
+ print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
47
+
48
+ return write_manifest_and_archive(
49
+ unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
50
+ )
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rolfedh-doc-utils"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  description = "CLI tools for AsciiDoc documentation projects"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -12,7 +12,7 @@ license = { file = "LICENSE" }
12
12
  authors = [
13
13
  { name="Rolfe Dlugy-Hegwer" }
14
14
  ]
15
- dependencies = []
15
+ dependencies = ["PyYAML>=6.0"]
16
16
 
17
17
  [project.scripts]
18
18
  check-scannability = "check_scannability:main"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -28,6 +28,7 @@ License: MIT License
28
28
  Requires-Python: >=3.8
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
+ Requires-Dist: PyYAML>=6.0
31
32
  Dynamic: license-file
32
33
 
33
34
  # doc-utils
@@ -50,11 +51,34 @@ A set of Python utilities and CLI tools to help technical writers maintain Ascii
50
51
 
51
52
  ### From PyPI
52
53
 
53
- Install the package from PyPI:
54
+ On modern Linux distributions, you may encounter an "externally-managed-environment" error. Use one of these methods:
54
55
 
56
+ **Option 1: pipx (Recommended for CLI tools)**
57
+ ```sh
58
+ pipx install rolfedh-doc-utils
59
+ ```
60
+
61
+ **Option 2: pip with --user flag**
62
+ ```sh
63
+ pip install --user rolfedh-doc-utils
64
+ ```
65
+
66
+ **Option 3: Traditional pip (may require virtual environment)**
55
67
  ```sh
56
68
  pip install rolfedh-doc-utils
57
- ````
69
+ ```
70
+
71
+ ### Upgrading
72
+
73
+ To upgrade to the latest version:
74
+
75
+ ```sh
76
+ # If installed with pipx:
77
+ pipx upgrade rolfedh-doc-utils
78
+
79
+ # If installed with pip:
80
+ pip install --upgrade rolfedh-doc-utils # or --user flag if needed
81
+ ```
58
82
 
59
83
  ### For Development
60
84
 
@@ -116,6 +140,10 @@ Scans `.adoc` files in the current directory to report:
116
140
 
117
141
  Scans the `./modules` and `./assemblies` directories for `.adoc` files that are not referenced. Optionally archives and deletes them.
118
142
 
143
+ Works with both:
144
+ - **OpenShift-docs style** repositories (uses `_topic_maps/*.yml` files)
145
+ - **Traditional AsciiDoc** repositories (uses `master.adoc` files)
146
+
119
147
  ➡️ See [`archive_unused_files.md`](https://github.com/rolfedh/doc-utils/blob/main/archive_unused_files.md).
120
148
 
121
149
  ---
@@ -211,7 +239,7 @@ If you see an error like `ModuleNotFoundError: No module named 'find_unused_attr
211
239
 
212
240
  1. The package isn't installed. Run:
213
241
  ```sh
214
- pip install rolfedh-doc-utils
242
+ pipx install rolfedh-doc-utils # or pip install --user rolfedh-doc-utils
215
243
  ```
216
244
 
217
245
  2. You're trying to run the script directly without installation. Either:
@@ -8,6 +8,7 @@ pyproject.toml
8
8
  doc_utils/__init__.py
9
9
  doc_utils/file_utils.py
10
10
  doc_utils/scannability.py
11
+ doc_utils/topic_map_parser.py
11
12
  doc_utils/unused_adoc.py
12
13
  doc_utils/unused_attributes.py
13
14
  doc_utils/unused_images.py
@@ -15,6 +16,7 @@ rolfedh_doc_utils.egg-info/PKG-INFO
15
16
  rolfedh_doc_utils.egg-info/SOURCES.txt
16
17
  rolfedh_doc_utils.egg-info/dependency_links.txt
17
18
  rolfedh_doc_utils.egg-info/entry_points.txt
19
+ rolfedh_doc_utils.egg-info/requires.txt
18
20
  rolfedh_doc_utils.egg-info/top_level.txt
19
21
  tests/test_archive_unused_files.py
20
22
  tests/test_archive_unused_images.py
@@ -25,4 +27,5 @@ tests/test_fixture_archive_unused_files.py
25
27
  tests/test_fixture_archive_unused_images.py
26
28
  tests/test_fixture_check_scannability.py
27
29
  tests/test_parse_exclude_list.py
30
+ tests/test_topic_map_parser.py
28
31
  tests/test_unused_attributes.py
@@ -0,0 +1,190 @@
1
+ import pytest
2
+ import os
3
+ import tempfile
4
+ import yaml
5
+ from doc_utils.topic_map_parser import (
6
+ detect_repo_type,
7
+ extract_files_from_topic_map,
8
+ process_topic_group,
9
+ get_all_topic_map_references
10
+ )
11
+
12
+
13
+ class TestDetectRepoType:
14
+ def test_detect_topic_map_repo(self):
15
+ """Test detection of OpenShift-docs style repository with topic maps."""
16
+ with tempfile.TemporaryDirectory() as tmpdir:
17
+ # Create _topic_maps directory with a .yml file
18
+ topic_maps_dir = os.path.join(tmpdir, '_topic_maps')
19
+ os.makedirs(topic_maps_dir)
20
+
21
+ # Create a sample topic map file
22
+ topic_map_file = os.path.join(topic_maps_dir, '_topic_map.yml')
23
+ with open(topic_map_file, 'w') as f:
24
+ f.write("---\nName: Test\nDir: test\n")
25
+
26
+ assert detect_repo_type(tmpdir) == 'topic_map'
27
+
28
+ def test_detect_master_adoc_repo(self):
29
+ """Test detection of traditional repository with master.adoc files."""
30
+ with tempfile.TemporaryDirectory() as tmpdir:
31
+ # Create a master.adoc file
32
+ master_file = os.path.join(tmpdir, 'master.adoc')
33
+ with open(master_file, 'w') as f:
34
+ f.write("= Master Document\n")
35
+
36
+ assert detect_repo_type(tmpdir) == 'master_adoc'
37
+
38
+ def test_detect_unknown_repo(self):
39
+ """Test detection returns 'unknown' for unrecognized repository structure."""
40
+ with tempfile.TemporaryDirectory() as tmpdir:
41
+ # Empty directory
42
+ assert detect_repo_type(tmpdir) == 'unknown'
43
+
44
+
45
+ class TestExtractFilesFromTopicMap:
46
+ def test_extract_simple_topic_map(self):
47
+ """Test extracting files from a simple topic map."""
48
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
49
+ f.write("""---
50
+ Name: Overview
51
+ Dir: welcome
52
+ Topics:
53
+ - Name: Welcome
54
+ File: index
55
+ - Name: Introduction
56
+ File: intro
57
+ """)
58
+ f.flush()
59
+
60
+ try:
61
+ files = extract_files_from_topic_map(f.name)
62
+ assert 'welcome/index.adoc' in files
63
+ assert 'welcome/intro.adoc' in files
64
+ assert len(files) == 2
65
+ finally:
66
+ os.unlink(f.name)
67
+
68
+ def test_extract_nested_topic_map(self):
69
+ """Test extracting files from a topic map with nested topics."""
70
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
71
+ f.write("""---
72
+ Name: Architecture
73
+ Dir: architecture
74
+ Topics:
75
+ - Name: Overview
76
+ File: index
77
+ - Name: Components
78
+ Dir: components
79
+ Topics:
80
+ - Name: API Server
81
+ File: api-server
82
+ - Name: Controller
83
+ File: controller
84
+ """)
85
+ f.flush()
86
+
87
+ try:
88
+ files = extract_files_from_topic_map(f.name)
89
+ assert 'architecture/index.adoc' in files
90
+ assert 'architecture/components/api-server.adoc' in files
91
+ assert 'architecture/components/controller.adoc' in files
92
+ assert len(files) == 3
93
+ finally:
94
+ os.unlink(f.name)
95
+
96
+ def test_extract_multiple_documents(self):
97
+ """Test extracting files from a YAML file with multiple documents."""
98
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
99
+ f.write("""---
100
+ Name: Overview
101
+ Dir: welcome
102
+ Topics:
103
+ - Name: Welcome
104
+ File: index
105
+ ---
106
+ Name: Installation
107
+ Dir: install
108
+ Topics:
109
+ - Name: Quick Start
110
+ File: quickstart
111
+ """)
112
+ f.flush()
113
+
114
+ try:
115
+ files = extract_files_from_topic_map(f.name)
116
+ assert 'welcome/index.adoc' in files
117
+ assert 'install/quickstart.adoc' in files
118
+ assert len(files) == 2
119
+ finally:
120
+ os.unlink(f.name)
121
+
122
+
123
+ class TestProcessTopicGroup:
124
+ def test_process_simple_group(self):
125
+ """Test processing a simple topic group."""
126
+ group = {
127
+ 'Name': 'Test Group',
128
+ 'Dir': 'test',
129
+ 'Topics': [
130
+ {'Name': 'Topic 1', 'File': 'topic1'},
131
+ {'Name': 'Topic 2', 'File': 'topic2'}
132
+ ]
133
+ }
134
+
135
+ referenced_files = set()
136
+ process_topic_group(group, referenced_files)
137
+
138
+ assert 'test/topic1.adoc' in referenced_files
139
+ assert 'test/topic2.adoc' in referenced_files
140
+
141
+ def test_process_group_with_parent_dir(self):
142
+ """Test processing a topic group with a parent directory."""
143
+ group = {
144
+ 'Name': 'Subgroup',
145
+ 'Dir': 'sub',
146
+ 'Topics': [
147
+ {'Name': 'Topic', 'File': 'topic'}
148
+ ]
149
+ }
150
+
151
+ referenced_files = set()
152
+ process_topic_group(group, referenced_files, parent_dir='parent')
153
+
154
+ assert 'parent/sub/topic.adoc' in referenced_files
155
+
156
+
157
+ class TestGetAllTopicMapReferences:
158
+ def test_get_all_references(self):
159
+ """Test getting all references from all topic maps."""
160
+ with tempfile.TemporaryDirectory() as tmpdir:
161
+ # Create _topic_maps directory
162
+ topic_maps_dir = os.path.join(tmpdir, '_topic_maps')
163
+ os.makedirs(topic_maps_dir)
164
+
165
+ # Create first topic map
166
+ topic_map1 = os.path.join(topic_maps_dir, 'map1.yml')
167
+ with open(topic_map1, 'w') as f:
168
+ f.write("""---
169
+ Name: Group1
170
+ Dir: group1
171
+ Topics:
172
+ - Name: Topic1
173
+ File: topic1
174
+ """)
175
+
176
+ # Create second topic map
177
+ topic_map2 = os.path.join(topic_maps_dir, 'map2.yml')
178
+ with open(topic_map2, 'w') as f:
179
+ f.write("""---
180
+ Name: Group2
181
+ Dir: group2
182
+ Topics:
183
+ - Name: Topic2
184
+ File: topic2
185
+ """)
186
+
187
+ references = get_all_topic_map_references(tmpdir)
188
+ assert 'group1/topic1.adoc' in references
189
+ assert 'group2/topic2.adoc' in references
190
+ assert len(references) == 2
@@ -1,24 +0,0 @@
1
- # doc_utils/unused_adoc.py
2
-
3
- import os
4
- import re
5
- from .file_utils import collect_files, write_manifest_and_archive
6
-
7
- def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
8
- asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
9
- include_pattern = re.compile(r'include::(.+?)\[')
10
- included_files = set()
11
- adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
12
- for file_path in adoc_files:
13
- try:
14
- with open(file_path, 'r', encoding='utf-8') as f:
15
- content = f.read()
16
- includes = include_pattern.findall(content)
17
- included_files.update(os.path.basename(include) for include in includes)
18
- except Exception as e:
19
- print(f"Warning: could not read {file_path}: {e}")
20
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in included_files]
21
- unused_files = list(dict.fromkeys(unused_files))
22
- return write_manifest_and_archive(
23
- unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
24
- )