rolfedh-doc-utils 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/PKG-INFO +1 -1
  2. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/archive_unused_files.py +9 -3
  3. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/topic_map_parser.py +10 -2
  4. rolfedh_doc_utils-0.1.5/doc_utils/unused_adoc.py +120 -0
  5. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/unused_images.py +3 -0
  6. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/pyproject.toml +1 -1
  7. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/PKG-INFO +1 -1
  8. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/SOURCES.txt +3 -0
  9. rolfedh_doc_utils-0.1.5/setup.py +45 -0
  10. rolfedh_doc_utils-0.1.5/tests/test_auto_discovery.py +146 -0
  11. rolfedh_doc_utils-0.1.5/tests/test_symlink_handling.py +91 -0
  12. rolfedh_doc_utils-0.1.3/doc_utils/unused_adoc.py +0 -50
  13. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/LICENSE +0 -0
  14. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/README.md +0 -0
  15. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/archive_unused_images.py +0 -0
  16. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/check_scannability.py +0 -0
  17. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/__init__.py +0 -0
  18. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/file_utils.py +0 -0
  19. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/scannability.py +0 -0
  20. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/doc_utils/unused_attributes.py +0 -0
  21. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/find_unused_attributes.py +0 -0
  22. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/dependency_links.txt +0 -0
  23. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/entry_points.txt +0 -0
  24. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/requires.txt +0 -0
  25. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/rolfedh_doc_utils.egg-info/top_level.txt +0 -0
  26. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/setup.cfg +0 -0
  27. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_archive_unused_files.py +0 -0
  28. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_archive_unused_images.py +0 -0
  29. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_check_scannability.py +0 -0
  30. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_cli_entry_points.py +0 -0
  31. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_file_utils.py +0 -0
  32. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_fixture_archive_unused_files.py +0 -0
  33. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_fixture_archive_unused_images.py +0 -0
  34. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_fixture_check_scannability.py +0 -0
  35. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_parse_exclude_list.py +0 -0
  36. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_topic_map_parser.py +0 -0
  37. {rolfedh_doc_utils-0.1.3 → rolfedh_doc_utils-0.1.5}/tests/test_unused_attributes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -1,7 +1,8 @@
1
1
  """
2
2
  Archive Unused AsciiDoc Files
3
3
 
4
- Scans './modules' and './assemblies' for AsciiDoc files not referenced by any other AsciiDoc file in the project. Optionally archives and deletes them.
4
+ Automatically discovers and scans 'modules' and 'assemblies' directories for AsciiDoc files
5
+ not referenced by any other AsciiDoc file in the project. Optionally archives and deletes them.
5
6
 
6
7
  For full documentation and usage examples, see archive_unused_files.md in this directory.
7
8
  """
@@ -11,14 +12,19 @@ from doc_utils.unused_adoc import find_unused_adoc
11
12
  from doc_utils.file_utils import parse_exclude_list_file
12
13
 
13
14
  def main():
14
- parser = argparse.ArgumentParser(description='Archive unused AsciiDoc files.')
15
+ parser = argparse.ArgumentParser(
16
+ description='Archive unused AsciiDoc files.',
17
+ epilog='By default, automatically discovers all modules and assemblies directories in the repository.'
18
+ )
15
19
  parser.add_argument('--archive', action='store_true', help='Move the files to a dated zip in the archive directory.')
20
+ parser.add_argument('--scan-dir', action='append', default=[], help='Specific directory to scan (can be used multiple times). If not specified, auto-discovers directories.')
16
21
  parser.add_argument('--exclude-dir', action='append', default=[], help='Directory to exclude (can be used multiple times).')
17
22
  parser.add_argument('--exclude-file', action='append', default=[], help='File to exclude (can be used multiple times).')
18
23
  parser.add_argument('--exclude-list', type=str, help='Path to a file containing directories or files to exclude, one per line.')
19
24
  args = parser.parse_args()
20
25
 
21
- scan_dirs = ['./modules', './modules/rn', './assemblies']
26
+ # Use provided scan directories or None for auto-discovery
27
+ scan_dirs = args.scan_dir if args.scan_dir else None
22
28
  archive_dir = './archive'
23
29
 
24
30
  exclude_dirs = list(args.exclude_dir)
@@ -22,8 +22,16 @@ def detect_repo_type(base_path='.'):
22
22
  if yml_files:
23
23
  return 'topic_map'
24
24
 
25
- # Check for master.adoc files
26
- master_files = glob.glob(os.path.join(base_path, '**/master.adoc'), recursive=True)
25
+ # Check for master.adoc files using os.walk to avoid symlink issues
26
+ master_files = []
27
+ for root, dirs, files in os.walk(base_path):
28
+ # Skip symbolic link directories to prevent infinite recursion
29
+ dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
30
+
31
+ # Check for master.adoc in this directory
32
+ if 'master.adoc' in files:
33
+ master_files.append(os.path.join(root, 'master.adoc'))
34
+
27
35
  if master_files:
28
36
  return 'master_adoc'
29
37
 
@@ -0,0 +1,120 @@
1
+ # doc_utils/unused_adoc.py
2
+
3
+ import os
4
+ import re
5
+ from .file_utils import collect_files, write_manifest_and_archive
6
+ from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
+
8
+ def find_scan_directories(base_path='.', exclude_dirs=None):
9
+ """
10
+ Automatically find all 'modules' and 'assemblies' directories in the repository.
11
+
12
+ Returns a list of paths to scan.
13
+ """
14
+ scan_dirs = []
15
+ exclude_dirs = exclude_dirs or []
16
+
17
+ for root, dirs, files in os.walk(base_path):
18
+ # Skip symbolic links to prevent issues
19
+ dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))]
20
+
21
+ # Skip excluded directories
22
+ for exclude_dir in exclude_dirs:
23
+ abs_exclude = os.path.abspath(exclude_dir)
24
+ if os.path.abspath(root).startswith(abs_exclude):
25
+ dirs[:] = [] # Don't descend into excluded directories
26
+ break
27
+
28
+ # Skip hidden directories and common non-content directories
29
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', 'build', 'dist', 'target']]
30
+
31
+ # Look for modules and assemblies directories
32
+ for d in dirs:
33
+ if d in ['modules', 'assemblies']:
34
+ dir_path = os.path.join(root, d)
35
+ # Check if this directory or any subdirectory contains .adoc files
36
+ has_adoc = False
37
+ for subroot, subdirs, subfiles in os.walk(dir_path):
38
+ # Skip symbolic links
39
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
40
+ if any(f.endswith('.adoc') for f in subfiles):
41
+ has_adoc = True
42
+ break
43
+ if has_adoc:
44
+ scan_dirs.append(dir_path)
45
+
46
+ # Also check for modules/rn pattern if modules exists
47
+ modules_dirs = [d for d in scan_dirs if os.path.basename(d) == 'modules']
48
+ for modules_dir in modules_dirs:
49
+ rn_dir = os.path.join(modules_dir, 'rn')
50
+ if os.path.isdir(rn_dir):
51
+ # Check if rn directory or subdirectories contain .adoc files
52
+ has_adoc = False
53
+ for subroot, subdirs, subfiles in os.walk(rn_dir):
54
+ subdirs[:] = [sd for sd in subdirs if not os.path.islink(os.path.join(subroot, sd))]
55
+ if any(f.endswith('.adoc') for f in subfiles):
56
+ has_adoc = True
57
+ break
58
+ if has_adoc:
59
+ scan_dirs.append(rn_dir)
60
+
61
+ return scan_dirs
62
+
63
+ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exclude_dirs=None, exclude_files=None):
64
+ # Print safety warning
65
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
66
+
67
+ # If no scan_dirs provided, auto-discover them
68
+ if not scan_dirs:
69
+ scan_dirs = find_scan_directories(exclude_dirs=exclude_dirs)
70
+ if scan_dirs:
71
+ print(f"Auto-discovered directories to scan:")
72
+ for dir_path in sorted(scan_dirs):
73
+ print(f" - {dir_path}")
74
+ else:
75
+ print("No 'modules' or 'assemblies' directories found containing .adoc files.")
76
+ print("Please run this tool from your documentation repository root.")
77
+ return
78
+
79
+ # Detect repository type
80
+ repo_type = detect_repo_type()
81
+ print(f"Detected repository type: {repo_type}")
82
+
83
+ # Collect all .adoc files in scan directories
84
+ asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
85
+
86
+ # Track which files are referenced
87
+ referenced_files = set()
88
+
89
+ if repo_type == 'topic_map':
90
+ # For OpenShift-docs style repos, get references from topic maps
91
+ topic_references = get_all_topic_map_references()
92
+ # Convert to basenames for comparison
93
+ referenced_files.update(os.path.basename(ref) for ref in topic_references)
94
+
95
+ # Always scan for include:: directives in all .adoc files
96
+ include_pattern = re.compile(r'include::(.+?)\[')
97
+ adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
98
+
99
+ for file_path in adoc_files:
100
+ try:
101
+ with open(file_path, 'r', encoding='utf-8') as f:
102
+ content = f.read()
103
+ includes = include_pattern.findall(content)
104
+ # Extract just the filename from the include path
105
+ for include in includes:
106
+ # Handle both relative and absolute includes
107
+ include_basename = os.path.basename(include)
108
+ referenced_files.add(include_basename)
109
+ except Exception as e:
110
+ print(f"Warning: could not read {file_path}: {e}")
111
+
112
+ # Find unused files by comparing basenames
113
+ unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
114
+ unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
115
+
116
+ print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
117
+
118
+ return write_manifest_and_archive(
119
+ unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
120
+ )
@@ -7,6 +7,9 @@ from .file_utils import collect_files, write_manifest_and_archive
7
7
  IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
8
8
 
9
9
  def find_unused_images(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
10
+ # Print safety warning
11
+ print("\n⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n")
12
+
10
13
  image_files = collect_files(scan_dirs, IMAGE_EXTENSIONS, exclude_dirs, exclude_files)
11
14
  adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
12
15
  referenced_images = set()
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rolfedh-doc-utils"
7
- version = "0.1.3"
7
+ version = "0.1.5"
8
8
  description = "CLI tools for AsciiDoc documentation projects"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rolfedh-doc-utils
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: CLI tools for AsciiDoc documentation projects
5
5
  Author: Rolfe Dlugy-Hegwer
6
6
  License: MIT License
@@ -5,6 +5,7 @@ archive_unused_images.py
5
5
  check_scannability.py
6
6
  find_unused_attributes.py
7
7
  pyproject.toml
8
+ setup.py
8
9
  doc_utils/__init__.py
9
10
  doc_utils/file_utils.py
10
11
  doc_utils/scannability.py
@@ -20,6 +21,7 @@ rolfedh_doc_utils.egg-info/requires.txt
20
21
  rolfedh_doc_utils.egg-info/top_level.txt
21
22
  tests/test_archive_unused_files.py
22
23
  tests/test_archive_unused_images.py
24
+ tests/test_auto_discovery.py
23
25
  tests/test_check_scannability.py
24
26
  tests/test_cli_entry_points.py
25
27
  tests/test_file_utils.py
@@ -27,5 +29,6 @@ tests/test_fixture_archive_unused_files.py
27
29
  tests/test_fixture_archive_unused_images.py
28
30
  tests/test_fixture_check_scannability.py
29
31
  tests/test_parse_exclude_list.py
32
+ tests/test_symlink_handling.py
30
33
  tests/test_topic_map_parser.py
31
34
  tests/test_unused_attributes.py
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Setup script for doc-utils package.
4
+ This file is only needed if we want to customize the installation process.
5
+ """
6
+
7
+ from setuptools import setup
8
+ from setuptools.command.install import install
9
+ from setuptools.command.develop import develop
10
+ from setuptools.command.egg_info import egg_info
11
+
12
+ def custom_post_install():
13
+ """Display safety message after installation."""
14
+ print("\n" + "="*60)
15
+ print("✅ doc-utils installed successfully!")
16
+ print("\n⚠️ IMPORTANT: Safety First")
17
+ print(" • Work in a git branch (never main/master)")
18
+ print(" • Run without --archive first to preview")
19
+ print(" • Review changes with git diff")
20
+ print("="*60 + "\n")
21
+
22
+ class CustomInstallCommand(install):
23
+ """Customized setuptools install command."""
24
+ def run(self):
25
+ install.run(self)
26
+ custom_post_install()
27
+
28
+ class CustomDevelopCommand(develop):
29
+ """Customized setuptools develop command."""
30
+ def run(self):
31
+ develop.run(self)
32
+ custom_post_install()
33
+
34
+ class CustomEggInfoCommand(egg_info):
35
+ """Customized setuptools egg_info command."""
36
+ def run(self):
37
+ egg_info.run(self)
38
+
39
+ setup(
40
+ cmdclass={
41
+ 'install': CustomInstallCommand,
42
+ 'develop': CustomDevelopCommand,
43
+ 'egg_info': CustomEggInfoCommand,
44
+ },
45
+ )
@@ -0,0 +1,146 @@
1
+ """Test automatic directory discovery for archive-unused-files."""
2
+
3
+ import os
4
+ import tempfile
5
+ import pytest
6
+ from doc_utils.unused_adoc import find_scan_directories
7
+
8
+
9
+ def test_find_scan_directories_standard_structure(tmp_path):
10
+ """Test discovery with standard directory structure."""
11
+
12
+ # Create standard structure
13
+ (tmp_path / "modules").mkdir()
14
+ (tmp_path / "modules" / "test.adoc").write_text("= Test\n")
15
+
16
+ (tmp_path / "assemblies").mkdir()
17
+ (tmp_path / "assemblies" / "assembly.adoc").write_text("= Assembly\n")
18
+
19
+ # Find directories
20
+ dirs = find_scan_directories(str(tmp_path))
21
+
22
+ # Should find both directories
23
+ assert len(dirs) == 2
24
+ assert any("modules" in d for d in dirs)
25
+ assert any("assemblies" in d for d in dirs)
26
+
27
+
28
+ def test_find_scan_directories_nested_structure(tmp_path):
29
+ """Test discovery with nested directory structure."""
30
+
31
+ # Create nested structure like red-hat-insights-documentation
32
+ downstream = tmp_path / "downstream"
33
+ downstream.mkdir()
34
+
35
+ (downstream / "modules").mkdir()
36
+ (downstream / "modules" / "test.adoc").write_text("= Test\n")
37
+
38
+ (downstream / "assemblies").mkdir()
39
+ (downstream / "assemblies" / "assembly.adoc").write_text("= Assembly\n")
40
+
41
+ # Find directories
42
+ dirs = find_scan_directories(str(tmp_path))
43
+
44
+ # Should find both nested directories
45
+ assert len(dirs) == 2
46
+ assert any("downstream/modules" in d or "downstream\\modules" in d for d in dirs)
47
+ assert any("downstream/assemblies" in d or "downstream\\assemblies" in d for d in dirs)
48
+
49
+
50
+ def test_find_scan_directories_with_rn(tmp_path):
51
+ """Test discovery includes modules/rn directory when it contains .adoc files."""
52
+
53
+ # Create modules with rn subdirectory
54
+ modules = tmp_path / "modules"
55
+ modules.mkdir()
56
+ (modules / "test.adoc").write_text("= Test\n")
57
+
58
+ rn_dir = modules / "rn"
59
+ rn_dir.mkdir()
60
+ (rn_dir / "release-notes.adoc").write_text("= Release Notes\n")
61
+
62
+ # Find directories
63
+ dirs = find_scan_directories(str(tmp_path))
64
+
65
+ # Should find modules and modules/rn
66
+ assert len(dirs) == 2
67
+ assert any("modules/rn" in d or "modules\\rn" in d for d in dirs)
68
+
69
+
70
+ def test_find_scan_directories_empty_dirs(tmp_path):
71
+ """Test that empty directories without .adoc files are not included."""
72
+
73
+ # Create directories without .adoc files
74
+ (tmp_path / "modules").mkdir()
75
+ (tmp_path / "assemblies").mkdir()
76
+
77
+ # Find directories
78
+ dirs = find_scan_directories(str(tmp_path))
79
+
80
+ # Should find no directories since they don't contain .adoc files
81
+ assert len(dirs) == 0
82
+
83
+
84
+ def test_find_scan_directories_with_exclusions(tmp_path):
85
+ """Test directory discovery with exclusions."""
86
+
87
+ # Create multiple module directories
88
+ (tmp_path / "modules").mkdir()
89
+ (tmp_path / "modules" / "test.adoc").write_text("= Test\n")
90
+
91
+ archived = tmp_path / "archived"
92
+ archived.mkdir()
93
+ (archived / "modules").mkdir()
94
+ (archived / "modules" / "old.adoc").write_text("= Old\n")
95
+
96
+ # Find directories excluding archived
97
+ dirs = find_scan_directories(str(tmp_path), exclude_dirs=[str(archived)])
98
+
99
+ # Should only find the non-excluded modules
100
+ assert len(dirs) == 1
101
+ assert "archived" not in dirs[0]
102
+
103
+
104
+ def test_find_scan_directories_skips_hidden(tmp_path):
105
+ """Test that hidden directories are skipped."""
106
+
107
+ # Create visible modules
108
+ (tmp_path / "modules").mkdir()
109
+ (tmp_path / "modules" / "test.adoc").write_text("= Test\n")
110
+
111
+ # Create hidden directory
112
+ hidden = tmp_path / ".archive"
113
+ hidden.mkdir()
114
+ (hidden / "modules").mkdir()
115
+ (hidden / "modules" / "archived.adoc").write_text("= Archived\n")
116
+
117
+ # Find directories
118
+ dirs = find_scan_directories(str(tmp_path))
119
+
120
+ # Should only find visible modules
121
+ assert len(dirs) == 1
122
+ assert ".archive" not in dirs[0]
123
+
124
+
125
+ def test_find_scan_directories_multiple_locations(tmp_path):
126
+ """Test discovery with modules/assemblies in multiple locations."""
127
+
128
+ # Create modules in multiple places
129
+ (tmp_path / "modules").mkdir()
130
+ (tmp_path / "modules" / "root.adoc").write_text("= Root\n")
131
+
132
+ content1 = tmp_path / "content1"
133
+ content1.mkdir()
134
+ (content1 / "modules").mkdir()
135
+ (content1 / "modules" / "content1.adoc").write_text("= Content1\n")
136
+
137
+ content2 = tmp_path / "content2"
138
+ content2.mkdir()
139
+ (content2 / "assemblies").mkdir()
140
+ (content2 / "assemblies" / "assembly2.adoc").write_text("= Assembly2\n")
141
+
142
+ # Find directories
143
+ dirs = find_scan_directories(str(tmp_path))
144
+
145
+ # Should find all three directories
146
+ assert len(dirs) == 3
@@ -0,0 +1,91 @@
1
+ """Test that topic_map_parser handles symbolic links correctly without freezing."""
2
+
3
+ import os
4
+ import tempfile
5
+ import pytest
6
+ from doc_utils.topic_map_parser import detect_repo_type
7
+
8
+
9
+ def test_detect_repo_type_with_circular_symlinks(tmp_path):
10
+ """Test that detect_repo_type doesn't freeze with circular symbolic links."""
11
+
12
+ # Create a directory structure with circular symlinks
13
+ modules_dir = tmp_path / "modules"
14
+ modules_dir.mkdir()
15
+
16
+ # Create a master.adoc file in modules
17
+ (modules_dir / "master.adoc").write_text("= Test Doc\n")
18
+
19
+ # Create a circular symlink: modules/modules -> ../modules
20
+ circular_link = modules_dir / "modules"
21
+ try:
22
+ os.symlink("../../modules", str(circular_link))
23
+ except OSError:
24
+ pytest.skip("Cannot create symbolic links on this system")
25
+
26
+ # This should not freeze - it should skip the symlink
27
+ repo_type = detect_repo_type(str(tmp_path))
28
+ assert repo_type == "master_adoc"
29
+
30
+
31
+ def test_detect_repo_type_with_nested_circular_symlinks(tmp_path):
32
+ """Test detection with nested directories containing circular symlinks."""
33
+
34
+ # Create nested structure
35
+ (tmp_path / "assemblies").mkdir()
36
+ modules_dir = tmp_path / "modules"
37
+ modules_dir.mkdir()
38
+
39
+ # Create master.adoc
40
+ (tmp_path / "master.adoc").write_text("= Main Doc\n")
41
+
42
+ # Create archive directory with circular symlinks (similar to real case)
43
+ archive_dir = tmp_path / ".archive" / "archived-content" / "modules"
44
+ archive_dir.mkdir(parents=True)
45
+
46
+ try:
47
+ # Create circular symlink in archive
48
+ os.symlink("../../modules", str(archive_dir / "modules"))
49
+ except OSError:
50
+ pytest.skip("Cannot create symbolic links on this system")
51
+
52
+ # Should detect master_adoc without freezing
53
+ repo_type = detect_repo_type(str(tmp_path))
54
+ assert repo_type == "master_adoc"
55
+
56
+
57
+ def test_detect_repo_type_skips_symlink_directories(tmp_path):
58
+ """Test that symlinked directories are skipped during traversal."""
59
+
60
+ # Create main directories
61
+ real_dir = tmp_path / "real_modules"
62
+ real_dir.mkdir()
63
+ (real_dir / "master.adoc").write_text("= Real Master\n")
64
+
65
+ # Create a symlink to real_dir
66
+ linked_dir = tmp_path / "linked_modules"
67
+ try:
68
+ os.symlink(str(real_dir), str(linked_dir))
69
+ except OSError:
70
+ pytest.skip("Cannot create symbolic links on this system")
71
+
72
+ # The function should find the master.adoc in real_dir but not traverse linked_dir
73
+ repo_type = detect_repo_type(str(tmp_path))
74
+ assert repo_type == "master_adoc"
75
+
76
+ # Test that symlinked directories are not traversed
77
+ only_symlink_path = tmp_path / "only_symlink_test"
78
+ only_symlink_path.mkdir()
79
+
80
+ # Create a directory outside that contains master.adoc
81
+ external_dir = tmp_path / "external"
82
+ external_dir.mkdir()
83
+ (external_dir / "master.adoc").write_text("= External Master\n")
84
+
85
+ # Create only a symlink to it inside our test directory
86
+ symlinked_dir = only_symlink_path / "linked_dir"
87
+ os.symlink(str(external_dir), str(symlinked_dir))
88
+
89
+ # Should not find master.adoc since it's only accessible via symlink
90
+ repo_type = detect_repo_type(str(only_symlink_path))
91
+ assert repo_type == "unknown"
@@ -1,50 +0,0 @@
1
- # doc_utils/unused_adoc.py
2
-
3
- import os
4
- import re
5
- from .file_utils import collect_files, write_manifest_and_archive
6
- from .topic_map_parser import detect_repo_type, get_all_topic_map_references
7
-
8
- def find_unused_adoc(scan_dirs, archive_dir, archive=False, exclude_dirs=None, exclude_files=None):
9
- # Detect repository type
10
- repo_type = detect_repo_type()
11
- print(f"Detected repository type: {repo_type}")
12
-
13
- # Collect all .adoc files in scan directories
14
- asciidoc_files = collect_files(scan_dirs, {'.adoc'}, exclude_dirs, exclude_files)
15
-
16
- # Track which files are referenced
17
- referenced_files = set()
18
-
19
- if repo_type == 'topic_map':
20
- # For OpenShift-docs style repos, get references from topic maps
21
- topic_references = get_all_topic_map_references()
22
- # Convert to basenames for comparison
23
- referenced_files.update(os.path.basename(ref) for ref in topic_references)
24
-
25
- # Always scan for include:: directives in all .adoc files
26
- include_pattern = re.compile(r'include::(.+?)\[')
27
- adoc_files = collect_files(['.'], {'.adoc'}, exclude_dirs, exclude_files)
28
-
29
- for file_path in adoc_files:
30
- try:
31
- with open(file_path, 'r', encoding='utf-8') as f:
32
- content = f.read()
33
- includes = include_pattern.findall(content)
34
- # Extract just the filename from the include path
35
- for include in includes:
36
- # Handle both relative and absolute includes
37
- include_basename = os.path.basename(include)
38
- referenced_files.add(include_basename)
39
- except Exception as e:
40
- print(f"Warning: could not read {file_path}: {e}")
41
-
42
- # Find unused files by comparing basenames
43
- unused_files = [f for f in asciidoc_files if os.path.basename(f) not in referenced_files]
44
- unused_files = list(dict.fromkeys(unused_files)) # Remove duplicates
45
-
46
- print(f"Found {len(unused_files)} unused files out of {len(asciidoc_files)} total files in scan directories")
47
-
48
- return write_manifest_and_archive(
49
- unused_files, archive_dir, 'to-archive', 'to-archive', archive=archive
50
- )