reposnap 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {reposnap-0.7.0 → reposnap-0.8.0}/.coverage +0 -0
  2. {reposnap-0.7.0 → reposnap-0.8.0}/PKG-INFO +52 -2
  3. {reposnap-0.7.0 → reposnap-0.8.0}/README.md +51 -1
  4. {reposnap-0.7.0 → reposnap-0.8.0}/pyproject.toml +1 -1
  5. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/controllers/project_controller.py +66 -2
  6. reposnap-0.8.0/reposnap/core/content_search.py +104 -0
  7. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/cli.py +12 -0
  8. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_cli.py +95 -0
  9. reposnap-0.8.0/tests/reposnap/test_contains_filter.py +295 -0
  10. {reposnap-0.7.0 → reposnap-0.8.0}/.github/workflows/python-package.yml +0 -0
  11. {reposnap-0.7.0 → reposnap-0.8.0}/.github/workflows/release.yml +0 -0
  12. {reposnap-0.7.0 → reposnap-0.8.0}/.gitignore +0 -0
  13. {reposnap-0.7.0 → reposnap-0.8.0}/.pre-commit-config.yaml +0 -0
  14. {reposnap-0.7.0 → reposnap-0.8.0}/.python-version +0 -0
  15. {reposnap-0.7.0 → reposnap-0.8.0}/.vscode/launch.json +0 -0
  16. {reposnap-0.7.0 → reposnap-0.8.0}/CONTRIBUTING.md +0 -0
  17. {reposnap-0.7.0 → reposnap-0.8.0}/LICENSE +0 -0
  18. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/__init__.py +0 -0
  19. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/controllers/__init__.py +0 -0
  20. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/__init__.py +0 -0
  21. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/file_system.py +0 -0
  22. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/git_repo.py +0 -0
  23. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/markdown_generator.py +0 -0
  24. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/__init__.py +0 -0
  25. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/gui.py +0 -0
  26. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/models/__init__.py +0 -0
  27. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/models/file_tree.py +0 -0
  28. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/utils/__init__.py +0 -0
  29. {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/utils/path_utils.py +0 -0
  30. {reposnap-0.7.0 → reposnap-0.8.0}/requirements-dev.lock +0 -0
  31. {reposnap-0.7.0 → reposnap-0.8.0}/requirements.lock +0 -0
  32. {reposnap-0.7.0 → reposnap-0.8.0}/tests/__init__.py +0 -0
  33. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/__init__.py +0 -0
  34. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_collected_tree.py +0 -0
  35. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_file_system.py +0 -0
  36. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_file_tree.py +0 -0
  37. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_git_repo.py +0 -0
  38. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_gui.py +0 -0
  39. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_markdown_generator.py +0 -0
  40. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_path_utils.py +0 -0
  41. {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_project_controller.py +0 -0
  42. {reposnap-0.7.0 → reposnap-0.8.0}/tests/resources/another_existing_file.py +0 -0
  43. {reposnap-0.7.0 → reposnap-0.8.0}/tests/resources/existing_file.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reposnap
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Generate a Markdown file with all contents of your project
5
5
  Author: agoloborodko
6
6
  License-File: LICENSE
@@ -25,6 +25,7 @@ Description-Content-Type: text/markdown
25
25
  - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
26
26
  - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
27
27
  - **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
28
+ - **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
28
29
  - **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
29
30
 
30
31
  ## Installation
@@ -50,7 +51,7 @@ pip install -r requirements.lock
50
51
  To use `reposnap` from the command line, run it with the following options:
51
52
 
52
53
  ```bash
53
- reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
54
+ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
54
55
  ```
55
56
 
56
57
  - `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
@@ -61,6 +62,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
61
62
  - `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
62
63
  - `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
63
64
  - `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
65
+ - `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
66
+ - `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
64
67
 
65
68
  #### Pattern Matching
66
69
 
@@ -73,6 +76,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
73
76
  - `-i "*.py"`: Includes only files ending with `.py`.
74
77
  - `-e "*.test.*"`: Excludes files with `.test.` in their names.
75
78
 
79
+ #### Content Filtering
80
+
81
+ The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
82
+
83
+ - **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
84
+ - **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
85
+ - **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
86
+
87
+ **Examples**:
88
+
89
+ 1. **Find files containing specific imports**:
90
+ ```bash
91
+ reposnap . -S "import logging"
92
+ ```
93
+
94
+ 2. **Search for multiple patterns (OR logic)**:
95
+ ```bash
96
+ reposnap . -S "TODO" "FIXME" "import requests"
97
+ ```
98
+
99
+ 3. **Case-sensitive content search**:
100
+ ```bash
101
+ reposnap . -S "TODO" --contains-case
102
+ ```
103
+
104
+ 4. **Combine content filtering with other filters**:
105
+ ```bash
106
+ reposnap . -S "class " -i "*.py" --structure-only
107
+ ```
108
+
109
+ 5. **Find files with specific function calls**:
110
+ ```bash
111
+ reposnap . -S "logger.error" "raise Exception"
112
+ ```
113
+
76
114
  #### Only Snapshot Your Current Work
77
115
 
78
116
  The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
@@ -147,6 +185,18 @@ This is particularly useful when you want to:
147
185
  reposnap . -c
148
186
  ```
149
187
 
188
+ 7. **Find and document files containing specific code patterns**:
189
+
190
+ ```bash
191
+ reposnap . -S "import logging" "logger."
192
+ ```
193
+
194
+ 8. **Combine content filtering with file type filtering**:
195
+
196
+ ```bash
197
+ reposnap . -S "class " -i "*.py" --structure-only
198
+ ```
199
+
150
200
  ### Graphical User Interface
151
201
 
152
202
  `reposnap` also provides a GUI for users who prefer an interactive interface.
@@ -12,6 +12,7 @@
12
12
  - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
13
13
  - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
14
14
  - **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
15
+ - **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
15
16
  - **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
16
17
 
17
18
  ## Installation
@@ -37,7 +38,7 @@ pip install -r requirements.lock
37
38
  To use `reposnap` from the command line, run it with the following options:
38
39
 
39
40
  ```bash
40
- reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
41
+ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
41
42
  ```
42
43
 
43
44
  - `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
@@ -48,6 +49,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
48
49
  - `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
49
50
  - `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
50
51
  - `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
52
+ - `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
53
+ - `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
51
54
 
52
55
  #### Pattern Matching
53
56
 
@@ -60,6 +63,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
60
63
  - `-i "*.py"`: Includes only files ending with `.py`.
61
64
  - `-e "*.test.*"`: Excludes files with `.test.` in their names.
62
65
 
66
+ #### Content Filtering
67
+
68
+ The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
69
+
70
+ - **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
71
+ - **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
72
+ - **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
73
+
74
+ **Examples**:
75
+
76
+ 1. **Find files containing specific imports**:
77
+ ```bash
78
+ reposnap . -S "import logging"
79
+ ```
80
+
81
+ 2. **Search for multiple patterns (OR logic)**:
82
+ ```bash
83
+ reposnap . -S "TODO" "FIXME" "import requests"
84
+ ```
85
+
86
+ 3. **Case-sensitive content search**:
87
+ ```bash
88
+ reposnap . -S "TODO" --contains-case
89
+ ```
90
+
91
+ 4. **Combine content filtering with other filters**:
92
+ ```bash
93
+ reposnap . -S "class " -i "*.py" --structure-only
94
+ ```
95
+
96
+ 5. **Find files with specific function calls**:
97
+ ```bash
98
+ reposnap . -S "logger.error" "raise Exception"
99
+ ```
100
+
63
101
  #### Only Snapshot Your Current Work
64
102
 
65
103
  The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
@@ -134,6 +172,18 @@ This is particularly useful when you want to:
134
172
  reposnap . -c
135
173
  ```
136
174
 
175
+ 7. **Find and document files containing specific code patterns**:
176
+
177
+ ```bash
178
+ reposnap . -S "import logging" "logger."
179
+ ```
180
+
181
+ 8. **Combine content filtering with file type filtering**:
182
+
183
+ ```bash
184
+ reposnap . -S "class " -i "*.py" --structure-only
185
+ ```
186
+
137
187
  ### Graphical User Interface
138
188
 
139
189
  `reposnap` also provides a GUI for users who prefer an interactive interface.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "reposnap"
3
- version = "0.7.0"
3
+ version = "0.8.0"
4
4
  description = "Generate a Markdown file with all contents of your project"
5
5
  authors = [
6
6
  { name = "agoloborodko" }
@@ -19,7 +19,13 @@ class ProjectController:
19
19
  ]
20
20
  self.input_paths = []
21
21
  for p in input_paths:
22
- candidate = (self.root_dir / p).resolve()
22
+ if p.is_absolute():
23
+ # Handle absolute paths - use as-is but verify they're under root_dir
24
+ candidate = p.resolve()
25
+ else:
26
+ # Handle relative paths - join with root_dir
27
+ candidate = (self.root_dir / p).resolve()
28
+
23
29
  if candidate.exists():
24
30
  try:
25
31
  rel = candidate.relative_to(self.root_dir)
@@ -31,7 +37,7 @@ class ProjectController:
31
37
  )
32
38
  else:
33
39
  self.logger.warning(
34
- f"Path {p} does not exist relative to repository root {self.root_dir}."
40
+ f"Path {p} does not exist or is not under repository root {self.root_dir}."
35
41
  )
36
42
  self.output_file: Path = (
37
43
  Path(args.output).resolve()
@@ -48,6 +54,8 @@ class ProjectController:
48
54
  args.exclude if hasattr(args, "exclude") else []
49
55
  )
50
56
  self.changes_only: bool = getattr(args, "changes", False)
57
+ self.contains: List[str] = getattr(args, "contains", [])
58
+ self.contains_case: bool = getattr(args, "contains_case", False)
51
59
  else:
52
60
  self.args = None
53
61
  self.input_paths = []
@@ -56,6 +64,8 @@ class ProjectController:
56
64
  self.include_patterns = []
57
65
  self.exclude_patterns = []
58
66
  self.changes_only = False
67
+ self.contains = []
68
+ self.contains_case = False
59
69
  self.file_tree: Optional[FileTree] = None
60
70
  self.gitignore_patterns: List[str] = []
61
71
  if self.root_dir:
@@ -110,6 +120,58 @@ class ProjectController:
110
120
  files = [f for f in files if not spec_exc.match_file(f.as_posix())]
111
121
  return files
112
122
 
123
+ def _apply_content_filter(self, files: List[Path]) -> List[Path]:
124
+ """
125
+ Filter files based on content substring matching.
126
+
127
+ Args:
128
+ files: List of relative file paths to filter
129
+
130
+ Returns:
131
+ Filtered list of files that contain at least one of the patterns
132
+ specified in self.contains. Returns original list if no patterns
133
+ are specified.
134
+
135
+ Note:
136
+ Uses case-insensitive matching by default unless self.contains_case
137
+ is True. Skips binary files and files larger than 5MB for performance.
138
+ """
139
+ if not self.contains:
140
+ return files
141
+
142
+ from reposnap.core.content_search import filter_files_by_content
143
+
144
+ initial_count = len(files)
145
+ ignore_case = not self.contains_case
146
+
147
+ self.logger.debug(
148
+ f"Applying content filter with patterns: {self.contains}, "
149
+ f"ignore_case: {ignore_case}"
150
+ )
151
+
152
+ # Convert relative paths to absolute for content search
153
+ absolute_paths = [self.root_dir / file_path for file_path in files]
154
+ filtered_absolute = filter_files_by_content(
155
+ absolute_paths, self.contains, ignore_case
156
+ )
157
+
158
+ # Convert back to relative paths
159
+ filtered_files = []
160
+ for abs_path in filtered_absolute:
161
+ try:
162
+ rel_path = abs_path.relative_to(self.root_dir)
163
+ filtered_files.append(rel_path)
164
+ except ValueError:
165
+ continue
166
+
167
+ kept_count = len(filtered_files)
168
+ self.logger.info(
169
+ f"Applied content filter (kept {kept_count} / {initial_count})"
170
+ )
171
+ self.logger.debug(f"Files kept after content filter: {filtered_files}")
172
+
173
+ return filtered_files
174
+
113
175
  def collect_file_tree(self) -> None:
114
176
  if self.changes_only:
115
177
  self.logger.info("Collecting uncommitted files from Git repository.")
@@ -147,6 +209,8 @@ class ProjectController:
147
209
  continue
148
210
  all_files = self._apply_include_exclude(all_files)
149
211
  self.logger.debug(f"All files after applying include/exclude: {all_files}")
212
+ all_files = self._apply_content_filter(all_files)
213
+ self.logger.debug(f"All files after applying content filter: {all_files}")
150
214
  if self.input_paths:
151
215
  trees = []
152
216
  for input_path in self.input_paths:
@@ -0,0 +1,104 @@
1
+ # src/reposnap/core/content_search.py
2
+
3
+ """
4
+ Private content search helpers for substring matching in files.
5
+
6
+ This module provides stateless utility functions for searching file contents.
7
+ It is intended for internal use by the project controller and should not be
8
+ imported directly by external consumers.
9
+ """
10
+
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import List
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Configuration constants
19
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
20
+ BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
21
+
22
+
23
+ def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
24
+ """
25
+ Check if a file contains any of the given patterns.
26
+
27
+ Args:
28
+ path: Path to the file to search
29
+ patterns: List of substring patterns to search for
30
+ ignore_case: Whether to perform case-insensitive matching (default: True)
31
+
32
+ Returns:
33
+ True if file contains any pattern, False otherwise
34
+
35
+ Note:
36
+ Uses streaming read with utf-8 encoding and error handling for binary files.
37
+ Returns False if file cannot be read as text or if file is too large/binary.
38
+ """
39
+ if not patterns:
40
+ return True
41
+
42
+ if not path.is_file():
43
+ return False
44
+
45
+ # Check file size - skip files larger than MAX_FILE_SIZE
46
+ try:
47
+ file_size = path.stat().st_size
48
+ if file_size > MAX_FILE_SIZE:
49
+ logger.debug(f"Skipping large file {path} ({file_size} bytes)")
50
+ return False
51
+ except OSError as e:
52
+ logger.debug(f"Could not stat file {path}: {e}")
53
+ return False
54
+
55
+ # Check for binary content in first KB
56
+ try:
57
+ with path.open("rb") as f:
58
+ first_chunk = f.read(BINARY_CHECK_SIZE)
59
+ if b"\0" in first_chunk:
60
+ logger.debug(f"Skipping binary file {path}")
61
+ return False
62
+ except Exception as e:
63
+ logger.debug(f"Could not read file {path} for binary check: {e}")
64
+ return False
65
+
66
+ # Pre-compute search patterns (case-normalized if needed)
67
+ search_patterns = [p.lower() if ignore_case else p for p in patterns]
68
+
69
+ try:
70
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
71
+ for line in f:
72
+ search_line = line.lower() if ignore_case else line
73
+
74
+ if any(pattern in search_line for pattern in search_patterns):
75
+ return True
76
+ return False
77
+ except Exception as e:
78
+ logger.debug(f"Could not read file {path} for content search: {e}")
79
+ return False
80
+
81
+
82
+ def filter_files_by_content(
83
+ files: List[Path], patterns: List[str], ignore_case: bool = True
84
+ ) -> List[Path]:
85
+ """
86
+ Filter a list of files to only include those containing the given patterns.
87
+
88
+ Args:
89
+ files: List of file paths to filter
90
+ patterns: List of substring patterns to search for
91
+ ignore_case: Whether to perform case-insensitive matching (default: True)
92
+
93
+ Returns:
94
+ Filtered list of files that contain at least one pattern
95
+ """
96
+ if not patterns:
97
+ return files
98
+
99
+ matched_files = []
100
+ for file_path in files:
101
+ if file_matches(file_path, patterns, ignore_case):
102
+ matched_files.append(file_path)
103
+
104
+ return matched_files
@@ -46,6 +46,18 @@ def main():
46
46
  action="store_true",
47
47
  help="Use only files that are added/modified/untracked/stashed but not yet committed.",
48
48
  )
49
+ parser.add_argument(
50
+ "-S",
51
+ "--contains",
52
+ nargs="+",
53
+ default=[],
54
+ help="Only include files whose contents contain these substrings",
55
+ )
56
+ parser.add_argument(
57
+ "--contains-case",
58
+ action="store_true",
59
+ help="Make --contains case-sensitive",
60
+ )
49
61
 
50
62
  args = parser.parse_args()
51
63
 
@@ -126,3 +126,98 @@ def test_cli_without_changes_flag(mock_controller, temp_dir):
126
126
  args = mock_controller.call_args[0][0]
127
127
  assert args.changes is False
128
128
  mock_controller_instance.run.assert_called_once()
129
+
130
+
131
+ @patch("reposnap.interfaces.cli.ProjectController")
132
+ def test_cli_with_contains_flag_single(mock_controller, temp_dir):
133
+ """Test that the --contains flag with single value is properly parsed."""
134
+ mock_controller_instance = MagicMock()
135
+ mock_controller.return_value = mock_controller_instance
136
+
137
+ with patch("sys.argv", ["cli.py", str(temp_dir), "--contains", "import"]):
138
+ main()
139
+
140
+ # Verify controller was called with args containing contains=["import"]
141
+ mock_controller.assert_called_once()
142
+ args = mock_controller.call_args[0][0]
143
+ assert args.contains == ["import"]
144
+ assert args.contains_case is False # Default
145
+ mock_controller_instance.run.assert_called_once()
146
+
147
+
148
+ @patch("reposnap.interfaces.cli.ProjectController")
149
+ def test_cli_with_contains_flag_multiple(mock_controller, temp_dir):
150
+ """Test that the --contains flag with multiple values is properly parsed."""
151
+ mock_controller_instance = MagicMock()
152
+ mock_controller.return_value = mock_controller_instance
153
+
154
+ with patch(
155
+ "sys.argv", ["cli.py", str(temp_dir), "-S", "import", "logging", "TODO"]
156
+ ):
157
+ main()
158
+
159
+ # Verify controller was called with args containing multiple patterns
160
+ mock_controller.assert_called_once()
161
+ args = mock_controller.call_args[0][0]
162
+ assert args.contains == ["import", "logging", "TODO"]
163
+ assert args.contains_case is False # Default
164
+ mock_controller_instance.run.assert_called_once()
165
+
166
+
167
+ @patch("reposnap.interfaces.cli.ProjectController")
168
+ def test_cli_with_contains_case_flag(mock_controller, temp_dir):
169
+ """Test that the --contains-case flag is properly parsed."""
170
+ mock_controller_instance = MagicMock()
171
+ mock_controller.return_value = mock_controller_instance
172
+
173
+ with patch("sys.argv", ["cli.py", str(temp_dir), "-S", "TODO", "--contains-case"]):
174
+ main()
175
+
176
+ # Verify controller was called with args containing contains_case=True
177
+ mock_controller.assert_called_once()
178
+ args = mock_controller.call_args[0][0]
179
+ assert args.contains == ["TODO"]
180
+ assert args.contains_case is True
181
+ mock_controller_instance.run.assert_called_once()
182
+
183
+
184
+ @patch("reposnap.interfaces.cli.ProjectController")
185
+ def test_cli_contains_defaults(mock_controller, temp_dir):
186
+ """Test that contains flags default correctly when not provided."""
187
+ mock_controller_instance = MagicMock()
188
+ mock_controller.return_value = mock_controller_instance
189
+
190
+ with patch("sys.argv", ["cli.py", str(temp_dir)]):
191
+ main()
192
+
193
+ # Verify controller was called with default contains values
194
+ mock_controller.assert_called_once()
195
+ args = mock_controller.call_args[0][0]
196
+ assert args.contains == [] # Default empty list
197
+ assert args.contains_case is False # Default False
198
+ mock_controller_instance.run.assert_called_once()
199
+
200
+
201
+ @patch("reposnap.interfaces.cli.ProjectController")
202
+ def test_cli_contains_with_binary_files(mock_controller, temp_dir):
203
+ """Test that binary files are properly handled in contains filter."""
204
+ mock_controller_instance = MagicMock()
205
+ mock_controller.return_value = mock_controller_instance
206
+
207
+ # Create a binary file and a text file
208
+ binary_file = os.path.join(temp_dir, "binary.bin")
209
+ with open(binary_file, "wb") as f:
210
+ f.write(b"\x00\x01binary\x00data\xff")
211
+
212
+ text_file = os.path.join(temp_dir, "text.py")
213
+ with open(text_file, "w") as f:
214
+ f.write("import logging\ndef main(): pass\n")
215
+
216
+ with patch("sys.argv", ["cli.py", str(temp_dir), "-S", "import"]):
217
+ main()
218
+
219
+ # Should have been called and run successfully
220
+ mock_controller.assert_called_once()
221
+ args = mock_controller.call_args[0][0]
222
+ assert args.contains == ["import"]
223
+ mock_controller_instance.run.assert_called_once()
@@ -0,0 +1,295 @@
1
+ # tests/reposnap/test_contains_filter.py
2
+
3
+ from pathlib import Path
4
+ from reposnap.core.content_search import file_matches, filter_files_by_content
5
+
6
+
7
+ class TestContentSearch:
8
+ """Test content search functionality."""
9
+
10
+ def test_file_matches_single_pattern(self, tmp_path):
11
+ """Test matching a single pattern in file content."""
12
+ test_file = tmp_path / "test.py"
13
+ test_file.write_text("def main():\n print('needle in haystack')\n")
14
+
15
+ assert file_matches(test_file, ["needle"], ignore_case=True)
16
+ assert not file_matches(test_file, ["nonexistent"], ignore_case=True)
17
+
18
+ def test_file_matches_multiple_patterns_or_logic(self, tmp_path):
19
+ """Test that multiple patterns use OR logic (any match is sufficient)."""
20
+ test_file = tmp_path / "test.py"
21
+ test_file.write_text("import logging\ndef main():\n pass\n")
22
+
23
+ # Should match because file contains "import"
24
+ assert file_matches(test_file, ["import", "nonexistent"], ignore_case=True)
25
+ # Should match because file contains "logging"
26
+ assert file_matches(test_file, ["nonexistent", "logging"], ignore_case=True)
27
+ # Should not match because file contains neither
28
+ assert not file_matches(test_file, ["foo", "bar"], ignore_case=True)
29
+
30
+ def test_file_matches_case_insensitive_default(self, tmp_path):
31
+ """Test case insensitive matching by default."""
32
+ test_file = tmp_path / "test.py"
33
+ test_file.write_text("TODO: Fix this bug\n")
34
+
35
+ assert file_matches(test_file, ["todo"], ignore_case=True)
36
+ assert file_matches(test_file, ["TODO"], ignore_case=True)
37
+ assert file_matches(test_file, ["Todo"], ignore_case=True)
38
+
39
+ def test_file_matches_case_sensitive_flag(self, tmp_path):
40
+ """Test case sensitive matching when flag is set."""
41
+ test_file = tmp_path / "test.py"
42
+ test_file.write_text("TODO: Fix this bug\n")
43
+
44
+ assert file_matches(test_file, ["TODO"], ignore_case=False)
45
+ assert not file_matches(test_file, ["todo"], ignore_case=False)
46
+ assert not file_matches(test_file, ["Todo"], ignore_case=False)
47
+
48
+ def test_file_matches_empty_patterns(self, tmp_path):
49
+ """Test that empty patterns list returns True."""
50
+ test_file = tmp_path / "test.py"
51
+ test_file.write_text("any content\n")
52
+
53
+ assert file_matches(test_file, [], ignore_case=True)
54
+
55
+ def test_file_matches_nonexistent_file(self, tmp_path):
56
+ """Test that non-existent file returns False."""
57
+ nonexistent = tmp_path / "nonexistent.py"
58
+
59
+ assert not file_matches(nonexistent, ["pattern"], ignore_case=True)
60
+
61
+ def test_file_matches_binary_file_handling(self, tmp_path):
62
+ """Test that binary files are handled gracefully."""
63
+ binary_file = tmp_path / "test.bin"
64
+ binary_file.write_bytes(b"\x00\x01\x02\x03\xff\xfe")
65
+
66
+ # Should not crash and should return False for binary content
67
+ assert not file_matches(binary_file, ["pattern"], ignore_case=True)
68
+
69
+ def test_file_matches_large_file_handling(self, tmp_path):
70
+ """Test that large files are skipped for performance."""
71
+ from reposnap.core.content_search import MAX_FILE_SIZE
72
+
73
+ large_file = tmp_path / "large.txt"
74
+ # Create a file slightly larger than MAX_FILE_SIZE
75
+ large_content = "x" * (MAX_FILE_SIZE + 1000)
76
+ large_file.write_text(large_content)
77
+
78
+ # Should skip large files and return False
79
+ assert not file_matches(large_file, ["x"], ignore_case=True)
80
+
81
+ def test_file_matches_binary_detection_in_middle(self, tmp_path):
82
+ """Test binary detection with null bytes after text content."""
83
+ mixed_file = tmp_path / "mixed.txt"
84
+ # Write text content followed by binary content with null bytes
85
+ with mixed_file.open("wb") as f:
86
+ f.write(b"some text content\n")
87
+ f.write(b"\x00binary\x00data\xff")
88
+
89
+ # Should detect binary content and return False
90
+ assert not file_matches(mixed_file, ["text"], ignore_case=True)
91
+
92
+ def test_filter_files_by_content_basic(self, tmp_path):
93
+ """Test basic file filtering by content."""
94
+ file1 = tmp_path / "file1.py"
95
+ file1.write_text("import logging\ndef main(): pass\n")
96
+
97
+ file2 = tmp_path / "file2.py"
98
+ file2.write_text("import os\ndef helper(): pass\n")
99
+
100
+ file3 = tmp_path / "file3.py"
101
+ file3.write_text("print('hello world')\n")
102
+
103
+ files = [file1, file2, file3]
104
+
105
+ # Filter for files containing "import"
106
+ filtered = filter_files_by_content(files, ["import"])
107
+ assert set(filtered) == {file1, file2}
108
+
109
+ # Filter for files containing "logging"
110
+ filtered = filter_files_by_content(files, ["logging"])
111
+ assert filtered == [file1]
112
+
113
+ def test_filter_files_by_content_empty_patterns(self, tmp_path):
114
+ """Test that empty patterns return all files."""
115
+ file1 = tmp_path / "file1.py"
116
+ file1.write_text("content1")
117
+
118
+ file2 = tmp_path / "file2.py"
119
+ file2.write_text("content2")
120
+
121
+ files = [file1, file2]
122
+ filtered = filter_files_by_content(files, [])
123
+ assert filtered == files
124
+
125
+ def test_filter_files_by_content_case_sensitivity(self, tmp_path):
126
+ """Test case sensitivity in file filtering."""
127
+ test_file = tmp_path / "test.py"
128
+ test_file.write_text("TODO: Important task\n")
129
+
130
+ files = [test_file]
131
+
132
+ # Case insensitive (default)
133
+ filtered = filter_files_by_content(files, ["todo"], ignore_case=True)
134
+ assert filtered == [test_file]
135
+
136
+ # Case sensitive
137
+ filtered = filter_files_by_content(files, ["todo"], ignore_case=False)
138
+ assert filtered == []
139
+
140
+ filtered = filter_files_by_content(files, ["TODO"], ignore_case=False)
141
+ assert filtered == [test_file]
142
+
143
+
144
+ class TestProjectControllerIntegration:
145
+ """Test integration of contains filter with ProjectController."""
146
+
147
+ def test_contains_filter_integration(self, tmp_path):
148
+ """Test that ProjectController properly applies content filters."""
149
+ from reposnap.controllers.project_controller import ProjectController
150
+ from unittest.mock import Mock
151
+
152
+ # Create test files
153
+ file1 = tmp_path / "file1.py"
154
+ file1.write_text("import logging\ndef main(): pass\n")
155
+
156
+ file2 = tmp_path / "file2.py"
157
+ file2.write_text("import os\ndef helper(): pass\n")
158
+
159
+ file3 = tmp_path / "file3.txt"
160
+ file3.write_text("This is a text file\n")
161
+
162
+ # Mock args object
163
+ args = Mock()
164
+ args.paths = [str(tmp_path)]
165
+ args.output = "output.md"
166
+ args.structure_only = False
167
+ args.include = []
168
+ args.exclude = []
169
+ args.changes = False
170
+ args.contains = ["import"]
171
+ args.contains_case = False
172
+
173
+ # Create controller and test content filtering
174
+ controller = ProjectController(args)
175
+ controller.set_root_dir(tmp_path)
176
+
177
+ # Test the _apply_content_filter method directly
178
+ files = [Path("file1.py"), Path("file2.py"), Path("file3.txt")]
179
+ filtered = controller._apply_content_filter(files)
180
+
181
+ # Should only keep files with "import"
182
+ assert set(f.name for f in filtered) == {"file1.py", "file2.py"}
183
+
184
+ def test_contains_filter_case_sensitivity_integration(self, tmp_path):
185
+ """Test case sensitivity integration in ProjectController."""
186
+ from reposnap.controllers.project_controller import ProjectController
187
+ from unittest.mock import Mock
188
+
189
+ # Create test file
190
+ test_file = tmp_path / "test.py"
191
+ test_file.write_text("TODO: Fix this\n")
192
+
193
+ # Test case insensitive (default)
194
+ args = Mock()
195
+ args.paths = [str(tmp_path)]
196
+ args.output = "output.md"
197
+ args.structure_only = False
198
+ args.include = []
199
+ args.exclude = []
200
+ args.changes = False
201
+ args.contains = ["todo"]
202
+ args.contains_case = False
203
+
204
+ controller = ProjectController(args)
205
+ controller.set_root_dir(tmp_path)
206
+
207
+ files = [Path("test.py")]
208
+ filtered = controller._apply_content_filter(files)
209
+ assert len(filtered) == 1
210
+
211
+ # Test case sensitive
212
+ args.contains_case = True
213
+ controller = ProjectController(args)
214
+ controller.set_root_dir(tmp_path)
215
+
216
+ filtered = controller._apply_content_filter(files)
217
+ assert len(filtered) == 0 # "todo" != "TODO"
218
+
219
+ def test_no_contains_patterns_returns_all_files(self, tmp_path):
220
+ """Test that empty contains patterns return all files unchanged."""
221
+ from reposnap.controllers.project_controller import ProjectController
222
+ from unittest.mock import Mock
223
+
224
+ # Create test files
225
+ file1 = tmp_path / "file1.py"
226
+ file1.write_text("content1")
227
+
228
+ file2 = tmp_path / "file2.py"
229
+ file2.write_text("content2")
230
+
231
+ # Mock args with empty contains
232
+ args = Mock()
233
+ args.paths = [str(tmp_path)]
234
+ args.output = "output.md"
235
+ args.structure_only = False
236
+ args.include = []
237
+ args.exclude = []
238
+ args.changes = False
239
+ args.contains = []
240
+ args.contains_case = False
241
+
242
+ controller = ProjectController(args)
243
+ controller.set_root_dir(tmp_path)
244
+
245
+ files = [Path("file1.py"), Path("file2.py")]
246
+ filtered = controller._apply_content_filter(files)
247
+
248
+ # Should return all files unchanged
249
+ assert set(filtered) == set(files)
250
+
251
+ def test_absolute_path_handling(self, tmp_path):
252
+ """Test that ProjectController properly handles absolute paths."""
253
+ from reposnap.controllers.project_controller import ProjectController
254
+ from unittest.mock import Mock
255
+
256
+ # Create test file
257
+ test_file = tmp_path / "test.py"
258
+ test_file.write_text("import logging\ndef main(): pass\n")
259
+
260
+ # Mock args with absolute path
261
+ args = Mock()
262
+ args.paths = [str(test_file)] # Absolute path
263
+ args.output = "output.md"
264
+ args.structure_only = False
265
+ args.include = []
266
+ args.exclude = []
267
+ args.changes = False
268
+ args.contains = ["import"]
269
+ args.contains_case = False
270
+
271
+ # Create controller with the temp directory as root from the start
272
+ # This simulates how the controller would actually work
273
+ controller = ProjectController()
274
+ controller.set_root_dir(tmp_path)
275
+
276
+ # Process the args manually after setting root dir
277
+ input_paths = [Path(p) for p in args.paths]
278
+ controller.input_paths = []
279
+ for p in input_paths:
280
+ if p.is_absolute():
281
+ candidate = p.resolve()
282
+ else:
283
+ candidate = (controller.root_dir / p).resolve()
284
+
285
+ if candidate.exists():
286
+ try:
287
+ rel = candidate.relative_to(controller.root_dir)
288
+ if rel != Path("."):
289
+ controller.input_paths.append(rel)
290
+ except ValueError:
291
+ pass # Path not under root
292
+
293
+ # Should have processed the absolute path correctly
294
+ assert len(controller.input_paths) == 1
295
+ assert controller.input_paths[0].name == "test.py"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes