reposnap 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {reposnap-0.7.0 → reposnap-0.8.0}/.coverage +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/PKG-INFO +52 -2
- {reposnap-0.7.0 → reposnap-0.8.0}/README.md +51 -1
- {reposnap-0.7.0 → reposnap-0.8.0}/pyproject.toml +1 -1
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/controllers/project_controller.py +66 -2
- reposnap-0.8.0/reposnap/core/content_search.py +104 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/cli.py +12 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_cli.py +95 -0
- reposnap-0.8.0/tests/reposnap/test_contains_filter.py +295 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.github/workflows/python-package.yml +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.github/workflows/release.yml +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.gitignore +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.pre-commit-config.yaml +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.python-version +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/.vscode/launch.json +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/CONTRIBUTING.md +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/LICENSE +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/controllers/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/file_system.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/git_repo.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/core/markdown_generator.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/interfaces/gui.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/models/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/models/file_tree.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/utils/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/reposnap/utils/path_utils.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/requirements-dev.lock +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/requirements.lock +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/__init__.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_collected_tree.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_file_system.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_file_tree.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_git_repo.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_gui.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_markdown_generator.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_path_utils.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/reposnap/test_project_controller.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/resources/another_existing_file.py +0 -0
- {reposnap-0.7.0 → reposnap-0.8.0}/tests/resources/existing_file.py +0 -0
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: reposnap
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Generate a Markdown file with all contents of your project
|
5
5
|
Author: agoloborodko
|
6
6
|
License-File: LICENSE
|
@@ -25,6 +25,7 @@ Description-Content-Type: text/markdown
|
|
25
25
|
- **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
|
26
26
|
- **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
|
27
27
|
- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
|
28
|
+
- **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
|
28
29
|
- **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
|
29
30
|
|
30
31
|
## Installation
|
@@ -50,7 +51,7 @@ pip install -r requirements.lock
|
|
50
51
|
To use `reposnap` from the command line, run it with the following options:
|
51
52
|
|
52
53
|
```bash
|
53
|
-
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
|
54
|
+
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
|
54
55
|
```
|
55
56
|
|
56
57
|
- `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
|
@@ -61,6 +62,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
61
62
|
- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
|
62
63
|
- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
|
63
64
|
- `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
|
65
|
+
- `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
|
66
|
+
- `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
|
64
67
|
|
65
68
|
#### Pattern Matching
|
66
69
|
|
@@ -73,6 +76,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
73
76
|
- `-i "*.py"`: Includes only files ending with `.py`.
|
74
77
|
- `-e "*.test.*"`: Excludes files with `.test.` in their names.
|
75
78
|
|
79
|
+
#### Content Filtering
|
80
|
+
|
81
|
+
The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
|
82
|
+
|
83
|
+
- **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
|
84
|
+
- **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
|
85
|
+
- **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
|
86
|
+
|
87
|
+
**Examples**:
|
88
|
+
|
89
|
+
1. **Find files containing specific imports**:
|
90
|
+
```bash
|
91
|
+
reposnap . -S "import logging"
|
92
|
+
```
|
93
|
+
|
94
|
+
2. **Search for multiple patterns (OR logic)**:
|
95
|
+
```bash
|
96
|
+
reposnap . -S "TODO" "FIXME" "import requests"
|
97
|
+
```
|
98
|
+
|
99
|
+
3. **Case-sensitive content search**:
|
100
|
+
```bash
|
101
|
+
reposnap . -S "TODO" --contains-case
|
102
|
+
```
|
103
|
+
|
104
|
+
4. **Combine content filtering with other filters**:
|
105
|
+
```bash
|
106
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
107
|
+
```
|
108
|
+
|
109
|
+
5. **Find files with specific function calls**:
|
110
|
+
```bash
|
111
|
+
reposnap . -S "logger.error" "raise Exception"
|
112
|
+
```
|
113
|
+
|
76
114
|
#### Only Snapshot Your Current Work
|
77
115
|
|
78
116
|
The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
|
@@ -147,6 +185,18 @@ This is particularly useful when you want to:
|
|
147
185
|
reposnap . -c
|
148
186
|
```
|
149
187
|
|
188
|
+
7. **Find and document files containing specific code patterns**:
|
189
|
+
|
190
|
+
```bash
|
191
|
+
reposnap . -S "import logging" "logger."
|
192
|
+
```
|
193
|
+
|
194
|
+
8. **Combine content filtering with file type filtering**:
|
195
|
+
|
196
|
+
```bash
|
197
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
198
|
+
```
|
199
|
+
|
150
200
|
### Graphical User Interface
|
151
201
|
|
152
202
|
`reposnap` also provides a GUI for users who prefer an interactive interface.
|
@@ -12,6 +12,7 @@
|
|
12
12
|
- **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
|
13
13
|
- **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
|
14
14
|
- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
|
15
|
+
- **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
|
15
16
|
- **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
|
16
17
|
|
17
18
|
## Installation
|
@@ -37,7 +38,7 @@ pip install -r requirements.lock
|
|
37
38
|
To use `reposnap` from the command line, run it with the following options:
|
38
39
|
|
39
40
|
```bash
|
40
|
-
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
|
41
|
+
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
|
41
42
|
```
|
42
43
|
|
43
44
|
- `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
|
@@ -48,6 +49,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
48
49
|
- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
|
49
50
|
- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
|
50
51
|
- `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
|
52
|
+
- `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
|
53
|
+
- `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
|
51
54
|
|
52
55
|
#### Pattern Matching
|
53
56
|
|
@@ -60,6 +63,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
60
63
|
- `-i "*.py"`: Includes only files ending with `.py`.
|
61
64
|
- `-e "*.test.*"`: Excludes files with `.test.` in their names.
|
62
65
|
|
66
|
+
#### Content Filtering
|
67
|
+
|
68
|
+
The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
|
69
|
+
|
70
|
+
- **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
|
71
|
+
- **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
|
72
|
+
- **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
|
73
|
+
|
74
|
+
**Examples**:
|
75
|
+
|
76
|
+
1. **Find files containing specific imports**:
|
77
|
+
```bash
|
78
|
+
reposnap . -S "import logging"
|
79
|
+
```
|
80
|
+
|
81
|
+
2. **Search for multiple patterns (OR logic)**:
|
82
|
+
```bash
|
83
|
+
reposnap . -S "TODO" "FIXME" "import requests"
|
84
|
+
```
|
85
|
+
|
86
|
+
3. **Case-sensitive content search**:
|
87
|
+
```bash
|
88
|
+
reposnap . -S "TODO" --contains-case
|
89
|
+
```
|
90
|
+
|
91
|
+
4. **Combine content filtering with other filters**:
|
92
|
+
```bash
|
93
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
94
|
+
```
|
95
|
+
|
96
|
+
5. **Find files with specific function calls**:
|
97
|
+
```bash
|
98
|
+
reposnap . -S "logger.error" "raise Exception"
|
99
|
+
```
|
100
|
+
|
63
101
|
#### Only Snapshot Your Current Work
|
64
102
|
|
65
103
|
The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
|
@@ -134,6 +172,18 @@ This is particularly useful when you want to:
|
|
134
172
|
reposnap . -c
|
135
173
|
```
|
136
174
|
|
175
|
+
7. **Find and document files containing specific code patterns**:
|
176
|
+
|
177
|
+
```bash
|
178
|
+
reposnap . -S "import logging" "logger."
|
179
|
+
```
|
180
|
+
|
181
|
+
8. **Combine content filtering with file type filtering**:
|
182
|
+
|
183
|
+
```bash
|
184
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
185
|
+
```
|
186
|
+
|
137
187
|
### Graphical User Interface
|
138
188
|
|
139
189
|
`reposnap` also provides a GUI for users who prefer an interactive interface.
|
@@ -19,7 +19,13 @@ class ProjectController:
|
|
19
19
|
]
|
20
20
|
self.input_paths = []
|
21
21
|
for p in input_paths:
|
22
|
-
|
22
|
+
if p.is_absolute():
|
23
|
+
# Handle absolute paths - use as-is but verify they're under root_dir
|
24
|
+
candidate = p.resolve()
|
25
|
+
else:
|
26
|
+
# Handle relative paths - join with root_dir
|
27
|
+
candidate = (self.root_dir / p).resolve()
|
28
|
+
|
23
29
|
if candidate.exists():
|
24
30
|
try:
|
25
31
|
rel = candidate.relative_to(self.root_dir)
|
@@ -31,7 +37,7 @@ class ProjectController:
|
|
31
37
|
)
|
32
38
|
else:
|
33
39
|
self.logger.warning(
|
34
|
-
f"Path {p} does not exist
|
40
|
+
f"Path {p} does not exist or is not under repository root {self.root_dir}."
|
35
41
|
)
|
36
42
|
self.output_file: Path = (
|
37
43
|
Path(args.output).resolve()
|
@@ -48,6 +54,8 @@ class ProjectController:
|
|
48
54
|
args.exclude if hasattr(args, "exclude") else []
|
49
55
|
)
|
50
56
|
self.changes_only: bool = getattr(args, "changes", False)
|
57
|
+
self.contains: List[str] = getattr(args, "contains", [])
|
58
|
+
self.contains_case: bool = getattr(args, "contains_case", False)
|
51
59
|
else:
|
52
60
|
self.args = None
|
53
61
|
self.input_paths = []
|
@@ -56,6 +64,8 @@ class ProjectController:
|
|
56
64
|
self.include_patterns = []
|
57
65
|
self.exclude_patterns = []
|
58
66
|
self.changes_only = False
|
67
|
+
self.contains = []
|
68
|
+
self.contains_case = False
|
59
69
|
self.file_tree: Optional[FileTree] = None
|
60
70
|
self.gitignore_patterns: List[str] = []
|
61
71
|
if self.root_dir:
|
@@ -110,6 +120,58 @@ class ProjectController:
|
|
110
120
|
files = [f for f in files if not spec_exc.match_file(f.as_posix())]
|
111
121
|
return files
|
112
122
|
|
123
|
+
def _apply_content_filter(self, files: List[Path]) -> List[Path]:
|
124
|
+
"""
|
125
|
+
Filter files based on content substring matching.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
files: List of relative file paths to filter
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
Filtered list of files that contain at least one of the patterns
|
132
|
+
specified in self.contains. Returns original list if no patterns
|
133
|
+
are specified.
|
134
|
+
|
135
|
+
Note:
|
136
|
+
Uses case-insensitive matching by default unless self.contains_case
|
137
|
+
is True. Skips binary files and files larger than 5MB for performance.
|
138
|
+
"""
|
139
|
+
if not self.contains:
|
140
|
+
return files
|
141
|
+
|
142
|
+
from reposnap.core.content_search import filter_files_by_content
|
143
|
+
|
144
|
+
initial_count = len(files)
|
145
|
+
ignore_case = not self.contains_case
|
146
|
+
|
147
|
+
self.logger.debug(
|
148
|
+
f"Applying content filter with patterns: {self.contains}, "
|
149
|
+
f"ignore_case: {ignore_case}"
|
150
|
+
)
|
151
|
+
|
152
|
+
# Convert relative paths to absolute for content search
|
153
|
+
absolute_paths = [self.root_dir / file_path for file_path in files]
|
154
|
+
filtered_absolute = filter_files_by_content(
|
155
|
+
absolute_paths, self.contains, ignore_case
|
156
|
+
)
|
157
|
+
|
158
|
+
# Convert back to relative paths
|
159
|
+
filtered_files = []
|
160
|
+
for abs_path in filtered_absolute:
|
161
|
+
try:
|
162
|
+
rel_path = abs_path.relative_to(self.root_dir)
|
163
|
+
filtered_files.append(rel_path)
|
164
|
+
except ValueError:
|
165
|
+
continue
|
166
|
+
|
167
|
+
kept_count = len(filtered_files)
|
168
|
+
self.logger.info(
|
169
|
+
f"Applied content filter (kept {kept_count} / {initial_count})"
|
170
|
+
)
|
171
|
+
self.logger.debug(f"Files kept after content filter: {filtered_files}")
|
172
|
+
|
173
|
+
return filtered_files
|
174
|
+
|
113
175
|
def collect_file_tree(self) -> None:
|
114
176
|
if self.changes_only:
|
115
177
|
self.logger.info("Collecting uncommitted files from Git repository.")
|
@@ -147,6 +209,8 @@ class ProjectController:
|
|
147
209
|
continue
|
148
210
|
all_files = self._apply_include_exclude(all_files)
|
149
211
|
self.logger.debug(f"All files after applying include/exclude: {all_files}")
|
212
|
+
all_files = self._apply_content_filter(all_files)
|
213
|
+
self.logger.debug(f"All files after applying content filter: {all_files}")
|
150
214
|
if self.input_paths:
|
151
215
|
trees = []
|
152
216
|
for input_path in self.input_paths:
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# src/reposnap/core/content_search.py
|
2
|
+
|
3
|
+
"""
|
4
|
+
Private content search helpers for substring matching in files.
|
5
|
+
|
6
|
+
This module provides stateless utility functions for searching file contents.
|
7
|
+
It is intended for internal use by the project controller and should not be
|
8
|
+
imported directly by external consumers.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import List
|
14
|
+
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
# Configuration constants
|
19
|
+
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
|
20
|
+
BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
|
21
|
+
|
22
|
+
|
23
|
+
def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
|
24
|
+
"""
|
25
|
+
Check if a file contains any of the given patterns.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
path: Path to the file to search
|
29
|
+
patterns: List of substring patterns to search for
|
30
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
True if file contains any pattern, False otherwise
|
34
|
+
|
35
|
+
Note:
|
36
|
+
Uses streaming read with utf-8 encoding and error handling for binary files.
|
37
|
+
Returns False if file cannot be read as text or if file is too large/binary.
|
38
|
+
"""
|
39
|
+
if not patterns:
|
40
|
+
return True
|
41
|
+
|
42
|
+
if not path.is_file():
|
43
|
+
return False
|
44
|
+
|
45
|
+
# Check file size - skip files larger than MAX_FILE_SIZE
|
46
|
+
try:
|
47
|
+
file_size = path.stat().st_size
|
48
|
+
if file_size > MAX_FILE_SIZE:
|
49
|
+
logger.debug(f"Skipping large file {path} ({file_size} bytes)")
|
50
|
+
return False
|
51
|
+
except OSError as e:
|
52
|
+
logger.debug(f"Could not stat file {path}: {e}")
|
53
|
+
return False
|
54
|
+
|
55
|
+
# Check for binary content in first KB
|
56
|
+
try:
|
57
|
+
with path.open("rb") as f:
|
58
|
+
first_chunk = f.read(BINARY_CHECK_SIZE)
|
59
|
+
if b"\0" in first_chunk:
|
60
|
+
logger.debug(f"Skipping binary file {path}")
|
61
|
+
return False
|
62
|
+
except Exception as e:
|
63
|
+
logger.debug(f"Could not read file {path} for binary check: {e}")
|
64
|
+
return False
|
65
|
+
|
66
|
+
# Pre-compute search patterns (case-normalized if needed)
|
67
|
+
search_patterns = [p.lower() if ignore_case else p for p in patterns]
|
68
|
+
|
69
|
+
try:
|
70
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
71
|
+
for line in f:
|
72
|
+
search_line = line.lower() if ignore_case else line
|
73
|
+
|
74
|
+
if any(pattern in search_line for pattern in search_patterns):
|
75
|
+
return True
|
76
|
+
return False
|
77
|
+
except Exception as e:
|
78
|
+
logger.debug(f"Could not read file {path} for content search: {e}")
|
79
|
+
return False
|
80
|
+
|
81
|
+
|
82
|
+
def filter_files_by_content(
|
83
|
+
files: List[Path], patterns: List[str], ignore_case: bool = True
|
84
|
+
) -> List[Path]:
|
85
|
+
"""
|
86
|
+
Filter a list of files to only include those containing the given patterns.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
files: List of file paths to filter
|
90
|
+
patterns: List of substring patterns to search for
|
91
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Filtered list of files that contain at least one pattern
|
95
|
+
"""
|
96
|
+
if not patterns:
|
97
|
+
return files
|
98
|
+
|
99
|
+
matched_files = []
|
100
|
+
for file_path in files:
|
101
|
+
if file_matches(file_path, patterns, ignore_case):
|
102
|
+
matched_files.append(file_path)
|
103
|
+
|
104
|
+
return matched_files
|
@@ -46,6 +46,18 @@ def main():
|
|
46
46
|
action="store_true",
|
47
47
|
help="Use only files that are added/modified/untracked/stashed but not yet committed.",
|
48
48
|
)
|
49
|
+
parser.add_argument(
|
50
|
+
"-S",
|
51
|
+
"--contains",
|
52
|
+
nargs="+",
|
53
|
+
default=[],
|
54
|
+
help="Only include files whose contents contain these substrings",
|
55
|
+
)
|
56
|
+
parser.add_argument(
|
57
|
+
"--contains-case",
|
58
|
+
action="store_true",
|
59
|
+
help="Make --contains case-sensitive",
|
60
|
+
)
|
49
61
|
|
50
62
|
args = parser.parse_args()
|
51
63
|
|
@@ -126,3 +126,98 @@ def test_cli_without_changes_flag(mock_controller, temp_dir):
|
|
126
126
|
args = mock_controller.call_args[0][0]
|
127
127
|
assert args.changes is False
|
128
128
|
mock_controller_instance.run.assert_called_once()
|
129
|
+
|
130
|
+
|
131
|
+
@patch("reposnap.interfaces.cli.ProjectController")
|
132
|
+
def test_cli_with_contains_flag_single(mock_controller, temp_dir):
|
133
|
+
"""Test that the --contains flag with single value is properly parsed."""
|
134
|
+
mock_controller_instance = MagicMock()
|
135
|
+
mock_controller.return_value = mock_controller_instance
|
136
|
+
|
137
|
+
with patch("sys.argv", ["cli.py", str(temp_dir), "--contains", "import"]):
|
138
|
+
main()
|
139
|
+
|
140
|
+
# Verify controller was called with args containing contains=["import"]
|
141
|
+
mock_controller.assert_called_once()
|
142
|
+
args = mock_controller.call_args[0][0]
|
143
|
+
assert args.contains == ["import"]
|
144
|
+
assert args.contains_case is False # Default
|
145
|
+
mock_controller_instance.run.assert_called_once()
|
146
|
+
|
147
|
+
|
148
|
+
@patch("reposnap.interfaces.cli.ProjectController")
|
149
|
+
def test_cli_with_contains_flag_multiple(mock_controller, temp_dir):
|
150
|
+
"""Test that the --contains flag with multiple values is properly parsed."""
|
151
|
+
mock_controller_instance = MagicMock()
|
152
|
+
mock_controller.return_value = mock_controller_instance
|
153
|
+
|
154
|
+
with patch(
|
155
|
+
"sys.argv", ["cli.py", str(temp_dir), "-S", "import", "logging", "TODO"]
|
156
|
+
):
|
157
|
+
main()
|
158
|
+
|
159
|
+
# Verify controller was called with args containing multiple patterns
|
160
|
+
mock_controller.assert_called_once()
|
161
|
+
args = mock_controller.call_args[0][0]
|
162
|
+
assert args.contains == ["import", "logging", "TODO"]
|
163
|
+
assert args.contains_case is False # Default
|
164
|
+
mock_controller_instance.run.assert_called_once()
|
165
|
+
|
166
|
+
|
167
|
+
@patch("reposnap.interfaces.cli.ProjectController")
|
168
|
+
def test_cli_with_contains_case_flag(mock_controller, temp_dir):
|
169
|
+
"""Test that the --contains-case flag is properly parsed."""
|
170
|
+
mock_controller_instance = MagicMock()
|
171
|
+
mock_controller.return_value = mock_controller_instance
|
172
|
+
|
173
|
+
with patch("sys.argv", ["cli.py", str(temp_dir), "-S", "TODO", "--contains-case"]):
|
174
|
+
main()
|
175
|
+
|
176
|
+
# Verify controller was called with args containing contains_case=True
|
177
|
+
mock_controller.assert_called_once()
|
178
|
+
args = mock_controller.call_args[0][0]
|
179
|
+
assert args.contains == ["TODO"]
|
180
|
+
assert args.contains_case is True
|
181
|
+
mock_controller_instance.run.assert_called_once()
|
182
|
+
|
183
|
+
|
184
|
+
@patch("reposnap.interfaces.cli.ProjectController")
|
185
|
+
def test_cli_contains_defaults(mock_controller, temp_dir):
|
186
|
+
"""Test that contains flags default correctly when not provided."""
|
187
|
+
mock_controller_instance = MagicMock()
|
188
|
+
mock_controller.return_value = mock_controller_instance
|
189
|
+
|
190
|
+
with patch("sys.argv", ["cli.py", str(temp_dir)]):
|
191
|
+
main()
|
192
|
+
|
193
|
+
# Verify controller was called with default contains values
|
194
|
+
mock_controller.assert_called_once()
|
195
|
+
args = mock_controller.call_args[0][0]
|
196
|
+
assert args.contains == [] # Default empty list
|
197
|
+
assert args.contains_case is False # Default False
|
198
|
+
mock_controller_instance.run.assert_called_once()
|
199
|
+
|
200
|
+
|
201
|
+
@patch("reposnap.interfaces.cli.ProjectController")
|
202
|
+
def test_cli_contains_with_binary_files(mock_controller, temp_dir):
|
203
|
+
"""Test that binary files are properly handled in contains filter."""
|
204
|
+
mock_controller_instance = MagicMock()
|
205
|
+
mock_controller.return_value = mock_controller_instance
|
206
|
+
|
207
|
+
# Create a binary file and a text file
|
208
|
+
binary_file = os.path.join(temp_dir, "binary.bin")
|
209
|
+
with open(binary_file, "wb") as f:
|
210
|
+
f.write(b"\x00\x01binary\x00data\xff")
|
211
|
+
|
212
|
+
text_file = os.path.join(temp_dir, "text.py")
|
213
|
+
with open(text_file, "w") as f:
|
214
|
+
f.write("import logging\ndef main(): pass\n")
|
215
|
+
|
216
|
+
with patch("sys.argv", ["cli.py", str(temp_dir), "-S", "import"]):
|
217
|
+
main()
|
218
|
+
|
219
|
+
# Should have been called and run successfully
|
220
|
+
mock_controller.assert_called_once()
|
221
|
+
args = mock_controller.call_args[0][0]
|
222
|
+
assert args.contains == ["import"]
|
223
|
+
mock_controller_instance.run.assert_called_once()
|
@@ -0,0 +1,295 @@
|
|
1
|
+
# tests/reposnap/test_contains_filter.py
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from reposnap.core.content_search import file_matches, filter_files_by_content
|
5
|
+
|
6
|
+
|
7
|
+
class TestContentSearch:
|
8
|
+
"""Test content search functionality."""
|
9
|
+
|
10
|
+
def test_file_matches_single_pattern(self, tmp_path):
|
11
|
+
"""Test matching a single pattern in file content."""
|
12
|
+
test_file = tmp_path / "test.py"
|
13
|
+
test_file.write_text("def main():\n print('needle in haystack')\n")
|
14
|
+
|
15
|
+
assert file_matches(test_file, ["needle"], ignore_case=True)
|
16
|
+
assert not file_matches(test_file, ["nonexistent"], ignore_case=True)
|
17
|
+
|
18
|
+
def test_file_matches_multiple_patterns_or_logic(self, tmp_path):
|
19
|
+
"""Test that multiple patterns use OR logic (any match is sufficient)."""
|
20
|
+
test_file = tmp_path / "test.py"
|
21
|
+
test_file.write_text("import logging\ndef main():\n pass\n")
|
22
|
+
|
23
|
+
# Should match because file contains "import"
|
24
|
+
assert file_matches(test_file, ["import", "nonexistent"], ignore_case=True)
|
25
|
+
# Should match because file contains "logging"
|
26
|
+
assert file_matches(test_file, ["nonexistent", "logging"], ignore_case=True)
|
27
|
+
# Should not match because file contains neither
|
28
|
+
assert not file_matches(test_file, ["foo", "bar"], ignore_case=True)
|
29
|
+
|
30
|
+
def test_file_matches_case_insensitive_default(self, tmp_path):
|
31
|
+
"""Test case insensitive matching by default."""
|
32
|
+
test_file = tmp_path / "test.py"
|
33
|
+
test_file.write_text("TODO: Fix this bug\n")
|
34
|
+
|
35
|
+
assert file_matches(test_file, ["todo"], ignore_case=True)
|
36
|
+
assert file_matches(test_file, ["TODO"], ignore_case=True)
|
37
|
+
assert file_matches(test_file, ["Todo"], ignore_case=True)
|
38
|
+
|
39
|
+
def test_file_matches_case_sensitive_flag(self, tmp_path):
|
40
|
+
"""Test case sensitive matching when flag is set."""
|
41
|
+
test_file = tmp_path / "test.py"
|
42
|
+
test_file.write_text("TODO: Fix this bug\n")
|
43
|
+
|
44
|
+
assert file_matches(test_file, ["TODO"], ignore_case=False)
|
45
|
+
assert not file_matches(test_file, ["todo"], ignore_case=False)
|
46
|
+
assert not file_matches(test_file, ["Todo"], ignore_case=False)
|
47
|
+
|
48
|
+
def test_file_matches_empty_patterns(self, tmp_path):
|
49
|
+
"""Test that empty patterns list returns True."""
|
50
|
+
test_file = tmp_path / "test.py"
|
51
|
+
test_file.write_text("any content\n")
|
52
|
+
|
53
|
+
assert file_matches(test_file, [], ignore_case=True)
|
54
|
+
|
55
|
+
def test_file_matches_nonexistent_file(self, tmp_path):
|
56
|
+
"""Test that non-existent file returns False."""
|
57
|
+
nonexistent = tmp_path / "nonexistent.py"
|
58
|
+
|
59
|
+
assert not file_matches(nonexistent, ["pattern"], ignore_case=True)
|
60
|
+
|
61
|
+
def test_file_matches_binary_file_handling(self, tmp_path):
|
62
|
+
"""Test that binary files are handled gracefully."""
|
63
|
+
binary_file = tmp_path / "test.bin"
|
64
|
+
binary_file.write_bytes(b"\x00\x01\x02\x03\xff\xfe")
|
65
|
+
|
66
|
+
# Should not crash and should return False for binary content
|
67
|
+
assert not file_matches(binary_file, ["pattern"], ignore_case=True)
|
68
|
+
|
69
|
+
def test_file_matches_large_file_handling(self, tmp_path):
|
70
|
+
"""Test that large files are skipped for performance."""
|
71
|
+
from reposnap.core.content_search import MAX_FILE_SIZE
|
72
|
+
|
73
|
+
large_file = tmp_path / "large.txt"
|
74
|
+
# Create a file slightly larger than MAX_FILE_SIZE
|
75
|
+
large_content = "x" * (MAX_FILE_SIZE + 1000)
|
76
|
+
large_file.write_text(large_content)
|
77
|
+
|
78
|
+
# Should skip large files and return False
|
79
|
+
assert not file_matches(large_file, ["x"], ignore_case=True)
|
80
|
+
|
81
|
+
def test_file_matches_binary_detection_in_middle(self, tmp_path):
|
82
|
+
"""Test binary detection with null bytes after text content."""
|
83
|
+
mixed_file = tmp_path / "mixed.txt"
|
84
|
+
# Write text content followed by binary content with null bytes
|
85
|
+
with mixed_file.open("wb") as f:
|
86
|
+
f.write(b"some text content\n")
|
87
|
+
f.write(b"\x00binary\x00data\xff")
|
88
|
+
|
89
|
+
# Should detect binary content and return False
|
90
|
+
assert not file_matches(mixed_file, ["text"], ignore_case=True)
|
91
|
+
|
92
|
+
def test_filter_files_by_content_basic(self, tmp_path):
|
93
|
+
"""Test basic file filtering by content."""
|
94
|
+
file1 = tmp_path / "file1.py"
|
95
|
+
file1.write_text("import logging\ndef main(): pass\n")
|
96
|
+
|
97
|
+
file2 = tmp_path / "file2.py"
|
98
|
+
file2.write_text("import os\ndef helper(): pass\n")
|
99
|
+
|
100
|
+
file3 = tmp_path / "file3.py"
|
101
|
+
file3.write_text("print('hello world')\n")
|
102
|
+
|
103
|
+
files = [file1, file2, file3]
|
104
|
+
|
105
|
+
# Filter for files containing "import"
|
106
|
+
filtered = filter_files_by_content(files, ["import"])
|
107
|
+
assert set(filtered) == {file1, file2}
|
108
|
+
|
109
|
+
# Filter for files containing "logging"
|
110
|
+
filtered = filter_files_by_content(files, ["logging"])
|
111
|
+
assert filtered == [file1]
|
112
|
+
|
113
|
+
def test_filter_files_by_content_empty_patterns(self, tmp_path):
|
114
|
+
"""Test that empty patterns return all files."""
|
115
|
+
file1 = tmp_path / "file1.py"
|
116
|
+
file1.write_text("content1")
|
117
|
+
|
118
|
+
file2 = tmp_path / "file2.py"
|
119
|
+
file2.write_text("content2")
|
120
|
+
|
121
|
+
files = [file1, file2]
|
122
|
+
filtered = filter_files_by_content(files, [])
|
123
|
+
assert filtered == files
|
124
|
+
|
125
|
+
def test_filter_files_by_content_case_sensitivity(self, tmp_path):
|
126
|
+
"""Test case sensitivity in file filtering."""
|
127
|
+
test_file = tmp_path / "test.py"
|
128
|
+
test_file.write_text("TODO: Important task\n")
|
129
|
+
|
130
|
+
files = [test_file]
|
131
|
+
|
132
|
+
# Case insensitive (default)
|
133
|
+
filtered = filter_files_by_content(files, ["todo"], ignore_case=True)
|
134
|
+
assert filtered == [test_file]
|
135
|
+
|
136
|
+
# Case sensitive
|
137
|
+
filtered = filter_files_by_content(files, ["todo"], ignore_case=False)
|
138
|
+
assert filtered == []
|
139
|
+
|
140
|
+
filtered = filter_files_by_content(files, ["TODO"], ignore_case=False)
|
141
|
+
assert filtered == [test_file]
|
142
|
+
|
143
|
+
|
144
|
+
class TestProjectControllerIntegration:
|
145
|
+
"""Test integration of contains filter with ProjectController."""
|
146
|
+
|
147
|
+
def test_contains_filter_integration(self, tmp_path):
|
148
|
+
"""Test that ProjectController properly applies content filters."""
|
149
|
+
from reposnap.controllers.project_controller import ProjectController
|
150
|
+
from unittest.mock import Mock
|
151
|
+
|
152
|
+
# Create test files
|
153
|
+
file1 = tmp_path / "file1.py"
|
154
|
+
file1.write_text("import logging\ndef main(): pass\n")
|
155
|
+
|
156
|
+
file2 = tmp_path / "file2.py"
|
157
|
+
file2.write_text("import os\ndef helper(): pass\n")
|
158
|
+
|
159
|
+
file3 = tmp_path / "file3.txt"
|
160
|
+
file3.write_text("This is a text file\n")
|
161
|
+
|
162
|
+
# Mock args object
|
163
|
+
args = Mock()
|
164
|
+
args.paths = [str(tmp_path)]
|
165
|
+
args.output = "output.md"
|
166
|
+
args.structure_only = False
|
167
|
+
args.include = []
|
168
|
+
args.exclude = []
|
169
|
+
args.changes = False
|
170
|
+
args.contains = ["import"]
|
171
|
+
args.contains_case = False
|
172
|
+
|
173
|
+
# Create controller and test content filtering
|
174
|
+
controller = ProjectController(args)
|
175
|
+
controller.set_root_dir(tmp_path)
|
176
|
+
|
177
|
+
# Test the _apply_content_filter method directly
|
178
|
+
files = [Path("file1.py"), Path("file2.py"), Path("file3.txt")]
|
179
|
+
filtered = controller._apply_content_filter(files)
|
180
|
+
|
181
|
+
# Should only keep files with "import"
|
182
|
+
assert set(f.name for f in filtered) == {"file1.py", "file2.py"}
|
183
|
+
|
184
|
+
def test_contains_filter_case_sensitivity_integration(self, tmp_path):
|
185
|
+
"""Test case sensitivity integration in ProjectController."""
|
186
|
+
from reposnap.controllers.project_controller import ProjectController
|
187
|
+
from unittest.mock import Mock
|
188
|
+
|
189
|
+
# Create test file
|
190
|
+
test_file = tmp_path / "test.py"
|
191
|
+
test_file.write_text("TODO: Fix this\n")
|
192
|
+
|
193
|
+
# Test case insensitive (default)
|
194
|
+
args = Mock()
|
195
|
+
args.paths = [str(tmp_path)]
|
196
|
+
args.output = "output.md"
|
197
|
+
args.structure_only = False
|
198
|
+
args.include = []
|
199
|
+
args.exclude = []
|
200
|
+
args.changes = False
|
201
|
+
args.contains = ["todo"]
|
202
|
+
args.contains_case = False
|
203
|
+
|
204
|
+
controller = ProjectController(args)
|
205
|
+
controller.set_root_dir(tmp_path)
|
206
|
+
|
207
|
+
files = [Path("test.py")]
|
208
|
+
filtered = controller._apply_content_filter(files)
|
209
|
+
assert len(filtered) == 1
|
210
|
+
|
211
|
+
# Test case sensitive
|
212
|
+
args.contains_case = True
|
213
|
+
controller = ProjectController(args)
|
214
|
+
controller.set_root_dir(tmp_path)
|
215
|
+
|
216
|
+
filtered = controller._apply_content_filter(files)
|
217
|
+
assert len(filtered) == 0 # "todo" != "TODO"
|
218
|
+
|
219
|
+
def test_no_contains_patterns_returns_all_files(self, tmp_path):
|
220
|
+
"""Test that empty contains patterns return all files unchanged."""
|
221
|
+
from reposnap.controllers.project_controller import ProjectController
|
222
|
+
from unittest.mock import Mock
|
223
|
+
|
224
|
+
# Create test files
|
225
|
+
file1 = tmp_path / "file1.py"
|
226
|
+
file1.write_text("content1")
|
227
|
+
|
228
|
+
file2 = tmp_path / "file2.py"
|
229
|
+
file2.write_text("content2")
|
230
|
+
|
231
|
+
# Mock args with empty contains
|
232
|
+
args = Mock()
|
233
|
+
args.paths = [str(tmp_path)]
|
234
|
+
args.output = "output.md"
|
235
|
+
args.structure_only = False
|
236
|
+
args.include = []
|
237
|
+
args.exclude = []
|
238
|
+
args.changes = False
|
239
|
+
args.contains = []
|
240
|
+
args.contains_case = False
|
241
|
+
|
242
|
+
controller = ProjectController(args)
|
243
|
+
controller.set_root_dir(tmp_path)
|
244
|
+
|
245
|
+
files = [Path("file1.py"), Path("file2.py")]
|
246
|
+
filtered = controller._apply_content_filter(files)
|
247
|
+
|
248
|
+
# Should return all files unchanged
|
249
|
+
assert set(filtered) == set(files)
|
250
|
+
|
251
|
+
def test_absolute_path_handling(self, tmp_path):
|
252
|
+
"""Test that ProjectController properly handles absolute paths."""
|
253
|
+
from reposnap.controllers.project_controller import ProjectController
|
254
|
+
from unittest.mock import Mock
|
255
|
+
|
256
|
+
# Create test file
|
257
|
+
test_file = tmp_path / "test.py"
|
258
|
+
test_file.write_text("import logging\ndef main(): pass\n")
|
259
|
+
|
260
|
+
# Mock args with absolute path
|
261
|
+
args = Mock()
|
262
|
+
args.paths = [str(test_file)] # Absolute path
|
263
|
+
args.output = "output.md"
|
264
|
+
args.structure_only = False
|
265
|
+
args.include = []
|
266
|
+
args.exclude = []
|
267
|
+
args.changes = False
|
268
|
+
args.contains = ["import"]
|
269
|
+
args.contains_case = False
|
270
|
+
|
271
|
+
# Create controller with the temp directory as root from the start
|
272
|
+
# This simulates how the controller would actually work
|
273
|
+
controller = ProjectController()
|
274
|
+
controller.set_root_dir(tmp_path)
|
275
|
+
|
276
|
+
# Process the args manually after setting root dir
|
277
|
+
input_paths = [Path(p) for p in args.paths]
|
278
|
+
controller.input_paths = []
|
279
|
+
for p in input_paths:
|
280
|
+
if p.is_absolute():
|
281
|
+
candidate = p.resolve()
|
282
|
+
else:
|
283
|
+
candidate = (controller.root_dir / p).resolve()
|
284
|
+
|
285
|
+
if candidate.exists():
|
286
|
+
try:
|
287
|
+
rel = candidate.relative_to(controller.root_dir)
|
288
|
+
if rel != Path("."):
|
289
|
+
controller.input_paths.append(rel)
|
290
|
+
except ValueError:
|
291
|
+
pass # Path not under root
|
292
|
+
|
293
|
+
# Should have processed the absolute path correctly
|
294
|
+
assert len(controller.input_paths) == 1
|
295
|
+
assert controller.input_paths[0].name == "test.py"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|