reposnap 0.6.5__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {reposnap-0.6.5 → reposnap-0.8.0}/.coverage +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/PKG-INFO +96 -2
- {reposnap-0.6.5 → reposnap-0.8.0}/README.md +95 -1
- {reposnap-0.6.5 → reposnap-0.8.0}/pyproject.toml +1 -1
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/controllers/project_controller.py +82 -6
- reposnap-0.8.0/reposnap/core/content_search.py +104 -0
- reposnap-0.8.0/reposnap/core/git_repo.py +100 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/cli.py +18 -0
- reposnap-0.8.0/tests/reposnap/test_cli.py +223 -0
- reposnap-0.8.0/tests/reposnap/test_contains_filter.py +295 -0
- reposnap-0.8.0/tests/reposnap/test_git_repo.py +211 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_project_controller.py +79 -0
- reposnap-0.6.5/reposnap/core/git_repo.py +0 -32
- reposnap-0.6.5/tests/reposnap/test_cli.py +0 -80
- reposnap-0.6.5/tests/reposnap/test_git_repo.py +0 -20
- {reposnap-0.6.5 → reposnap-0.8.0}/.github/workflows/python-package.yml +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/.github/workflows/release.yml +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/.gitignore +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/.pre-commit-config.yaml +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/.python-version +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/.vscode/launch.json +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/CONTRIBUTING.md +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/LICENSE +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/controllers/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/file_system.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/markdown_generator.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/gui.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/models/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/models/file_tree.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/utils/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/utils/path_utils.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/requirements-dev.lock +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/requirements.lock +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/__init__.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_collected_tree.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_file_system.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_file_tree.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_gui.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_markdown_generator.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_path_utils.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/resources/another_existing_file.py +0 -0
- {reposnap-0.6.5 → reposnap-0.8.0}/tests/resources/existing_file.py +0 -0
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: reposnap
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Generate a Markdown file with all contents of your project
|
5
5
|
Author: agoloborodko
|
6
6
|
License-File: LICENSE
|
@@ -25,6 +25,8 @@ Description-Content-Type: text/markdown
|
|
25
25
|
- **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
|
26
26
|
- **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
|
27
27
|
- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
|
28
|
+
- **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
|
29
|
+
- **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
|
28
30
|
|
29
31
|
## Installation
|
30
32
|
|
@@ -49,7 +51,7 @@ pip install -r requirements.lock
|
|
49
51
|
To use `reposnap` from the command line, run it with the following options:
|
50
52
|
|
51
53
|
```bash
|
52
|
-
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] paths [paths ...]
|
54
|
+
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
|
53
55
|
```
|
54
56
|
|
55
57
|
- `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
|
@@ -59,6 +61,9 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
59
61
|
- `--debug`: Enable debug-level logging.
|
60
62
|
- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
|
61
63
|
- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
|
64
|
+
- `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
|
65
|
+
- `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
|
66
|
+
- `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
|
62
67
|
|
63
68
|
#### Pattern Matching
|
64
69
|
|
@@ -71,6 +76,77 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
71
76
|
- `-i "*.py"`: Includes only files ending with `.py`.
|
72
77
|
- `-e "*.test.*"`: Excludes files with `.test.` in their names.
|
73
78
|
|
79
|
+
#### Content Filtering
|
80
|
+
|
81
|
+
The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
|
82
|
+
|
83
|
+
- **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
|
84
|
+
- **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
|
85
|
+
- **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
|
86
|
+
|
87
|
+
**Examples**:
|
88
|
+
|
89
|
+
1. **Find files containing specific imports**:
|
90
|
+
```bash
|
91
|
+
reposnap . -S "import logging"
|
92
|
+
```
|
93
|
+
|
94
|
+
2. **Search for multiple patterns (OR logic)**:
|
95
|
+
```bash
|
96
|
+
reposnap . -S "TODO" "FIXME" "import requests"
|
97
|
+
```
|
98
|
+
|
99
|
+
3. **Case-sensitive content search**:
|
100
|
+
```bash
|
101
|
+
reposnap . -S "TODO" --contains-case
|
102
|
+
```
|
103
|
+
|
104
|
+
4. **Combine content filtering with other filters**:
|
105
|
+
```bash
|
106
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
107
|
+
```
|
108
|
+
|
109
|
+
5. **Find files with specific function calls**:
|
110
|
+
```bash
|
111
|
+
reposnap . -S "logger.error" "raise Exception"
|
112
|
+
```
|
113
|
+
|
114
|
+
#### Only Snapshot Your Current Work
|
115
|
+
|
116
|
+
The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
|
117
|
+
|
118
|
+
- **Staged changes**: Files that have been added to the index with `git add`
|
119
|
+
- **Unstaged changes**: Files that have been modified but not yet staged
|
120
|
+
- **Untracked files**: New files that haven't been added to Git yet
|
121
|
+
- **Stashed changes**: Files that are stored in Git stash entries
|
122
|
+
|
123
|
+
This is particularly useful when you want to:
|
124
|
+
- Document only your current work-in-progress
|
125
|
+
- Create a snapshot of changes before committing
|
126
|
+
- Review what files you've been working on
|
127
|
+
|
128
|
+
**Examples**:
|
129
|
+
|
130
|
+
1. **Generate documentation for only your uncommitted changes**:
|
131
|
+
```bash
|
132
|
+
reposnap . -c
|
133
|
+
```
|
134
|
+
|
135
|
+
2. **Combine with structure-only for a quick overview**:
|
136
|
+
```bash
|
137
|
+
reposnap . -c --structure-only
|
138
|
+
```
|
139
|
+
|
140
|
+
3. **Filter uncommitted changes by file type**:
|
141
|
+
```bash
|
142
|
+
reposnap . -c -i "*.py"
|
143
|
+
```
|
144
|
+
|
145
|
+
4. **Exclude test files from uncommitted changes**:
|
146
|
+
```bash
|
147
|
+
reposnap . -c -e "*test*"
|
148
|
+
```
|
149
|
+
|
74
150
|
#### Examples
|
75
151
|
|
76
152
|
1. **Generate a full project structure with file contents**:
|
@@ -103,6 +179,24 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
103
179
|
reposnap my_project/ -e "gui"
|
104
180
|
```
|
105
181
|
|
182
|
+
6. **Document only your current uncommitted work**:
|
183
|
+
|
184
|
+
```bash
|
185
|
+
reposnap . -c
|
186
|
+
```
|
187
|
+
|
188
|
+
7. **Find and document files containing specific code patterns**:
|
189
|
+
|
190
|
+
```bash
|
191
|
+
reposnap . -S "import logging" "logger."
|
192
|
+
```
|
193
|
+
|
194
|
+
8. **Combine content filtering with file type filtering**:
|
195
|
+
|
196
|
+
```bash
|
197
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
198
|
+
```
|
199
|
+
|
106
200
|
### Graphical User Interface
|
107
201
|
|
108
202
|
`reposnap` also provides a GUI for users who prefer an interactive interface.
|
@@ -12,6 +12,8 @@
|
|
12
12
|
- **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
|
13
13
|
- **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
|
14
14
|
- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
|
15
|
+
- **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
|
16
|
+
- **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
|
15
17
|
|
16
18
|
## Installation
|
17
19
|
|
@@ -36,7 +38,7 @@ pip install -r requirements.lock
|
|
36
38
|
To use `reposnap` from the command line, run it with the following options:
|
37
39
|
|
38
40
|
```bash
|
39
|
-
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] paths [paths ...]
|
41
|
+
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
|
40
42
|
```
|
41
43
|
|
42
44
|
- `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
|
@@ -46,6 +48,9 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
46
48
|
- `--debug`: Enable debug-level logging.
|
47
49
|
- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
|
48
50
|
- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
|
51
|
+
- `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
|
52
|
+
- `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
|
53
|
+
- `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
|
49
54
|
|
50
55
|
#### Pattern Matching
|
51
56
|
|
@@ -58,6 +63,77 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
58
63
|
- `-i "*.py"`: Includes only files ending with `.py`.
|
59
64
|
- `-e "*.test.*"`: Excludes files with `.test.` in their names.
|
60
65
|
|
66
|
+
#### Content Filtering
|
67
|
+
|
68
|
+
The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
|
69
|
+
|
70
|
+
- **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
|
71
|
+
- **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
|
72
|
+
- **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
|
73
|
+
|
74
|
+
**Examples**:
|
75
|
+
|
76
|
+
1. **Find files containing specific imports**:
|
77
|
+
```bash
|
78
|
+
reposnap . -S "import logging"
|
79
|
+
```
|
80
|
+
|
81
|
+
2. **Search for multiple patterns (OR logic)**:
|
82
|
+
```bash
|
83
|
+
reposnap . -S "TODO" "FIXME" "import requests"
|
84
|
+
```
|
85
|
+
|
86
|
+
3. **Case-sensitive content search**:
|
87
|
+
```bash
|
88
|
+
reposnap . -S "TODO" --contains-case
|
89
|
+
```
|
90
|
+
|
91
|
+
4. **Combine content filtering with other filters**:
|
92
|
+
```bash
|
93
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
94
|
+
```
|
95
|
+
|
96
|
+
5. **Find files with specific function calls**:
|
97
|
+
```bash
|
98
|
+
reposnap . -S "logger.error" "raise Exception"
|
99
|
+
```
|
100
|
+
|
101
|
+
#### Only Snapshot Your Current Work
|
102
|
+
|
103
|
+
The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
|
104
|
+
|
105
|
+
- **Staged changes**: Files that have been added to the index with `git add`
|
106
|
+
- **Unstaged changes**: Files that have been modified but not yet staged
|
107
|
+
- **Untracked files**: New files that haven't been added to Git yet
|
108
|
+
- **Stashed changes**: Files that are stored in Git stash entries
|
109
|
+
|
110
|
+
This is particularly useful when you want to:
|
111
|
+
- Document only your current work-in-progress
|
112
|
+
- Create a snapshot of changes before committing
|
113
|
+
- Review what files you've been working on
|
114
|
+
|
115
|
+
**Examples**:
|
116
|
+
|
117
|
+
1. **Generate documentation for only your uncommitted changes**:
|
118
|
+
```bash
|
119
|
+
reposnap . -c
|
120
|
+
```
|
121
|
+
|
122
|
+
2. **Combine with structure-only for a quick overview**:
|
123
|
+
```bash
|
124
|
+
reposnap . -c --structure-only
|
125
|
+
```
|
126
|
+
|
127
|
+
3. **Filter uncommitted changes by file type**:
|
128
|
+
```bash
|
129
|
+
reposnap . -c -i "*.py"
|
130
|
+
```
|
131
|
+
|
132
|
+
4. **Exclude test files from uncommitted changes**:
|
133
|
+
```bash
|
134
|
+
reposnap . -c -e "*test*"
|
135
|
+
```
|
136
|
+
|
61
137
|
#### Examples
|
62
138
|
|
63
139
|
1. **Generate a full project structure with file contents**:
|
@@ -90,6 +166,24 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
90
166
|
reposnap my_project/ -e "gui"
|
91
167
|
```
|
92
168
|
|
169
|
+
6. **Document only your current uncommitted work**:
|
170
|
+
|
171
|
+
```bash
|
172
|
+
reposnap . -c
|
173
|
+
```
|
174
|
+
|
175
|
+
7. **Find and document files containing specific code patterns**:
|
176
|
+
|
177
|
+
```bash
|
178
|
+
reposnap . -S "import logging" "logger."
|
179
|
+
```
|
180
|
+
|
181
|
+
8. **Combine content filtering with file type filtering**:
|
182
|
+
|
183
|
+
```bash
|
184
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
185
|
+
```
|
186
|
+
|
93
187
|
### Graphical User Interface
|
94
188
|
|
95
189
|
`reposnap` also provides a GUI for users who prefer an interactive interface.
|
@@ -15,11 +15,17 @@ class ProjectController:
|
|
15
15
|
self.args = args
|
16
16
|
# Treat positional arguments as literal file/directory names.
|
17
17
|
input_paths = [
|
18
|
-
Path(p) for p in (args.paths if hasattr(args, "paths") else [
|
18
|
+
Path(p) for p in (args.paths if hasattr(args, "paths") else [])
|
19
19
|
]
|
20
20
|
self.input_paths = []
|
21
21
|
for p in input_paths:
|
22
|
-
|
22
|
+
if p.is_absolute():
|
23
|
+
# Handle absolute paths - use as-is but verify they're under root_dir
|
24
|
+
candidate = p.resolve()
|
25
|
+
else:
|
26
|
+
# Handle relative paths - join with root_dir
|
27
|
+
candidate = (self.root_dir / p).resolve()
|
28
|
+
|
23
29
|
if candidate.exists():
|
24
30
|
try:
|
25
31
|
rel = candidate.relative_to(self.root_dir)
|
@@ -31,7 +37,7 @@ class ProjectController:
|
|
31
37
|
)
|
32
38
|
else:
|
33
39
|
self.logger.warning(
|
34
|
-
f"Path {p} does not exist
|
40
|
+
f"Path {p} does not exist or is not under repository root {self.root_dir}."
|
35
41
|
)
|
36
42
|
self.output_file: Path = (
|
37
43
|
Path(args.output).resolve()
|
@@ -47,6 +53,9 @@ class ProjectController:
|
|
47
53
|
self.exclude_patterns: List[str] = (
|
48
54
|
args.exclude if hasattr(args, "exclude") else []
|
49
55
|
)
|
56
|
+
self.changes_only: bool = getattr(args, "changes", False)
|
57
|
+
self.contains: List[str] = getattr(args, "contains", [])
|
58
|
+
self.contains_case: bool = getattr(args, "contains_case", False)
|
50
59
|
else:
|
51
60
|
self.args = None
|
52
61
|
self.input_paths = []
|
@@ -54,6 +63,9 @@ class ProjectController:
|
|
54
63
|
self.structure_only = False
|
55
64
|
self.include_patterns = []
|
56
65
|
self.exclude_patterns = []
|
66
|
+
self.changes_only = False
|
67
|
+
self.contains = []
|
68
|
+
self.contains_case = False
|
57
69
|
self.file_tree: Optional[FileTree] = None
|
58
70
|
self.gitignore_patterns: List[str] = []
|
59
71
|
if self.root_dir:
|
@@ -108,14 +120,76 @@ class ProjectController:
|
|
108
120
|
files = [f for f in files if not spec_exc.match_file(f.as_posix())]
|
109
121
|
return files
|
110
122
|
|
123
|
+
def _apply_content_filter(self, files: List[Path]) -> List[Path]:
|
124
|
+
"""
|
125
|
+
Filter files based on content substring matching.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
files: List of relative file paths to filter
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
Filtered list of files that contain at least one of the patterns
|
132
|
+
specified in self.contains. Returns original list if no patterns
|
133
|
+
are specified.
|
134
|
+
|
135
|
+
Note:
|
136
|
+
Uses case-insensitive matching by default unless self.contains_case
|
137
|
+
is True. Skips binary files and files larger than 5MB for performance.
|
138
|
+
"""
|
139
|
+
if not self.contains:
|
140
|
+
return files
|
141
|
+
|
142
|
+
from reposnap.core.content_search import filter_files_by_content
|
143
|
+
|
144
|
+
initial_count = len(files)
|
145
|
+
ignore_case = not self.contains_case
|
146
|
+
|
147
|
+
self.logger.debug(
|
148
|
+
f"Applying content filter with patterns: {self.contains}, "
|
149
|
+
f"ignore_case: {ignore_case}"
|
150
|
+
)
|
151
|
+
|
152
|
+
# Convert relative paths to absolute for content search
|
153
|
+
absolute_paths = [self.root_dir / file_path for file_path in files]
|
154
|
+
filtered_absolute = filter_files_by_content(
|
155
|
+
absolute_paths, self.contains, ignore_case
|
156
|
+
)
|
157
|
+
|
158
|
+
# Convert back to relative paths
|
159
|
+
filtered_files = []
|
160
|
+
for abs_path in filtered_absolute:
|
161
|
+
try:
|
162
|
+
rel_path = abs_path.relative_to(self.root_dir)
|
163
|
+
filtered_files.append(rel_path)
|
164
|
+
except ValueError:
|
165
|
+
continue
|
166
|
+
|
167
|
+
kept_count = len(filtered_files)
|
168
|
+
self.logger.info(
|
169
|
+
f"Applied content filter (kept {kept_count} / {initial_count})"
|
170
|
+
)
|
171
|
+
self.logger.debug(f"Files kept after content filter: {filtered_files}")
|
172
|
+
|
173
|
+
return filtered_files
|
174
|
+
|
111
175
|
def collect_file_tree(self) -> None:
|
112
|
-
|
176
|
+
if self.changes_only:
|
177
|
+
self.logger.info("Collecting uncommitted files from Git repository.")
|
178
|
+
else:
|
179
|
+
self.logger.info("Collecting files from Git tracked files if available.")
|
113
180
|
try:
|
114
181
|
from reposnap.core.git_repo import GitRepo
|
115
182
|
|
116
183
|
git_repo = GitRepo(self.root_dir)
|
117
|
-
|
118
|
-
|
184
|
+
if self.changes_only:
|
185
|
+
all_files = git_repo.get_uncommitted_files()
|
186
|
+
self.logger.info(
|
187
|
+
"Using only uncommitted files (staged, unstaged, untracked, stashed)."
|
188
|
+
)
|
189
|
+
else:
|
190
|
+
all_files = git_repo.get_git_files()
|
191
|
+
self.logger.info("Using all Git tracked files.")
|
192
|
+
self.logger.debug(f"Git files: {all_files}")
|
119
193
|
except Exception as e:
|
120
194
|
self.logger.warning(f"Error obtaining Git tracked files: {e}.")
|
121
195
|
all_files = []
|
@@ -135,6 +209,8 @@ class ProjectController:
|
|
135
209
|
continue
|
136
210
|
all_files = self._apply_include_exclude(all_files)
|
137
211
|
self.logger.debug(f"All files after applying include/exclude: {all_files}")
|
212
|
+
all_files = self._apply_content_filter(all_files)
|
213
|
+
self.logger.debug(f"All files after applying content filter: {all_files}")
|
138
214
|
if self.input_paths:
|
139
215
|
trees = []
|
140
216
|
for input_path in self.input_paths:
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# src/reposnap/core/content_search.py
|
2
|
+
|
3
|
+
"""
|
4
|
+
Private content search helpers for substring matching in files.
|
5
|
+
|
6
|
+
This module provides stateless utility functions for searching file contents.
|
7
|
+
It is intended for internal use by the project controller and should not be
|
8
|
+
imported directly by external consumers.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import List
|
14
|
+
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
# Configuration constants
|
19
|
+
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
|
20
|
+
BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
|
21
|
+
|
22
|
+
|
23
|
+
def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
|
24
|
+
"""
|
25
|
+
Check if a file contains any of the given patterns.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
path: Path to the file to search
|
29
|
+
patterns: List of substring patterns to search for
|
30
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
True if file contains any pattern, False otherwise
|
34
|
+
|
35
|
+
Note:
|
36
|
+
Uses streaming read with utf-8 encoding and error handling for binary files.
|
37
|
+
Returns False if file cannot be read as text or if file is too large/binary.
|
38
|
+
"""
|
39
|
+
if not patterns:
|
40
|
+
return True
|
41
|
+
|
42
|
+
if not path.is_file():
|
43
|
+
return False
|
44
|
+
|
45
|
+
# Check file size - skip files larger than MAX_FILE_SIZE
|
46
|
+
try:
|
47
|
+
file_size = path.stat().st_size
|
48
|
+
if file_size > MAX_FILE_SIZE:
|
49
|
+
logger.debug(f"Skipping large file {path} ({file_size} bytes)")
|
50
|
+
return False
|
51
|
+
except OSError as e:
|
52
|
+
logger.debug(f"Could not stat file {path}: {e}")
|
53
|
+
return False
|
54
|
+
|
55
|
+
# Check for binary content in first KB
|
56
|
+
try:
|
57
|
+
with path.open("rb") as f:
|
58
|
+
first_chunk = f.read(BINARY_CHECK_SIZE)
|
59
|
+
if b"\0" in first_chunk:
|
60
|
+
logger.debug(f"Skipping binary file {path}")
|
61
|
+
return False
|
62
|
+
except Exception as e:
|
63
|
+
logger.debug(f"Could not read file {path} for binary check: {e}")
|
64
|
+
return False
|
65
|
+
|
66
|
+
# Pre-compute search patterns (case-normalized if needed)
|
67
|
+
search_patterns = [p.lower() if ignore_case else p for p in patterns]
|
68
|
+
|
69
|
+
try:
|
70
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
71
|
+
for line in f:
|
72
|
+
search_line = line.lower() if ignore_case else line
|
73
|
+
|
74
|
+
if any(pattern in search_line for pattern in search_patterns):
|
75
|
+
return True
|
76
|
+
return False
|
77
|
+
except Exception as e:
|
78
|
+
logger.debug(f"Could not read file {path} for content search: {e}")
|
79
|
+
return False
|
80
|
+
|
81
|
+
|
82
|
+
def filter_files_by_content(
|
83
|
+
files: List[Path], patterns: List[str], ignore_case: bool = True
|
84
|
+
) -> List[Path]:
|
85
|
+
"""
|
86
|
+
Filter a list of files to only include those containing the given patterns.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
files: List of file paths to filter
|
90
|
+
patterns: List of substring patterns to search for
|
91
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Filtered list of files that contain at least one pattern
|
95
|
+
"""
|
96
|
+
if not patterns:
|
97
|
+
return files
|
98
|
+
|
99
|
+
matched_files = []
|
100
|
+
for file_path in files:
|
101
|
+
if file_matches(file_path, patterns, ignore_case):
|
102
|
+
matched_files.append(file_path)
|
103
|
+
|
104
|
+
return matched_files
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# src/reposnap/core/git_repo.py
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from pathlib import Path
|
5
|
+
from git import Repo, InvalidGitRepositoryError
|
6
|
+
from typing import List
|
7
|
+
|
8
|
+
|
9
|
+
class GitRepo:
|
10
|
+
def __init__(self, repo_path: Path):
|
11
|
+
self.repo_path: Path = repo_path.resolve()
|
12
|
+
self.logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
def get_git_files(self) -> List[Path]:
|
15
|
+
try:
|
16
|
+
repo: Repo = Repo(self.repo_path, search_parent_directories=True)
|
17
|
+
repo_root: Path = Path(repo.working_tree_dir).resolve()
|
18
|
+
git_files: List[str] = repo.git.ls_files().splitlines()
|
19
|
+
self.logger.debug(f"Git files from {repo_root}: {git_files}")
|
20
|
+
git_files_relative: List[Path] = []
|
21
|
+
for f in git_files:
|
22
|
+
absolute_path: Path = (repo_root / f).resolve()
|
23
|
+
try:
|
24
|
+
relative_path: Path = absolute_path.relative_to(self.repo_path)
|
25
|
+
git_files_relative.append(relative_path)
|
26
|
+
except ValueError:
|
27
|
+
# Skip files not under root_dir
|
28
|
+
continue
|
29
|
+
return git_files_relative
|
30
|
+
except InvalidGitRepositoryError:
|
31
|
+
self.logger.error(f"Invalid Git repository at: {self.repo_path}")
|
32
|
+
return []
|
33
|
+
|
34
|
+
def get_uncommitted_files(self) -> List[Path]:
|
35
|
+
"""
|
36
|
+
Return every *working-copy* file that differs from HEAD - staged,
|
37
|
+
unstaged, untracked, plus everything referenced in `git stash list`.
|
38
|
+
Paths are *relative to* self.repo_path.
|
39
|
+
"""
|
40
|
+
try:
|
41
|
+
repo: Repo = Repo(self.repo_path, search_parent_directories=True)
|
42
|
+
repo_root: Path = Path(repo.working_tree_dir).resolve()
|
43
|
+
paths: set = set()
|
44
|
+
|
45
|
+
# Staged changes (diff between index and HEAD)
|
46
|
+
for diff in repo.index.diff("HEAD"):
|
47
|
+
paths.add(diff.a_path or diff.b_path)
|
48
|
+
|
49
|
+
# Unstaged changes (diff between working tree and index)
|
50
|
+
for diff in repo.index.diff(None):
|
51
|
+
paths.add(diff.a_path or diff.b_path)
|
52
|
+
|
53
|
+
# Untracked files
|
54
|
+
paths.update(repo.untracked_files)
|
55
|
+
|
56
|
+
# Stash entries - with performance guard
|
57
|
+
try:
|
58
|
+
stash_refs = repo.git.stash("list", "--format=%gd").splitlines()
|
59
|
+
# Limit stash processing to prevent performance issues
|
60
|
+
max_stashes = 10
|
61
|
+
if len(stash_refs) > max_stashes:
|
62
|
+
self.logger.warning(
|
63
|
+
f"Large stash stack detected ({len(stash_refs)} entries). "
|
64
|
+
f"Processing only the first {max_stashes} stashes."
|
65
|
+
)
|
66
|
+
stash_refs = stash_refs[:max_stashes]
|
67
|
+
|
68
|
+
for ref in stash_refs:
|
69
|
+
if ref.strip(): # Skip empty lines
|
70
|
+
stash_files = repo.git.diff(
|
71
|
+
"--name-only", f"{ref}^1", ref
|
72
|
+
).splitlines()
|
73
|
+
paths.update(stash_files)
|
74
|
+
except Exception as e:
|
75
|
+
self.logger.debug(f"Error processing stash entries: {e}")
|
76
|
+
|
77
|
+
# Convert to relative paths and filter existing files
|
78
|
+
relative_paths = []
|
79
|
+
for path_str in paths:
|
80
|
+
if path_str: # Skip empty strings
|
81
|
+
absolute_path = (repo_root / path_str).resolve()
|
82
|
+
try:
|
83
|
+
relative_path = absolute_path.relative_to(self.repo_path)
|
84
|
+
if absolute_path.is_file():
|
85
|
+
relative_paths.append(relative_path)
|
86
|
+
except ValueError:
|
87
|
+
# Log warning for paths outside repo root
|
88
|
+
self.logger.warning(
|
89
|
+
f"Path {path_str} is outside repository root {self.repo_path}. Skipping."
|
90
|
+
)
|
91
|
+
continue
|
92
|
+
|
93
|
+
# Return sorted, deduplicated list for deterministic output
|
94
|
+
result = sorted(set(relative_paths))
|
95
|
+
self.logger.debug(f"Uncommitted files from {repo_root}: {result}")
|
96
|
+
return result
|
97
|
+
|
98
|
+
except InvalidGitRepositoryError:
|
99
|
+
self.logger.error(f"Invalid Git repository at: {self.repo_path}")
|
100
|
+
return []
|
@@ -40,6 +40,24 @@ def main():
|
|
40
40
|
default=[],
|
41
41
|
help="File/folder patterns to exclude.",
|
42
42
|
)
|
43
|
+
parser.add_argument(
|
44
|
+
"-c",
|
45
|
+
"--changes",
|
46
|
+
action="store_true",
|
47
|
+
help="Use only files that are added/modified/untracked/stashed but not yet committed.",
|
48
|
+
)
|
49
|
+
parser.add_argument(
|
50
|
+
"-S",
|
51
|
+
"--contains",
|
52
|
+
nargs="+",
|
53
|
+
default=[],
|
54
|
+
help="Only include files whose contents contain these substrings",
|
55
|
+
)
|
56
|
+
parser.add_argument(
|
57
|
+
"--contains-case",
|
58
|
+
action="store_true",
|
59
|
+
help="Make --contains case-sensitive",
|
60
|
+
)
|
43
61
|
|
44
62
|
args = parser.parse_args()
|
45
63
|
|