reposnap 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reposnap/controllers/project_controller.py +66 -2
- reposnap/core/content_search.py +104 -0
- reposnap/interfaces/cli.py +12 -0
- {reposnap-0.7.0.dist-info → reposnap-0.8.0.dist-info}/METADATA +52 -2
- {reposnap-0.7.0.dist-info → reposnap-0.8.0.dist-info}/RECORD +8 -7
- {reposnap-0.7.0.dist-info → reposnap-0.8.0.dist-info}/WHEEL +0 -0
- {reposnap-0.7.0.dist-info → reposnap-0.8.0.dist-info}/entry_points.txt +0 -0
- {reposnap-0.7.0.dist-info → reposnap-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -19,7 +19,13 @@ class ProjectController:
|
|
19
19
|
]
|
20
20
|
self.input_paths = []
|
21
21
|
for p in input_paths:
|
22
|
-
|
22
|
+
if p.is_absolute():
|
23
|
+
# Handle absolute paths - use as-is but verify they're under root_dir
|
24
|
+
candidate = p.resolve()
|
25
|
+
else:
|
26
|
+
# Handle relative paths - join with root_dir
|
27
|
+
candidate = (self.root_dir / p).resolve()
|
28
|
+
|
23
29
|
if candidate.exists():
|
24
30
|
try:
|
25
31
|
rel = candidate.relative_to(self.root_dir)
|
@@ -31,7 +37,7 @@ class ProjectController:
|
|
31
37
|
)
|
32
38
|
else:
|
33
39
|
self.logger.warning(
|
34
|
-
f"Path {p} does not exist
|
40
|
+
f"Path {p} does not exist or is not under repository root {self.root_dir}."
|
35
41
|
)
|
36
42
|
self.output_file: Path = (
|
37
43
|
Path(args.output).resolve()
|
@@ -48,6 +54,8 @@ class ProjectController:
|
|
48
54
|
args.exclude if hasattr(args, "exclude") else []
|
49
55
|
)
|
50
56
|
self.changes_only: bool = getattr(args, "changes", False)
|
57
|
+
self.contains: List[str] = getattr(args, "contains", [])
|
58
|
+
self.contains_case: bool = getattr(args, "contains_case", False)
|
51
59
|
else:
|
52
60
|
self.args = None
|
53
61
|
self.input_paths = []
|
@@ -56,6 +64,8 @@ class ProjectController:
|
|
56
64
|
self.include_patterns = []
|
57
65
|
self.exclude_patterns = []
|
58
66
|
self.changes_only = False
|
67
|
+
self.contains = []
|
68
|
+
self.contains_case = False
|
59
69
|
self.file_tree: Optional[FileTree] = None
|
60
70
|
self.gitignore_patterns: List[str] = []
|
61
71
|
if self.root_dir:
|
@@ -110,6 +120,58 @@ class ProjectController:
|
|
110
120
|
files = [f for f in files if not spec_exc.match_file(f.as_posix())]
|
111
121
|
return files
|
112
122
|
|
123
|
+
def _apply_content_filter(self, files: List[Path]) -> List[Path]:
|
124
|
+
"""
|
125
|
+
Filter files based on content substring matching.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
files: List of relative file paths to filter
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
Filtered list of files that contain at least one of the patterns
|
132
|
+
specified in self.contains. Returns original list if no patterns
|
133
|
+
are specified.
|
134
|
+
|
135
|
+
Note:
|
136
|
+
Uses case-insensitive matching by default unless self.contains_case
|
137
|
+
is True. Skips binary files and files larger than 5MB for performance.
|
138
|
+
"""
|
139
|
+
if not self.contains:
|
140
|
+
return files
|
141
|
+
|
142
|
+
from reposnap.core.content_search import filter_files_by_content
|
143
|
+
|
144
|
+
initial_count = len(files)
|
145
|
+
ignore_case = not self.contains_case
|
146
|
+
|
147
|
+
self.logger.debug(
|
148
|
+
f"Applying content filter with patterns: {self.contains}, "
|
149
|
+
f"ignore_case: {ignore_case}"
|
150
|
+
)
|
151
|
+
|
152
|
+
# Convert relative paths to absolute for content search
|
153
|
+
absolute_paths = [self.root_dir / file_path for file_path in files]
|
154
|
+
filtered_absolute = filter_files_by_content(
|
155
|
+
absolute_paths, self.contains, ignore_case
|
156
|
+
)
|
157
|
+
|
158
|
+
# Convert back to relative paths
|
159
|
+
filtered_files = []
|
160
|
+
for abs_path in filtered_absolute:
|
161
|
+
try:
|
162
|
+
rel_path = abs_path.relative_to(self.root_dir)
|
163
|
+
filtered_files.append(rel_path)
|
164
|
+
except ValueError:
|
165
|
+
continue
|
166
|
+
|
167
|
+
kept_count = len(filtered_files)
|
168
|
+
self.logger.info(
|
169
|
+
f"Applied content filter (kept {kept_count} / {initial_count})"
|
170
|
+
)
|
171
|
+
self.logger.debug(f"Files kept after content filter: {filtered_files}")
|
172
|
+
|
173
|
+
return filtered_files
|
174
|
+
|
113
175
|
def collect_file_tree(self) -> None:
|
114
176
|
if self.changes_only:
|
115
177
|
self.logger.info("Collecting uncommitted files from Git repository.")
|
@@ -147,6 +209,8 @@ class ProjectController:
|
|
147
209
|
continue
|
148
210
|
all_files = self._apply_include_exclude(all_files)
|
149
211
|
self.logger.debug(f"All files after applying include/exclude: {all_files}")
|
212
|
+
all_files = self._apply_content_filter(all_files)
|
213
|
+
self.logger.debug(f"All files after applying content filter: {all_files}")
|
150
214
|
if self.input_paths:
|
151
215
|
trees = []
|
152
216
|
for input_path in self.input_paths:
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# src/reposnap/core/content_search.py
|
2
|
+
|
3
|
+
"""
|
4
|
+
Private content search helpers for substring matching in files.
|
5
|
+
|
6
|
+
This module provides stateless utility functions for searching file contents.
|
7
|
+
It is intended for internal use by the project controller and should not be
|
8
|
+
imported directly by external consumers.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import logging
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import List
|
14
|
+
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
# Configuration constants
|
19
|
+
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
|
20
|
+
BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
|
21
|
+
|
22
|
+
|
23
|
+
def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
|
24
|
+
"""
|
25
|
+
Check if a file contains any of the given patterns.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
path: Path to the file to search
|
29
|
+
patterns: List of substring patterns to search for
|
30
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
True if file contains any pattern, False otherwise
|
34
|
+
|
35
|
+
Note:
|
36
|
+
Uses streaming read with utf-8 encoding and error handling for binary files.
|
37
|
+
Returns False if file cannot be read as text or if file is too large/binary.
|
38
|
+
"""
|
39
|
+
if not patterns:
|
40
|
+
return True
|
41
|
+
|
42
|
+
if not path.is_file():
|
43
|
+
return False
|
44
|
+
|
45
|
+
# Check file size - skip files larger than MAX_FILE_SIZE
|
46
|
+
try:
|
47
|
+
file_size = path.stat().st_size
|
48
|
+
if file_size > MAX_FILE_SIZE:
|
49
|
+
logger.debug(f"Skipping large file {path} ({file_size} bytes)")
|
50
|
+
return False
|
51
|
+
except OSError as e:
|
52
|
+
logger.debug(f"Could not stat file {path}: {e}")
|
53
|
+
return False
|
54
|
+
|
55
|
+
# Check for binary content in first KB
|
56
|
+
try:
|
57
|
+
with path.open("rb") as f:
|
58
|
+
first_chunk = f.read(BINARY_CHECK_SIZE)
|
59
|
+
if b"\0" in first_chunk:
|
60
|
+
logger.debug(f"Skipping binary file {path}")
|
61
|
+
return False
|
62
|
+
except Exception as e:
|
63
|
+
logger.debug(f"Could not read file {path} for binary check: {e}")
|
64
|
+
return False
|
65
|
+
|
66
|
+
# Pre-compute search patterns (case-normalized if needed)
|
67
|
+
search_patterns = [p.lower() if ignore_case else p for p in patterns]
|
68
|
+
|
69
|
+
try:
|
70
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
71
|
+
for line in f:
|
72
|
+
search_line = line.lower() if ignore_case else line
|
73
|
+
|
74
|
+
if any(pattern in search_line for pattern in search_patterns):
|
75
|
+
return True
|
76
|
+
return False
|
77
|
+
except Exception as e:
|
78
|
+
logger.debug(f"Could not read file {path} for content search: {e}")
|
79
|
+
return False
|
80
|
+
|
81
|
+
|
82
|
+
def filter_files_by_content(
|
83
|
+
files: List[Path], patterns: List[str], ignore_case: bool = True
|
84
|
+
) -> List[Path]:
|
85
|
+
"""
|
86
|
+
Filter a list of files to only include those containing the given patterns.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
files: List of file paths to filter
|
90
|
+
patterns: List of substring patterns to search for
|
91
|
+
ignore_case: Whether to perform case-insensitive matching (default: True)
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Filtered list of files that contain at least one pattern
|
95
|
+
"""
|
96
|
+
if not patterns:
|
97
|
+
return files
|
98
|
+
|
99
|
+
matched_files = []
|
100
|
+
for file_path in files:
|
101
|
+
if file_matches(file_path, patterns, ignore_case):
|
102
|
+
matched_files.append(file_path)
|
103
|
+
|
104
|
+
return matched_files
|
reposnap/interfaces/cli.py
CHANGED
@@ -46,6 +46,18 @@ def main():
|
|
46
46
|
action="store_true",
|
47
47
|
help="Use only files that are added/modified/untracked/stashed but not yet committed.",
|
48
48
|
)
|
49
|
+
parser.add_argument(
|
50
|
+
"-S",
|
51
|
+
"--contains",
|
52
|
+
nargs="+",
|
53
|
+
default=[],
|
54
|
+
help="Only include files whose contents contain these substrings",
|
55
|
+
)
|
56
|
+
parser.add_argument(
|
57
|
+
"--contains-case",
|
58
|
+
action="store_true",
|
59
|
+
help="Make --contains case-sensitive",
|
60
|
+
)
|
49
61
|
|
50
62
|
args = parser.parse_args()
|
51
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: reposnap
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Generate a Markdown file with all contents of your project
|
5
5
|
Author: agoloborodko
|
6
6
|
License-File: LICENSE
|
@@ -25,6 +25,7 @@ Description-Content-Type: text/markdown
|
|
25
25
|
- **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
|
26
26
|
- **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
|
27
27
|
- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
|
28
|
+
- **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
|
28
29
|
- **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
|
29
30
|
|
30
31
|
## Installation
|
@@ -50,7 +51,7 @@ pip install -r requirements.lock
|
|
50
51
|
To use `reposnap` from the command line, run it with the following options:
|
51
52
|
|
52
53
|
```bash
|
53
|
-
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
|
54
|
+
reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
|
54
55
|
```
|
55
56
|
|
56
57
|
- `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
|
@@ -61,6 +62,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
61
62
|
- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
|
62
63
|
- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
|
63
64
|
- `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
|
65
|
+
- `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
|
66
|
+
- `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
|
64
67
|
|
65
68
|
#### Pattern Matching
|
66
69
|
|
@@ -73,6 +76,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
|
|
73
76
|
- `-i "*.py"`: Includes only files ending with `.py`.
|
74
77
|
- `-e "*.test.*"`: Excludes files with `.test.` in their names.
|
75
78
|
|
79
|
+
#### Content Filtering
|
80
|
+
|
81
|
+
The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
|
82
|
+
|
83
|
+
- **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
|
84
|
+
- **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
|
85
|
+
- **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
|
86
|
+
|
87
|
+
**Examples**:
|
88
|
+
|
89
|
+
1. **Find files containing specific imports**:
|
90
|
+
```bash
|
91
|
+
reposnap . -S "import logging"
|
92
|
+
```
|
93
|
+
|
94
|
+
2. **Search for multiple patterns (OR logic)**:
|
95
|
+
```bash
|
96
|
+
reposnap . -S "TODO" "FIXME" "import requests"
|
97
|
+
```
|
98
|
+
|
99
|
+
3. **Case-sensitive content search**:
|
100
|
+
```bash
|
101
|
+
reposnap . -S "TODO" --contains-case
|
102
|
+
```
|
103
|
+
|
104
|
+
4. **Combine content filtering with other filters**:
|
105
|
+
```bash
|
106
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
107
|
+
```
|
108
|
+
|
109
|
+
5. **Find files with specific function calls**:
|
110
|
+
```bash
|
111
|
+
reposnap . -S "logger.error" "raise Exception"
|
112
|
+
```
|
113
|
+
|
76
114
|
#### Only Snapshot Your Current Work
|
77
115
|
|
78
116
|
The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
|
@@ -147,6 +185,18 @@ This is particularly useful when you want to:
|
|
147
185
|
reposnap . -c
|
148
186
|
```
|
149
187
|
|
188
|
+
7. **Find and document files containing specific code patterns**:
|
189
|
+
|
190
|
+
```bash
|
191
|
+
reposnap . -S "import logging" "logger."
|
192
|
+
```
|
193
|
+
|
194
|
+
8. **Combine content filtering with file type filtering**:
|
195
|
+
|
196
|
+
```bash
|
197
|
+
reposnap . -S "class " -i "*.py" --structure-only
|
198
|
+
```
|
199
|
+
|
150
200
|
### Graphical User Interface
|
151
201
|
|
152
202
|
`reposnap` also provides a GUI for users who prefer an interactive interface.
|
@@ -1,19 +1,20 @@
|
|
1
1
|
reposnap/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
reposnap/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
reposnap/controllers/project_controller.py,sha256=
|
3
|
+
reposnap/controllers/project_controller.py,sha256=Kg_Vu7gkWRj5EDdiOwoGlsmaZ1XUsBc_XHIV5yJG4w8,13007
|
4
4
|
reposnap/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
reposnap/core/content_search.py,sha256=kPiW5edDpNWgDwQg2GbkBJNweAyq7DHUZgtvajTZHK8,3262
|
5
6
|
reposnap/core/file_system.py,sha256=82gwvmgrsWf63paMrIz-Z0eqIjbqt9_-vujdXlJJoFE,1074
|
6
7
|
reposnap/core/git_repo.py,sha256=YVIbx-Y_MUbnn5Z4E2XBTJbG7Kawx5aUX2tg6vnocd0,4284
|
7
8
|
reposnap/core/markdown_generator.py,sha256=V6uEbxVSbCbxKN9ysTDKsIDvEGBxFutpOpyaZRXZUGw,3747
|
8
9
|
reposnap/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
reposnap/interfaces/cli.py,sha256=
|
10
|
+
reposnap/interfaces/cli.py,sha256=gmO49_DeNiqDsCtKxMEgZ8T-UjxDqwZ9m0LR_JsES5Q,1976
|
10
11
|
reposnap/interfaces/gui.py,sha256=sTuQxjD1nPa9FpgfzOwi6VDO5QMMtDX-5CiEhbJJcs4,5429
|
11
12
|
reposnap/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
13
|
reposnap/models/file_tree.py,sha256=jGo_SizdFcOiDC1OOMz-tiijRN3iSD7ENh6Xw8S6OL0,3362
|
13
14
|
reposnap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
15
|
reposnap/utils/path_utils.py,sha256=UrMe5cjspTf-4gjg2lzv6BgLwZ7S_1lLECQvDMDZO9Y,507
|
15
|
-
reposnap-0.
|
16
|
-
reposnap-0.
|
17
|
-
reposnap-0.
|
18
|
-
reposnap-0.
|
19
|
-
reposnap-0.
|
16
|
+
reposnap-0.8.0.dist-info/METADATA,sha256=dRmQAHOFy1Q41neUvmC8YAwxRbWFgxQ7Z4MQFiKu6mI,8687
|
17
|
+
reposnap-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
reposnap-0.8.0.dist-info/entry_points.txt,sha256=o3GyO7bpR0dujPCjsvvZMPv4pXNJlFwD49_pA1r5FOA,102
|
19
|
+
reposnap-0.8.0.dist-info/licenses/LICENSE,sha256=Aj7WCYBXi98pvi723HPn4GDRyjxToNWb3PC6j1_lnPk,1069
|
20
|
+
reposnap-0.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|