reposnap 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,13 @@ class ProjectController:
19
19
  ]
20
20
  self.input_paths = []
21
21
  for p in input_paths:
22
- candidate = (self.root_dir / p).resolve()
22
+ if p.is_absolute():
23
+ # Handle absolute paths - use as-is but verify they're under root_dir
24
+ candidate = p.resolve()
25
+ else:
26
+ # Handle relative paths - join with root_dir
27
+ candidate = (self.root_dir / p).resolve()
28
+
23
29
  if candidate.exists():
24
30
  try:
25
31
  rel = candidate.relative_to(self.root_dir)
@@ -31,7 +37,7 @@ class ProjectController:
31
37
  )
32
38
  else:
33
39
  self.logger.warning(
34
- f"Path {p} does not exist relative to repository root {self.root_dir}."
40
+ f"Path {p} does not exist or is not under repository root {self.root_dir}."
35
41
  )
36
42
  self.output_file: Path = (
37
43
  Path(args.output).resolve()
@@ -48,6 +54,8 @@ class ProjectController:
48
54
  args.exclude if hasattr(args, "exclude") else []
49
55
  )
50
56
  self.changes_only: bool = getattr(args, "changes", False)
57
+ self.contains: List[str] = getattr(args, "contains", [])
58
+ self.contains_case: bool = getattr(args, "contains_case", False)
51
59
  else:
52
60
  self.args = None
53
61
  self.input_paths = []
@@ -56,6 +64,8 @@ class ProjectController:
56
64
  self.include_patterns = []
57
65
  self.exclude_patterns = []
58
66
  self.changes_only = False
67
+ self.contains = []
68
+ self.contains_case = False
59
69
  self.file_tree: Optional[FileTree] = None
60
70
  self.gitignore_patterns: List[str] = []
61
71
  if self.root_dir:
@@ -110,6 +120,58 @@ class ProjectController:
110
120
  files = [f for f in files if not spec_exc.match_file(f.as_posix())]
111
121
  return files
112
122
 
123
+ def _apply_content_filter(self, files: List[Path]) -> List[Path]:
124
+ """
125
+ Filter files based on content substring matching.
126
+
127
+ Args:
128
+ files: List of relative file paths to filter
129
+
130
+ Returns:
131
+ Filtered list of files that contain at least one of the patterns
132
+ specified in self.contains. Returns original list if no patterns
133
+ are specified.
134
+
135
+ Note:
136
+ Uses case-insensitive matching by default unless self.contains_case
137
+ is True. Skips binary files and files larger than 5MB for performance.
138
+ """
139
+ if not self.contains:
140
+ return files
141
+
142
+ from reposnap.core.content_search import filter_files_by_content
143
+
144
+ initial_count = len(files)
145
+ ignore_case = not self.contains_case
146
+
147
+ self.logger.debug(
148
+ f"Applying content filter with patterns: {self.contains}, "
149
+ f"ignore_case: {ignore_case}"
150
+ )
151
+
152
+ # Convert relative paths to absolute for content search
153
+ absolute_paths = [self.root_dir / file_path for file_path in files]
154
+ filtered_absolute = filter_files_by_content(
155
+ absolute_paths, self.contains, ignore_case
156
+ )
157
+
158
+ # Convert back to relative paths
159
+ filtered_files = []
160
+ for abs_path in filtered_absolute:
161
+ try:
162
+ rel_path = abs_path.relative_to(self.root_dir)
163
+ filtered_files.append(rel_path)
164
+ except ValueError:
165
+ continue
166
+
167
+ kept_count = len(filtered_files)
168
+ self.logger.info(
169
+ f"Applied content filter (kept {kept_count} / {initial_count})"
170
+ )
171
+ self.logger.debug(f"Files kept after content filter: {filtered_files}")
172
+
173
+ return filtered_files
174
+
113
175
  def collect_file_tree(self) -> None:
114
176
  if self.changes_only:
115
177
  self.logger.info("Collecting uncommitted files from Git repository.")
@@ -147,6 +209,8 @@ class ProjectController:
147
209
  continue
148
210
  all_files = self._apply_include_exclude(all_files)
149
211
  self.logger.debug(f"All files after applying include/exclude: {all_files}")
212
+ all_files = self._apply_content_filter(all_files)
213
+ self.logger.debug(f"All files after applying content filter: {all_files}")
150
214
  if self.input_paths:
151
215
  trees = []
152
216
  for input_path in self.input_paths:
@@ -0,0 +1,104 @@
1
+ # src/reposnap/core/content_search.py
2
+
3
+ """
4
+ Private content search helpers for substring matching in files.
5
+
6
+ This module provides stateless utility functions for searching file contents.
7
+ It is intended for internal use by the project controller and should not be
8
+ imported directly by external consumers.
9
+ """
10
+
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import List
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Configuration constants
19
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
20
+ BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
21
+
22
+
23
+ def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
24
+ """
25
+ Check if a file contains any of the given patterns.
26
+
27
+ Args:
28
+ path: Path to the file to search
29
+ patterns: List of substring patterns to search for
30
+ ignore_case: Whether to perform case-insensitive matching (default: True)
31
+
32
+ Returns:
33
+ True if file contains any pattern, False otherwise
34
+
35
+ Note:
36
+ Uses streaming read with utf-8 encoding and error handling for binary files.
37
+ Returns False if file cannot be read as text or if file is too large/binary.
38
+ """
39
+ if not patterns:
40
+ return True
41
+
42
+ if not path.is_file():
43
+ return False
44
+
45
+ # Check file size - skip files larger than MAX_FILE_SIZE
46
+ try:
47
+ file_size = path.stat().st_size
48
+ if file_size > MAX_FILE_SIZE:
49
+ logger.debug(f"Skipping large file {path} ({file_size} bytes)")
50
+ return False
51
+ except OSError as e:
52
+ logger.debug(f"Could not stat file {path}: {e}")
53
+ return False
54
+
55
+ # Check for binary content in first KB
56
+ try:
57
+ with path.open("rb") as f:
58
+ first_chunk = f.read(BINARY_CHECK_SIZE)
59
+ if b"\0" in first_chunk:
60
+ logger.debug(f"Skipping binary file {path}")
61
+ return False
62
+ except Exception as e:
63
+ logger.debug(f"Could not read file {path} for binary check: {e}")
64
+ return False
65
+
66
+ # Pre-compute search patterns (case-normalized if needed)
67
+ search_patterns = [p.lower() if ignore_case else p for p in patterns]
68
+
69
+ try:
70
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
71
+ for line in f:
72
+ search_line = line.lower() if ignore_case else line
73
+
74
+ if any(pattern in search_line for pattern in search_patterns):
75
+ return True
76
+ return False
77
+ except Exception as e:
78
+ logger.debug(f"Could not read file {path} for content search: {e}")
79
+ return False
80
+
81
+
82
+ def filter_files_by_content(
83
+ files: List[Path], patterns: List[str], ignore_case: bool = True
84
+ ) -> List[Path]:
85
+ """
86
+ Filter a list of files to only include those containing the given patterns.
87
+
88
+ Args:
89
+ files: List of file paths to filter
90
+ patterns: List of substring patterns to search for
91
+ ignore_case: Whether to perform case-insensitive matching (default: True)
92
+
93
+ Returns:
94
+ Filtered list of files that contain at least one pattern
95
+ """
96
+ if not patterns:
97
+ return files
98
+
99
+ matched_files = []
100
+ for file_path in files:
101
+ if file_matches(file_path, patterns, ignore_case):
102
+ matched_files.append(file_path)
103
+
104
+ return matched_files
@@ -46,6 +46,18 @@ def main():
46
46
  action="store_true",
47
47
  help="Use only files that are added/modified/untracked/stashed but not yet committed.",
48
48
  )
49
+ parser.add_argument(
50
+ "-S",
51
+ "--contains",
52
+ nargs="+",
53
+ default=[],
54
+ help="Only include files whose contents contain these substrings",
55
+ )
56
+ parser.add_argument(
57
+ "--contains-case",
58
+ action="store_true",
59
+ help="Make --contains case-sensitive",
60
+ )
49
61
 
50
62
  args = parser.parse_args()
51
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reposnap
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Generate a Markdown file with all contents of your project
5
5
  Author: agoloborodko
6
6
  License-File: LICENSE
@@ -25,6 +25,7 @@ Description-Content-Type: text/markdown
25
25
  - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
26
26
  - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
27
27
  - **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
28
+ - **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
28
29
  - **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
29
30
 
30
31
  ## Installation
@@ -50,7 +51,7 @@ pip install -r requirements.lock
50
51
  To use `reposnap` from the command line, run it with the following options:
51
52
 
52
53
  ```bash
53
- reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] paths [paths ...]
54
+ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
54
55
  ```
55
56
 
56
57
  - `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
@@ -61,6 +62,8 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
61
62
  - `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
62
63
  - `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
63
64
  - `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
65
+ - `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
66
+ - `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
64
67
 
65
68
  #### Pattern Matching
66
69
 
@@ -73,6 +76,41 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
73
76
  - `-i "*.py"`: Includes only files ending with `.py`.
74
77
  - `-e "*.test.*"`: Excludes files with `.test.` in their names.
75
78
 
79
+ #### Content Filtering
80
+
81
+ The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
82
+
83
+ - **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
84
+ - **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
85
+ - **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
86
+
87
+ **Examples**:
88
+
89
+ 1. **Find files containing specific imports**:
90
+ ```bash
91
+ reposnap . -S "import logging"
92
+ ```
93
+
94
+ 2. **Search for multiple patterns (OR logic)**:
95
+ ```bash
96
+ reposnap . -S "TODO" "FIXME" "import requests"
97
+ ```
98
+
99
+ 3. **Case-sensitive content search**:
100
+ ```bash
101
+ reposnap . -S "TODO" --contains-case
102
+ ```
103
+
104
+ 4. **Combine content filtering with other filters**:
105
+ ```bash
106
+ reposnap . -S "class " -i "*.py" --structure-only
107
+ ```
108
+
109
+ 5. **Find files with specific function calls**:
110
+ ```bash
111
+ reposnap . -S "logger.error" "raise Exception"
112
+ ```
113
+
76
114
  #### Only Snapshot Your Current Work
77
115
 
78
116
  The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
@@ -147,6 +185,18 @@ This is particularly useful when you want to:
147
185
  reposnap . -c
148
186
  ```
149
187
 
188
+ 7. **Find and document files containing specific code patterns**:
189
+
190
+ ```bash
191
+ reposnap . -S "import logging" "logger."
192
+ ```
193
+
194
+ 8. **Combine content filtering with file type filtering**:
195
+
196
+ ```bash
197
+ reposnap . -S "class " -i "*.py" --structure-only
198
+ ```
199
+
150
200
  ### Graphical User Interface
151
201
 
152
202
  `reposnap` also provides a GUI for users who prefer an interactive interface.
@@ -1,19 +1,20 @@
1
1
  reposnap/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  reposnap/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- reposnap/controllers/project_controller.py,sha256=RROOcb_FiEhFc9oshptPL8moH-5lkUrio7Lp0MqBqp0,10599
3
+ reposnap/controllers/project_controller.py,sha256=Kg_Vu7gkWRj5EDdiOwoGlsmaZ1XUsBc_XHIV5yJG4w8,13007
4
4
  reposnap/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ reposnap/core/content_search.py,sha256=kPiW5edDpNWgDwQg2GbkBJNweAyq7DHUZgtvajTZHK8,3262
5
6
  reposnap/core/file_system.py,sha256=82gwvmgrsWf63paMrIz-Z0eqIjbqt9_-vujdXlJJoFE,1074
6
7
  reposnap/core/git_repo.py,sha256=YVIbx-Y_MUbnn5Z4E2XBTJbG7Kawx5aUX2tg6vnocd0,4284
7
8
  reposnap/core/markdown_generator.py,sha256=V6uEbxVSbCbxKN9ysTDKsIDvEGBxFutpOpyaZRXZUGw,3747
8
9
  reposnap/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- reposnap/interfaces/cli.py,sha256=gL0gauEt_AkuRRr-p5YAeHeUPgvZ59lpZMqsopLjHas,1661
10
+ reposnap/interfaces/cli.py,sha256=gmO49_DeNiqDsCtKxMEgZ8T-UjxDqwZ9m0LR_JsES5Q,1976
10
11
  reposnap/interfaces/gui.py,sha256=sTuQxjD1nPa9FpgfzOwi6VDO5QMMtDX-5CiEhbJJcs4,5429
11
12
  reposnap/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
13
  reposnap/models/file_tree.py,sha256=jGo_SizdFcOiDC1OOMz-tiijRN3iSD7ENh6Xw8S6OL0,3362
13
14
  reposnap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
15
  reposnap/utils/path_utils.py,sha256=UrMe5cjspTf-4gjg2lzv6BgLwZ7S_1lLECQvDMDZO9Y,507
15
- reposnap-0.7.0.dist-info/METADATA,sha256=oJF5qQGPWc6aG2mnupOpiDrbJ1hWbApjDCP0tM-lB2Y,6768
16
- reposnap-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- reposnap-0.7.0.dist-info/entry_points.txt,sha256=o3GyO7bpR0dujPCjsvvZMPv4pXNJlFwD49_pA1r5FOA,102
18
- reposnap-0.7.0.dist-info/licenses/LICENSE,sha256=Aj7WCYBXi98pvi723HPn4GDRyjxToNWb3PC6j1_lnPk,1069
19
- reposnap-0.7.0.dist-info/RECORD,,
16
+ reposnap-0.8.0.dist-info/METADATA,sha256=dRmQAHOFy1Q41neUvmC8YAwxRbWFgxQ7Z4MQFiKu6mI,8687
17
+ reposnap-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ reposnap-0.8.0.dist-info/entry_points.txt,sha256=o3GyO7bpR0dujPCjsvvZMPv4pXNJlFwD49_pA1r5FOA,102
19
+ reposnap-0.8.0.dist-info/licenses/LICENSE,sha256=Aj7WCYBXi98pvi723HPn4GDRyjxToNWb3PC6j1_lnPk,1069
20
+ reposnap-0.8.0.dist-info/RECORD,,