reposnap 0.6.5__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {reposnap-0.6.5 → reposnap-0.8.0}/.coverage +0 -0
  2. {reposnap-0.6.5 → reposnap-0.8.0}/PKG-INFO +96 -2
  3. {reposnap-0.6.5 → reposnap-0.8.0}/README.md +95 -1
  4. {reposnap-0.6.5 → reposnap-0.8.0}/pyproject.toml +1 -1
  5. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/controllers/project_controller.py +82 -6
  6. reposnap-0.8.0/reposnap/core/content_search.py +104 -0
  7. reposnap-0.8.0/reposnap/core/git_repo.py +100 -0
  8. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/cli.py +18 -0
  9. reposnap-0.8.0/tests/reposnap/test_cli.py +223 -0
  10. reposnap-0.8.0/tests/reposnap/test_contains_filter.py +295 -0
  11. reposnap-0.8.0/tests/reposnap/test_git_repo.py +211 -0
  12. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_project_controller.py +79 -0
  13. reposnap-0.6.5/reposnap/core/git_repo.py +0 -32
  14. reposnap-0.6.5/tests/reposnap/test_cli.py +0 -80
  15. reposnap-0.6.5/tests/reposnap/test_git_repo.py +0 -20
  16. {reposnap-0.6.5 → reposnap-0.8.0}/.github/workflows/python-package.yml +0 -0
  17. {reposnap-0.6.5 → reposnap-0.8.0}/.github/workflows/release.yml +0 -0
  18. {reposnap-0.6.5 → reposnap-0.8.0}/.gitignore +0 -0
  19. {reposnap-0.6.5 → reposnap-0.8.0}/.pre-commit-config.yaml +0 -0
  20. {reposnap-0.6.5 → reposnap-0.8.0}/.python-version +0 -0
  21. {reposnap-0.6.5 → reposnap-0.8.0}/.vscode/launch.json +0 -0
  22. {reposnap-0.6.5 → reposnap-0.8.0}/CONTRIBUTING.md +0 -0
  23. {reposnap-0.6.5 → reposnap-0.8.0}/LICENSE +0 -0
  24. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/__init__.py +0 -0
  25. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/controllers/__init__.py +0 -0
  26. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/__init__.py +0 -0
  27. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/file_system.py +0 -0
  28. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/core/markdown_generator.py +0 -0
  29. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/__init__.py +0 -0
  30. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/interfaces/gui.py +0 -0
  31. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/models/__init__.py +0 -0
  32. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/models/file_tree.py +0 -0
  33. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/utils/__init__.py +0 -0
  34. {reposnap-0.6.5 → reposnap-0.8.0}/reposnap/utils/path_utils.py +0 -0
  35. {reposnap-0.6.5 → reposnap-0.8.0}/requirements-dev.lock +0 -0
  36. {reposnap-0.6.5 → reposnap-0.8.0}/requirements.lock +0 -0
  37. {reposnap-0.6.5 → reposnap-0.8.0}/tests/__init__.py +0 -0
  38. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/__init__.py +0 -0
  39. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_collected_tree.py +0 -0
  40. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_file_system.py +0 -0
  41. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_file_tree.py +0 -0
  42. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_gui.py +0 -0
  43. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_markdown_generator.py +0 -0
  44. {reposnap-0.6.5 → reposnap-0.8.0}/tests/reposnap/test_path_utils.py +0 -0
  45. {reposnap-0.6.5 → reposnap-0.8.0}/tests/resources/another_existing_file.py +0 -0
  46. {reposnap-0.6.5 → reposnap-0.8.0}/tests/resources/existing_file.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reposnap
3
- Version: 0.6.5
3
+ Version: 0.8.0
4
4
  Summary: Generate a Markdown file with all contents of your project
5
5
  Author: agoloborodko
6
6
  License-File: LICENSE
@@ -25,6 +25,8 @@ Description-Content-Type: text/markdown
25
25
  - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
26
26
  - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
27
27
  - **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
28
+ - **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
29
+ - **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
28
30
 
29
31
  ## Installation
30
32
 
@@ -49,7 +51,7 @@ pip install -r requirements.lock
49
51
  To use `reposnap` from the command line, run it with the following options:
50
52
 
51
53
  ```bash
52
- reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] paths [paths ...]
54
+ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
53
55
  ```
54
56
 
55
57
  - `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
@@ -59,6 +61,9 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
59
61
  - `--debug`: Enable debug-level logging.
60
62
  - `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
61
63
  - `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
64
+ - `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
65
+ - `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
66
+ - `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
62
67
 
63
68
  #### Pattern Matching
64
69
 
@@ -71,6 +76,77 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
71
76
  - `-i "*.py"`: Includes only files ending with `.py`.
72
77
  - `-e "*.test.*"`: Excludes files with `.test.` in their names.
73
78
 
79
+ #### Content Filtering
80
+
81
+ The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
82
+
83
+ - **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
84
+ - **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
85
+ - **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
86
+
87
+ **Examples**:
88
+
89
+ 1. **Find files containing specific imports**:
90
+ ```bash
91
+ reposnap . -S "import logging"
92
+ ```
93
+
94
+ 2. **Search for multiple patterns (OR logic)**:
95
+ ```bash
96
+ reposnap . -S "TODO" "FIXME" "import requests"
97
+ ```
98
+
99
+ 3. **Case-sensitive content search**:
100
+ ```bash
101
+ reposnap . -S "TODO" --contains-case
102
+ ```
103
+
104
+ 4. **Combine content filtering with other filters**:
105
+ ```bash
106
+ reposnap . -S "class " -i "*.py" --structure-only
107
+ ```
108
+
109
+ 5. **Find files with specific function calls**:
110
+ ```bash
111
+ reposnap . -S "logger.error" "raise Exception"
112
+ ```
113
+
114
+ #### Only Snapshot Your Current Work
115
+
116
+ The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
117
+
118
+ - **Staged changes**: Files that have been added to the index with `git add`
119
+ - **Unstaged changes**: Files that have been modified but not yet staged
120
+ - **Untracked files**: New files that haven't been added to Git yet
121
+ - **Stashed changes**: Files that are stored in Git stash entries
122
+
123
+ This is particularly useful when you want to:
124
+ - Document only your current work-in-progress
125
+ - Create a snapshot of changes before committing
126
+ - Review what files you've been working on
127
+
128
+ **Examples**:
129
+
130
+ 1. **Generate documentation for only your uncommitted changes**:
131
+ ```bash
132
+ reposnap . -c
133
+ ```
134
+
135
+ 2. **Combine with structure-only for a quick overview**:
136
+ ```bash
137
+ reposnap . -c --structure-only
138
+ ```
139
+
140
+ 3. **Filter uncommitted changes by file type**:
141
+ ```bash
142
+ reposnap . -c -i "*.py"
143
+ ```
144
+
145
+ 4. **Exclude test files from uncommitted changes**:
146
+ ```bash
147
+ reposnap . -c -e "*test*"
148
+ ```
149
+
74
150
  #### Examples
75
151
 
76
152
  1. **Generate a full project structure with file contents**:
@@ -103,6 +179,24 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
103
179
  reposnap my_project/ -e "gui"
104
180
  ```
105
181
 
182
+ 6. **Document only your current uncommitted work**:
183
+
184
+ ```bash
185
+ reposnap . -c
186
+ ```
187
+
188
+ 7. **Find and document files containing specific code patterns**:
189
+
190
+ ```bash
191
+ reposnap . -S "import logging" "logger."
192
+ ```
193
+
194
+ 8. **Combine content filtering with file type filtering**:
195
+
196
+ ```bash
197
+ reposnap . -S "class " -i "*.py" --structure-only
198
+ ```
199
+
106
200
  ### Graphical User Interface
107
201
 
108
202
  `reposnap` also provides a GUI for users who prefer an interactive interface.
@@ -12,6 +12,8 @@
12
12
  - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files.
13
13
  - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories.
14
14
  - **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude.
15
+ - **Content Filtering**: Use `--contains` to filter files based on their content, including only files that contain specific substrings or code patterns.
16
+ - **Changes Only Mode**: Use `-c` or `--changes` to snapshot only uncommitted files (staged, unstaged, untracked, and stashed changes).
15
17
 
16
18
  ## Installation
17
19
 
@@ -36,7 +38,7 @@ pip install -r requirements.lock
36
38
  To use `reposnap` from the command line, run it with the following options:
37
39
 
38
40
  ```bash
39
- reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] paths [paths ...]
41
+ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] [-c] [-S CONTAINS [CONTAINS ...]] [--contains-case] paths [paths ...]
40
42
  ```
41
43
 
42
44
  - `paths`: One or more paths (files or directories) within the repository whose content and structure should be rendered.
@@ -46,6 +48,9 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
46
48
  - `--debug`: Enable debug-level logging.
47
49
  - `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files.
48
50
  - `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files.
51
+ - `-c, --changes`: Use only files that are added/modified/untracked/stashed but not yet committed.
52
+ - `-S, --contains`: Only include files whose contents contain these substrings. Multiple patterns can be specified.
53
+ - `--contains-case`: Make `--contains` case-sensitive (default is case-insensitive).
49
54
 
50
55
  #### Pattern Matching
51
56
 
@@ -58,6 +63,77 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
58
63
  - `-i "*.py"`: Includes only files ending with `.py`.
59
64
  - `-e "*.test.*"`: Excludes files with `.test.` in their names.
60
65
 
66
+ #### Content Filtering
67
+
68
+ The `--contains` (or `-S`) flag allows you to filter files based on their content, including only files that contain specific substrings. This is particularly useful for focusing on files that contain certain code patterns, imports, or keywords.
69
+
70
+ - **Case Sensitivity**: By default, content matching is case-insensitive. Use the `--contains-case` flag to enable case-sensitive matching.
71
+ - **Multiple Patterns**: You can specify multiple patterns, and files containing **any** of the patterns will be included (OR logic).
72
+ - **Performance**: Large files (>5MB) and binary files are automatically skipped for performance and safety reasons.
73
+
74
+ **Examples**:
75
+
76
+ 1. **Find files containing specific imports**:
77
+ ```bash
78
+ reposnap . -S "import logging"
79
+ ```
80
+
81
+ 2. **Search for multiple patterns (OR logic)**:
82
+ ```bash
83
+ reposnap . -S "TODO" "FIXME" "import requests"
84
+ ```
85
+
86
+ 3. **Case-sensitive content search**:
87
+ ```bash
88
+ reposnap . -S "TODO" --contains-case
89
+ ```
90
+
91
+ 4. **Combine content filtering with other filters**:
92
+ ```bash
93
+ reposnap . -S "class " -i "*.py" --structure-only
94
+ ```
95
+
96
+ 5. **Find files with specific function calls**:
97
+ ```bash
98
+ reposnap . -S "logger.error" "raise Exception"
99
+ ```
100
+
101
+ #### Only Snapshot Your Current Work
102
+
103
+ The `-c` or `--changes` flag allows you to generate documentation for only the files that have been modified but not yet committed. This includes:
104
+
105
+ - **Staged changes**: Files that have been added to the index with `git add`
106
+ - **Unstaged changes**: Files that have been modified but not yet staged
107
+ - **Untracked files**: New files that haven't been added to Git yet
108
+ - **Stashed changes**: Files that are stored in Git stash entries
109
+
110
+ This is particularly useful when you want to:
111
+ - Document only your current work-in-progress
112
+ - Create a snapshot of changes before committing
113
+ - Review what files you've been working on
114
+
115
+ **Examples**:
116
+
117
+ 1. **Generate documentation for only your uncommitted changes**:
118
+ ```bash
119
+ reposnap . -c
120
+ ```
121
+
122
+ 2. **Combine with structure-only for a quick overview**:
123
+ ```bash
124
+ reposnap . -c --structure-only
125
+ ```
126
+
127
+ 3. **Filter uncommitted changes by file type**:
128
+ ```bash
129
+ reposnap . -c -i "*.py"
130
+ ```
131
+
132
+ 4. **Exclude test files from uncommitted changes**:
133
+ ```bash
134
+ reposnap . -c -e "*test*"
135
+ ```
136
+
61
137
  #### Examples
62
138
 
63
139
  1. **Generate a full project structure with file contents**:
@@ -90,6 +166,24 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]
90
166
  reposnap my_project/ -e "gui"
91
167
  ```
92
168
 
169
+ 6. **Document only your current uncommitted work**:
170
+
171
+ ```bash
172
+ reposnap . -c
173
+ ```
174
+
175
+ 7. **Find and document files containing specific code patterns**:
176
+
177
+ ```bash
178
+ reposnap . -S "import logging" "logger."
179
+ ```
180
+
181
+ 8. **Combine content filtering with file type filtering**:
182
+
183
+ ```bash
184
+ reposnap . -S "class " -i "*.py" --structure-only
185
+ ```
186
+
93
187
  ### Graphical User Interface
94
188
 
95
189
  `reposnap` also provides a GUI for users who prefer an interactive interface.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "reposnap"
3
- version = "0.6.5"
3
+ version = "0.8.0"
4
4
  description = "Generate a Markdown file with all contents of your project"
5
5
  authors = [
6
6
  { name = "agoloborodko" }
@@ -15,11 +15,17 @@ class ProjectController:
15
15
  self.args = args
16
16
  # Treat positional arguments as literal file/directory names.
17
17
  input_paths = [
18
- Path(p) for p in (args.paths if hasattr(args, "paths") else [args.path])
18
+ Path(p) for p in (args.paths if hasattr(args, "paths") else [])
19
19
  ]
20
20
  self.input_paths = []
21
21
  for p in input_paths:
22
- candidate = (self.root_dir / p).resolve()
22
+ if p.is_absolute():
23
+ # Handle absolute paths - use as-is but verify they're under root_dir
24
+ candidate = p.resolve()
25
+ else:
26
+ # Handle relative paths - join with root_dir
27
+ candidate = (self.root_dir / p).resolve()
28
+
23
29
  if candidate.exists():
24
30
  try:
25
31
  rel = candidate.relative_to(self.root_dir)
@@ -31,7 +37,7 @@ class ProjectController:
31
37
  )
32
38
  else:
33
39
  self.logger.warning(
34
- f"Path {p} does not exist relative to repository root {self.root_dir}."
40
+ f"Path {p} does not exist or is not under repository root {self.root_dir}."
35
41
  )
36
42
  self.output_file: Path = (
37
43
  Path(args.output).resolve()
@@ -47,6 +53,9 @@ class ProjectController:
47
53
  self.exclude_patterns: List[str] = (
48
54
  args.exclude if hasattr(args, "exclude") else []
49
55
  )
56
+ self.changes_only: bool = getattr(args, "changes", False)
57
+ self.contains: List[str] = getattr(args, "contains", [])
58
+ self.contains_case: bool = getattr(args, "contains_case", False)
50
59
  else:
51
60
  self.args = None
52
61
  self.input_paths = []
@@ -54,6 +63,9 @@ class ProjectController:
54
63
  self.structure_only = False
55
64
  self.include_patterns = []
56
65
  self.exclude_patterns = []
66
+ self.changes_only = False
67
+ self.contains = []
68
+ self.contains_case = False
57
69
  self.file_tree: Optional[FileTree] = None
58
70
  self.gitignore_patterns: List[str] = []
59
71
  if self.root_dir:
@@ -108,14 +120,76 @@ class ProjectController:
108
120
  files = [f for f in files if not spec_exc.match_file(f.as_posix())]
109
121
  return files
110
122
 
123
+ def _apply_content_filter(self, files: List[Path]) -> List[Path]:
124
+ """
125
+ Filter files based on content substring matching.
126
+
127
+ Args:
128
+ files: List of relative file paths to filter
129
+
130
+ Returns:
131
+ Filtered list of files that contain at least one of the patterns
132
+ specified in self.contains. Returns original list if no patterns
133
+ are specified.
134
+
135
+ Note:
136
+ Uses case-insensitive matching by default unless self.contains_case
137
+ is True. Skips binary files and files larger than 5MB for performance.
138
+ """
139
+ if not self.contains:
140
+ return files
141
+
142
+ from reposnap.core.content_search import filter_files_by_content
143
+
144
+ initial_count = len(files)
145
+ ignore_case = not self.contains_case
146
+
147
+ self.logger.debug(
148
+ f"Applying content filter with patterns: {self.contains}, "
149
+ f"ignore_case: {ignore_case}"
150
+ )
151
+
152
+ # Convert relative paths to absolute for content search
153
+ absolute_paths = [self.root_dir / file_path for file_path in files]
154
+ filtered_absolute = filter_files_by_content(
155
+ absolute_paths, self.contains, ignore_case
156
+ )
157
+
158
+ # Convert back to relative paths
159
+ filtered_files = []
160
+ for abs_path in filtered_absolute:
161
+ try:
162
+ rel_path = abs_path.relative_to(self.root_dir)
163
+ filtered_files.append(rel_path)
164
+ except ValueError:
165
+ continue
166
+
167
+ kept_count = len(filtered_files)
168
+ self.logger.info(
169
+ f"Applied content filter (kept {kept_count} / {initial_count})"
170
+ )
171
+ self.logger.debug(f"Files kept after content filter: {filtered_files}")
172
+
173
+ return filtered_files
174
+
111
175
  def collect_file_tree(self) -> None:
112
- self.logger.info("Collecting files from Git tracked files if available.")
176
+ if self.changes_only:
177
+ self.logger.info("Collecting uncommitted files from Git repository.")
178
+ else:
179
+ self.logger.info("Collecting files from Git tracked files if available.")
113
180
  try:
114
181
  from reposnap.core.git_repo import GitRepo
115
182
 
116
183
  git_repo = GitRepo(self.root_dir)
117
- all_files = git_repo.get_git_files()
118
- self.logger.debug(f"Git tracked files: {all_files}")
184
+ if self.changes_only:
185
+ all_files = git_repo.get_uncommitted_files()
186
+ self.logger.info(
187
+ "Using only uncommitted files (staged, unstaged, untracked, stashed)."
188
+ )
189
+ else:
190
+ all_files = git_repo.get_git_files()
191
+ self.logger.info("Using all Git tracked files.")
192
+ self.logger.debug(f"Git files: {all_files}")
119
193
  except Exception as e:
120
194
  self.logger.warning(f"Error obtaining Git tracked files: {e}.")
121
195
  all_files = []
@@ -135,6 +209,8 @@ class ProjectController:
135
209
  continue
136
210
  all_files = self._apply_include_exclude(all_files)
137
211
  self.logger.debug(f"All files after applying include/exclude: {all_files}")
212
+ all_files = self._apply_content_filter(all_files)
213
+ self.logger.debug(f"All files after applying content filter: {all_files}")
138
214
  if self.input_paths:
139
215
  trees = []
140
216
  for input_path in self.input_paths:
@@ -0,0 +1,104 @@
1
+ # src/reposnap/core/content_search.py
2
+
3
+ """
4
+ Private content search helpers for substring matching in files.
5
+
6
+ This module provides stateless utility functions for searching file contents.
7
+ It is intended for internal use by the project controller and should not be
8
+ imported directly by external consumers.
9
+ """
10
+
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import List
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Configuration constants
19
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MiB
20
+ BINARY_CHECK_SIZE = 1024 # First 1KB to check for binary content
21
+
22
+
23
+ def file_matches(path: Path, patterns: List[str], ignore_case: bool = True) -> bool:
24
+ """
25
+ Check if a file contains any of the given patterns.
26
+
27
+ Args:
28
+ path: Path to the file to search
29
+ patterns: List of substring patterns to search for
30
+ ignore_case: Whether to perform case-insensitive matching (default: True)
31
+
32
+ Returns:
33
+ True if file contains any pattern, False otherwise
34
+
35
+ Note:
36
+ Uses streaming read with utf-8 encoding and error handling for binary files.
37
+ Returns False if file cannot be read as text or if file is too large/binary.
38
+ """
39
+ if not patterns:
40
+ return True
41
+
42
+ if not path.is_file():
43
+ return False
44
+
45
+ # Check file size - skip files larger than MAX_FILE_SIZE
46
+ try:
47
+ file_size = path.stat().st_size
48
+ if file_size > MAX_FILE_SIZE:
49
+ logger.debug(f"Skipping large file {path} ({file_size} bytes)")
50
+ return False
51
+ except OSError as e:
52
+ logger.debug(f"Could not stat file {path}: {e}")
53
+ return False
54
+
55
+ # Check for binary content in first KB
56
+ try:
57
+ with path.open("rb") as f:
58
+ first_chunk = f.read(BINARY_CHECK_SIZE)
59
+ if b"\0" in first_chunk:
60
+ logger.debug(f"Skipping binary file {path}")
61
+ return False
62
+ except Exception as e:
63
+ logger.debug(f"Could not read file {path} for binary check: {e}")
64
+ return False
65
+
66
+ # Pre-compute search patterns (case-normalized if needed)
67
+ search_patterns = [p.lower() if ignore_case else p for p in patterns]
68
+
69
+ try:
70
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
71
+ for line in f:
72
+ search_line = line.lower() if ignore_case else line
73
+
74
+ if any(pattern in search_line for pattern in search_patterns):
75
+ return True
76
+ return False
77
+ except Exception as e:
78
+ logger.debug(f"Could not read file {path} for content search: {e}")
79
+ return False
80
+
81
+
82
+ def filter_files_by_content(
83
+ files: List[Path], patterns: List[str], ignore_case: bool = True
84
+ ) -> List[Path]:
85
+ """
86
+ Filter a list of files to only include those containing the given patterns.
87
+
88
+ Args:
89
+ files: List of file paths to filter
90
+ patterns: List of substring patterns to search for
91
+ ignore_case: Whether to perform case-insensitive matching (default: True)
92
+
93
+ Returns:
94
+ Filtered list of files that contain at least one pattern
95
+ """
96
+ if not patterns:
97
+ return files
98
+
99
+ matched_files = []
100
+ for file_path in files:
101
+ if file_matches(file_path, patterns, ignore_case):
102
+ matched_files.append(file_path)
103
+
104
+ return matched_files
@@ -0,0 +1,100 @@
1
+ # src/reposnap/core/git_repo.py
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from git import Repo, InvalidGitRepositoryError
6
+ from typing import List
7
+
8
+
9
+ class GitRepo:
10
+ def __init__(self, repo_path: Path):
11
+ self.repo_path: Path = repo_path.resolve()
12
+ self.logger = logging.getLogger(__name__)
13
+
14
+ def get_git_files(self) -> List[Path]:
15
+ try:
16
+ repo: Repo = Repo(self.repo_path, search_parent_directories=True)
17
+ repo_root: Path = Path(repo.working_tree_dir).resolve()
18
+ git_files: List[str] = repo.git.ls_files().splitlines()
19
+ self.logger.debug(f"Git files from {repo_root}: {git_files}")
20
+ git_files_relative: List[Path] = []
21
+ for f in git_files:
22
+ absolute_path: Path = (repo_root / f).resolve()
23
+ try:
24
+ relative_path: Path = absolute_path.relative_to(self.repo_path)
25
+ git_files_relative.append(relative_path)
26
+ except ValueError:
27
+ # Skip files not under root_dir
28
+ continue
29
+ return git_files_relative
30
+ except InvalidGitRepositoryError:
31
+ self.logger.error(f"Invalid Git repository at: {self.repo_path}")
32
+ return []
33
+
34
+ def get_uncommitted_files(self) -> List[Path]:
35
+ """
36
+ Return every *working-copy* file that differs from HEAD - staged,
37
+ unstaged, untracked, plus everything referenced in `git stash list`.
38
+ Paths are *relative to* self.repo_path.
39
+ """
40
+ try:
41
+ repo: Repo = Repo(self.repo_path, search_parent_directories=True)
42
+ repo_root: Path = Path(repo.working_tree_dir).resolve()
43
+ paths: set = set()
44
+
45
+ # Staged changes (diff between index and HEAD)
46
+ for diff in repo.index.diff("HEAD"):
47
+ paths.add(diff.a_path or diff.b_path)
48
+
49
+ # Unstaged changes (diff between working tree and index)
50
+ for diff in repo.index.diff(None):
51
+ paths.add(diff.a_path or diff.b_path)
52
+
53
+ # Untracked files
54
+ paths.update(repo.untracked_files)
55
+
56
+ # Stash entries - with performance guard
57
+ try:
58
+ stash_refs = repo.git.stash("list", "--format=%gd").splitlines()
59
+ # Limit stash processing to prevent performance issues
60
+ max_stashes = 10
61
+ if len(stash_refs) > max_stashes:
62
+ self.logger.warning(
63
+ f"Large stash stack detected ({len(stash_refs)} entries). "
64
+ f"Processing only the first {max_stashes} stashes."
65
+ )
66
+ stash_refs = stash_refs[:max_stashes]
67
+
68
+ for ref in stash_refs:
69
+ if ref.strip(): # Skip empty lines
70
+ stash_files = repo.git.diff(
71
+ "--name-only", f"{ref}^1", ref
72
+ ).splitlines()
73
+ paths.update(stash_files)
74
+ except Exception as e:
75
+ self.logger.debug(f"Error processing stash entries: {e}")
76
+
77
+ # Convert to relative paths and filter existing files
78
+ relative_paths = []
79
+ for path_str in paths:
80
+ if path_str: # Skip empty strings
81
+ absolute_path = (repo_root / path_str).resolve()
82
+ try:
83
+ relative_path = absolute_path.relative_to(self.repo_path)
84
+ if absolute_path.is_file():
85
+ relative_paths.append(relative_path)
86
+ except ValueError:
87
+ # Log warning for paths outside repo root
88
+ self.logger.warning(
89
+ f"Path {path_str} is outside repository root {self.repo_path}. Skipping."
90
+ )
91
+ continue
92
+
93
+ # Return sorted, deduplicated list for deterministic output
94
+ result = sorted(set(relative_paths))
95
+ self.logger.debug(f"Uncommitted files from {repo_root}: {result}")
96
+ return result
97
+
98
+ except InvalidGitRepositoryError:
99
+ self.logger.error(f"Invalid Git repository at: {self.repo_path}")
100
+ return []
@@ -40,6 +40,24 @@ def main():
40
40
  default=[],
41
41
  help="File/folder patterns to exclude.",
42
42
  )
43
+ parser.add_argument(
44
+ "-c",
45
+ "--changes",
46
+ action="store_true",
47
+ help="Use only files that are added/modified/untracked/stashed but not yet committed.",
48
+ )
49
+ parser.add_argument(
50
+ "-S",
51
+ "--contains",
52
+ nargs="+",
53
+ default=[],
54
+ help="Only include files whose contents contain these substrings",
55
+ )
56
+ parser.add_argument(
57
+ "--contains-case",
58
+ action="store_true",
59
+ help="Make --contains case-sensitive",
60
+ )
43
61
 
44
62
  args = parser.parse_args()
45
63