libsightseeing 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ """
2
+ libsightseeing - a shared library for file finding and source resolution.
3
+
4
+ a library for finding files in repositories while respecting .gitignore files
5
+ and supporting include/exclude patterns.
6
+
7
+ functions:
8
+ `find_files` - one-liner api for finding files
9
+
10
+ classes:
11
+ `SourceResolver` - configurable file resolver with gitignore support
12
+
13
+ usage:
14
+ ```python
15
+ from libsightseeing import find_files, SourceResolver
16
+
17
+ # simple usage
18
+ files = find_files(".", include=["*.py"])
19
+
20
+ # advanced usage
21
+ resolver = SourceResolver(
22
+ root=".",
23
+ include=["src/**/*.py"],
24
+ exclude=["tests"],
25
+ respect_gitignore=True,
26
+ )
27
+ files = resolver.resolve()
28
+ ```
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ from pathlib import Path
34
+
35
+ from .core import SourceResolver
36
+
37
+ __version__ = "0.1.0"
38
+ __all__ = ["find_files", "SourceResolver"]
39
+
40
+
41
+ def find_files(
42
+ root: str | Path = ".",
43
+ *,
44
+ include: list[str] | None = None,
45
+ exclude: list[str] | None = None,
46
+ respect_gitignore: bool = True,
47
+ ) -> tuple[Path, ...]:
48
+ """
49
+ find files in a directory with gitignore support.
50
+
51
+ a convenience function that creates a SourceResolver with the given
52
+ parameters and returns the resolved files.
53
+
54
+ arguments:
55
+ `root: str | Path`
56
+ the root directory to search in (default: current directory)
57
+ `include: list[str] | None`
58
+ glob patterns for files to include
59
+ `exclude: list[str] | None`
60
+ glob patterns for files to exclude
61
+ `respect_gitignore: bool`
62
+ whether to respect .gitignore files (default: True)
63
+
64
+ returns: `tuple[Path, ...]`
65
+ tuple of resolved file paths
66
+
67
+ usage:
68
+ ```python
69
+ # find all python files
70
+ files = find_files(".", include=["*.py"])
71
+
72
+ # find files excluding tests
73
+ files = find_files("src", exclude=["tests"])
74
+
75
+ # include gitignored files
76
+ files = find_files(".", include=["*.py"], respect_gitignore=False)
77
+ ```
78
+ """
79
+ # call constructor directly, conditionally passing exclude
80
+ # this preserves the default exclude patterns from SourceResolver when not specified
81
+ if exclude is not None:
82
+ resolver = SourceResolver(
83
+ root=Path(root),
84
+ include=include or [],
85
+ exclude=exclude,
86
+ respect_gitignore=respect_gitignore,
87
+ )
88
+ else:
89
+ resolver = SourceResolver(
90
+ root=Path(root),
91
+ include=include or [],
92
+ respect_gitignore=respect_gitignore,
93
+ )
94
+ return resolver.resolve()
libsightseeing/core.py ADDED
@@ -0,0 +1,105 @@
1
+ """
2
+ core module for libsightseeing.
3
+
4
+ contains the SourceResolver class for finding files with gitignore support.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from collections.abc import Generator
12
+ from typing import Final
13
+
14
+ from .gitignore import GitignoreMatcher
15
+ from .patterns import PatternMatcher
16
+
17
+ # default exclude patterns - only .venv as per requirements
18
+ DEFAULT_EXCLUDE: Final[list[str]] = [".venv"]
19
+
20
+
21
+ @dataclass
22
+ class SourceResolver:
23
+ """
24
+ configurable file resolver with gitignore support.
25
+
26
+ resolves source files from a root directory, respecting .gitignore files
27
+ and supporting include/exclude patterns.
28
+
29
+ attributes:
30
+ `root: Path`
31
+ the root directory to search in
32
+ `include: list[str]`
33
+ glob patterns for files to include
34
+ `exclude: list[str]`
35
+ glob patterns for files to exclude
36
+ `respect_gitignore: bool`
37
+ whether to respect .gitignore files
38
+
39
+ usage:
40
+ ```python
41
+ resolver = SourceResolver(
42
+ root=Path("."),
43
+ include=["src/**/*.py"],
44
+ exclude=["tests"],
45
+ respect_gitignore=True,
46
+ )
47
+ files = resolver.resolve()
48
+ ```
49
+ """
50
+
51
+ root: Path
52
+ include: list[str] = field(default_factory=list)
53
+ exclude: list[str] = field(default_factory=lambda: DEFAULT_EXCLUDE.copy())
54
+ respect_gitignore: bool = True
55
+
56
+ def __post_init__(self) -> None:
57
+ """ensure root is a Path object."""
58
+ self.root = Path(self.root).resolve()
59
+
60
+ def resolve(self) -> tuple[Path, ...]:
61
+ """
62
+ resolve all files matching the configured patterns.
63
+
64
+ walks the directory tree from root, collecting files that:
65
+ 1. match include patterns (if specified)
66
+ 2. do not match exclude patterns
67
+ 3. are not ignored by .gitignore (if respect_gitignore is True)
68
+
69
+ returns: `tuple[Path, ...]`
70
+ tuple of resolved file paths, sorted alphabetically
71
+ """
72
+ files: list[Path] = []
73
+ gitignore_matcher: GitignoreMatcher | None = None
74
+
75
+ if self.respect_gitignore:
76
+ gitignore_matcher = GitignoreMatcher(self.root)
77
+
78
+ pattern_matcher = PatternMatcher(self.include, self.exclude)
79
+
80
+ for file_path in self._iter_files():
81
+ # check if file matches patterns
82
+ if not pattern_matcher.matches(file_path, self.root):
83
+ continue
84
+
85
+ # check if file is gitignored
86
+ if gitignore_matcher is not None and gitignore_matcher.is_ignored(file_path):
87
+ continue
88
+
89
+ files.append(file_path)
90
+
91
+ return tuple(sorted(files))
92
+
93
+ def _iter_files(self) -> Generator[Path, None, None]:
94
+ """
95
+ iterate over all files in the root directory.
96
+
97
+ yields: Path
98
+ file paths (not directories)
99
+ """
100
+ if not self.root.exists():
101
+ return
102
+
103
+ for path in self.root.rglob("*"):
104
+ if path.is_file():
105
+ yield path
@@ -0,0 +1,136 @@
1
+ """
2
+ gitignore handling module for libsightseeing.
3
+
4
+ handles parsing and matching of .gitignore files using pathspec.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from pathspec import GitIgnoreSpec
14
+
15
+
16
+ class GitignoreMatcher:
17
+ """
18
+ matcher for .gitignore rules.
19
+
20
+ collects and applies .gitignore rules from the root directory
21
+ and all subdirectories during file traversal.
22
+
23
+ attributes:
24
+ `root: Path`
25
+ the root directory to match against
26
+ `specs: list[tuple[Path, GitIgnoreSpec]]`
27
+ list of (directory, spec) tuples
28
+
29
+ usage:
30
+ ```python
31
+ matcher = GitignoreMatcher(Path("."))
32
+ if matcher.is_ignored(Path("./file.txt")):
33
+ print("file is ignored")
34
+ ```
35
+ """
36
+
37
+ root: Path
38
+ specs: list[tuple[Path, GitIgnoreSpec]]
39
+
40
+ def __init__(self, root: Path) -> None:
41
+ """
42
+ initialise the gitignore matcher.
43
+
44
+ arguments:
45
+ `root: Path`
46
+ the root directory to search for .gitignore files
47
+ """
48
+ self.root = root.resolve()
49
+ self.specs = []
50
+ self._collect_gitignore_rules()
51
+
52
+ def _collect_gitignore_rules(self) -> None:
53
+ """
54
+ collect all .gitignore rules from root and subdirectories.
55
+
56
+ finds all .gitignore files and parses their rules using pathspec.
57
+ """
58
+ from pathspec import GitIgnoreSpec
59
+
60
+ # find all .gitignore files
61
+ for gitignore_file in self.root.rglob(".gitignore"):
62
+ if not gitignore_file.is_file():
63
+ continue
64
+
65
+ try:
66
+ content = gitignore_file.read_text(encoding="utf-8")
67
+ except (OSError, UnicodeDecodeError):
68
+ continue
69
+
70
+ # parse patterns from content
71
+ patterns: list[str] = []
72
+ for line in content.splitlines():
73
+ line = line.rstrip("\n")
74
+ # skip empty lines and comments
75
+ if not line or line.startswith("#"):
76
+ continue
77
+ patterns.append(line)
78
+
79
+ if patterns:
80
+ spec = GitIgnoreSpec.from_lines(patterns)
81
+ self.specs.append((gitignore_file.parent, spec))
82
+
83
+ def is_ignored(self, file_path: Path) -> bool:
84
+ """
85
+ check if a file is ignored by any .gitignore rule.
86
+
87
+ arguments:
88
+ `file_path: Path`
89
+ the file path to check
90
+
91
+ returns: `bool`
92
+ True if the file is ignored, False otherwise
93
+ """
94
+ resolved_path = file_path.resolve()
95
+
96
+ # check if any parent directory is ignored first
97
+ parent = resolved_path.parent
98
+ while parent != parent.parent and self.root in parent.parents or parent == self.root:
99
+ if self._is_path_ignored(parent):
100
+ return True
101
+ if parent == self.root:
102
+ break
103
+ parent = parent.parent
104
+
105
+ # check the file itself
106
+ return self._is_path_ignored(resolved_path)
107
+
108
+ def _is_path_ignored(self, path: Path) -> bool:
109
+ """
110
+ check if a path is ignored by gitignore rules.
111
+
112
+ arguments:
113
+ `path: Path`
114
+ the path to check
115
+
116
+ returns: `bool`
117
+ True if the path is ignored, False otherwise
118
+ """
119
+ matched = False
120
+
121
+ for ignore_dir, spec in self.specs:
122
+ # only apply rules from directories that contain the path
123
+ if not str(path).startswith(str(ignore_dir)):
124
+ continue
125
+
126
+ # get relative path from the gitignore directory
127
+ try:
128
+ rel_path = path.relative_to(ignore_dir)
129
+ except ValueError:
130
+ continue
131
+
132
+ # check if path matches any pattern
133
+ if spec.match_file(str(rel_path)):
134
+ matched = True
135
+
136
+ return matched
@@ -0,0 +1,164 @@
1
+ """
2
+ pattern matching module for libsightseeing.
3
+
4
+ handles include/exclude glob pattern matching for files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from fnmatch import fnmatch
10
+ from pathlib import Path
11
+
12
+
13
+ class PatternMatcher:
14
+ """
15
+ matcher for include/exclude glob patterns.
16
+
17
+ matches file paths against include and exclude glob patterns.
18
+ if no include patterns are specified, all files are considered included.
19
+
20
+ attributes:
21
+ `include: list[str]`
22
+ glob patterns for files to include
23
+ `exclude: list[str]`
24
+ glob patterns for files to exclude
25
+
26
+ usage:
27
+ ```python
28
+ matcher = PatternMatcher(
29
+ include=["*.py", "src/**/*.py"],
30
+ exclude=["tests", ".venv"]
31
+ )
32
+ if matcher.matches(Path("./src/main.py"), Path(".")):
33
+ print("file matches patterns")
34
+ ```
35
+ """
36
+
37
+ include: list[str]
38
+ exclude: list[str]
39
+
40
+ def __init__(self, include: list[str], exclude: list[str]) -> None:
41
+ """
42
+ initialise the pattern matcher.
43
+
44
+ arguments:
45
+ `include: list[str]`
46
+ glob patterns for files to include
47
+ `exclude: list[str]`
48
+ glob patterns for files to exclude
49
+ """
50
+ self.include = include
51
+ self.exclude = exclude
52
+
53
+ def matches(self, file_path: Path, root: Path) -> bool:
54
+ """
55
+ check if a file matches the include/exclude patterns.
56
+
57
+ a file matches if:
58
+ 1. it matches at least one include pattern (or no include patterns specified)
59
+ 2. it does not match any exclude pattern
60
+
61
+ arguments:
62
+ `file_path: Path`
63
+ the file path to check
64
+ `root: Path`
65
+ the root directory for relative path calculation
66
+
67
+ returns: `bool`
68
+ True if the file matches, False otherwise
69
+ """
70
+ # get relative path from root for matching
71
+ try:
72
+ rel_path = file_path.relative_to(root)
73
+ except ValueError:
74
+ # file is not under root, use absolute path
75
+ rel_path = file_path
76
+
77
+ # normalize path separators for cross-platform matching
78
+ rel_path_str = str(rel_path).replace("\\", "/")
79
+ file_name = file_path.name
80
+
81
+ # check exclude patterns first
82
+ for pattern in self.exclude:
83
+ if self._match_pattern(rel_path_str, file_name, pattern):
84
+ return False
85
+ # also check if any parent directory matches the exclude pattern
86
+ # (e.g., .venv should match .venv/script.py)
87
+ if "/" in rel_path_str:
88
+ path_parts = rel_path_str.split("/")
89
+ for i in range(len(path_parts) - 1): # -1 to not include the filename
90
+ parent_path = "/".join(path_parts[: i + 1])
91
+ if fnmatch(parent_path, pattern):
92
+ return False
93
+
94
+ # check include patterns
95
+ if not self.include:
96
+ # no include patterns means include all
97
+ return True
98
+
99
+ for pattern in self.include:
100
+ if self._match_pattern(rel_path_str, file_name, pattern):
101
+ return True
102
+
103
+ # didn't match any include pattern
104
+ return False
105
+
106
+ def _match_pattern(self, rel_path: str, file_name: str, pattern: str) -> bool:
107
+ """
108
+ match a path against a glob pattern.
109
+
110
+ arguments:
111
+ `rel_path: str`
112
+ relative path from root
113
+ `file_name: str`
114
+ just the filename
115
+ `pattern: str`
116
+ glob pattern to match against
117
+
118
+ returns: `bool`
119
+ True if the path matches the pattern
120
+ """
121
+ # match against full relative path
122
+ if fnmatch(rel_path, pattern):
123
+ return True
124
+
125
+ # match against filename only (for patterns like "*.py")
126
+ if fnmatch(file_name, pattern):
127
+ return True
128
+
129
+ # match against path components (for directory patterns)
130
+ if "/" in pattern:
131
+ # handle ** patterns
132
+ if "**" in pattern:
133
+ parts = rel_path.split("/")
134
+ pattern_parts = pattern.split("/")
135
+
136
+ # handle patterns like "src/**/*.py"
137
+ if "**" in pattern_parts:
138
+ # split pattern around **
139
+ idx = pattern_parts.index("**")
140
+ prefix = "/".join(pattern_parts[:idx]) # e.g., "src"
141
+ suffix = "/".join(pattern_parts[idx + 1 :]) # e.g., "*.py"
142
+
143
+ # check if path starts with prefix
144
+ if prefix and not rel_path.startswith(prefix + "/"):
145
+ return False
146
+
147
+ # check if any suffix of the path matches the suffix pattern
148
+ for i in range(len(parts)):
149
+ path_suffix = "/".join(parts[i:])
150
+ if fnmatch(path_suffix, suffix):
151
+ return True
152
+
153
+ # simple ** matching - check if pattern matches as suffix
154
+ if pattern.startswith("**/"):
155
+ suffix_pattern = pattern[3:]
156
+ if fnmatch(rel_path, suffix_pattern):
157
+ return True
158
+ # check each suffix
159
+ for i in range(len(parts)):
160
+ path_suffix = "/".join(parts[i:])
161
+ if fnmatch(path_suffix, suffix_pattern):
162
+ return True
163
+
164
+ return False
File without changes
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: libsightseeing
3
+ Version: 0.1.0
4
+ Summary: a shared library for file finding and source resolution with gitignore support
5
+ Project-URL: homepage, https://github.com/anomalyco/raiseattention
6
+ Project-URL: repository, https://github.com/anomalyco/raiseattention
7
+ Project-URL: documentation, https://github.com/anomalyco/raiseattention#readme
8
+ License: MIT
9
+ Keywords: file-finding,gitignore,source-resolution
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: pathspec>=1.0.0
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
21
+ Description-Content-Type: text/markdown
22
+
23
+ # libsightseeing
24
+
25
+ a shared library for file finding and source resolution with gitignore support.
26
+
27
+ ## overview
28
+
29
+ libsightseeing provides a simple api for finding files in repositories while
30
+ respecting .gitignore files and supporting include/exclude patterns.
31
+
32
+ ## installation
33
+
34
+ ```bash
35
+ pip install libsightseeing
36
+ ```
37
+
38
+ ## usage
39
+
40
+ ### simple api
41
+
42
+ ```python
43
+ from libsightseeing import find_files
44
+
45
+ # find all python files
46
+ files = find_files(".", include=["*.py"])
47
+
48
+ # find files excluding tests
49
+ files = find_files("src", exclude=["tests"])
50
+
51
+ # include gitignored files
52
+ files = find_files(".", include=["*.py"], respect_gitignore=False)
53
+ ```
54
+
55
+ ### advanced api
56
+
57
+ ```python
58
+ from libsightseeing import SourceResolver
59
+
60
+ resolver = SourceResolver(
61
+ root=".",
62
+ include=["src/**/*.py"],
63
+ exclude=["tests"],
64
+ respect_gitignore=True,
65
+ )
66
+ files = resolver.resolve()
67
+ ```
68
+
69
+ ## features
70
+
71
+ - respects .gitignore files automatically
72
+ - supports glob patterns for include/exclude
73
+ - simple one-liner api
74
+ - configurable resolver for advanced use cases
75
+ - only depends on gitignore-parser
76
+
77
+ ## licence
78
+
79
+ mit
@@ -0,0 +1,8 @@
1
+ libsightseeing/__init__.py,sha256=sDf_gZcojFAxYsqrv-6WRuendWe-OWbtZK37w934yRM,2583
2
+ libsightseeing/core.py,sha256=kakTPH4IVVNwSYHQJzp3o2tHyzAH2ke515oOxjshmTk,3061
3
+ libsightseeing/gitignore.py,sha256=hxf1VN9jo6cndisztKAsu4dLpTVz7vsPu2lPgbmsPOE,3925
4
+ libsightseeing/patterns.py,sha256=thlxiUDORzOUB3aNaESabM3qdcuZwfOgV1PtKTAUTvo,5521
5
+ libsightseeing/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ libsightseeing-0.1.0.dist-info/METADATA,sha256=wnKOdHnLAyTpWciosKieti_HlNEOvM_KPyzkbyehD24,2006
7
+ libsightseeing-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
8
+ libsightseeing-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any