skeletonpy 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: skeletonpy
3
+ Version: 1.0.0
4
+ Project-URL: Homepage, https://github.com/Premik/skeletonpy
5
+ Project-URL: Repository, https://github.com/Premik/skeletonpy
6
+ Project-URL: Issues, https://github.com/Premik/skeletonpy/issues
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: jedi
9
+ Requires-Dist: pathspec
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest; extra == "dev"
12
+ Requires-Dist: black; extra == "dev"
13
+ Requires-Dist: isort; extra == "dev"
14
+ Requires-Dist: ruff; extra == "dev"
15
+ Requires-Dist: mypy; extra == "dev"
16
+
17
+ # SkeletonPy
18
+
19
+ SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
20
+
21
+ ## Motivation
22
+
23
+ SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
24
+
25
+ Why use SkeletonPy over full-context stuffing or maintaining local indexes?
26
+
27
+ * **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
28
+ * **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
29
+ * **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
30
+ * **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
31
+
32
+ ![side_by_side_example](doc/example-sbs.png)
33
+
34
+ ## Quick Start
35
+
36
+ From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
37
+
38
+ ```bash
39
+ uvx skeletonpy src
40
+ ```
41
+
42
+ This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
43
+
44
+ ## Installation
45
+
46
+ You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
47
+
48
+ ```bash
49
+ pip install skeletonpy
50
+
51
+ uv pip install skeletonpy
52
+ ```
53
+
54
+ Alternatively, you can run it directly without a permanent installation:
55
+
56
+ ```bash
57
+ pipx run skeletonpy -- --help
58
+
59
+ uvx skeletonpy --help
60
+ ```
61
+
62
+ Once installed, you can invoke the script:
63
+ ```bash
64
+ skeletonpy --help
65
+ ```
66
+
67
+ ## Usage
68
+
69
+ Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
70
+
71
+ For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
72
+
73
+ ```bash
74
+ skeletonpy src --exclude "_test\.py" -o main_src.txt
75
+ ```
76
+
77
+ This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
78
+ See the [examples folder](examples/README.md) for more.
@@ -0,0 +1,62 @@
1
+ # SkeletonPy
2
+
3
+ SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
4
+
5
+ ## Motivation
6
+
7
+ SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
8
+
9
+ Why use SkeletonPy over full-context stuffing or maintaining local indexes?
10
+
11
+ * **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
12
+ * **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
13
+ * **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
14
+ * **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
15
+
16
+ ![side_by_side_example](doc/example-sbs.png)
17
+
18
+ ## Quick Start
19
+
20
+ From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
21
+
22
+ ```bash
23
+ uvx skeletonpy src
24
+ ```
25
+
26
+ This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
27
+
28
+ ## Installation
29
+
30
+ You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
31
+
32
+ ```bash
33
+ pip install skeletonpy
34
+
35
+ uv pip install skeletonpy
36
+ ```
37
+
38
+ Alternatively, you can run it directly without a permanent installation:
39
+
40
+ ```bash
41
+ pipx run skeletonpy -- --help
42
+
43
+ uvx skeletonpy --help
44
+ ```
45
+
46
+ Once installed, you can invoke the script:
47
+ ```bash
48
+ skeletonpy --help
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
54
+
55
+ For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
56
+
57
+ ```bash
58
+ skeletonpy src --exclude "_test\.py" -o main_src.txt
59
+ ```
60
+
61
+ This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
62
+ See the [examples folder](examples/README.md) for more.
@@ -0,0 +1,60 @@
1
+ [project]
2
+ name = "skeletonpy"
3
+ version = "1.0.0"
4
+ readme = "README.md"
5
+
6
+ dependencies = [
7
+ "jedi",
8
+ "pathspec",
9
+ ]
10
+
11
+ [project.urls]
12
+ Homepage = "https://github.com/Premik/skeletonpy"
13
+ Repository = "https://github.com/Premik/skeletonpy"
14
+ Issues = "https://github.com/Premik/skeletonpy/issues"
15
+
16
+
17
+ [project.scripts]
18
+ skeletonpy = "app:main"
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest",
23
+ "black",
24
+ "isort",
25
+ "ruff",
26
+ "mypy",
27
+ ]
28
+
29
+
30
+ [tool.pytest.ini_options]
31
+ pythonpath = ["src", "tests"]
32
+ log_cli = true
33
+ log_cli_level = "DEBUG"
34
+ #log_cli_format = "%(asctime)s [%(levelname)8xs] %(message)s (%(filename)s:%(lineno)s)"
35
+ #log_cli_date_format = "%Y-%m-%d %H:%M:%S"
36
+
37
+ [tool.black]
38
+ line-length = 160
39
+ skip-string-normalization = true
40
+
41
+ [tool.isort]
42
+ profile = "black"
43
+ src_paths = ["src", "tests"]
44
+ line_length = 160
45
+
46
+ [tool.ruff]
47
+ line-length = 160
48
+ ignore = ["E741", "E722", "E731"]
49
+
50
+ [tool.mypy]
51
+ strict_optional = false
52
+ disallow_untyped_calls = true
53
+ warn_unused_configs = true
54
+ # disallow_untyped_defs = true
55
+ check_untyped_defs = true
56
+ pretty = true
57
+
58
+ [tool.setuptools]
59
+ package-dir = {"" = "src"}
60
+ py-modules = ["app", "parsing", "overview", "sources"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,122 @@
1
+ import argparse
2
+ import hashlib
3
+ import json
4
+ import os
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ from parsing import ProjectModel
9
+ from sources import FileMan
10
+
11
+
12
+ def parse_args() -> argparse.Namespace:
13
+ parser = argparse.ArgumentParser(description='Process project files.')
14
+ parser.add_argument('dirs', nargs='*', default=[os.getcwd()], help='Directories to process (default: current directory)')
15
+ parser.add_argument(
16
+ '-i',
17
+ '--include',
18
+ nargs='+',
19
+ default=[],
20
+ help="Include files if their path contains any of the given strings (substring match).",
21
+ )
22
+ parser.add_argument(
23
+ '-I',
24
+ '--include-exact',
25
+ nargs='+',
26
+ default=[],
27
+ help="Include files if their full path matches one of the given regular expressions. This performs a full regexp match, as opposed to the simple substring match of '--include'.",
28
+ )
29
+ parser.add_argument('-e', '--exclude', nargs='+', default=[], help="Exclude files if their full path matches one of the given regular expressions.")
30
+ parser.add_argument('-o', '--output', help='Output file name or path (default: same as summary file)')
31
+
32
+ args = parser.parse_args()
33
+ return args
34
+
35
+
36
+ def get_args_hash(args: argparse.Namespace) -> str:
37
+ """Create a hash from the arguments."""
38
+ args_dict = vars(args)
39
+ # Create a sorted string representation of the dictionary
40
+ # to handle cases where arguments are not always in the same order.
41
+ sorted_args_str = json.dumps(args_dict, sort_keys=True)
42
+ return hashlib.sha256(sorted_args_str.encode('utf-8')).hexdigest()
43
+
44
+
45
+ def get_output_path(src_path: Path, output_spec: str | None) -> Path:
46
+ """
47
+ Determine the output path based on source path and output specification.
48
+
49
+ Args:
50
+ src_path: The source directory path
51
+ output_spec: Output file specification - can be None, filename, or path
52
+
53
+ Returns:
54
+ Path object for the target output location
55
+ """
56
+ if not output_spec:
57
+ return src_path
58
+
59
+ output_path = Path(output_spec)
60
+
61
+ # If just a filename without path components, put in source dir
62
+ if len(output_path.parts) == 1:
63
+ return src_path / output_path
64
+
65
+ # If relative path, make it relative to source dir
66
+ if not output_path.is_absolute():
67
+ return src_path / output_path
68
+
69
+ # Otherwise use absolute path as-is
70
+ return output_path
71
+
72
+
73
+ def debug() -> None:
74
+ import debugpy
75
+
76
+ # Allow other processes to connect to debugpy
77
+ debugpy.listen(("0.0.0.0", 5678)) # Use any available port, here 5678
78
+
79
+ # Pause execution until a debugger is attached
80
+ print("Waiting for debugger to attach...")
81
+ debugpy.wait_for_client()
82
+ print("Debugger is attached. Starting execution...")
83
+
84
+
85
+ def main() -> None:
86
+ args = parse_args()
87
+
88
+ args_hash = get_args_hash(args)
89
+ trg = Path("/tmp/skels") / args_hash
90
+
91
+ if trg.exists() and trg.is_dir():
92
+ shutil.rmtree(trg)
93
+ trg.mkdir(parents=True, exist_ok=True)
94
+
95
+ pm = ProjectModel(proj_root_path=trg)
96
+ pm.file_man.src_paths = [Path(d) for d in args.dirs]
97
+
98
+ includes = []
99
+ if args.include:
100
+ includes.extend(FileMan.make_substring_match(p) for p in args.include)
101
+ if args.include_exact:
102
+ includes.extend(args.include_exact)
103
+
104
+ if not includes:
105
+ includes = ['.*']
106
+
107
+ pm.file_man.includes = includes
108
+ pm.file_man.excludes = args.exclude
109
+ # debug()
110
+
111
+ pm.parse_all()
112
+
113
+ for src_path in pm.file_man.src_paths:
114
+ output_path = get_output_path(src_path, args.output)
115
+ # Create parent directories if needed
116
+ output_path.parent.mkdir(parents=True, exist_ok=True)
117
+ print(f"{pm.file_man.summary_file_path}->{output_path}")
118
+ shutil.copy2(pm.file_man.summary_file_path, output_path)
119
+
120
+
121
+ if __name__ == "__main__":
122
+ main()
@@ -0,0 +1,186 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from functools import cached_property
4
+ from pathlib import Path
5
+ from typing import Iterable, Iterator
6
+
7
+ from sources import SrcFile, SrcFragment
8
+
9
+
10
+ def trim_string(value: str, max_length: int = 200) -> str:
11
+ """Trims a string from the middle if it exceeds the max length."""
12
+ if len(value) <= max_length * 1.2:
13
+ return value
14
+ prefix_length = int(max_length * 0.7) # 70% prefix
15
+ suffix_length = max_length - prefix_length - 3 # Account for '…'
16
+ return f"{value[:prefix_length]}…{value[-suffix_length:]}"
17
+
18
+
19
+ def trim_collection(value: dict | list | set, max_items: int = 4) -> str:
20
+ """Trims collections like dicts, lists, or sets to max_items."""
21
+ if len(value) <= max_items:
22
+ return str(value) # Return unmodified if within limit
23
+
24
+ if isinstance(value, dict):
25
+ header = list(value.items())[: max_items // 2]
26
+ footer = list(value.items())[-max_items // 2 :]
27
+ return f"{{{', '.join(map(str, header))}, …, {', '.join(map(str, footer))}}}"
28
+
29
+ if isinstance(value, (list, set)):
30
+ header = list(value)[: max_items // 2]
31
+ footer = list(value)[-max_items // 2 :]
32
+ return f"[{', '.join(map(str, header))}, …, {', '.join(map(str, footer))}]"
33
+
34
+ return str(value)
35
+
36
+
37
+ @dataclass
38
+ class OverviewSection:
39
+ frag: SrcFragment = field(repr=False)
40
+ name: str = "test"
41
+ parent_name: str = ""
42
+
43
+ out_lines_raw: list[str] = field(default_factory=list, repr=False)
44
+
45
+ def _ensure_index(self, index: int) -> None:
46
+ if index < len(self.out_lines_raw):
47
+ return
48
+ padding = [''] * (index + 1 - len(self.out_lines_raw))
49
+ self.out_lines_raw.extend(padding)
50
+
51
+ def out_lines(self) -> Iterator[str]:
52
+ started = True
53
+ for l in self.out_lines_raw:
54
+ if l is None:
55
+ continue
56
+ if started: # Skip leading blank lines
57
+ if l.strip() == "":
58
+ continue
59
+ started = False
60
+ yield l
61
+
62
+ @property
63
+ def content(self) -> str:
64
+ return "\n".join(self.out_lines())
65
+
66
+ def remove_whitespaces(self) -> None:
67
+ self.out_lines_raw = [re.sub(r'\s+', '', line) if line is not None else None for line in self.out_lines_raw]
68
+
69
+ def remove_blank_lines(self) -> None:
70
+ self.out_lines_raw = [line if line is not None and line.strip() != "" else None for line in self.out_lines_raw]
71
+
72
+ def remove_comments(self) -> None:
73
+ self.out_lines_raw = [re.sub(r'#.*', '', line) if line is not None else None for line in self.out_lines_raw]
74
+
75
+ def remove_single_line_docstrings(self) -> None:
76
+ pattern = r'(\'\'\'.*?\'\'\')|(\"\"\".*?\"\"\")'
77
+ self.out_lines_raw = [re.sub(pattern, '', line) if line is not None else None for line in self.out_lines_raw]
78
+
79
+ def remove_multi_line_docstrings(self, sepa='"""') -> None:
80
+ in_docstring = False
81
+
82
+ for i, line in enumerate(self.out_lines_raw):
83
+ if line is None:
84
+ continue
85
+
86
+ # if line.find("'''") > -1 or line.find('"""') > -1:
87
+ if line.find(sepa) > -1:
88
+ in_docstring = not in_docstring
89
+ if not in_docstring:
90
+ self.out_lines_raw[i] = None
91
+ if in_docstring:
92
+ self.out_lines_raw[i] = None
93
+
94
+ def type_from_name(self, name: str) -> str:
95
+ if not name:
96
+ return ""
97
+ if '-' in name:
98
+ return name.split('-', 1)[0]
99
+ else:
100
+ return ""
101
+
102
+ @property
103
+ def type(self) -> str:
104
+ return self.type_from_name(self.name)
105
+
106
+ @property
107
+ def parent_type(self) -> str:
108
+ return self.type_from_name(self.parent_name)
109
+
110
+ @property
111
+ def src(self) -> SrcFile:
112
+ return self.frag.src
113
+
114
+ def __getitem__(self, index: int | slice) -> str | list[str]:
115
+ if isinstance(index, slice):
116
+ return self.out_lines_raw[index]
117
+ if index < 0:
118
+ return None
119
+ self._ensure_index(index)
120
+ return self.out_lines_raw[index]
121
+
122
+ def __setitem__(self, index: int, value: str) -> None:
123
+ self._ensure_index(index)
124
+ self.out_lines_raw[index] = value
125
+
126
+ def cut_lines(self, start: int, end: int) -> list[str]:
127
+ if start < 0 or start >= len(self.out_lines_raw):
128
+ return []
129
+
130
+ end = min(end, len(self.out_lines_raw))
131
+ if end < start:
132
+ return []
133
+
134
+ cut_lines = self.out_lines_raw[start : end + 1]
135
+ self.drop_lines(start, end)
136
+ self.frag.exclude_range_indices(start, end)
137
+ return cut_lines
138
+
139
+ def paste_lines(self, lines: Iterable[str], start: int = 0) -> None:
140
+ for i, line in enumerate(lines, start=start):
141
+ self[i] = line
142
+
143
+ def drop_lines(self, start: int = 0, end: int | None = None, val: str = None) -> None:
144
+ if end is None or start >= len(self.out_lines_raw):
145
+ end = len(self.out_lines_raw) - 1
146
+ if start < 0:
147
+ start = 0
148
+
149
+ if end < start:
150
+ return
151
+ self.out_lines_raw[start : end + 1] = [val] * (end - start + 1)
152
+
153
+ def set_lines(self, lines: list[str], start: int = 0) -> None:
154
+ self.drop_lines()
155
+ self.paste_lines(lines, start)
156
+
157
+ @property
158
+ def out_file_path(self) -> Path:
159
+ p = self.src.skel_path
160
+ if self.parent_name:
161
+ p = p / self.parent_name
162
+ return p / f"{self.name}.py.txt"
163
+
164
+ @cached_property
165
+ def summary_file_path(self) -> Path:
166
+ return self.src.file_man.summary_file_path
167
+
168
+ @cached_property
169
+ def previous_content(self) -> str:
170
+ try:
171
+ with open(self.out_file_path, 'r', encoding='utf-8') as f:
172
+ return f.read()
173
+ except FileNotFoundError:
174
+ return ''
175
+
176
+ def append_to_summary(self, text: str) -> None:
177
+ with open(self.summary_file_path, 'a', encoding='utf-8') as f:
178
+ f.write(text)
179
+
180
+ def append_content_to_summary(self) -> None:
181
+ self.append_to_summary(self.content)
182
+
183
+ def save(self) -> None:
184
+ self.out_file_path.parent.mkdir(parents=True, exist_ok=True)
185
+ with open(self.out_file_path, 'w', encoding='utf-8') as f:
186
+ f.write(self.content)
@@ -0,0 +1,342 @@
1
+ import re
2
+ import shutil
3
+ from dataclasses import dataclass, field
4
+ from functools import cached_property
5
+ from pathlib import Path
6
+ from typing import Callable, Iterator
7
+
8
+ import jedi
9
+ from jedi.api.classes import Name
10
+
11
+ from overview import OverviewSection, trim_string
12
+ from sources import FileMan, SrcFile, SrcFragment
13
+
14
+
15
+ @dataclass
16
+ class PythonParser:
17
+ src: SrcFile
18
+ current_name: Name = None
19
+ extra_lines_before_current: int = 0
20
+ tags: list[str] = field(default_factory=list)
21
+ sections: list[OverviewSection] = field(default_factory=list)
22
+ decorator_re = re.compile(r"@(?P<name>\w+)")
23
+ dispatch_methods: dict[str, Callable] = field(default_factory=dict, repr=False)
24
+ max_string_length: int = 200
25
+ max_items: int = 4
26
+
27
+ def __post_init__(self) -> None:
28
+ self.dispatch_methods.update(
29
+ {
30
+ 'module-import': self.handle_import,
31
+ 'class-import': self.handle_import,
32
+ 'function-import': self.handle_import,
33
+ 'function-@property': self.handle_property,
34
+ 'function-@cached_property': self.handle_property,
35
+ 'class': self.handle_class,
36
+ 'class.statement': self.handle_class_statement,
37
+ 'module.statement': self.handle_module_statement,
38
+ 'function': self.handle_function,
39
+ '': self.handle_skip,
40
+ }
41
+ )
42
+
43
+ @property
44
+ def module_section(self) -> OverviewSection:
45
+ if not self.sections:
46
+ return None
47
+ return self.sections[0]
48
+
49
+ @property
50
+ def current_section(self) -> OverviewSection:
51
+ if not self.sections:
52
+ return None
53
+ return self.sections[-1]
54
+
55
+ @property
56
+ def parent_section(self) -> OverviewSection:
57
+ if len(self.sections) < 2:
58
+ return None
59
+ return self.sections[-2]
60
+
61
+ def push_new_section(self, name: str) -> OverviewSection:
62
+ frg = SrcFragment(self.src)
63
+ ret = OverviewSection(frg, name)
64
+ self.sections.append(ret)
65
+ self.claim_current_lines()
66
+ return ret
67
+
68
+ def push_module(self, name: str) -> OverviewSection:
69
+ ret = self.push_new_section(f"module-{name}")
70
+ return ret
71
+
72
+ def push_class(self, name: str) -> OverviewSection:
73
+ ret = self.push_new_section(f"class-{name}")
74
+ ret.parent_name = ret.name
75
+ return ret
76
+
77
+ def push_function(self, name: str) -> OverviewSection:
78
+ pn = self.current_section.name
79
+ ret = self.push_new_section(f"function-{name}")
80
+ ret.parent_name = pn
81
+
82
+ return ret
83
+
84
+ def pop(self) -> OverviewSection:
85
+ cs = self.current_section
86
+ assert cs, "Nothing to pop"
87
+ self.sections.pop()
88
+ parent_cs = self.current_section
89
+
90
+ if cs.type in ["module", "class"]:
91
+ cs.remove_comments()
92
+ cs.remove_single_line_docstrings()
93
+ cs.remove_multi_line_docstrings('"""')
94
+ cs.remove_multi_line_docstrings("'''")
95
+ cs.remove_blank_lines()
96
+ cs.save()
97
+ cs.append_to_summary(f"\n\n# {self.src.module_path} {cs.frag.range_str}\n")
98
+ cs.append_content_to_summary()
99
+ return cs
100
+
101
+ parent_cs.paste_lines(cs.out_lines(), cs.frag.start)
102
+ return cs
103
+
104
+ def claim_current_lines(self) -> None:
105
+ start = self.current_range[0]
106
+ end = self.current_range[1]
107
+ for line_loc, st in enumerate(self.current_lines):
108
+ line = start + line_loc
109
+ self.current_section[line] = st
110
+
111
+ self.current_section.frag.include_range_indices(start, end)
112
+
113
+ if self.parent_section:
114
+ self.parent_section.cut_lines(start, end)
115
+
116
+ def handle_import(self) -> None:
117
+ cs = self.current_section
118
+ cs.drop_lines(self.current_range[0], self.current_range[1])
119
+
120
+ def handle_function(self) -> None:
121
+ cn = self.current_name
122
+ self.push_function(cn.name)
123
+ cs = self.current_section
124
+
125
+ try:
126
+ sum = f"{cn.get_type_hint()}"
127
+ except Exception as e:
128
+ # Probaby bug in Jedi, workaround by only using basic desc without types
129
+ print(f"Exception: {str(e)}")
130
+ # traceback.print_exc()
131
+ sum = cn.description
132
+
133
+ sum = re.sub(r'self\s*,?', '', sum)
134
+ q = "[\"']?"
135
+ sum = re.sub(f'{q}', '', sum)
136
+ sum = re.sub(r'\s+', '', sum)
137
+ sum = re.sub(rf'->\s*{q}None{q}', '', sum)
138
+ tags = ''.join(self.tags)
139
+ if tags:
140
+ tags += ' '
141
+ sum = f"{tags}{sum}"
142
+ cs.set_lines([sum])
143
+
144
+ def handle_class_statement(self) -> None:
145
+ cs = self.current_section
146
+ cn = self.current_name
147
+ code = cn.get_line_code()
148
+ code = re.sub(r'\s+', '', code)
149
+ # Apply trimming to possibly long values in the code
150
+ code = trim_string(code, self.max_string_length)
151
+ cs.paste_lines([code], self.current_range[0])
152
+
153
+ def handle_module_statement(self) -> None:
154
+ cs = self.current_section
155
+ assert cs == self.module_section
156
+ cn = self.current_name
157
+
158
+ def handle_property(self) -> None:
159
+ cn = self.current_name
160
+ self.push_function(cn.name)
161
+ cs = self.current_section
162
+ function_signature = cn.get_line_code()
163
+ # Regular expression to find return type hint
164
+ match = re.search(r'(->\s*[a-zA-Z_][a-zA-Z0-9_]*)', function_signature)
165
+ return_type = ""
166
+ if match:
167
+ return_type = match.group(1)
168
+ sum = f"{cn.name}{return_type}"
169
+ cs.set_lines([sum])
170
+ cs.remove_whitespaces()
171
+
172
+ def handle_class(self) -> None:
173
+ self.push_class(self.current_name.name)
174
+
175
+ def handle_fail(self) -> None:
176
+ raise NotImplementedError(f"Handler not implemented for {self.current_name}")
177
+
178
+ def handle_skip(self) -> None:
179
+ s = self.current_name
180
+ cr = self.current_range
181
+
182
+ @cached_property
183
+ def script(self) -> jedi.Script:
184
+ return jedi.Script(path=self.src.path)
185
+
186
+ @property
187
+ def current_range(self) -> tuple[int, int]:
188
+ if not self.current_name:
189
+ return (0, len(self.src.lines) - 1)
190
+ start_line, _ = self.current_name.get_definition_start_position()
191
+ end_line, _ = self.current_name.get_definition_end_position()
192
+ # Convert lines to start from 0
193
+ return (start_line - self.extra_lines_before_current - 1, end_line - 1)
194
+
195
+ @property
196
+ def current_range_str(self) -> str:
197
+ start = self.current_range[0]
198
+ end = self.current_range[1]
199
+ if end - start == 0:
200
+ return f"({start+1})"
201
+ return f"({start+1}-{end+1})"
202
+
203
+ @property
204
+ def current_lines(self) -> list[str]:
205
+ start, end = self.current_range
206
+ return self.src.lines[start : end + 1]
207
+
208
+ def get_prev_lines(self) -> Iterator[str]:
209
+ start, _ = self.current_range
210
+ for i in range(start - 1, 0, -1):
211
+ yield self.src.lines[i]
212
+
213
+ @property
214
+ def current_line(self) -> list[str]:
215
+ return self.current_name.get_line_code()
216
+
217
+ def find_tags(self) -> None:
218
+ self.tags.clear()
219
+ if "import" in self.current_line:
220
+ self.tags.append("import")
221
+
222
+ self.extra_lines_before_current = 0
223
+ for pl in self.get_prev_lines():
224
+ # Walk back through the lines to find any decorators.
225
+ decors = PythonParser.decorator_re.findall(pl)
226
+ if not decors:
227
+ break # No more (or any) are found
228
+ self.tags += [f"@{d}" for d in decors]
229
+ self.extra_lines_before_current += 1
230
+
231
+ def pop_if_neded(self, line_end: int) -> None:
232
+ while len(self.sections) > 1 and line_end > self.current_section.frag.end:
233
+ self.pop()
234
+
235
+ def parse_src(self) -> None:
236
+ self.push_module(self.src.base_name)
237
+
238
+ for name in self.script.get_names(all_scopes=True, definitions=True):
239
+ self.current_name = name
240
+ self.find_tags()
241
+ line_code = name.get_line_code()
242
+ if "TYPE_CHECKING" in line_code or line_code.strip() == "pass":
243
+ continue
244
+
245
+ self.pop_if_neded(self.current_range[0])
246
+ if not self.sections:
247
+ break
248
+
249
+ self.dispatch_handler()
250
+ self.pop_if_neded(len(self.src.lines)) # After file end, to pop all section
251
+
252
+ def parse_debug(self) -> None:
253
+ self.push_new_section("module-debug")
254
+ self.module_section.drop_lines(val="")
255
+ for name in self.script.get_names(all_scopes=True):
256
+ self.current_name = name
257
+ self.find_tags()
258
+ self.debug_output()
259
+ self.current_section.save()
260
+
261
+ def dispatch_handler(self) -> None:
262
+ parent_type = self.current_section.type
263
+ if parent_type:
264
+ parent_type += "."
265
+
266
+ dispatch_keys = [self.current_name.type, f"{parent_type}{self.current_name.type}", '']
267
+
268
+ for tag in self.tags:
269
+ dispatch_keys.append(f"{self.current_name.type}-{tag}")
270
+ dispatch_keys.append(f"{parent_type}{self.current_name.type}-{tag}")
271
+
272
+ dispatch_keys = list(dict.fromkeys(dispatch_keys))
273
+ # Try dispatch keys from most specific to least
274
+ dispatch_keys.sort(key=len, reverse=True)
275
+
276
+ for key in dispatch_keys:
277
+ if key in self.dispatch_methods:
278
+ self.dispatch_methods[key]()
279
+ return
280
+ raise NotImplementedError(f"Handler not found for:{dispatch_keys}")
281
+
282
+ def debug_output(self) -> None:
283
+ s = self.current_name
284
+ ls = self.current_lines
285
+
286
+ for line_loc, st in enumerate(self.current_lines):
287
+ line = self.current_range[0] + line_loc
288
+ if line_loc == 0:
289
+ # Create tag string separately to avoid nested f-string issues
290
+ tag_str = ''.join([f"-{t}" for t in self.tags])
291
+ tp = f"{s.name[:10]}:{s.type}{tag_str}"
292
+ else:
293
+ tp = "-" * 40
294
+
295
+ formatted_line = f"{st+ '#':<82} {line+1:<3}{tp[:25]:<25}"
296
+ prev_line = self.module_section[line]
297
+ if prev_line:
298
+ formatted_line = f"{prev_line} {tp[:25]:<25}"
299
+
300
+ self.module_section[line] = formatted_line
301
+
302
+
303
+ @dataclass
304
+ class ProjectModel:
305
+ proj_root_path: Path
306
+
307
+ file_man: FileMan = field(init=False)
308
+
309
+ def __post_init__(self) -> None:
310
+ self.file_man = FileMan(proj_root_path=self.proj_root_path)
311
+
312
+ def parse_all(self) -> None:
313
+ self.file_man.detect_changes()
314
+ vals = self.file_man.src_files.values()
315
+ if not vals:
316
+ raise RuntimeError(f"No src file matched in {self.file_man.src_paths}")
317
+ for src in vals:
318
+ print(src)
319
+ parser = PythonParser(src)
320
+ parser.script
321
+ parser.parse_src()
322
+
323
+ parser = PythonParser(src)
324
+ parser.script
325
+ parser.parse_debug()
326
+
327
+
328
+ if __name__ == "__main__":
329
+ root = Path("/home/premik/.conda/envs/unsloth/lib/python3.12/site-packages/transformers/models/gemma3")
330
+
331
+ trg = Path("/tmp/skels")
332
+ if trg.exists() and trg.is_dir():
333
+ shutil.rmtree(trg)
334
+ trg.mkdir(exist_ok=True)
335
+
336
+ sk_path = root / "skels"
337
+ if not sk_path.exists():
338
+ sk_path.symlink_to(trg, target_is_directory=True)
339
+
340
+ pm = ProjectModel(root)
341
+ pm.file_man.includes = ["modeling_gemma3"]
342
+ pm.parse_all()
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: skeletonpy
3
+ Version: 1.0.0
4
+ Project-URL: Homepage, https://github.com/Premik/skeletonpy
5
+ Project-URL: Repository, https://github.com/Premik/skeletonpy
6
+ Project-URL: Issues, https://github.com/Premik/skeletonpy/issues
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: jedi
9
+ Requires-Dist: pathspec
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest; extra == "dev"
12
+ Requires-Dist: black; extra == "dev"
13
+ Requires-Dist: isort; extra == "dev"
14
+ Requires-Dist: ruff; extra == "dev"
15
+ Requires-Dist: mypy; extra == "dev"
16
+
17
+ # SkeletonPy
18
+
19
+ SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
20
+
21
+ ## Motivation
22
+
23
+ SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
24
+
25
+ Why use SkeletonPy over full-context stuffing or maintaining local indexes?
26
+
27
+ * **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
28
+ * **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
29
+ * **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
30
+ * **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
31
+
32
+ ![side_by_side_example](doc/example-sbs.png)
33
+
34
+ ## Quick Start
35
+
36
+ From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
37
+
38
+ ```bash
39
+ uvx skeletonpy src
40
+ ```
41
+
42
+ This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
43
+
44
+ ## Installation
45
+
46
+ You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
47
+
48
+ ```bash
49
+ pip install skeletonpy
50
+
51
+ uv pip install skeletonpy
52
+ ```
53
+
54
+ Alternatively, you can run it directly without a permanent installation:
55
+
56
+ ```bash
57
+ pipx run skeletonpy -- --help
58
+
59
+ uvx skeletonpy --help
60
+ ```
61
+
62
+ Once installed, you can invoke the script:
63
+ ```bash
64
+ skeletonpy --help
65
+ ```
66
+
67
+ ## Usage
68
+
69
+ Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
70
+
71
+ For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
72
+
73
+ ```bash
74
+ skeletonpy src --exclude "_test\.py" -o main_src.txt
75
+ ```
76
+
77
+ This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
78
+ See the [examples folder](examples/README.md) for more.
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/app.py
4
+ src/overview.py
5
+ src/parsing.py
6
+ src/sources.py
7
+ src/skeletonpy.egg-info/PKG-INFO
8
+ src/skeletonpy.egg-info/SOURCES.txt
9
+ src/skeletonpy.egg-info/dependency_links.txt
10
+ src/skeletonpy.egg-info/entry_points.txt
11
+ src/skeletonpy.egg-info/requires.txt
12
+ src/skeletonpy.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ skeletonpy = app:main
@@ -0,0 +1,9 @@
1
+ jedi
2
+ pathspec
3
+
4
+ [dev]
5
+ pytest
6
+ black
7
+ isort
8
+ ruff
9
+ mypy
@@ -0,0 +1,4 @@
1
+ app
2
+ overview
3
+ parsing
4
+ sources
@@ -0,0 +1,365 @@
1
+ import base64
2
+ import hashlib
3
+ import json
4
+ import os
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum, auto
8
+ from functools import cached_property
9
+ from pathlib import Path
10
+ from typing import Any, Generator, Iterator, Optional
11
+
12
+ from pathspec import PathSpec
13
+
14
+
15
+ def hash(s: str, digest=9) -> str:
16
+ hash_bytes = hashlib.blake2s(s.encode('utf-8'), digest_size=digest).digest()
17
+ return base64.b64encode(hash_bytes).decode('utf-8')
18
+
19
+
20
+ def hash_short(s: str) -> str:
21
+ return hash(s, 3)
22
+
23
+
24
+ class ContentState(Enum):
25
+ UNKNOWN = auto()
26
+ NEW = auto()
27
+ CHANGED = auto()
28
+ UNCHANGED = auto()
29
+ REMOVED = auto()
30
+
31
+
32
+ @dataclass
33
+ class SrcFragment:
34
+ src: 'SrcFile' = field(compare=False, repr=False)
35
+ line_indexes: list[int] = field(default_factory=list)
36
+ state: ContentState = field(default=ContentState.UNKNOWN, repr=False)
37
+
38
+ def __getitem__(self, index: int | slice) -> str | list[str]:
39
+ if isinstance(index, slice):
40
+ return [self.src.lines[i] for i in self.line_indexes[index]]
41
+ return self.src.lines[self.line_indexes[index]]
42
+
43
+ def __iter__(self) -> Generator[str, Any, None]:
44
+ for i in self.line_indexes:
45
+ yield self.src.lines[i]
46
+
47
+ @property
48
+ def content(self) -> str:
49
+ return "\n".join(self)
50
+
51
+ def to_json(self) -> dict[int, str]:
52
+ if self.src.path.exists():
53
+ return {i: hash_short(self.src.lines[i]) for i in self.line_indexes}
54
+ else:
55
+ return {}
56
+
57
+ def merge_with(self, other: 'SrcFragment') -> 'None':
58
+ self.line_indexes = sorted(set(self.line_indexes + other.line_indexes))
59
+
60
+ def include_range_indices(self, start: int, end: int) -> None:
61
+ new_indices = list(range(start, end + 1))
62
+ self.line_indexes = sorted(set(self.line_indexes + new_indices))
63
+
64
+ def exclude_range_indices(self, start: int, end: int) -> None:
65
+ exclude_set = set(range(start, end + 1))
66
+ self.line_indexes = sorted(i for i in self.line_indexes if i not in exclude_set)
67
+
68
+ @property
69
+ def start(self) -> int:
70
+ if not self.line_indexes:
71
+ return -1
72
+ return self.line_indexes[0]
73
+
74
+ @property
75
+ def end(self) -> int:
76
+ if not self.line_indexes:
77
+ return -1
78
+ return self.line_indexes[-1]
79
+
80
+ @property
81
+ def range_str(self) -> str:
82
+ if self.end - self.start == 0:
83
+ return f"({self.start+1})"
84
+ return f"({self.start+1}-{self.end+1})"
85
+
86
+ @staticmethod
87
+ def of_json(src: 'SrcFile', json_dict: dict[int, str]) -> "SrcFragment":
88
+ line_indexes = []
89
+ state = ContentState.UNCHANGED
90
+
91
+ for index_str, hashed in json_dict.items():
92
+ index = int(index_str)
93
+ line_indexes.append(index)
94
+ # Check if the hash corresponds to the source line
95
+ # if hash_short(src.lines[index]) != hashed:
96
+ # state = ContentState.CHANGED
97
+
98
+ return SrcFragment(src=src, line_indexes=line_indexes, state=state)
99
+
100
+
101
+ @dataclass
102
+ class SrcFile:
103
+ path: Path
104
+ file_man: 'FileMan' = field(compare=False, repr=False)
105
+ state: ContentState = ContentState.UNKNOWN
106
+ fragments: list[SrcFragment] = field(default_factory=list, repr=False)
107
+ _hash: str = None
108
+
109
+ @cached_property
110
+ def content(self) -> str:
111
+ if self.path.exists():
112
+ return self.path.read_text(encoding="utf8")
113
+ else:
114
+ return ""
115
+
116
+ @cached_property
117
+ def lines(self) -> list[str]:
118
+ return self.content.splitlines()
119
+
120
+ @cached_property
121
+ def base_name(self) -> str:
122
+ return self.path.stem
123
+
124
+ @cached_property
125
+ def module_path(self) -> str:
126
+ rel_path = self.file_man.safe_relative_path(self.path)
127
+ return str(rel_path)
128
+
129
+ @cached_property
130
+ def packages(self) -> list[str]:
131
+ rel_path = self.file_man.safe_relative_path(self.path)
132
+ return list(rel_path.parent.parts)
133
+
134
+ @cached_property
135
+ def skel_path(self) -> Path:
136
+ if not self.file_man:
137
+ raise ValueError("FileMan ref is null")
138
+ ret = self.file_man.skel_path
139
+ for pkg in self.packages:
140
+ ret = ret / pkg
141
+ ret = ret / self.base_name
142
+ ret.mkdir(parents=True, exist_ok=True)
143
+ return ret
144
+
145
+ @property
146
+ def hash(self) -> str:
147
+ if self._hash is None:
148
+ self.rehash()
149
+ return self._hash
150
+
151
+ def rehash(self) -> None:
152
+ if self.path.exists():
153
+ self._hash = hash(self.content)
154
+ else:
155
+ self._hash = None
156
+
157
+ def set_fragments_state(self, state: ContentState) -> None:
158
+ """Set the state on this file and all its fragments"""
159
+ self.state = state
160
+ for fragment in self.fragments:
161
+ fragment.state = state
162
+
163
+ def compare_content(self) -> None:
164
+ if not self.path.exists(): # Set state as REMOVED if file no longer exists
165
+ self.set_fragments_state(ContentState.REMOVED)
166
+ return
167
+
168
+ old_hash = self._hash
169
+ self.rehash()
170
+
171
+ if old_hash is None: # New file
172
+ self.set_fragments_state(ContentState.NEW)
173
+ return
174
+ if old_hash == self._hash: # Content unchanged
175
+ self.set_fragments_state(ContentState.UNCHANGED)
176
+ return
177
+
178
+ # Content changed
179
+ self.set_fragments_state(ContentState.CHANGED)
180
+
181
+ def to_json(self) -> dict:
182
+ return {
183
+ "path": str(self.path),
184
+ "hash": self.hash,
185
+ "fragments": [f.to_json() for f in self.fragments],
186
+ }
187
+
188
+ @staticmethod
189
+ def of_json(json_dict: dict, file_man: 'FileMan') -> "SrcFile":
190
+ src_file = SrcFile(path=Path(json_dict["path"]), file_man=file_man, _hash=json_dict["hash"])
191
+ if "fragments" in json_dict:
192
+ src_file.fragments = [SrcFragment.of_json(src_file, f) for f in json_dict["fragments"]]
193
+ return src_file
194
+
195
+
196
+ @dataclass
197
+ class FileMan:
198
+ proj_root_path: Path
199
+ src_paths: list[Path] = field(default_factory=list)
200
+ src_files: dict[Path, SrcFile] = field(default_factory=dict)
201
+ skel_path: Path = field(init=False)
202
+ cache_path: Optional[Path] = None
203
+ includes: list[str] = field(default_factory=lambda: [".*"])
204
+ excludes: list[str] = field(default_factory=list)
205
+
206
+ def __post_init__(self) -> None:
207
+ if not self.proj_root_path.is_dir() or not self.proj_root_path.exists():
208
+ raise ValueError(f"Project root path {self.proj_root_path} must be an existing directory")
209
+
210
+ self.skel_path = self.proj_root_path / "skels"
211
+ self.skel_path.mkdir(exist_ok=True)
212
+
213
+ if not self.cache_path:
214
+ self.cache_path = Path(self.skel_path, "cache.json")
215
+
216
+ if not self.src_paths:
217
+ self.src_paths = [self.proj_root_path]
218
+
219
+ @cached_property
220
+ def gitignore(self) -> PathSpec:
221
+ patterns = []
222
+ # Collect patterns from all source paths
223
+ for src_path in self.src_paths:
224
+ gitignore_path = src_path / '.gitignore'
225
+ if gitignore_path.exists():
226
+ with open(gitignore_path) as f:
227
+ patterns.extend(f.readlines())
228
+
229
+ # Also check project root if different from source paths
230
+ if self.proj_root_path not in self.src_paths:
231
+ gitignore_path = self.proj_root_path / '.gitignore'
232
+ if gitignore_path.exists():
233
+ with open(gitignore_path) as f:
234
+ patterns.extend(f.readlines())
235
+
236
+ # Create a single PathSpec from all collected patterns
237
+ return PathSpec.from_lines('gitwildmatch', patterns)
238
+
239
+ def clear(self) -> None:
240
+ self.src_files.clear()
241
+
242
+ def load(self) -> None:
243
+ self.clear()
244
+ if not self.cache_path.exists():
245
+ return
246
+ with open(self.cache_path, encoding='utf-8') as f:
247
+ json_data = json.loads(f.read())
248
+ self.src_files = {Path(key): SrcFile.of_json(value, self) for key, value in json_data.items()}
249
+
250
+ def save(self) -> None:
251
+ json_data = {str(key): value.to_json() for key, value in self.src_files.items()}
252
+ with open(self.cache_path, 'w', encoding='utf-8') as f:
253
+ json.dump(json_data, f, indent=2)
254
+
255
+ @cached_property
256
+ def summary_file_path(self) -> Path:
257
+ return self.skel_path / "summary.py.txt"
258
+
259
+ def detect_changes(self) -> None:
260
+ self.load()
261
+
262
+ for src_file_path in self.source_files():
263
+ src_file = self.src_files.get(src_file_path, None)
264
+ if not src_file: # new file
265
+ self.src_files[src_file_path] = SrcFile(src_file_path, self, ContentState.NEW)
266
+ continue
267
+ src_file.compare_content()
268
+ for src_file in self.src_files.values():
269
+ if src_file.state == ContentState.UNKNOWN:
270
+ # Unknown -> has not been visited since load, means the file has been removed
271
+ src_file.state = ContentState.REMOVED
272
+
273
+ @cached_property
274
+ def src_root(self) -> Path:
275
+ if not self.src_paths:
276
+ return self.proj_root_path
277
+
278
+ # Convert paths to absolute and get parts
279
+ abs_paths = [p.absolute() for p in self.src_paths]
280
+ path_parts = [p.parts for p in abs_paths]
281
+
282
+ common = []
283
+ for parts in zip(*path_parts): # Find common prefix among all paths
284
+ if len(set(parts)) != 1:
285
+ break
286
+ common.append(parts[0])
287
+
288
+ if not common:
289
+ return self.proj_root_path
290
+
291
+ return Path(*common)
292
+
293
+ @cached_property
294
+ def includes_rx(self) -> list[re.Pattern]:
295
+ return [re.compile(p) for p in self.includes]
296
+
297
+ @cached_property
298
+ def excludes_rx(self) -> list[re.Pattern]:
299
+ return [re.compile(p) for p in self.excludes]
300
+
301
+ def rx_list_match(self, text: str, rx_list: list[re.Pattern]) -> bool:
302
+ return any(rx.fullmatch(text) for rx in rx_list)
303
+
304
+ def includes_match(self, text: str) -> bool:
305
+ return self.rx_list_match(text, self.includes_rx)
306
+
307
+ def excludes_match(self, text: str) -> bool:
308
+ return self.rx_list_match(text, self.excludes_rx)
309
+
310
+ @staticmethod
311
+ def make_substring_match(pattern: str) -> str:
312
+ return f".*{pattern}.*"
313
+
314
+ def safe_relative_path(self, path: Path | str) -> Path:
315
+ """
316
+ Safely get a path relative to src_root, handling cases where the path is not within src_root.
317
+ If the path is not within src_root, returns the path as is.
318
+ """
319
+ path = Path(path) if isinstance(path, str) else path
320
+ try:
321
+ return path.relative_to(self.src_root)
322
+ except ValueError:
323
+ # Path is not within src_root, return the path as is
324
+ # This handles relative imports and files outside the source root
325
+ return path
326
+
327
+ def fn_match(self, filename: str) -> bool:
328
+ # Early returns for non-Python files and gitignore matches
329
+ if not filename.endswith('.py'):
330
+ return False
331
+
332
+ # Get relative path safely
333
+ rel_path = self.safe_relative_path(filename)
334
+
335
+ # Only check gitignore if the path is actually relative
336
+ # (i.e., it was successfully made relative to src_root)
337
+ if str(rel_path) != filename:
338
+ if self.gitignore.match_file(str(rel_path)):
339
+ return False
340
+
341
+ # Check include/exclude patterns
342
+ # Use relative path for matching to allow cleaner patterns
343
+ match_target = str(rel_path)
344
+ if not self.includes_match(match_target):
345
+ return False
346
+
347
+ if self.excludes_match(match_target):
348
+ return False
349
+
350
+ return True
351
+
352
+ def source_files(self) -> Iterator[Path]:
353
+ for src_path in self.src_paths:
354
+ for dir, _, filenames in os.walk(src_path):
355
+ dirpath = Path(dir)
356
+ for filename in filenames:
357
+ filepath = dirpath / filename
358
+ if self.fn_match(str(filepath)):
359
+ yield filepath
360
+
361
+
362
+ if __name__ == "__main__":
363
+ fm = FileMan(proj_root_path=Path("/wrk/dev/Skeleton"))
364
+ # for src in fm.sources.values():
365
+ # print(src.lines)