skeletonpy 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skeletonpy-1.0.0/PKG-INFO +78 -0
- skeletonpy-1.0.0/README.md +62 -0
- skeletonpy-1.0.0/pyproject.toml +60 -0
- skeletonpy-1.0.0/setup.cfg +4 -0
- skeletonpy-1.0.0/src/app.py +122 -0
- skeletonpy-1.0.0/src/overview.py +186 -0
- skeletonpy-1.0.0/src/parsing.py +342 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/PKG-INFO +78 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/SOURCES.txt +12 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/dependency_links.txt +1 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/entry_points.txt +2 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/requires.txt +9 -0
- skeletonpy-1.0.0/src/skeletonpy.egg-info/top_level.txt +4 -0
- skeletonpy-1.0.0/src/sources.py +365 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skeletonpy
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Project-URL: Homepage, https://github.com/Premik/skeletonpy
|
|
5
|
+
Project-URL: Repository, https://github.com/Premik/skeletonpy
|
|
6
|
+
Project-URL: Issues, https://github.com/Premik/skeletonpy/issues
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: jedi
|
|
9
|
+
Requires-Dist: pathspec
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest; extra == "dev"
|
|
12
|
+
Requires-Dist: black; extra == "dev"
|
|
13
|
+
Requires-Dist: isort; extra == "dev"
|
|
14
|
+
Requires-Dist: ruff; extra == "dev"
|
|
15
|
+
Requires-Dist: mypy; extra == "dev"
|
|
16
|
+
|
|
17
|
+
# SkeletonPy
|
|
18
|
+
|
|
19
|
+
SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
|
|
20
|
+
|
|
21
|
+
## Motivation
|
|
22
|
+
|
|
23
|
+
SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
|
|
24
|
+
|
|
25
|
+
Why use SkeletonPy over full-context stuffing or maintaining local indexes?
|
|
26
|
+
|
|
27
|
+
* **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
|
|
28
|
+
* **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
|
|
29
|
+
* **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
|
|
30
|
+
* **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
|
|
31
|
+
|
|
32
|
+

|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uvx skeletonpy src
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install skeletonpy
|
|
50
|
+
|
|
51
|
+
uv pip install skeletonpy
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Alternatively, you can run it directly without a permanent installation:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pipx run skeletonpy -- --help
|
|
58
|
+
|
|
59
|
+
uvx skeletonpy --help
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Once installed, you can invoke the script:
|
|
63
|
+
```bash
|
|
64
|
+
skeletonpy --help
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Usage
|
|
68
|
+
|
|
69
|
+
Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
|
|
70
|
+
|
|
71
|
+
For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
skeletonpy src --exclude "_test\.py" -o main_src.txt
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
|
|
78
|
+
See the [examples folder](examples/README.md) for more.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# SkeletonPy
|
|
2
|
+
|
|
3
|
+
SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
|
|
4
|
+
|
|
5
|
+
## Motivation
|
|
6
|
+
|
|
7
|
+
SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
|
|
8
|
+
|
|
9
|
+
Why use SkeletonPy over full-context stuffing or maintaining local indexes?
|
|
10
|
+
|
|
11
|
+
* **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
|
|
12
|
+
* **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
|
|
13
|
+
* **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
|
|
14
|
+
* **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uvx skeletonpy src
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install skeletonpy
|
|
34
|
+
|
|
35
|
+
uv pip install skeletonpy
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Alternatively, you can run it directly without a permanent installation:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pipx run skeletonpy -- --help
|
|
42
|
+
|
|
43
|
+
uvx skeletonpy --help
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Once installed, you can invoke the script:
|
|
47
|
+
```bash
|
|
48
|
+
skeletonpy --help
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
|
|
54
|
+
|
|
55
|
+
For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
skeletonpy src --exclude "_test\.py" -o main_src.txt
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
|
|
62
|
+
See the [examples folder](examples/README.md) for more.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "skeletonpy"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
readme = "README.md"
|
|
5
|
+
|
|
6
|
+
dependencies = [
|
|
7
|
+
"jedi",
|
|
8
|
+
"pathspec",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.urls]
|
|
12
|
+
Homepage = "https://github.com/Premik/skeletonpy"
|
|
13
|
+
Repository = "https://github.com/Premik/skeletonpy"
|
|
14
|
+
Issues = "https://github.com/Premik/skeletonpy/issues"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
skeletonpy = "app:main"
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
dev = [
|
|
22
|
+
"pytest",
|
|
23
|
+
"black",
|
|
24
|
+
"isort",
|
|
25
|
+
"ruff",
|
|
26
|
+
"mypy",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
pythonpath = ["src", "tests"]
|
|
32
|
+
log_cli = true
|
|
33
|
+
log_cli_level = "DEBUG"
|
|
34
|
+
#log_cli_format = "%(asctime)s [%(levelname)8xs] %(message)s (%(filename)s:%(lineno)s)"
|
|
35
|
+
#log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
36
|
+
|
|
37
|
+
[tool.black]
|
|
38
|
+
line-length = 160
|
|
39
|
+
skip-string-normalization = true
|
|
40
|
+
|
|
41
|
+
[tool.isort]
|
|
42
|
+
profile = "black"
|
|
43
|
+
src_paths = ["src", "tests"]
|
|
44
|
+
line_length = 160
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
line-length = 160
|
|
48
|
+
ignore = ["E741", "E722", "E731"]
|
|
49
|
+
|
|
50
|
+
[tool.mypy]
|
|
51
|
+
strict_optional = false
|
|
52
|
+
disallow_untyped_calls = true
|
|
53
|
+
warn_unused_configs = true
|
|
54
|
+
# disallow_untyped_defs = true
|
|
55
|
+
check_untyped_defs = true
|
|
56
|
+
pretty = true
|
|
57
|
+
|
|
58
|
+
[tool.setuptools]
|
|
59
|
+
package-dir = {"" = "src"}
|
|
60
|
+
py-modules = ["app", "parsing", "overview", "sources"]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from parsing import ProjectModel
|
|
9
|
+
from sources import FileMan
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_args() -> argparse.Namespace:
|
|
13
|
+
parser = argparse.ArgumentParser(description='Process project files.')
|
|
14
|
+
parser.add_argument('dirs', nargs='*', default=[os.getcwd()], help='Directories to process (default: current directory)')
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
'-i',
|
|
17
|
+
'--include',
|
|
18
|
+
nargs='+',
|
|
19
|
+
default=[],
|
|
20
|
+
help="Include files if their path contains any of the given strings (substring match).",
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
'-I',
|
|
24
|
+
'--include-exact',
|
|
25
|
+
nargs='+',
|
|
26
|
+
default=[],
|
|
27
|
+
help="Include files if their full path matches one of the given regular expressions. This performs a full regexp match, as opposed to the simple substring match of '--include'.",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument('-e', '--exclude', nargs='+', default=[], help="Exclude files if their full path matches one of the given regular expressions.")
|
|
30
|
+
parser.add_argument('-o', '--output', help='Output file name or path (default: same as summary file)')
|
|
31
|
+
|
|
32
|
+
args = parser.parse_args()
|
|
33
|
+
return args
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_args_hash(args: argparse.Namespace) -> str:
|
|
37
|
+
"""Create a hash from the arguments."""
|
|
38
|
+
args_dict = vars(args)
|
|
39
|
+
# Create a sorted string representation of the dictionary
|
|
40
|
+
# to handle cases where arguments are not always in the same order.
|
|
41
|
+
sorted_args_str = json.dumps(args_dict, sort_keys=True)
|
|
42
|
+
return hashlib.sha256(sorted_args_str.encode('utf-8')).hexdigest()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_output_path(src_path: Path, output_spec: str | None) -> Path:
|
|
46
|
+
"""
|
|
47
|
+
Determine the output path based on source path and output specification.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
src_path: The source directory path
|
|
51
|
+
output_spec: Output file specification - can be None, filename, or path
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Path object for the target output location
|
|
55
|
+
"""
|
|
56
|
+
if not output_spec:
|
|
57
|
+
return src_path
|
|
58
|
+
|
|
59
|
+
output_path = Path(output_spec)
|
|
60
|
+
|
|
61
|
+
# If just a filename without path components, put in source dir
|
|
62
|
+
if len(output_path.parts) == 1:
|
|
63
|
+
return src_path / output_path
|
|
64
|
+
|
|
65
|
+
# If relative path, make it relative to source dir
|
|
66
|
+
if not output_path.is_absolute():
|
|
67
|
+
return src_path / output_path
|
|
68
|
+
|
|
69
|
+
# Otherwise use absolute path as-is
|
|
70
|
+
return output_path
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def debug() -> None:
|
|
74
|
+
import debugpy
|
|
75
|
+
|
|
76
|
+
# Allow other processes to connect to debugpy
|
|
77
|
+
debugpy.listen(("0.0.0.0", 5678)) # Use any available port, here 5678
|
|
78
|
+
|
|
79
|
+
# Pause execution until a debugger is attached
|
|
80
|
+
print("Waiting for debugger to attach...")
|
|
81
|
+
debugpy.wait_for_client()
|
|
82
|
+
print("Debugger is attached. Starting execution...")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main() -> None:
|
|
86
|
+
args = parse_args()
|
|
87
|
+
|
|
88
|
+
args_hash = get_args_hash(args)
|
|
89
|
+
trg = Path("/tmp/skels") / args_hash
|
|
90
|
+
|
|
91
|
+
if trg.exists() and trg.is_dir():
|
|
92
|
+
shutil.rmtree(trg)
|
|
93
|
+
trg.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
|
|
95
|
+
pm = ProjectModel(proj_root_path=trg)
|
|
96
|
+
pm.file_man.src_paths = [Path(d) for d in args.dirs]
|
|
97
|
+
|
|
98
|
+
includes = []
|
|
99
|
+
if args.include:
|
|
100
|
+
includes.extend(FileMan.make_substring_match(p) for p in args.include)
|
|
101
|
+
if args.include_exact:
|
|
102
|
+
includes.extend(args.include_exact)
|
|
103
|
+
|
|
104
|
+
if not includes:
|
|
105
|
+
includes = ['.*']
|
|
106
|
+
|
|
107
|
+
pm.file_man.includes = includes
|
|
108
|
+
pm.file_man.excludes = args.exclude
|
|
109
|
+
# debug()
|
|
110
|
+
|
|
111
|
+
pm.parse_all()
|
|
112
|
+
|
|
113
|
+
for src_path in pm.file_man.src_paths:
|
|
114
|
+
output_path = get_output_path(src_path, args.output)
|
|
115
|
+
# Create parent directories if needed
|
|
116
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
print(f"{pm.file_man.summary_file_path}->{output_path}")
|
|
118
|
+
shutil.copy2(pm.file_man.summary_file_path, output_path)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
if __name__ == "__main__":
|
|
122
|
+
main()
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterable, Iterator
|
|
6
|
+
|
|
7
|
+
from sources import SrcFile, SrcFragment
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def trim_string(value: str, max_length: int = 200) -> str:
|
|
11
|
+
"""Trims a string from the middle if it exceeds the max length."""
|
|
12
|
+
if len(value) <= max_length * 1.2:
|
|
13
|
+
return value
|
|
14
|
+
prefix_length = int(max_length * 0.7) # 70% prefix
|
|
15
|
+
suffix_length = max_length - prefix_length - 3 # Account for '…'
|
|
16
|
+
return f"{value[:prefix_length]}…{value[-suffix_length:]}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def trim_collection(value: dict | list | set, max_items: int = 4) -> str:
|
|
20
|
+
"""Trims collections like dicts, lists, or sets to max_items."""
|
|
21
|
+
if len(value) <= max_items:
|
|
22
|
+
return str(value) # Return unmodified if within limit
|
|
23
|
+
|
|
24
|
+
if isinstance(value, dict):
|
|
25
|
+
header = list(value.items())[: max_items // 2]
|
|
26
|
+
footer = list(value.items())[-max_items // 2 :]
|
|
27
|
+
return f"{{{', '.join(map(str, header))}, …, {', '.join(map(str, footer))}}}"
|
|
28
|
+
|
|
29
|
+
if isinstance(value, (list, set)):
|
|
30
|
+
header = list(value)[: max_items // 2]
|
|
31
|
+
footer = list(value)[-max_items // 2 :]
|
|
32
|
+
return f"[{', '.join(map(str, header))}, …, {', '.join(map(str, footer))}]"
|
|
33
|
+
|
|
34
|
+
return str(value)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class OverviewSection:
|
|
39
|
+
frag: SrcFragment = field(repr=False)
|
|
40
|
+
name: str = "test"
|
|
41
|
+
parent_name: str = ""
|
|
42
|
+
|
|
43
|
+
out_lines_raw: list[str] = field(default_factory=list, repr=False)
|
|
44
|
+
|
|
45
|
+
def _ensure_index(self, index: int) -> None:
|
|
46
|
+
if index < len(self.out_lines_raw):
|
|
47
|
+
return
|
|
48
|
+
padding = [''] * (index + 1 - len(self.out_lines_raw))
|
|
49
|
+
self.out_lines_raw.extend(padding)
|
|
50
|
+
|
|
51
|
+
def out_lines(self) -> Iterator[str]:
|
|
52
|
+
started = True
|
|
53
|
+
for l in self.out_lines_raw:
|
|
54
|
+
if l is None:
|
|
55
|
+
continue
|
|
56
|
+
if started: # Skip leading blank lines
|
|
57
|
+
if l.strip() == "":
|
|
58
|
+
continue
|
|
59
|
+
started = False
|
|
60
|
+
yield l
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def content(self) -> str:
|
|
64
|
+
return "\n".join(self.out_lines())
|
|
65
|
+
|
|
66
|
+
def remove_whitespaces(self) -> None:
|
|
67
|
+
self.out_lines_raw = [re.sub(r'\s+', '', line) if line is not None else None for line in self.out_lines_raw]
|
|
68
|
+
|
|
69
|
+
def remove_blank_lines(self) -> None:
|
|
70
|
+
self.out_lines_raw = [line if line is not None and line.strip() != "" else None for line in self.out_lines_raw]
|
|
71
|
+
|
|
72
|
+
def remove_comments(self) -> None:
|
|
73
|
+
self.out_lines_raw = [re.sub(r'#.*', '', line) if line is not None else None for line in self.out_lines_raw]
|
|
74
|
+
|
|
75
|
+
def remove_single_line_docstrings(self) -> None:
|
|
76
|
+
pattern = r'(\'\'\'.*?\'\'\')|(\"\"\".*?\"\"\")'
|
|
77
|
+
self.out_lines_raw = [re.sub(pattern, '', line) if line is not None else None for line in self.out_lines_raw]
|
|
78
|
+
|
|
79
|
+
def remove_multi_line_docstrings(self, sepa='"""') -> None:
|
|
80
|
+
in_docstring = False
|
|
81
|
+
|
|
82
|
+
for i, line in enumerate(self.out_lines_raw):
|
|
83
|
+
if line is None:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# if line.find("'''") > -1 or line.find('"""') > -1:
|
|
87
|
+
if line.find(sepa) > -1:
|
|
88
|
+
in_docstring = not in_docstring
|
|
89
|
+
if not in_docstring:
|
|
90
|
+
self.out_lines_raw[i] = None
|
|
91
|
+
if in_docstring:
|
|
92
|
+
self.out_lines_raw[i] = None
|
|
93
|
+
|
|
94
|
+
def type_from_name(self, name: str) -> str:
|
|
95
|
+
if not name:
|
|
96
|
+
return ""
|
|
97
|
+
if '-' in name:
|
|
98
|
+
return name.split('-', 1)[0]
|
|
99
|
+
else:
|
|
100
|
+
return ""
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def type(self) -> str:
|
|
104
|
+
return self.type_from_name(self.name)
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def parent_type(self) -> str:
|
|
108
|
+
return self.type_from_name(self.parent_name)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def src(self) -> SrcFile:
|
|
112
|
+
return self.frag.src
|
|
113
|
+
|
|
114
|
+
def __getitem__(self, index: int | slice) -> str | list[str]:
|
|
115
|
+
if isinstance(index, slice):
|
|
116
|
+
return self.out_lines_raw[index]
|
|
117
|
+
if index < 0:
|
|
118
|
+
return None
|
|
119
|
+
self._ensure_index(index)
|
|
120
|
+
return self.out_lines_raw[index]
|
|
121
|
+
|
|
122
|
+
def __setitem__(self, index: int, value: str) -> None:
|
|
123
|
+
self._ensure_index(index)
|
|
124
|
+
self.out_lines_raw[index] = value
|
|
125
|
+
|
|
126
|
+
def cut_lines(self, start: int, end: int) -> list[str]:
|
|
127
|
+
if start < 0 or start >= len(self.out_lines_raw):
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
end = min(end, len(self.out_lines_raw))
|
|
131
|
+
if end < start:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
cut_lines = self.out_lines_raw[start : end + 1]
|
|
135
|
+
self.drop_lines(start, end)
|
|
136
|
+
self.frag.exclude_range_indices(start, end)
|
|
137
|
+
return cut_lines
|
|
138
|
+
|
|
139
|
+
def paste_lines(self, lines: Iterable[str], start: int = 0) -> None:
|
|
140
|
+
for i, line in enumerate(lines, start=start):
|
|
141
|
+
self[i] = line
|
|
142
|
+
|
|
143
|
+
def drop_lines(self, start: int = 0, end: int | None = None, val: str = None) -> None:
|
|
144
|
+
if end is None or start >= len(self.out_lines_raw):
|
|
145
|
+
end = len(self.out_lines_raw) - 1
|
|
146
|
+
if start < 0:
|
|
147
|
+
start = 0
|
|
148
|
+
|
|
149
|
+
if end < start:
|
|
150
|
+
return
|
|
151
|
+
self.out_lines_raw[start : end + 1] = [val] * (end - start + 1)
|
|
152
|
+
|
|
153
|
+
def set_lines(self, lines: list[str], start: int = 0) -> None:
|
|
154
|
+
self.drop_lines()
|
|
155
|
+
self.paste_lines(lines, start)
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def out_file_path(self) -> Path:
|
|
159
|
+
p = self.src.skel_path
|
|
160
|
+
if self.parent_name:
|
|
161
|
+
p = p / self.parent_name
|
|
162
|
+
return p / f"{self.name}.py.txt"
|
|
163
|
+
|
|
164
|
+
@cached_property
|
|
165
|
+
def summary_file_path(self) -> Path:
|
|
166
|
+
return self.src.file_man.summary_file_path
|
|
167
|
+
|
|
168
|
+
@cached_property
|
|
169
|
+
def previous_content(self) -> str:
|
|
170
|
+
try:
|
|
171
|
+
with open(self.out_file_path, 'r', encoding='utf-8') as f:
|
|
172
|
+
return f.read()
|
|
173
|
+
except FileNotFoundError:
|
|
174
|
+
return ''
|
|
175
|
+
|
|
176
|
+
def append_to_summary(self, text: str) -> None:
|
|
177
|
+
with open(self.summary_file_path, 'a', encoding='utf-8') as f:
|
|
178
|
+
f.write(text)
|
|
179
|
+
|
|
180
|
+
def append_content_to_summary(self) -> None:
|
|
181
|
+
self.append_to_summary(self.content)
|
|
182
|
+
|
|
183
|
+
def save(self) -> None:
|
|
184
|
+
self.out_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
with open(self.out_file_path, 'w', encoding='utf-8') as f:
|
|
186
|
+
f.write(self.content)
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import shutil
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from functools import cached_property
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, Iterator
|
|
7
|
+
|
|
8
|
+
import jedi
|
|
9
|
+
from jedi.api.classes import Name
|
|
10
|
+
|
|
11
|
+
from overview import OverviewSection, trim_string
|
|
12
|
+
from sources import FileMan, SrcFile, SrcFragment
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class PythonParser:
|
|
17
|
+
src: SrcFile
|
|
18
|
+
current_name: Name = None
|
|
19
|
+
extra_lines_before_current: int = 0
|
|
20
|
+
tags: list[str] = field(default_factory=list)
|
|
21
|
+
sections: list[OverviewSection] = field(default_factory=list)
|
|
22
|
+
decorator_re = re.compile(r"@(?P<name>\w+)")
|
|
23
|
+
dispatch_methods: dict[str, Callable] = field(default_factory=dict, repr=False)
|
|
24
|
+
max_string_length: int = 200
|
|
25
|
+
max_items: int = 4
|
|
26
|
+
|
|
27
|
+
def __post_init__(self) -> None:
|
|
28
|
+
self.dispatch_methods.update(
|
|
29
|
+
{
|
|
30
|
+
'module-import': self.handle_import,
|
|
31
|
+
'class-import': self.handle_import,
|
|
32
|
+
'function-import': self.handle_import,
|
|
33
|
+
'function-@property': self.handle_property,
|
|
34
|
+
'function-@cached_property': self.handle_property,
|
|
35
|
+
'class': self.handle_class,
|
|
36
|
+
'class.statement': self.handle_class_statement,
|
|
37
|
+
'module.statement': self.handle_module_statement,
|
|
38
|
+
'function': self.handle_function,
|
|
39
|
+
'': self.handle_skip,
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def module_section(self) -> OverviewSection:
|
|
45
|
+
if not self.sections:
|
|
46
|
+
return None
|
|
47
|
+
return self.sections[0]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def current_section(self) -> OverviewSection:
|
|
51
|
+
if not self.sections:
|
|
52
|
+
return None
|
|
53
|
+
return self.sections[-1]
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def parent_section(self) -> OverviewSection:
|
|
57
|
+
if len(self.sections) < 2:
|
|
58
|
+
return None
|
|
59
|
+
return self.sections[-2]
|
|
60
|
+
|
|
61
|
+
def push_new_section(self, name: str) -> OverviewSection:
|
|
62
|
+
frg = SrcFragment(self.src)
|
|
63
|
+
ret = OverviewSection(frg, name)
|
|
64
|
+
self.sections.append(ret)
|
|
65
|
+
self.claim_current_lines()
|
|
66
|
+
return ret
|
|
67
|
+
|
|
68
|
+
def push_module(self, name: str) -> OverviewSection:
|
|
69
|
+
ret = self.push_new_section(f"module-{name}")
|
|
70
|
+
return ret
|
|
71
|
+
|
|
72
|
+
def push_class(self, name: str) -> OverviewSection:
|
|
73
|
+
ret = self.push_new_section(f"class-{name}")
|
|
74
|
+
ret.parent_name = ret.name
|
|
75
|
+
return ret
|
|
76
|
+
|
|
77
|
+
def push_function(self, name: str) -> OverviewSection:
|
|
78
|
+
pn = self.current_section.name
|
|
79
|
+
ret = self.push_new_section(f"function-{name}")
|
|
80
|
+
ret.parent_name = pn
|
|
81
|
+
|
|
82
|
+
return ret
|
|
83
|
+
|
|
84
|
+
def pop(self) -> OverviewSection:
|
|
85
|
+
cs = self.current_section
|
|
86
|
+
assert cs, "Nothing to pop"
|
|
87
|
+
self.sections.pop()
|
|
88
|
+
parent_cs = self.current_section
|
|
89
|
+
|
|
90
|
+
if cs.type in ["module", "class"]:
|
|
91
|
+
cs.remove_comments()
|
|
92
|
+
cs.remove_single_line_docstrings()
|
|
93
|
+
cs.remove_multi_line_docstrings('"""')
|
|
94
|
+
cs.remove_multi_line_docstrings("'''")
|
|
95
|
+
cs.remove_blank_lines()
|
|
96
|
+
cs.save()
|
|
97
|
+
cs.append_to_summary(f"\n\n# {self.src.module_path} {cs.frag.range_str}\n")
|
|
98
|
+
cs.append_content_to_summary()
|
|
99
|
+
return cs
|
|
100
|
+
|
|
101
|
+
parent_cs.paste_lines(cs.out_lines(), cs.frag.start)
|
|
102
|
+
return cs
|
|
103
|
+
|
|
104
|
+
def claim_current_lines(self) -> None:
|
|
105
|
+
start = self.current_range[0]
|
|
106
|
+
end = self.current_range[1]
|
|
107
|
+
for line_loc, st in enumerate(self.current_lines):
|
|
108
|
+
line = start + line_loc
|
|
109
|
+
self.current_section[line] = st
|
|
110
|
+
|
|
111
|
+
self.current_section.frag.include_range_indices(start, end)
|
|
112
|
+
|
|
113
|
+
if self.parent_section:
|
|
114
|
+
self.parent_section.cut_lines(start, end)
|
|
115
|
+
|
|
116
|
+
def handle_import(self) -> None:
|
|
117
|
+
cs = self.current_section
|
|
118
|
+
cs.drop_lines(self.current_range[0], self.current_range[1])
|
|
119
|
+
|
|
120
|
+
def handle_function(self) -> None:
|
|
121
|
+
cn = self.current_name
|
|
122
|
+
self.push_function(cn.name)
|
|
123
|
+
cs = self.current_section
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
sum = f"{cn.get_type_hint()}"
|
|
127
|
+
except Exception as e:
|
|
128
|
+
# Probaby bug in Jedi, workaround by only using basic desc without types
|
|
129
|
+
print(f"Exception: {str(e)}")
|
|
130
|
+
# traceback.print_exc()
|
|
131
|
+
sum = cn.description
|
|
132
|
+
|
|
133
|
+
sum = re.sub(r'self\s*,?', '', sum)
|
|
134
|
+
q = "[\"']?"
|
|
135
|
+
sum = re.sub(f'{q}', '', sum)
|
|
136
|
+
sum = re.sub(r'\s+', '', sum)
|
|
137
|
+
sum = re.sub(rf'->\s*{q}None{q}', '', sum)
|
|
138
|
+
tags = ''.join(self.tags)
|
|
139
|
+
if tags:
|
|
140
|
+
tags += ' '
|
|
141
|
+
sum = f"{tags}{sum}"
|
|
142
|
+
cs.set_lines([sum])
|
|
143
|
+
|
|
144
|
+
def handle_class_statement(self) -> None:
|
|
145
|
+
cs = self.current_section
|
|
146
|
+
cn = self.current_name
|
|
147
|
+
code = cn.get_line_code()
|
|
148
|
+
code = re.sub(r'\s+', '', code)
|
|
149
|
+
# Apply trimming to possibly long values in the code
|
|
150
|
+
code = trim_string(code, self.max_string_length)
|
|
151
|
+
cs.paste_lines([code], self.current_range[0])
|
|
152
|
+
|
|
153
|
+
def handle_module_statement(self) -> None:
|
|
154
|
+
cs = self.current_section
|
|
155
|
+
assert cs == self.module_section
|
|
156
|
+
cn = self.current_name
|
|
157
|
+
|
|
158
|
+
def handle_property(self) -> None:
|
|
159
|
+
cn = self.current_name
|
|
160
|
+
self.push_function(cn.name)
|
|
161
|
+
cs = self.current_section
|
|
162
|
+
function_signature = cn.get_line_code()
|
|
163
|
+
# Regular expression to find return type hint
|
|
164
|
+
match = re.search(r'(->\s*[a-zA-Z_][a-zA-Z0-9_]*)', function_signature)
|
|
165
|
+
return_type = ""
|
|
166
|
+
if match:
|
|
167
|
+
return_type = match.group(1)
|
|
168
|
+
sum = f"{cn.name}{return_type}"
|
|
169
|
+
cs.set_lines([sum])
|
|
170
|
+
cs.remove_whitespaces()
|
|
171
|
+
|
|
172
|
+
def handle_class(self) -> None:
|
|
173
|
+
self.push_class(self.current_name.name)
|
|
174
|
+
|
|
175
|
+
def handle_fail(self) -> None:
|
|
176
|
+
raise NotImplementedError(f"Handler not implemented for {self.current_name}")
|
|
177
|
+
|
|
178
|
+
def handle_skip(self) -> None:
|
|
179
|
+
s = self.current_name
|
|
180
|
+
cr = self.current_range
|
|
181
|
+
|
|
182
|
+
@cached_property
|
|
183
|
+
def script(self) -> jedi.Script:
|
|
184
|
+
return jedi.Script(path=self.src.path)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def current_range(self) -> tuple[int, int]:
|
|
188
|
+
if not self.current_name:
|
|
189
|
+
return (0, len(self.src.lines) - 1)
|
|
190
|
+
start_line, _ = self.current_name.get_definition_start_position()
|
|
191
|
+
end_line, _ = self.current_name.get_definition_end_position()
|
|
192
|
+
# Convert lines to start from 0
|
|
193
|
+
return (start_line - self.extra_lines_before_current - 1, end_line - 1)
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def current_range_str(self) -> str:
|
|
197
|
+
start = self.current_range[0]
|
|
198
|
+
end = self.current_range[1]
|
|
199
|
+
if end - start == 0:
|
|
200
|
+
return f"({start+1})"
|
|
201
|
+
return f"({start+1}-{end+1})"
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def current_lines(self) -> list[str]:
|
|
205
|
+
start, end = self.current_range
|
|
206
|
+
return self.src.lines[start : end + 1]
|
|
207
|
+
|
|
208
|
+
def get_prev_lines(self) -> Iterator[str]:
|
|
209
|
+
start, _ = self.current_range
|
|
210
|
+
for i in range(start - 1, 0, -1):
|
|
211
|
+
yield self.src.lines[i]
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def current_line(self) -> list[str]:
|
|
215
|
+
return self.current_name.get_line_code()
|
|
216
|
+
|
|
217
|
+
def find_tags(self) -> None:
|
|
218
|
+
self.tags.clear()
|
|
219
|
+
if "import" in self.current_line:
|
|
220
|
+
self.tags.append("import")
|
|
221
|
+
|
|
222
|
+
self.extra_lines_before_current = 0
|
|
223
|
+
for pl in self.get_prev_lines():
|
|
224
|
+
# Walk back through the lines to find any decorators.
|
|
225
|
+
decors = PythonParser.decorator_re.findall(pl)
|
|
226
|
+
if not decors:
|
|
227
|
+
break # No more (or any) are found
|
|
228
|
+
self.tags += [f"@{d}" for d in decors]
|
|
229
|
+
self.extra_lines_before_current += 1
|
|
230
|
+
|
|
231
|
+
def pop_if_neded(self, line_end: int) -> None:
|
|
232
|
+
while len(self.sections) > 1 and line_end > self.current_section.frag.end:
|
|
233
|
+
self.pop()
|
|
234
|
+
|
|
235
|
+
def parse_src(self) -> None:
|
|
236
|
+
self.push_module(self.src.base_name)
|
|
237
|
+
|
|
238
|
+
for name in self.script.get_names(all_scopes=True, definitions=True):
|
|
239
|
+
self.current_name = name
|
|
240
|
+
self.find_tags()
|
|
241
|
+
line_code = name.get_line_code()
|
|
242
|
+
if "TYPE_CHECKING" in line_code or line_code.strip() == "pass":
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
self.pop_if_neded(self.current_range[0])
|
|
246
|
+
if not self.sections:
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
self.dispatch_handler()
|
|
250
|
+
self.pop_if_neded(len(self.src.lines)) # After file end, to pop all section
|
|
251
|
+
|
|
252
|
+
def parse_debug(self) -> None:
|
|
253
|
+
self.push_new_section("module-debug")
|
|
254
|
+
self.module_section.drop_lines(val="")
|
|
255
|
+
for name in self.script.get_names(all_scopes=True):
|
|
256
|
+
self.current_name = name
|
|
257
|
+
self.find_tags()
|
|
258
|
+
self.debug_output()
|
|
259
|
+
self.current_section.save()
|
|
260
|
+
|
|
261
|
+
def dispatch_handler(self) -> None:
|
|
262
|
+
parent_type = self.current_section.type
|
|
263
|
+
if parent_type:
|
|
264
|
+
parent_type += "."
|
|
265
|
+
|
|
266
|
+
dispatch_keys = [self.current_name.type, f"{parent_type}{self.current_name.type}", '']
|
|
267
|
+
|
|
268
|
+
for tag in self.tags:
|
|
269
|
+
dispatch_keys.append(f"{self.current_name.type}-{tag}")
|
|
270
|
+
dispatch_keys.append(f"{parent_type}{self.current_name.type}-{tag}")
|
|
271
|
+
|
|
272
|
+
dispatch_keys = list(dict.fromkeys(dispatch_keys))
|
|
273
|
+
# Try dispatch keys from most specific to least
|
|
274
|
+
dispatch_keys.sort(key=len, reverse=True)
|
|
275
|
+
|
|
276
|
+
for key in dispatch_keys:
|
|
277
|
+
if key in self.dispatch_methods:
|
|
278
|
+
self.dispatch_methods[key]()
|
|
279
|
+
return
|
|
280
|
+
raise NotImplementedError(f"Handler not found for:{dispatch_keys}")
|
|
281
|
+
|
|
282
|
+
def debug_output(self) -> None:
|
|
283
|
+
s = self.current_name
|
|
284
|
+
ls = self.current_lines
|
|
285
|
+
|
|
286
|
+
for line_loc, st in enumerate(self.current_lines):
|
|
287
|
+
line = self.current_range[0] + line_loc
|
|
288
|
+
if line_loc == 0:
|
|
289
|
+
# Create tag string separately to avoid nested f-string issues
|
|
290
|
+
tag_str = ''.join([f"-{t}" for t in self.tags])
|
|
291
|
+
tp = f"{s.name[:10]}:{s.type}{tag_str}"
|
|
292
|
+
else:
|
|
293
|
+
tp = "-" * 40
|
|
294
|
+
|
|
295
|
+
formatted_line = f"{st+ '#':<82} {line+1:<3}{tp[:25]:<25}"
|
|
296
|
+
prev_line = self.module_section[line]
|
|
297
|
+
if prev_line:
|
|
298
|
+
formatted_line = f"{prev_line} {tp[:25]:<25}"
|
|
299
|
+
|
|
300
|
+
self.module_section[line] = formatted_line
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@dataclass
|
|
304
|
+
class ProjectModel:
|
|
305
|
+
proj_root_path: Path
|
|
306
|
+
|
|
307
|
+
file_man: FileMan = field(init=False)
|
|
308
|
+
|
|
309
|
+
def __post_init__(self) -> None:
|
|
310
|
+
self.file_man = FileMan(proj_root_path=self.proj_root_path)
|
|
311
|
+
|
|
312
|
+
def parse_all(self) -> None:
|
|
313
|
+
self.file_man.detect_changes()
|
|
314
|
+
vals = self.file_man.src_files.values()
|
|
315
|
+
if not vals:
|
|
316
|
+
raise RuntimeError(f"No src file matched in {self.file_man.src_paths}")
|
|
317
|
+
for src in vals:
|
|
318
|
+
print(src)
|
|
319
|
+
parser = PythonParser(src)
|
|
320
|
+
parser.script
|
|
321
|
+
parser.parse_src()
|
|
322
|
+
|
|
323
|
+
parser = PythonParser(src)
|
|
324
|
+
parser.script
|
|
325
|
+
parser.parse_debug()
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
if __name__ == "__main__":
|
|
329
|
+
root = Path("/home/premik/.conda/envs/unsloth/lib/python3.12/site-packages/transformers/models/gemma3")
|
|
330
|
+
|
|
331
|
+
trg = Path("/tmp/skels")
|
|
332
|
+
if trg.exists() and trg.is_dir():
|
|
333
|
+
shutil.rmtree(trg)
|
|
334
|
+
trg.mkdir(exist_ok=True)
|
|
335
|
+
|
|
336
|
+
sk_path = root / "skels"
|
|
337
|
+
if not sk_path.exists():
|
|
338
|
+
sk_path.symlink_to(trg, target_is_directory=True)
|
|
339
|
+
|
|
340
|
+
pm = ProjectModel(root)
|
|
341
|
+
pm.file_man.includes = ["modeling_gemma3"]
|
|
342
|
+
pm.parse_all()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skeletonpy
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Project-URL: Homepage, https://github.com/Premik/skeletonpy
|
|
5
|
+
Project-URL: Repository, https://github.com/Premik/skeletonpy
|
|
6
|
+
Project-URL: Issues, https://github.com/Premik/skeletonpy/issues
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: jedi
|
|
9
|
+
Requires-Dist: pathspec
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest; extra == "dev"
|
|
12
|
+
Requires-Dist: black; extra == "dev"
|
|
13
|
+
Requires-Dist: isort; extra == "dev"
|
|
14
|
+
Requires-Dist: ruff; extra == "dev"
|
|
15
|
+
Requires-Dist: mypy; extra == "dev"
|
|
16
|
+
|
|
17
|
+
# SkeletonPy
|
|
18
|
+
|
|
19
|
+
SkeletonPy is a Python utility for code analysis and summarization. It parses Python source code to generate a compact overview, which is particularly useful for reducing the context size when working with Large Language Models (LLMs). By providing a summarized version of the code, it helps improve the performance of AI-assisted coding and reduces token usage.
|
|
20
|
+
|
|
21
|
+
## Motivation
|
|
22
|
+
|
|
23
|
+
SkeletonPy is designed as a fast, pure code-driven alternative to complex local indexers (like those used in Continue or Cursor) for developers who want a lightweight, zero-overhead solution. It serves as an companion for Agentic Frameworks (by providing them with a highly accurate map of your Python repository.
|
|
24
|
+
|
|
25
|
+
Why use SkeletonPy over full-context stuffing or maintaining local indexes?
|
|
26
|
+
|
|
27
|
+
* **Zero Overhead Code Mapping:** Code changes frequently during development. Instead of maintaining complex embeddings, local vector databases, or dealing with expensive re-indexing processes, SkeletonPy runs instantly and entirely locally without LLMs.
|
|
28
|
+
* **Focused Context:** Pumping entire repositories into the prompt window often leads to the "lost in the middle" phenomenon, where models overlook pieces of the context. A concise skeleton limits irrelevant information, which helps smaller local models and large models alike focus on what actually matters.
|
|
29
|
+
* **Cost and Speed:** Passing a compact skeleton instead of full source files means significantly fewer input tokens. This directly translates to lower API costs and faster responses.
|
|
30
|
+
* **Perfect for Agentic Workflows:** The generated summary contains original file names and precise line numbers down to class-level resolution.
|
|
31
|
+
|
|
32
|
+

|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
From your project's root directory, run `skeletonpy` with the path to your source code (`src`):
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uvx skeletonpy src
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
This will scan all Python files in the `src` directory and create a `skeleton.txt` file inside it. You can then append the content of this file to your LLM prompt.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
You can install `skeletonpy` from PyPI using your favorite package manager like `pip` or `uv`.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install skeletonpy
|
|
50
|
+
|
|
51
|
+
uv pip install skeletonpy
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Alternatively, you can run it directly without a permanent installation:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pipx run skeletonpy -- --help
|
|
58
|
+
|
|
59
|
+
uvx skeletonpy --help
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Once installed, you can invoke the script:
|
|
63
|
+
```bash
|
|
64
|
+
skeletonpy --help
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Usage
|
|
68
|
+
|
|
69
|
+
Run `skeletonpy` with the path to your source directory/directories. You can use include and exclude patterns to filter the files. The patterns are regular expressions.
|
|
70
|
+
|
|
71
|
+
For example, to process the `src` directory, including all Python files but excluding test files, and save the output to `skeleton.txt`:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
skeletonpy src --exclude "_test\.py" -o main_src.txt
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
This will generate a `main_src.txt` file. If you provide an absolute or relative path as output, it will be respected.
|
|
78
|
+
See the [examples folder](examples/README.md) for more.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/app.py
|
|
4
|
+
src/overview.py
|
|
5
|
+
src/parsing.py
|
|
6
|
+
src/sources.py
|
|
7
|
+
src/skeletonpy.egg-info/PKG-INFO
|
|
8
|
+
src/skeletonpy.egg-info/SOURCES.txt
|
|
9
|
+
src/skeletonpy.egg-info/dependency_links.txt
|
|
10
|
+
src/skeletonpy.egg-info/entry_points.txt
|
|
11
|
+
src/skeletonpy.egg-info/requires.txt
|
|
12
|
+
src/skeletonpy.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum, auto
|
|
8
|
+
from functools import cached_property
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Generator, Iterator, Optional
|
|
11
|
+
|
|
12
|
+
from pathspec import PathSpec
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def hash(s: str, digest=9) -> str:
|
|
16
|
+
hash_bytes = hashlib.blake2s(s.encode('utf-8'), digest_size=digest).digest()
|
|
17
|
+
return base64.b64encode(hash_bytes).decode('utf-8')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def hash_short(s: str) -> str:
|
|
21
|
+
return hash(s, 3)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ContentState(Enum):
|
|
25
|
+
UNKNOWN = auto()
|
|
26
|
+
NEW = auto()
|
|
27
|
+
CHANGED = auto()
|
|
28
|
+
UNCHANGED = auto()
|
|
29
|
+
REMOVED = auto()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SrcFragment:
|
|
34
|
+
src: 'SrcFile' = field(compare=False, repr=False)
|
|
35
|
+
line_indexes: list[int] = field(default_factory=list)
|
|
36
|
+
state: ContentState = field(default=ContentState.UNKNOWN, repr=False)
|
|
37
|
+
|
|
38
|
+
def __getitem__(self, index: int | slice) -> str | list[str]:
|
|
39
|
+
if isinstance(index, slice):
|
|
40
|
+
return [self.src.lines[i] for i in self.line_indexes[index]]
|
|
41
|
+
return self.src.lines[self.line_indexes[index]]
|
|
42
|
+
|
|
43
|
+
def __iter__(self) -> Generator[str, Any, None]:
|
|
44
|
+
for i in self.line_indexes:
|
|
45
|
+
yield self.src.lines[i]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def content(self) -> str:
|
|
49
|
+
return "\n".join(self)
|
|
50
|
+
|
|
51
|
+
def to_json(self) -> dict[int, str]:
|
|
52
|
+
if self.src.path.exists():
|
|
53
|
+
return {i: hash_short(self.src.lines[i]) for i in self.line_indexes}
|
|
54
|
+
else:
|
|
55
|
+
return {}
|
|
56
|
+
|
|
57
|
+
def merge_with(self, other: 'SrcFragment') -> 'None':
|
|
58
|
+
self.line_indexes = sorted(set(self.line_indexes + other.line_indexes))
|
|
59
|
+
|
|
60
|
+
def include_range_indices(self, start: int, end: int) -> None:
|
|
61
|
+
new_indices = list(range(start, end + 1))
|
|
62
|
+
self.line_indexes = sorted(set(self.line_indexes + new_indices))
|
|
63
|
+
|
|
64
|
+
def exclude_range_indices(self, start: int, end: int) -> None:
|
|
65
|
+
exclude_set = set(range(start, end + 1))
|
|
66
|
+
self.line_indexes = sorted(i for i in self.line_indexes if i not in exclude_set)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def start(self) -> int:
|
|
70
|
+
if not self.line_indexes:
|
|
71
|
+
return -1
|
|
72
|
+
return self.line_indexes[0]
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def end(self) -> int:
|
|
76
|
+
if not self.line_indexes:
|
|
77
|
+
return -1
|
|
78
|
+
return self.line_indexes[-1]
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def range_str(self) -> str:
|
|
82
|
+
if self.end - self.start == 0:
|
|
83
|
+
return f"({self.start+1})"
|
|
84
|
+
return f"({self.start+1}-{self.end+1})"
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def of_json(src: 'SrcFile', json_dict: dict[int, str]) -> "SrcFragment":
|
|
88
|
+
line_indexes = []
|
|
89
|
+
state = ContentState.UNCHANGED
|
|
90
|
+
|
|
91
|
+
for index_str, hashed in json_dict.items():
|
|
92
|
+
index = int(index_str)
|
|
93
|
+
line_indexes.append(index)
|
|
94
|
+
# Check if the hash corresponds to the source line
|
|
95
|
+
# if hash_short(src.lines[index]) != hashed:
|
|
96
|
+
# state = ContentState.CHANGED
|
|
97
|
+
|
|
98
|
+
return SrcFragment(src=src, line_indexes=line_indexes, state=state)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class SrcFile:
|
|
103
|
+
path: Path
|
|
104
|
+
file_man: 'FileMan' = field(compare=False, repr=False)
|
|
105
|
+
state: ContentState = ContentState.UNKNOWN
|
|
106
|
+
fragments: list[SrcFragment] = field(default_factory=list, repr=False)
|
|
107
|
+
_hash: str = None
|
|
108
|
+
|
|
109
|
+
@cached_property
|
|
110
|
+
def content(self) -> str:
|
|
111
|
+
if self.path.exists():
|
|
112
|
+
return self.path.read_text(encoding="utf8")
|
|
113
|
+
else:
|
|
114
|
+
return ""
|
|
115
|
+
|
|
116
|
+
@cached_property
|
|
117
|
+
def lines(self) -> list[str]:
|
|
118
|
+
return self.content.splitlines()
|
|
119
|
+
|
|
120
|
+
@cached_property
|
|
121
|
+
def base_name(self) -> str:
|
|
122
|
+
return self.path.stem
|
|
123
|
+
|
|
124
|
+
@cached_property
|
|
125
|
+
def module_path(self) -> str:
|
|
126
|
+
rel_path = self.file_man.safe_relative_path(self.path)
|
|
127
|
+
return str(rel_path)
|
|
128
|
+
|
|
129
|
+
@cached_property
|
|
130
|
+
def packages(self) -> list[str]:
|
|
131
|
+
rel_path = self.file_man.safe_relative_path(self.path)
|
|
132
|
+
return list(rel_path.parent.parts)
|
|
133
|
+
|
|
134
|
+
@cached_property
|
|
135
|
+
def skel_path(self) -> Path:
|
|
136
|
+
if not self.file_man:
|
|
137
|
+
raise ValueError("FileMan ref is null")
|
|
138
|
+
ret = self.file_man.skel_path
|
|
139
|
+
for pkg in self.packages:
|
|
140
|
+
ret = ret / pkg
|
|
141
|
+
ret = ret / self.base_name
|
|
142
|
+
ret.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
return ret
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def hash(self) -> str:
|
|
147
|
+
if self._hash is None:
|
|
148
|
+
self.rehash()
|
|
149
|
+
return self._hash
|
|
150
|
+
|
|
151
|
+
def rehash(self) -> None:
|
|
152
|
+
if self.path.exists():
|
|
153
|
+
self._hash = hash(self.content)
|
|
154
|
+
else:
|
|
155
|
+
self._hash = None
|
|
156
|
+
|
|
157
|
+
def set_fragments_state(self, state: ContentState) -> None:
|
|
158
|
+
"""Set the state on this file and all its fragments"""
|
|
159
|
+
self.state = state
|
|
160
|
+
for fragment in self.fragments:
|
|
161
|
+
fragment.state = state
|
|
162
|
+
|
|
163
|
+
def compare_content(self) -> None:
|
|
164
|
+
if not self.path.exists(): # Set state as REMOVED if file no longer exists
|
|
165
|
+
self.set_fragments_state(ContentState.REMOVED)
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
old_hash = self._hash
|
|
169
|
+
self.rehash()
|
|
170
|
+
|
|
171
|
+
if old_hash is None: # New file
|
|
172
|
+
self.set_fragments_state(ContentState.NEW)
|
|
173
|
+
return
|
|
174
|
+
if old_hash == self._hash: # Content unchanged
|
|
175
|
+
self.set_fragments_state(ContentState.UNCHANGED)
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
# Content changed
|
|
179
|
+
self.set_fragments_state(ContentState.CHANGED)
|
|
180
|
+
|
|
181
|
+
def to_json(self) -> dict:
|
|
182
|
+
return {
|
|
183
|
+
"path": str(self.path),
|
|
184
|
+
"hash": self.hash,
|
|
185
|
+
"fragments": [f.to_json() for f in self.fragments],
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def of_json(json_dict: dict, file_man: 'FileMan') -> "SrcFile":
|
|
190
|
+
src_file = SrcFile(path=Path(json_dict["path"]), file_man=file_man, _hash=json_dict["hash"])
|
|
191
|
+
if "fragments" in json_dict:
|
|
192
|
+
src_file.fragments = [SrcFragment.of_json(src_file, f) for f in json_dict["fragments"]]
|
|
193
|
+
return src_file
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclass
|
|
197
|
+
class FileMan:
|
|
198
|
+
proj_root_path: Path
|
|
199
|
+
src_paths: list[Path] = field(default_factory=list)
|
|
200
|
+
src_files: dict[Path, SrcFile] = field(default_factory=dict)
|
|
201
|
+
skel_path: Path = field(init=False)
|
|
202
|
+
cache_path: Optional[Path] = None
|
|
203
|
+
includes: list[str] = field(default_factory=lambda: [".*"])
|
|
204
|
+
excludes: list[str] = field(default_factory=list)
|
|
205
|
+
|
|
206
|
+
def __post_init__(self) -> None:
|
|
207
|
+
if not self.proj_root_path.is_dir() or not self.proj_root_path.exists():
|
|
208
|
+
raise ValueError(f"Project root path {self.proj_root_path} must be an existing directory")
|
|
209
|
+
|
|
210
|
+
self.skel_path = self.proj_root_path / "skels"
|
|
211
|
+
self.skel_path.mkdir(exist_ok=True)
|
|
212
|
+
|
|
213
|
+
if not self.cache_path:
|
|
214
|
+
self.cache_path = Path(self.skel_path, "cache.json")
|
|
215
|
+
|
|
216
|
+
if not self.src_paths:
|
|
217
|
+
self.src_paths = [self.proj_root_path]
|
|
218
|
+
|
|
219
|
+
@cached_property
|
|
220
|
+
def gitignore(self) -> PathSpec:
|
|
221
|
+
patterns = []
|
|
222
|
+
# Collect patterns from all source paths
|
|
223
|
+
for src_path in self.src_paths:
|
|
224
|
+
gitignore_path = src_path / '.gitignore'
|
|
225
|
+
if gitignore_path.exists():
|
|
226
|
+
with open(gitignore_path) as f:
|
|
227
|
+
patterns.extend(f.readlines())
|
|
228
|
+
|
|
229
|
+
# Also check project root if different from source paths
|
|
230
|
+
if self.proj_root_path not in self.src_paths:
|
|
231
|
+
gitignore_path = self.proj_root_path / '.gitignore'
|
|
232
|
+
if gitignore_path.exists():
|
|
233
|
+
with open(gitignore_path) as f:
|
|
234
|
+
patterns.extend(f.readlines())
|
|
235
|
+
|
|
236
|
+
# Create a single PathSpec from all collected patterns
|
|
237
|
+
return PathSpec.from_lines('gitwildmatch', patterns)
|
|
238
|
+
|
|
239
|
+
def clear(self) -> None:
|
|
240
|
+
self.src_files.clear()
|
|
241
|
+
|
|
242
|
+
def load(self) -> None:
|
|
243
|
+
self.clear()
|
|
244
|
+
if not self.cache_path.exists():
|
|
245
|
+
return
|
|
246
|
+
with open(self.cache_path, encoding='utf-8') as f:
|
|
247
|
+
json_data = json.loads(f.read())
|
|
248
|
+
self.src_files = {Path(key): SrcFile.of_json(value, self) for key, value in json_data.items()}
|
|
249
|
+
|
|
250
|
+
def save(self) -> None:
|
|
251
|
+
json_data = {str(key): value.to_json() for key, value in self.src_files.items()}
|
|
252
|
+
with open(self.cache_path, 'w', encoding='utf-8') as f:
|
|
253
|
+
json.dump(json_data, f, indent=2)
|
|
254
|
+
|
|
255
|
+
@cached_property
|
|
256
|
+
def summary_file_path(self) -> Path:
|
|
257
|
+
return self.skel_path / "summary.py.txt"
|
|
258
|
+
|
|
259
|
+
def detect_changes(self) -> None:
|
|
260
|
+
self.load()
|
|
261
|
+
|
|
262
|
+
for src_file_path in self.source_files():
|
|
263
|
+
src_file = self.src_files.get(src_file_path, None)
|
|
264
|
+
if not src_file: # new file
|
|
265
|
+
self.src_files[src_file_path] = SrcFile(src_file_path, self, ContentState.NEW)
|
|
266
|
+
continue
|
|
267
|
+
src_file.compare_content()
|
|
268
|
+
for src_file in self.src_files.values():
|
|
269
|
+
if src_file.state == ContentState.UNKNOWN:
|
|
270
|
+
# Unknown -> has not been visited since load, means the file has been removed
|
|
271
|
+
src_file.state = ContentState.REMOVED
|
|
272
|
+
|
|
273
|
+
@cached_property
|
|
274
|
+
def src_root(self) -> Path:
|
|
275
|
+
if not self.src_paths:
|
|
276
|
+
return self.proj_root_path
|
|
277
|
+
|
|
278
|
+
# Convert paths to absolute and get parts
|
|
279
|
+
abs_paths = [p.absolute() for p in self.src_paths]
|
|
280
|
+
path_parts = [p.parts for p in abs_paths]
|
|
281
|
+
|
|
282
|
+
common = []
|
|
283
|
+
for parts in zip(*path_parts): # Find common prefix among all paths
|
|
284
|
+
if len(set(parts)) != 1:
|
|
285
|
+
break
|
|
286
|
+
common.append(parts[0])
|
|
287
|
+
|
|
288
|
+
if not common:
|
|
289
|
+
return self.proj_root_path
|
|
290
|
+
|
|
291
|
+
return Path(*common)
|
|
292
|
+
|
|
293
|
+
@cached_property
|
|
294
|
+
def includes_rx(self) -> list[re.Pattern]:
|
|
295
|
+
return [re.compile(p) for p in self.includes]
|
|
296
|
+
|
|
297
|
+
@cached_property
|
|
298
|
+
def excludes_rx(self) -> list[re.Pattern]:
|
|
299
|
+
return [re.compile(p) for p in self.excludes]
|
|
300
|
+
|
|
301
|
+
def rx_list_match(self, text: str, rx_list: list[re.Pattern]) -> bool:
|
|
302
|
+
return any(rx.fullmatch(text) for rx in rx_list)
|
|
303
|
+
|
|
304
|
+
def includes_match(self, text: str) -> bool:
|
|
305
|
+
return self.rx_list_match(text, self.includes_rx)
|
|
306
|
+
|
|
307
|
+
def excludes_match(self, text: str) -> bool:
|
|
308
|
+
return self.rx_list_match(text, self.excludes_rx)
|
|
309
|
+
|
|
310
|
+
@staticmethod
|
|
311
|
+
def make_substring_match(pattern: str) -> str:
|
|
312
|
+
return f".*{pattern}.*"
|
|
313
|
+
|
|
314
|
+
def safe_relative_path(self, path: Path | str) -> Path:
|
|
315
|
+
"""
|
|
316
|
+
Safely get a path relative to src_root, handling cases where the path is not within src_root.
|
|
317
|
+
If the path is not within src_root, returns the path as is.
|
|
318
|
+
"""
|
|
319
|
+
path = Path(path) if isinstance(path, str) else path
|
|
320
|
+
try:
|
|
321
|
+
return path.relative_to(self.src_root)
|
|
322
|
+
except ValueError:
|
|
323
|
+
# Path is not within src_root, return the path as is
|
|
324
|
+
# This handles relative imports and files outside the source root
|
|
325
|
+
return path
|
|
326
|
+
|
|
327
|
+
def fn_match(self, filename: str) -> bool:
|
|
328
|
+
# Early returns for non-Python files and gitignore matches
|
|
329
|
+
if not filename.endswith('.py'):
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
# Get relative path safely
|
|
333
|
+
rel_path = self.safe_relative_path(filename)
|
|
334
|
+
|
|
335
|
+
# Only check gitignore if the path is actually relative
|
|
336
|
+
# (i.e., it was successfully made relative to src_root)
|
|
337
|
+
if str(rel_path) != filename:
|
|
338
|
+
if self.gitignore.match_file(str(rel_path)):
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
# Check include/exclude patterns
|
|
342
|
+
# Use relative path for matching to allow cleaner patterns
|
|
343
|
+
match_target = str(rel_path)
|
|
344
|
+
if not self.includes_match(match_target):
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
if self.excludes_match(match_target):
|
|
348
|
+
return False
|
|
349
|
+
|
|
350
|
+
return True
|
|
351
|
+
|
|
352
|
+
def source_files(self) -> Iterator[Path]:
|
|
353
|
+
for src_path in self.src_paths:
|
|
354
|
+
for dir, _, filenames in os.walk(src_path):
|
|
355
|
+
dirpath = Path(dir)
|
|
356
|
+
for filename in filenames:
|
|
357
|
+
filepath = dirpath / filename
|
|
358
|
+
if self.fn_match(str(filepath)):
|
|
359
|
+
yield filepath
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
if __name__ == "__main__":
|
|
363
|
+
fm = FileMan(proj_root_path=Path("/wrk/dev/Skeleton"))
|
|
364
|
+
# for src in fm.sources.values():
|
|
365
|
+
# print(src.lines)
|