savetoken 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- savetoken-0.1.0/PKG-INFO +164 -0
- savetoken-0.1.0/README.md +140 -0
- savetoken-0.1.0/pyproject.toml +46 -0
- savetoken-0.1.0/savetoken.egg-info/PKG-INFO +164 -0
- savetoken-0.1.0/savetoken.egg-info/SOURCES.txt +9 -0
- savetoken-0.1.0/savetoken.egg-info/dependency_links.txt +1 -0
- savetoken-0.1.0/savetoken.egg-info/entry_points.txt +2 -0
- savetoken-0.1.0/savetoken.egg-info/requires.txt +6 -0
- savetoken-0.1.0/savetoken.egg-info/top_level.txt +1 -0
- savetoken-0.1.0/setup.cfg +4 -0
- savetoken-0.1.0/tests/test_savetoken.py +741 -0
savetoken-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: savetoken
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert Repomix/repominify output into compact AI-friendly codebase summaries using free LLMs.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourname/savetoken
|
|
7
|
+
Project-URL: Issues, https://github.com/yourname/savetoken/issues
|
|
8
|
+
Keywords: ai,llm,codebase,repomix,prompt,tokens,summarizer
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
16
|
+
Classifier: Topic :: Utilities
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff; extra == "dev"
|
|
23
|
+
Requires-Dist: mypy; extra == "dev"
|
|
24
|
+
|
|
25
|
+
# savetoken 🪙
|
|
26
|
+
|
|
27
|
+
> Convert your Repomix codebase dump into a **compact semantic summary** — optimized for AI prompts, with zero raw code.
|
|
28
|
+
|
|
29
|
+
Instead of pasting 50,000 tokens of source code into ChatGPT or Claude, `savetoken` uses a **free LLM** to read the code once and produce a structured `codebrief.md` you reuse forever.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## How it fits in your workflow
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
Your project
|
|
37
|
+
↓
|
|
38
|
+
npx repomix # packs codebase → repomix-output.xml
|
|
39
|
+
↓
|
|
40
|
+
savetoken summarize repomix-output.xml # translates → codebrief.md ← you are here
|
|
41
|
+
↓
|
|
42
|
+
Paste codebrief.md into your AI prompt # ~500 tokens instead of 50k+
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install savetoken
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Requires Python 3.10+. Zero dependencies beyond stdlib.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Quick start
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# 1. Pack your codebase with Repomix
|
|
61
|
+
npx repomix
|
|
62
|
+
|
|
63
|
+
# 2. Summarize with Gemini Flash (free tier)
|
|
64
|
+
export GEMINI_API_KEY=your_key_here
|
|
65
|
+
savetoken summarize repomix-output.xml
|
|
66
|
+
|
|
67
|
+
# Output: codebrief.md — paste it into any AI prompt!
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Python API
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from savetoken import SaveToken
|
|
74
|
+
|
|
75
|
+
st = SaveToken(provider="gemini", api_key="...")
|
|
76
|
+
summary = st.summarize("repomix-output.xml")
|
|
77
|
+
st.save(summary, "codebrief.md")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Providers
|
|
83
|
+
|
|
84
|
+
| Provider | Free tier | Set env var |
|
|
85
|
+
|---|---|---|
|
|
86
|
+
| **Gemini Flash 2.0** ⭐ | ✅ Generous | `GEMINI_API_KEY` |
|
|
87
|
+
| **Groq (Llama 3.3 70B)** | ✅ Fast | `GROQ_API_KEY` |
|
|
88
|
+
| **Mistral Small** | ✅ Available | `MISTRAL_API_KEY` |
|
|
89
|
+
| **OpenAI / DeepSeek** | Paid | `OPENAI_API_KEY` |
|
|
90
|
+
|
|
91
|
+
Gemini is recommended — 1M token context window handles large codebases.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## CLI
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
savetoken summarize repomix-output.xml
|
|
99
|
+
savetoken summarize repomix-output.xml --provider groq --output brief.md --lang pt
|
|
100
|
+
savetoken summarize repomix-output.xml --force # ignore cache, regenerate all
|
|
101
|
+
savetoken clear-cache
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
| Flag | Default | Description |
|
|
105
|
+
|---|---|---|
|
|
106
|
+
| `--provider` | `gemini` | LLM provider |
|
|
107
|
+
| `--output` | `codebrief.md` | Output file |
|
|
108
|
+
| `--format` | `markdown` | `markdown` or `json` |
|
|
109
|
+
| `--lang` | `en` | Description language |
|
|
110
|
+
| `--force` | off | Skip cache |
|
|
111
|
+
| `--cache-dir` | `.savetoken_cache` | Cache location |
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Caching
|
|
116
|
+
|
|
117
|
+
`savetoken` hashes each file's content. On re-runs, **only changed files are re-summarized**. The cache lives in `.savetoken_cache/` — commit it to git to share across your team.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## VS Code Extension
|
|
122
|
+
|
|
123
|
+
Install the extension and use:
|
|
124
|
+
- **Command Palette → SaveToken: Summarize Codebase**
|
|
125
|
+
- Right-click `repomix-output.xml` → **SaveToken: Summarize**
|
|
126
|
+
- Configure provider and API key in Settings → SaveToken
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Example output (`codebrief.md`)
|
|
131
|
+
|
|
132
|
+
```markdown
|
|
133
|
+
# MyShop — savetoken summary
|
|
134
|
+
|
|
135
|
+
E-commerce backend built with FastAPI and PostgreSQL.
|
|
136
|
+
|
|
137
|
+
**Stack:** FastAPI, PostgreSQL, SQLAlchemy, Redis
|
|
138
|
+
**Architecture:** Layered MVC
|
|
139
|
+
**Entry points:** main.py
|
|
140
|
+
|
|
141
|
+
## Directory Map
|
|
142
|
+
- `src/api` — HTTP route handlers (REST)
|
|
143
|
+
- `src/services` — Business logic layer
|
|
144
|
+
- `src/models` — SQLAlchemy ORM entities
|
|
145
|
+
|
|
146
|
+
## Files
|
|
147
|
+
|
|
148
|
+
### `src/services/order.py`
|
|
149
|
+
Manages the full lifecycle of customer orders.
|
|
150
|
+
|
|
151
|
+
**Entities:**
|
|
152
|
+
- Order(id, user_id, total, status: enum[pending, paid, cancelled])
|
|
153
|
+
|
|
154
|
+
**Functions:**
|
|
155
|
+
- create_order(user_id, items) → Order: validates stock and creates order
|
|
156
|
+
- cancel_order(order_id) → bool: cancels if status allows
|
|
157
|
+
- calculate_total(items) → Decimal: applies discounts and rounds
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# savetoken 🪙
|
|
2
|
+
|
|
3
|
+
> Convert your Repomix codebase dump into a **compact semantic summary** — optimized for AI prompts, with zero raw code.
|
|
4
|
+
|
|
5
|
+
Instead of pasting 50,000 tokens of source code into ChatGPT or Claude, `savetoken` uses a **free LLM** to read the code once and produce a structured `codebrief.md` you reuse forever.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## How it fits in your workflow
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
Your project
|
|
13
|
+
↓
|
|
14
|
+
npx repomix # packs codebase → repomix-output.xml
|
|
15
|
+
↓
|
|
16
|
+
savetoken summarize repomix-output.xml # translates → codebrief.md ← you are here
|
|
17
|
+
↓
|
|
18
|
+
Paste codebrief.md into your AI prompt # ~500 tokens instead of 50k+
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install savetoken
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Requires Python 3.10+. Zero dependencies beyond stdlib.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Quick start
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# 1. Pack your codebase with Repomix
|
|
37
|
+
npx repomix
|
|
38
|
+
|
|
39
|
+
# 2. Summarize with Gemini Flash (free tier)
|
|
40
|
+
export GEMINI_API_KEY=your_key_here
|
|
41
|
+
savetoken summarize repomix-output.xml
|
|
42
|
+
|
|
43
|
+
# Output: codebrief.md — paste it into any AI prompt!
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Python API
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from savetoken import SaveToken
|
|
50
|
+
|
|
51
|
+
st = SaveToken(provider="gemini", api_key="...")
|
|
52
|
+
summary = st.summarize("repomix-output.xml")
|
|
53
|
+
st.save(summary, "codebrief.md")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Providers
|
|
59
|
+
|
|
60
|
+
| Provider | Free tier | Set env var |
|
|
61
|
+
|---|---|---|
|
|
62
|
+
| **Gemini Flash 2.0** ⭐ | ✅ Generous | `GEMINI_API_KEY` |
|
|
63
|
+
| **Groq (Llama 3.3 70B)** | ✅ Fast | `GROQ_API_KEY` |
|
|
64
|
+
| **Mistral Small** | ✅ Available | `MISTRAL_API_KEY` |
|
|
65
|
+
| **OpenAI / DeepSeek** | Paid | `OPENAI_API_KEY` |
|
|
66
|
+
|
|
67
|
+
Gemini is recommended — 1M token context window handles large codebases.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## CLI
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
savetoken summarize repomix-output.xml
|
|
75
|
+
savetoken summarize repomix-output.xml --provider groq --output brief.md --lang pt
|
|
76
|
+
savetoken summarize repomix-output.xml --force # ignore cache, regenerate all
|
|
77
|
+
savetoken clear-cache
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
| Flag | Default | Description |
|
|
81
|
+
|---|---|---|
|
|
82
|
+
| `--provider` | `gemini` | LLM provider |
|
|
83
|
+
| `--output` | `codebrief.md` | Output file |
|
|
84
|
+
| `--format` | `markdown` | `markdown` or `json` |
|
|
85
|
+
| `--lang` | `en` | Description language |
|
|
86
|
+
| `--force` | off | Skip cache |
|
|
87
|
+
| `--cache-dir` | `.savetoken_cache` | Cache location |
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Caching
|
|
92
|
+
|
|
93
|
+
`savetoken` hashes each file's content. On re-runs, **only changed files are re-summarized**. The cache lives in `.savetoken_cache/` — commit it to git to share across your team.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## VS Code Extension
|
|
98
|
+
|
|
99
|
+
Install the extension and use:
|
|
100
|
+
- **Command Palette → SaveToken: Summarize Codebase**
|
|
101
|
+
- Right-click `repomix-output.xml` → **SaveToken: Summarize**
|
|
102
|
+
- Configure provider and API key in Settings → SaveToken
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Example output (`codebrief.md`)
|
|
107
|
+
|
|
108
|
+
```markdown
|
|
109
|
+
# MyShop — savetoken summary
|
|
110
|
+
|
|
111
|
+
E-commerce backend built with FastAPI and PostgreSQL.
|
|
112
|
+
|
|
113
|
+
**Stack:** FastAPI, PostgreSQL, SQLAlchemy, Redis
|
|
114
|
+
**Architecture:** Layered MVC
|
|
115
|
+
**Entry points:** main.py
|
|
116
|
+
|
|
117
|
+
## Directory Map
|
|
118
|
+
- `src/api` — HTTP route handlers (REST)
|
|
119
|
+
- `src/services` — Business logic layer
|
|
120
|
+
- `src/models` — SQLAlchemy ORM entities
|
|
121
|
+
|
|
122
|
+
## Files
|
|
123
|
+
|
|
124
|
+
### `src/services/order.py`
|
|
125
|
+
Manages the full lifecycle of customer orders.
|
|
126
|
+
|
|
127
|
+
**Entities:**
|
|
128
|
+
- Order(id, user_id, total, status: enum[pending, paid, cancelled])
|
|
129
|
+
|
|
130
|
+
**Functions:**
|
|
131
|
+
- create_order(user_id, items) → Order: validates stock and creates order
|
|
132
|
+
- cancel_order(order_id) → bool: cancels if status allows
|
|
133
|
+
- calculate_total(items) → Decimal: applies discounts and rounds
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "savetoken"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Convert Repomix/repominify output into compact AI-friendly codebase summaries using free LLMs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
keywords = ["ai", "llm", "codebase", "repomix", "prompt", "tokens", "summarizer"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: Software Development :: Documentation",
|
|
21
|
+
"Topic :: Utilities",
|
|
22
|
+
]
|
|
23
|
+
# Zero required dependencies — uses only stdlib (urllib, json, hashlib, re, ast)
|
|
24
|
+
dependencies = []
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = ["pytest>=7", "pytest-cov", "ruff", "mypy"]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
savetoken = "savetoken.cli:main"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/yourname/savetoken"
|
|
34
|
+
Issues = "https://github.com/yourname/savetoken/issues"
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["."]
|
|
38
|
+
include = ["savetoken*"]
|
|
39
|
+
|
|
40
|
+
[tool.ruff]
|
|
41
|
+
line-length = 100
|
|
42
|
+
target-version = "py310"
|
|
43
|
+
|
|
44
|
+
[tool.mypy]
|
|
45
|
+
python_version = "3.10"
|
|
46
|
+
strict = false
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: savetoken
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert Repomix/repominify output into compact AI-friendly codebase summaries using free LLMs.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourname/savetoken
|
|
7
|
+
Project-URL: Issues, https://github.com/yourname/savetoken/issues
|
|
8
|
+
Keywords: ai,llm,codebase,repomix,prompt,tokens,summarizer
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
16
|
+
Classifier: Topic :: Utilities
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff; extra == "dev"
|
|
23
|
+
Requires-Dist: mypy; extra == "dev"
|
|
24
|
+
|
|
25
|
+
# savetoken 🪙
|
|
26
|
+
|
|
27
|
+
> Convert your Repomix codebase dump into a **compact semantic summary** — optimized for AI prompts, with zero raw code.
|
|
28
|
+
|
|
29
|
+
Instead of pasting 50,000 tokens of source code into ChatGPT or Claude, `savetoken` uses a **free LLM** to read the code once and produce a structured `codebrief.md` you reuse forever.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## How it fits in your workflow
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
Your project
|
|
37
|
+
↓
|
|
38
|
+
npx repomix # packs codebase → repomix-output.xml
|
|
39
|
+
↓
|
|
40
|
+
savetoken summarize repomix-output.xml # translates → codebrief.md ← you are here
|
|
41
|
+
↓
|
|
42
|
+
Paste codebrief.md into your AI prompt # ~500 tokens instead of 50k+
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install savetoken
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Requires Python 3.10+. Zero dependencies beyond stdlib.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Quick start
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# 1. Pack your codebase with Repomix
|
|
61
|
+
npx repomix
|
|
62
|
+
|
|
63
|
+
# 2. Summarize with Gemini Flash (free tier)
|
|
64
|
+
export GEMINI_API_KEY=your_key_here
|
|
65
|
+
savetoken summarize repomix-output.xml
|
|
66
|
+
|
|
67
|
+
# Output: codebrief.md — paste it into any AI prompt!
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Python API
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from savetoken import SaveToken
|
|
74
|
+
|
|
75
|
+
st = SaveToken(provider="gemini", api_key="...")
|
|
76
|
+
summary = st.summarize("repomix-output.xml")
|
|
77
|
+
st.save(summary, "codebrief.md")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Providers
|
|
83
|
+
|
|
84
|
+
| Provider | Free tier | Set env var |
|
|
85
|
+
|---|---|---|
|
|
86
|
+
| **Gemini Flash 2.0** ⭐ | ✅ Generous | `GEMINI_API_KEY` |
|
|
87
|
+
| **Groq (Llama 3.3 70B)** | ✅ Fast | `GROQ_API_KEY` |
|
|
88
|
+
| **Mistral Small** | ✅ Available | `MISTRAL_API_KEY` |
|
|
89
|
+
| **OpenAI / DeepSeek** | Paid | `OPENAI_API_KEY` |
|
|
90
|
+
|
|
91
|
+
Gemini is recommended — 1M token context window handles large codebases.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## CLI
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
savetoken summarize repomix-output.xml
|
|
99
|
+
savetoken summarize repomix-output.xml --provider groq --output brief.md --lang pt
|
|
100
|
+
savetoken summarize repomix-output.xml --force # ignore cache, regenerate all
|
|
101
|
+
savetoken clear-cache
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
| Flag | Default | Description |
|
|
105
|
+
|---|---|---|
|
|
106
|
+
| `--provider` | `gemini` | LLM provider |
|
|
107
|
+
| `--output` | `codebrief.md` | Output file |
|
|
108
|
+
| `--format` | `markdown` | `markdown` or `json` |
|
|
109
|
+
| `--lang` | `en` | Description language |
|
|
110
|
+
| `--force` | off | Skip cache |
|
|
111
|
+
| `--cache-dir` | `.savetoken_cache` | Cache location |
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Caching
|
|
116
|
+
|
|
117
|
+
`savetoken` hashes each file's content. On re-runs, **only changed files are re-summarized**. The cache lives in `.savetoken_cache/` — commit it to git to share across your team.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## VS Code Extension
|
|
122
|
+
|
|
123
|
+
Install the extension and use:
|
|
124
|
+
- **Command Palette → SaveToken: Summarize Codebase**
|
|
125
|
+
- Right-click `repomix-output.xml` → **SaveToken: Summarize**
|
|
126
|
+
- Configure provider and API key in Settings → SaveToken
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Example output (`codebrief.md`)
|
|
131
|
+
|
|
132
|
+
```markdown
|
|
133
|
+
# MyShop — savetoken summary
|
|
134
|
+
|
|
135
|
+
E-commerce backend built with FastAPI and PostgreSQL.
|
|
136
|
+
|
|
137
|
+
**Stack:** FastAPI, PostgreSQL, SQLAlchemy, Redis
|
|
138
|
+
**Architecture:** Layered MVC
|
|
139
|
+
**Entry points:** main.py
|
|
140
|
+
|
|
141
|
+
## Directory Map
|
|
142
|
+
- `src/api` — HTTP route handlers (REST)
|
|
143
|
+
- `src/services` — Business logic layer
|
|
144
|
+
- `src/models` — SQLAlchemy ORM entities
|
|
145
|
+
|
|
146
|
+
## Files
|
|
147
|
+
|
|
148
|
+
### `src/services/order.py`
|
|
149
|
+
Manages the full lifecycle of customer orders.
|
|
150
|
+
|
|
151
|
+
**Entities:**
|
|
152
|
+
- Order(id, user_id, total, status: enum[pending, paid, cancelled])
|
|
153
|
+
|
|
154
|
+
**Functions:**
|
|
155
|
+
- create_order(user_id, items) → Order: validates stock and creates order
|
|
156
|
+
- cancel_order(order_id) → bool: cancels if status allows
|
|
157
|
+
- calculate_total(items) → Decimal: applies discounts and rounds
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
savetoken.egg-info/PKG-INFO
|
|
4
|
+
savetoken.egg-info/SOURCES.txt
|
|
5
|
+
savetoken.egg-info/dependency_links.txt
|
|
6
|
+
savetoken.egg-info/entry_points.txt
|
|
7
|
+
savetoken.egg-info/requires.txt
|
|
8
|
+
savetoken.egg-info/top_level.txt
|
|
9
|
+
tests/test_savetoken.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
"""Tests for savetoken v0.4"""
|
|
2
|
+
import json, pytest
|
|
3
|
+
from savetoken.core import _parse_repomix, _hash, _format_sigs, _compact_sig
|
|
4
|
+
from savetoken.output import render_markdown, render_json, _e
|
|
5
|
+
from savetoken.models import *
|
|
6
|
+
from savetoken.exceptions import ParseError, InputError
|
|
7
|
+
from savetoken.providers.base import _parse_json, _safe_truncate, _ensure_list
|
|
8
|
+
|
|
9
|
+
REPOMIX_XML = """
|
|
10
|
+
<file path="src/main.py">
|
|
11
|
+
def main():
|
|
12
|
+
print("hello")
|
|
13
|
+
</file>
|
|
14
|
+
<file path="src/orders.py">
|
|
15
|
+
def create_order(user_id, items):
|
|
16
|
+
return reserve(items)
|
|
17
|
+
def reserve(items):
|
|
18
|
+
return items
|
|
19
|
+
</file>
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
23
|
+
# Parser
|
|
24
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
25
|
+
def test_parse_xml(): assert len(_parse_repomix(REPOMIX_XML)) == 2
|
|
26
|
+
def test_parse_plain(): assert len(_parse_repomix("===== a.py =====\ncode\n===== b.py =====\nmore\n")) == 2
|
|
27
|
+
def test_parse_empty(): assert _parse_repomix(" ") == []
|
|
28
|
+
def test_parse_no_empty(): assert len(_parse_repomix('<file path="e.py"></file><file path="r.py">x</file>')) == 1
|
|
29
|
+
def test_hash_stable(): assert _hash("x") == _hash("x") != _hash("y")
|
|
30
|
+
|
|
31
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
32
|
+
# Classifier
|
|
33
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
34
|
+
from savetoken.classifier import classify, extract_critical_comments
|
|
35
|
+
def test_cls_entry(): assert classify("main.py","") == FileRole.STRATEGIC
|
|
36
|
+
def test_cls_schema(): assert classify("src/schemas/order.py","") == FileRole.STRATEGIC
|
|
37
|
+
def test_cls_test(): assert classify("tests/test_orders.py","") == FileRole.STRATEGIC
|
|
38
|
+
def test_cls_config(): assert classify("settings.py","") == FileRole.STRATEGIC
|
|
39
|
+
def test_cls_hack(): assert classify("util.py","# HACK: do not remove") == FileRole.STRATEGIC
|
|
40
|
+
def test_cls_pydantic(): assert classify("dto.py","class OrderIn(BaseModel):\n id: int") == FileRole.STRATEGIC
|
|
41
|
+
def test_cls_proto(): assert classify("order.proto","syntax='proto3'") == FileRole.STRATEGIC
|
|
42
|
+
def test_cls_adr(): assert classify("docs/adr/001.md","# ADR") == FileRole.STRATEGIC
|
|
43
|
+
def test_cls_exception(): assert classify("src/exceptions/base.py","") == FileRole.STRATEGIC
|
|
44
|
+
def test_cls_skip_pyc(): assert classify("a.cpython-312.pyc","") == FileRole.SKIP
|
|
45
|
+
def test_cls_skip_mig(): assert classify("migrations/0001_init.py","") == FileRole.SKIP
|
|
46
|
+
def test_cls_skip_lock(): assert classify("package-lock.json","{}") == FileRole.SKIP
|
|
47
|
+
def test_cls_regular(): assert classify("src/services/billing.py","def charge():\n pass\n" * 30) == FileRole.REGULAR
|
|
48
|
+
def test_cls_small_file(): assert classify("src/tiny.py","x=1\ny=2") == FileRole.STRATEGIC
|
|
49
|
+
def test_extract_crit():
|
|
50
|
+
c = extract_critical_comments("x=1\n# HACK: bug\ny=2\n# FIXME: broken\n# HACK: bug")
|
|
51
|
+
assert len(c) == 2
|
|
52
|
+
|
|
53
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
54
|
+
# AST types
|
|
55
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
56
|
+
from savetoken.ast_types import extract_signatures
|
|
57
|
+
|
|
58
|
+
def test_ast_basic_sig():
|
|
59
|
+
chunks = [{"path":"a.py","content":"def foo(x: int, y: str) -> bool:\n return True\n"}]
|
|
60
|
+
sigs = extract_signatures(chunks)
|
|
61
|
+
assert "a.py" in sigs
|
|
62
|
+
fns = sigs["a.py"].functions
|
|
63
|
+
assert fns[0].name == "foo"
|
|
64
|
+
assert fns[0].return_type == "bool"
|
|
65
|
+
assert fns[0].params[0].annotation == "int"
|
|
66
|
+
|
|
67
|
+
def test_ast_async():
|
|
68
|
+
chunks = [{"path":"a.py","content":"async def bar() -> None:\n pass\n"}]
|
|
69
|
+
sigs = extract_signatures(chunks)
|
|
70
|
+
assert sigs["a.py"].functions[0].is_async
|
|
71
|
+
|
|
72
|
+
def test_ast_raises():
|
|
73
|
+
chunks = [{"path":"a.py","content":"def f():\n raise ValueError('x')\n"}]
|
|
74
|
+
sigs = extract_signatures(chunks)
|
|
75
|
+
assert "ValueError" in sigs["a.py"].functions[0].raises
|
|
76
|
+
|
|
77
|
+
def test_ast_module_doc():
|
|
78
|
+
chunks = [{"path":"a.py","content":'"""Order service module."""\ndef f(): pass\n'}]
|
|
79
|
+
sigs = extract_signatures(chunks)
|
|
80
|
+
assert "Order service" in sigs["a.py"].module_doc
|
|
81
|
+
|
|
82
|
+
def test_ast_class_methods():
|
|
83
|
+
chunks = [{"path":"a.py","content":"class Svc:\n def run(self, x: int): pass\n"}]
|
|
84
|
+
sigs = extract_signatures(chunks)
|
|
85
|
+
assert "Svc" in sigs["a.py"].classes
|
|
86
|
+
assert sigs["a.py"].classes["Svc"][0].name == "run"
|
|
87
|
+
|
|
88
|
+
def test_ast_non_python():
|
|
89
|
+
sigs = extract_signatures([{"path":"style.css","content":"body{}"}])
|
|
90
|
+
assert sigs == {}
|
|
91
|
+
|
|
92
|
+
def test_ast_compact():
|
|
93
|
+
from savetoken.ast_types import FunctionSignature, Param
|
|
94
|
+
sig = FunctionSignature("create_order",[Param("uid","UUID"),Param("req","CreateOrderRequest")],"OrderResponse",["PaymentError"],True)
|
|
95
|
+
c = sig.compact()
|
|
96
|
+
assert "async" in c
|
|
97
|
+
assert "UUID" in c
|
|
98
|
+
assert "OrderResponse" in c
|
|
99
|
+
assert "PaymentError" in c
|
|
100
|
+
|
|
101
|
+
def test_format_sigs_roundtrip():
|
|
102
|
+
chunks = [{"path":"a.py","content":"def foo(x: int) -> bool:\n pass\n"}]
|
|
103
|
+
sigs = extract_signatures(chunks)
|
|
104
|
+
from dataclasses import asdict
|
|
105
|
+
sigs_dict = asdict(sigs["a.py"])
|
|
106
|
+
result = _format_sigs(sigs_dict)
|
|
107
|
+
assert "foo" in result and "int" in result and "bool" in result
|
|
108
|
+
|
|
109
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
110
|
+
# Analyzer (prod/test split)
|
|
111
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
112
|
+
from savetoken.analyzer import build_graph
|
|
113
|
+
def test_graph_cross_file():
|
|
114
|
+
g = build_graph([{"path":"a.py","content":"def foo():\n bar()\n"},
|
|
115
|
+
{"path":"b.py","content":"def bar():\n pass\n"}])
|
|
116
|
+
assert any(e.caller_file=="a.py" and e.callee_fn=="bar" for e in g.calls)
|
|
117
|
+
|
|
118
|
+
def test_graph_test_separation():
|
|
119
|
+
g = build_graph([{"path":"test_x.py","content":"def test_foo():\n create()\n"},
|
|
120
|
+
{"path":"service.py","content":"def create():\n pass\n"}])
|
|
121
|
+
assert all(e.caller_file != "test_x.py" for e in g.calls)
|
|
122
|
+
assert any(e.caller_file == "test_x.py" for e in g.test_calls)
|
|
123
|
+
|
|
124
|
+
def test_graph_inheritance():
|
|
125
|
+
g = build_graph([{"path":"base.py","content":"class Animal:\n pass\n"},
|
|
126
|
+
{"path":"dog.py","content":"class Dog(Animal):\n pass\n"}])
|
|
127
|
+
assert any(e.child_class=="Dog" for e in g.inheritance)
|
|
128
|
+
|
|
129
|
+
def test_graph_no_self_calls():
|
|
130
|
+
g = build_graph([{"path":"a.py","content":"def foo():\n foo()\n"}])
|
|
131
|
+
assert all(e.caller_file != e.callee_file for e in g.calls)
|
|
132
|
+
|
|
133
|
+
def test_graph_syntax_error():
|
|
134
|
+
g = build_graph([{"path":"bad.py","content":"def (x:"}])
|
|
135
|
+
assert g.calls == []
|
|
136
|
+
|
|
137
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
138
|
+
# Ranker
|
|
139
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
140
|
+
from savetoken.ranker import rank_functions
|
|
141
|
+
def test_ranker_basic():
|
|
142
|
+
edges = [CallEdge("a.py","foo","b.py","bar"), CallEdge("c.py","baz","b.py","bar")]
|
|
143
|
+
chunks = [{"path":"b.py","content":"def bar():\n return 1\n"}]
|
|
144
|
+
hot = rank_functions(edges, chunks, top_n=5)
|
|
145
|
+
assert any(h.name=="bar" for h in hot)
|
|
146
|
+
|
|
147
|
+
def test_ranker_empty(): assert rank_functions([],[],top_n=5) == []
|
|
148
|
+
|
|
149
|
+
def test_ranker_budget():
|
|
150
|
+
edges = [CallEdge("a.py","foo","b.py","bar")]
|
|
151
|
+
chunks = [{"path":"b.py","content":"def bar():\n " + "x=1\n" * 1000}]
|
|
152
|
+
assert rank_functions(edges, chunks, top_n=5, token_budget=5) == []
|
|
153
|
+
|
|
154
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
155
|
+
# Env extractor
|
|
156
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
157
|
+
from savetoken.env_extractor import extract_env_vars
|
|
158
|
+
def test_env_getenv():
|
|
159
|
+
evs = extract_env_vars([{"path":"a.py","content":"import os\nX=os.getenv('DB_URL','sqlite')\n"}])
|
|
160
|
+
assert any(e.name=="DB_URL" and e.default=="sqlite" for e in evs)
|
|
161
|
+
|
|
162
|
+
def test_env_environ():
|
|
163
|
+
evs = extract_env_vars([{"path":"a.py","content":"import os\nX=os.environ['SECRET']\n"}])
|
|
164
|
+
assert any(e.name=="SECRET" and e.required for e in evs)
|
|
165
|
+
|
|
166
|
+
def test_env_dotenv():
|
|
167
|
+
evs = extract_env_vars([{"path":".env.example","content":"DB=postgres\nDEBUG=false\n"}])
|
|
168
|
+
assert {e.name for e in evs} >= {"DB","DEBUG"}
|
|
169
|
+
|
|
170
|
+
def test_env_dedup():
|
|
171
|
+
chunks=[{"path":"a.py","content":"import os\nX=os.getenv('V')\n"},
|
|
172
|
+
{"path":"b.py","content":"import os\nY=os.getenv('V','d')\n"}]
|
|
173
|
+
matches=[e for e in extract_env_vars(chunks) if e.name=="V"]
|
|
174
|
+
assert len(matches)==1 and len(matches[0].files)==2
|
|
175
|
+
|
|
176
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
177
|
+
# SQL schema extractor
|
|
178
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
179
|
+
from savetoken.schema_sql import extract_sql_schema
|
|
180
|
+
def test_sql_raw():
|
|
181
|
+
tables = extract_sql_schema([{"path":"s.sql","content":"CREATE TABLE orders (id INT, total FLOAT);\n"}])
|
|
182
|
+
assert any(t.name=="orders" for t in tables)
|
|
183
|
+
|
|
184
|
+
def test_sql_fk():
|
|
185
|
+
sql = "CREATE TABLE orders (id INT, user_id INT, FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE);"
|
|
186
|
+
tables = extract_sql_schema([{"path":"s.sql","content":sql}])
|
|
187
|
+
t = next(t for t in tables if t.name=="orders")
|
|
188
|
+
assert any("users" in fk for fk in t.foreign_keys)
|
|
189
|
+
|
|
190
|
+
def test_sql_django():
|
|
191
|
+
content = "from django.db import models\nclass Order(models.Model):\n total = models.DecimalField(max_length=10)\n"
|
|
192
|
+
tables = extract_sql_schema([{"path":"models.py","content":content}])
|
|
193
|
+
assert any(t.name=="order" for t in tables)
|
|
194
|
+
|
|
195
|
+
def test_sql_sqla():
|
|
196
|
+
content = "from sqlalchemy import Column,Integer\nclass Product(Base):\n __tablename__='products'\n id=Column(Integer,nullable=False)\n"
|
|
197
|
+
tables = extract_sql_schema([{"path":"m.py","content":content}])
|
|
198
|
+
assert any(t.name=="products" for t in tables)
|
|
199
|
+
|
|
200
|
+
def test_sql_empty():
|
|
201
|
+
assert extract_sql_schema([{"path":"a.py","content":"x=1"}]) == []
|
|
202
|
+
|
|
203
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
204
|
+
# Critical context
|
|
205
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
206
|
+
from savetoken.critical_context import extract_critical_blocks
|
|
207
|
+
def test_crit_window():
|
|
208
|
+
lines = [f"line{i}" for i in range(30)]
|
|
209
|
+
lines[15] = "# HACK: workaround"
|
|
210
|
+
blocks = extract_critical_blocks("f.py","\n".join(lines))
|
|
211
|
+
assert len(blocks)==1 and "HACK" in blocks[0].marker and "line5" in blocks[0].context
|
|
212
|
+
|
|
213
|
+
def test_crit_no_overlap():
|
|
214
|
+
lines = ["# HACK: a" if i==5 else "# HACK: b" if i==6 else f"l{i}" for i in range(30)]
|
|
215
|
+
assert len(extract_critical_blocks("f.py","\n".join(lines)))==1
|
|
216
|
+
|
|
217
|
+
def test_crit_empty():
|
|
218
|
+
assert extract_critical_blocks("f.py","x=1") == []
|
|
219
|
+
|
|
220
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
221
|
+
# Budget
|
|
222
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
223
|
+
from savetoken.budget import allocate_budget, compute_total_budget, estimate_tokens, trim_to_budget
|
|
224
|
+
|
|
225
|
+
def test_budget_all_fit():
|
|
226
|
+
chunks = [{"path":"a.py","content":"x=1"},{"path":"b.py","content":"y=2"}]
|
|
227
|
+
r = allocate_budget(chunks, sum(estimate_tokens(c["content"]) for c in chunks), 2.0)
|
|
228
|
+
assert set(r.included) == {"a.py","b.py"} and r.excluded == []
|
|
229
|
+
|
|
230
|
+
def test_budget_drop_tests_first():
|
|
231
|
+
big = "x=1\n" * 5000
|
|
232
|
+
chunks = [{"path":"tests/test_x.py","content":big},
|
|
233
|
+
{"path":"src/schemas.py","content":"class A: pass"}]
|
|
234
|
+
total = estimate_tokens(big) + estimate_tokens("class A: pass")
|
|
235
|
+
# budget just enough for schemas but not tests
|
|
236
|
+
r = allocate_budget(chunks, total, 0.15)
|
|
237
|
+
assert "src/schemas.py" in r.included
|
|
238
|
+
assert "tests/test_x.py" in r.excluded
|
|
239
|
+
|
|
240
|
+
def test_budget_no_truncate():
|
|
241
|
+
# Even if over budget, included files are whole (not truncated)
|
|
242
|
+
chunks = [{"path":"settings.py","content":"X=1\nY=2\nZ=3\n"}]
|
|
243
|
+
r = allocate_budget(chunks, estimate_tokens(chunks[0]["content"]) * 10, 2.0)
|
|
244
|
+
assert "settings.py" in r.included
|
|
245
|
+
|
|
246
|
+
def test_compute_total_budget():
|
|
247
|
+
assert compute_total_budget(10_000, 0.5) == 5_000
|
|
248
|
+
assert compute_total_budget(100, 0.5) == 2_000 # minimum floor
|
|
249
|
+
|
|
250
|
+
def test_trim_to_budget():
|
|
251
|
+
r = trim_to_budget("line1\nline2\n"+"x"*1000, 5)
|
|
252
|
+
assert "line1" in r and "truncated" in r
|
|
253
|
+
|
|
254
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
255
|
+
# Deduplicator
|
|
256
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
257
|
+
from savetoken.deduplicator import deduplicate
|
|
258
|
+
|
|
259
|
+
def _make_dup_summary() -> CodebaseSummary:
|
|
260
|
+
return CodebaseSummary(
|
|
261
|
+
files=[
|
|
262
|
+
FileSummary("src/schemas.py","Schemas",FileRole.STRATEGIC.value,verbatim="class A: pass"),
|
|
263
|
+
FileSummary("src/orders.py","Orders",FileRole.REGULAR.value,
|
|
264
|
+
functions=["create_order()->Order"]),
|
|
265
|
+
],
|
|
266
|
+
overview=ProjectOverview("P","Desc"),
|
|
267
|
+
graph=CodebaseGraph(
|
|
268
|
+
calls=[CallEdge("src/orders.py","create","src/schemas.py","A")],
|
|
269
|
+
inheritance=[
|
|
270
|
+
InheritanceEdge("src/schemas.py","A","","BaseModel"), # external
|
|
271
|
+
InheritanceEdge("src/orders.py","OrderPaid","src/orders.py","Order"), # internal
|
|
272
|
+
],
|
|
273
|
+
test_calls=[CallEdge("tests/test_x.py","test_a","src/orders.py","create")],
|
|
274
|
+
),
|
|
275
|
+
hot_functions=[
|
|
276
|
+
HotFunction("src/schemas.py","A",0.9,"def A(): pass",3), # verbatim file
|
|
277
|
+
HotFunction("src/orders.py","create",0.5,"def create(): pass",1),
|
|
278
|
+
],
|
|
279
|
+
sql_tables=[
|
|
280
|
+
SqlTable("schemas_a","","src/schemas.py"), # verbatim source
|
|
281
|
+
SqlTable("orders","","src/orders.py"), # non-verbatim
|
|
282
|
+
],
|
|
283
|
+
critical_blocks=[
|
|
284
|
+
CriticalBlock("src/schemas.py",1,"HACK","ctx"), # in verbatim
|
|
285
|
+
CriticalBlock("src/orders.py",5,"FIXME","ctx"), # not verbatim
|
|
286
|
+
],
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def test_dedup_removes_sql_from_verbatim():
|
|
290
|
+
s = deduplicate(_make_dup_summary())
|
|
291
|
+
assert all(t.source != "src/schemas.py" for t in s.sql_tables)
|
|
292
|
+
assert any(t.name == "orders" for t in s.sql_tables)
|
|
293
|
+
|
|
294
|
+
def test_dedup_removes_external_inheritance():
|
|
295
|
+
s = deduplicate(_make_dup_summary())
|
|
296
|
+
assert all(e.parent_class != "BaseModel" for e in s.graph.inheritance)
|
|
297
|
+
|
|
298
|
+
def test_dedup_keeps_internal_inheritance():
|
|
299
|
+
s = deduplicate(_make_dup_summary())
|
|
300
|
+
assert any(e.child_class == "OrderPaid" for e in s.graph.inheritance)
|
|
301
|
+
|
|
302
|
+
def test_dedup_removes_hot_from_verbatim():
|
|
303
|
+
s = deduplicate(_make_dup_summary())
|
|
304
|
+
assert all(h.file != "src/schemas.py" for h in s.hot_functions)
|
|
305
|
+
assert any(h.name == "create" for h in s.hot_functions)
|
|
306
|
+
|
|
307
|
+
def test_dedup_removes_crit_from_verbatim():
|
|
308
|
+
s = deduplicate(_make_dup_summary())
|
|
309
|
+
assert all(b.file != "src/schemas.py" for b in s.critical_blocks)
|
|
310
|
+
assert any(b.file == "src/orders.py" for b in s.critical_blocks)
|
|
311
|
+
|
|
312
|
+
def test_dedup_file_summaries_regular_only():
|
|
313
|
+
s = deduplicate(_make_dup_summary())
|
|
314
|
+
assert all(f.role == FileRole.REGULAR.value for f in s._dedup_file_summaries)
|
|
315
|
+
|
|
316
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
317
|
+
# Provider base
|
|
318
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
319
|
+
def test_parse_json_clean(): assert _parse_json('{"a":1}') == {"a":1}
|
|
320
|
+
def test_parse_json_fences(): assert _parse_json("```json\n{\"a\":1}\n```") == {"a":1}
|
|
321
|
+
def test_parse_json_raises():
|
|
322
|
+
with pytest.raises(ParseError): _parse_json("nope","test")
|
|
323
|
+
def test_safe_truncate():
|
|
324
|
+
assert _safe_truncate("short",100) == "short"
|
|
325
|
+
assert "truncated" in _safe_truncate("a\nb\n"+"x"*500, 10)
|
|
326
|
+
def test_ensure_list():
|
|
327
|
+
assert _ensure_list(["a"]) == ["a"]
|
|
328
|
+
assert _ensure_list(None) == []
|
|
329
|
+
assert _ensure_list("x") == ["x"]
|
|
330
|
+
|
|
331
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
332
|
+
# Output renderer
|
|
333
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
334
|
+
|
|
335
|
+
def _full_summary() -> CodebaseSummary:
|
|
336
|
+
s = CodebaseSummary(
|
|
337
|
+
files=[
|
|
338
|
+
FileSummary("src/orders.py","Manages orders",FileRole.REGULAR.value,
|
|
339
|
+
functions=["create_order(uid,items)->Order"],
|
|
340
|
+
entities=["Order(id,total,status)"],
|
|
341
|
+
critical_comments=["# HACK: ORM bug"],
|
|
342
|
+
module_doc="Order lifecycle management."),
|
|
343
|
+
FileSummary("src/schemas.py","Schemas",FileRole.STRATEGIC.value,
|
|
344
|
+
verbatim="class OrderIn(BaseModel):\n id: int",
|
|
345
|
+
module_doc="Pydantic schemas."),
|
|
346
|
+
],
|
|
347
|
+
overview=ProjectOverview("Shop","E-commerce",["FastAPI","PostgreSQL"],
|
|
348
|
+
"Layered MVC",["main.py"],{"src":"code"},
|
|
349
|
+
{"fastapi":"0.100.0","sqlalchemy":"1.4.0"}),
|
|
350
|
+
graph=CodebaseGraph(
|
|
351
|
+
calls=[CallEdge("src/orders.py","create_order","src/inv.py","reserve")],
|
|
352
|
+
inheritance=[InheritanceEdge("src/orders.py","PaidOrder","src/orders.py","Order")],
|
|
353
|
+
test_calls=[CallEdge("tests/t.py","test_create","src/orders.py","create_order")],
|
|
354
|
+
),
|
|
355
|
+
flows=[Flow("POST order","main.py","post_order",
|
|
356
|
+
[FlowStep("main.py","post_order"),FlowStep("src/orders.py","create_order")],
|
|
357
|
+
"Creates an order.")],
|
|
358
|
+
hot_functions=[HotFunction("src/orders.py","create_order",0.9,
|
|
359
|
+
"def create_order(uid,items):\n return items",2)],
|
|
360
|
+
env_vars=[EnvVar("DATABASE_URL","postgresql://localhost",["src/db.py"],True)],
|
|
361
|
+
sql_tables=[SqlTable("orders",["id INT","total FLOAT"],"src/models.py",
|
|
362
|
+
foreign_keys=["user_id → users.id ON DELETE CASCADE"],
|
|
363
|
+
indexes=["(user_id, created_at)"])],
|
|
364
|
+
critical_blocks=[CriticalBlock("src/orders.py",42,"HACK","line41\n>>> # HACK\nline43")],
|
|
365
|
+
)
|
|
366
|
+
s._dedup_file_summaries = [s.files[0]]
|
|
367
|
+
return s
|
|
368
|
+
|
|
369
|
+
def test_md_has_toc():
|
|
370
|
+
md = render_markdown(_full_summary())
|
|
371
|
+
assert "## Contents" in md
|
|
372
|
+
|
|
373
|
+
def test_md_overview():
|
|
374
|
+
md = render_markdown(_full_summary())
|
|
375
|
+
assert "Shop" in md and "FastAPI" in md
|
|
376
|
+
|
|
377
|
+
def test_md_dep_versions():
|
|
378
|
+
md = render_markdown(_full_summary())
|
|
379
|
+
assert "fastapi==0.100.0" in md
|
|
380
|
+
|
|
381
|
+
def test_md_env_table():
|
|
382
|
+
md = render_markdown(_full_summary())
|
|
383
|
+
assert "DATABASE_URL" in md
|
|
384
|
+
|
|
385
|
+
def test_md_sql_fk():
|
|
386
|
+
md = render_markdown(_full_summary())
|
|
387
|
+
assert "Database Schema" in md
|
|
388
|
+
assert "users.id" in md
|
|
389
|
+
assert "(user_id, created_at)" in md
|
|
390
|
+
|
|
391
|
+
def test_md_flows_compact():
|
|
392
|
+
md = render_markdown(_full_summary())
|
|
393
|
+
assert "Execution Flows" in md
|
|
394
|
+
assert "post_order" in md
|
|
395
|
+
|
|
396
|
+
def test_md_call_graph_compressed():
|
|
397
|
+
md = render_markdown(_full_summary())
|
|
398
|
+
assert "Call Graph" in md
|
|
399
|
+
assert "reserve" in md
|
|
400
|
+
|
|
401
|
+
def test_md_test_coverage_line():
|
|
402
|
+
md = render_markdown(_full_summary())
|
|
403
|
+
assert "test_create" in md or "Tests cover" in md
|
|
404
|
+
|
|
405
|
+
def test_md_hot_functions():
|
|
406
|
+
md = render_markdown(_full_summary())
|
|
407
|
+
assert "Hot Functions" in md
|
|
408
|
+
|
|
409
|
+
def test_md_verbatim_files():
|
|
410
|
+
md = render_markdown(_full_summary())
|
|
411
|
+
assert "Verbatim Files" in md
|
|
412
|
+
assert "OrderIn" in md
|
|
413
|
+
assert "Pydantic schemas." in md # module_doc
|
|
414
|
+
|
|
415
|
+
def test_md_critical_blocks():
|
|
416
|
+
md = render_markdown(_full_summary())
|
|
417
|
+
assert "Critical Blocks" in md
|
|
418
|
+
|
|
419
|
+
def test_md_file_summaries_module_doc():
|
|
420
|
+
md = render_markdown(_full_summary())
|
|
421
|
+
assert "Order lifecycle management." in md
|
|
422
|
+
|
|
423
|
+
def test_md_source_tags():
|
|
424
|
+
md = render_markdown(_full_summary())
|
|
425
|
+
assert "[ast+verbatim]" in md or "[verbatim]" in md or "[llm]" in md
|
|
426
|
+
|
|
427
|
+
def test_json_valid():
|
|
428
|
+
data = json.loads(render_json(_full_summary()))
|
|
429
|
+
assert all(k in data for k in ("overview","files","graph","hot_functions",
|
|
430
|
+
"env_vars","sql_tables","critical_blocks"))
|
|
431
|
+
assert "_dedup_file_summaries" not in data # stripped from JSON
|
|
432
|
+
|
|
433
|
+
def test_escape(): assert _e("a`b") == "a'b" and _e("a|b") == "a-b"
|
|
434
|
+
|
|
435
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
436
|
+
# Cache
|
|
437
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
438
|
+
from savetoken.cache import CacheManager
|
|
439
|
+
def test_cache_roundtrip(tmp_path):
|
|
440
|
+
cm = CacheManager(tmp_path/"c"); cm.set("k",{"x":1}); assert cm.get("k")=={"x":1}
|
|
441
|
+
def test_cache_miss(tmp_path): assert CacheManager(tmp_path/"c").get("x") is None
|
|
442
|
+
def test_cache_corrupt(tmp_path):
|
|
443
|
+
cm = CacheManager(tmp_path/"c"); (tmp_path/"c"/"bad.json").write_text("bad"); assert cm.get("bad") is None
|
|
444
|
+
def test_cache_clear(tmp_path):
|
|
445
|
+
cm = CacheManager(tmp_path/"c"); cm.set("k",{}); cm.clear(); assert cm.get("k") is None
|
|
446
|
+
def test_cache_stats(tmp_path):
|
|
447
|
+
cm = CacheManager(tmp_path/"c"); cm.set("a",{}); cm.set("b",{}); assert cm.stats()["entries"]==2
|
|
448
|
+
|
|
449
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
450
|
+
# Core integration with mock provider
|
|
451
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
452
|
+
class MockProvider:
|
|
453
|
+
name = "mock"
|
|
454
|
+
def summarize_file(self,path,content,language="en",signatures=""):
|
|
455
|
+
return FileSummary(path=path,purpose=f"Mock:{path}")
|
|
456
|
+
def summarize_project(self,files,language="en"):
|
|
457
|
+
return ProjectOverview(name="MockProject",description="Mocked.",entry_points=["main.py"])
|
|
458
|
+
def describe_flow(self,flow,language="en"):
|
|
459
|
+
return f"Flow:{flow.name}"
|
|
460
|
+
|
|
461
|
+
def _st(tmp_path):
|
|
462
|
+
from savetoken.core import SaveToken
|
|
463
|
+
st = SaveToken.__new__(SaveToken)
|
|
464
|
+
from savetoken.cache import CacheManager
|
|
465
|
+
st.provider = MockProvider()
|
|
466
|
+
st.cache = CacheManager(tmp_path/"cache")
|
|
467
|
+
st.language = "en"
|
|
468
|
+
st.flow_depth = 3
|
|
469
|
+
st.target_ratio = 0.5
|
|
470
|
+
return st
|
|
471
|
+
|
|
472
|
+
def test_core_xml(tmp_path):
|
|
473
|
+
src = tmp_path/"r.xml"; src.write_text(REPOMIX_XML)
|
|
474
|
+
s = _st(tmp_path).summarize(src)
|
|
475
|
+
assert len(s.files)==2 and s.overview.name=="MockProject"
|
|
476
|
+
|
|
477
|
+
def test_core_text(tmp_path):
|
|
478
|
+
s = _st(tmp_path).summarize_text(REPOMIX_XML)
|
|
479
|
+
assert len(s.files)==2
|
|
480
|
+
|
|
481
|
+
def test_core_strategic_verbatim(tmp_path):
|
|
482
|
+
# settings.py → strategic, included verbatim (pad to ensure budget)
|
|
483
|
+
padding = "".join(f'<file path="s{i}.py">{"x=1\n"*100}</file>' for i in range(20))
|
|
484
|
+
xml = '<file path="settings.py">DEBUG=True\nSECRET=x\n</file>' + padding
|
|
485
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
486
|
+
strat = [f for f in s.files if f.role==FileRole.STRATEGIC.value and f.verbatim]
|
|
487
|
+
assert any(f.path=="settings.py" for f in strat)
|
|
488
|
+
|
|
489
|
+
def test_core_skips_lockfile(tmp_path):
|
|
490
|
+
xml = '<file path="package-lock.json">{}</file><file path="app.py">x=1</file>'
|
|
491
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
492
|
+
assert all(f.path!="package-lock.json" for f in s.files)
|
|
493
|
+
|
|
494
|
+
def test_core_env_vars(tmp_path):
|
|
495
|
+
xml = '<file path="a.py">import os\nX=os.getenv("MY_VAR","d")\n</file>'
|
|
496
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
497
|
+
assert isinstance(s.env_vars, list)
|
|
498
|
+
|
|
499
|
+
def test_core_sql_tables(tmp_path):
|
|
500
|
+
xml = '<file path="s.sql">CREATE TABLE items (id INT);</file>'
|
|
501
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
502
|
+
assert isinstance(s.sql_tables, list)
|
|
503
|
+
|
|
504
|
+
def test_core_graph(tmp_path):
|
|
505
|
+
xml = '<file path="a.py">def foo():\n bar()\n</file><file path="b.py">def bar():\n pass\n</file>'
|
|
506
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
507
|
+
assert isinstance(s.graph, CodebaseGraph)
|
|
508
|
+
|
|
509
|
+
def test_core_dedup_applied(tmp_path):
|
|
510
|
+
# After dedup, _dedup_file_summaries should only contain REGULAR files
|
|
511
|
+
s = _st(tmp_path).summarize_text(REPOMIX_XML)
|
|
512
|
+
for f in s._dedup_file_summaries:
|
|
513
|
+
assert f.role == FileRole.REGULAR.value
|
|
514
|
+
|
|
515
|
+
def test_core_cache(tmp_path):
|
|
516
|
+
st = _st(tmp_path)
|
|
517
|
+
src = tmp_path/"r.xml"; src.write_text(REPOMIX_XML)
|
|
518
|
+
st.summarize(src)
|
|
519
|
+
class Fail:
|
|
520
|
+
name="fail"
|
|
521
|
+
def summarize_file(self,*a,**k): raise AssertionError("cache miss")
|
|
522
|
+
def summarize_project(self,*a,**k): raise AssertionError("cache miss")
|
|
523
|
+
def describe_flow(self,*a,**k): raise AssertionError("cache miss")
|
|
524
|
+
st.provider=Fail(); st.summarize(src) # must not raise
|
|
525
|
+
|
|
526
|
+
def test_core_missing(tmp_path):
|
|
527
|
+
with pytest.raises(InputError): _st(tmp_path).summarize(tmp_path/"nope.xml")
|
|
528
|
+
|
|
529
|
+
def test_core_empty(tmp_path):
|
|
530
|
+
src=tmp_path/"e.xml"; src.write_text(" ")
|
|
531
|
+
with pytest.raises(InputError): _st(tmp_path).summarize(src)
|
|
532
|
+
|
|
533
|
+
def test_core_save_md(tmp_path):
|
|
534
|
+
s = _st(tmp_path).summarize_text(REPOMIX_XML)
|
|
535
|
+
out = _st(tmp_path).save(s, tmp_path/"b.md")
|
|
536
|
+
assert "MockProject" in out.read_text()
|
|
537
|
+
|
|
538
|
+
def test_core_save_json(tmp_path):
|
|
539
|
+
s = _st(tmp_path).summarize_text(REPOMIX_XML)
|
|
540
|
+
out = _st(tmp_path).save(s, tmp_path/"b.json", fmt="json")
|
|
541
|
+
data = json.loads(out.read_text())
|
|
542
|
+
assert all(k in data for k in ("overview","files","graph","hot_functions","env_vars","sql_tables"))
|
|
543
|
+
assert "_dedup_file_summaries" not in data
|
|
544
|
+
|
|
545
|
+
def test_core_progress(tmp_path):
|
|
546
|
+
calls=[]
|
|
547
|
+
_st(tmp_path).summarize_text(REPOMIX_XML,on_progress=lambda c,t,p:calls.append(c))
|
|
548
|
+
assert len(calls)>=2
|
|
549
|
+
|
|
550
|
+
def test_flows(tmp_path):
|
|
551
|
+
from savetoken.flows import trace_flows
|
|
552
|
+
g = CodebaseGraph(calls=[
|
|
553
|
+
CallEdge("main.py","post_order","orders.py","create_order"),
|
|
554
|
+
CallEdge("orders.py","create_order","inv.py","reserve"),
|
|
555
|
+
])
|
|
556
|
+
flows = trace_flows(g, entry_file_hints=["main.py"])
|
|
557
|
+
assert any(s.fn=="post_order" for s in flows[0].steps)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
561
|
+
# Docstring extractor
|
|
562
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
563
|
+
from savetoken.docstring_extractor import (
|
|
564
|
+
extract_file_docs, render_file_doc, FileDoc, FnDoc, ClassDoc, ConstantDoc
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
_RICH_PY = '''\
|
|
568
|
+
"""Order service — manages the full lifecycle of customer orders."""
|
|
569
|
+
from decimal import Decimal
|
|
570
|
+
|
|
571
|
+
MAX_ITEMS = 50
|
|
572
|
+
SUPPORTED_CURRENCIES = ("BRL", "USD", "EUR")
|
|
573
|
+
|
|
574
|
+
class OrderService:
|
|
575
|
+
"""Orchestrates order creation, payment, and cancellation."""
|
|
576
|
+
|
|
577
|
+
def create_order(self, user_id: str, items: list) -> dict:
|
|
578
|
+
"""Create a new order, reserve inventory, and charge payment."""
|
|
579
|
+
pass
|
|
580
|
+
|
|
581
|
+
def cancel_order(self, order_id: str) -> bool:
|
|
582
|
+
"""Cancel an order if status allows. Returns True on success."""
|
|
583
|
+
raise ValueError("not cancellable")
|
|
584
|
+
|
|
585
|
+
def _internal(self):
|
|
586
|
+
"""Private helper — should not appear in output."""
|
|
587
|
+
pass
|
|
588
|
+
|
|
589
|
+
def calculate_discount(total: Decimal, code: str) -> Decimal:
|
|
590
|
+
"""Apply coupon code discount to total. Returns discounted amount."""
|
|
591
|
+
pass
|
|
592
|
+
'''
|
|
593
|
+
|
|
594
|
+
def test_docext_module_doc():
|
|
595
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
596
|
+
assert "Order service" in docs["a.py"].module_doc
|
|
597
|
+
|
|
598
|
+
def test_docext_constants():
|
|
599
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
600
|
+
names = [c.name for c in docs["a.py"].constants]
|
|
601
|
+
assert "MAX_ITEMS" in names
|
|
602
|
+
assert "SUPPORTED_CURRENCIES" in names
|
|
603
|
+
|
|
604
|
+
def test_docext_class_doc():
|
|
605
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
606
|
+
cls = docs["a.py"].classes[0]
|
|
607
|
+
assert cls.name == "OrderService"
|
|
608
|
+
assert "Orchestrates" in cls.doc
|
|
609
|
+
|
|
610
|
+
def test_docext_method_docs():
|
|
611
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
612
|
+
cls = docs["a.py"].classes[0]
|
|
613
|
+
method_names = [m.name for m in cls.methods]
|
|
614
|
+
assert "create_order" in method_names
|
|
615
|
+
assert "cancel_order" in method_names
|
|
616
|
+
|
|
617
|
+
def test_docext_method_raises():
|
|
618
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
619
|
+
cls = docs["a.py"].classes[0]
|
|
620
|
+
cancel = next(m for m in cls.methods if m.name == "cancel_order")
|
|
621
|
+
assert "ValueError" in cancel.raises
|
|
622
|
+
|
|
623
|
+
def test_docext_function_doc():
|
|
624
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
625
|
+
fns = {f.name: f for f in docs["a.py"].functions}
|
|
626
|
+
assert "calculate_discount" in fns
|
|
627
|
+
assert "coupon" in fns["calculate_discount"].doc
|
|
628
|
+
|
|
629
|
+
def test_docext_private_excluded_from_render():
|
|
630
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
631
|
+
rendered = render_file_doc(docs["a.py"])
|
|
632
|
+
assert "_internal" not in rendered
|
|
633
|
+
|
|
634
|
+
def test_docext_signature_types():
|
|
635
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
636
|
+
cls = docs["a.py"].classes[0]
|
|
637
|
+
create = next(m for m in cls.methods if m.name == "create_order")
|
|
638
|
+
assert "str" in create.signature
|
|
639
|
+
assert "list" in create.signature
|
|
640
|
+
|
|
641
|
+
def test_docext_render_has_all_sections():
|
|
642
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
643
|
+
rendered = render_file_doc(docs["a.py"])
|
|
644
|
+
assert "Order service" in rendered # module doc
|
|
645
|
+
assert "MAX_ITEMS" in rendered # constant
|
|
646
|
+
assert "OrderService" in rendered # class
|
|
647
|
+
assert "create_order" in rendered # method
|
|
648
|
+
assert "calculate_discount" in rendered # function
|
|
649
|
+
|
|
650
|
+
def test_docext_non_python_skipped():
|
|
651
|
+
docs = extract_file_docs([{"path": "style.css", "content": "body{}"}])
|
|
652
|
+
assert docs == {}
|
|
653
|
+
|
|
654
|
+
def test_docext_syntax_error_skipped():
|
|
655
|
+
docs = extract_file_docs([{"path": "bad.py", "content": "def (x:"}])
|
|
656
|
+
assert docs == {}
|
|
657
|
+
|
|
658
|
+
def test_docext_empty_file():
|
|
659
|
+
docs = extract_file_docs([{"path": "empty.py", "content": ""}])
|
|
660
|
+
fd = docs.get("empty.py")
|
|
661
|
+
assert fd is not None
|
|
662
|
+
assert fd.module_doc == ""
|
|
663
|
+
assert fd.constants == []
|
|
664
|
+
|
|
665
|
+
def test_docext_token_estimate():
|
|
666
|
+
docs = extract_file_docs([{"path": "a.py", "content": _RICH_PY}])
|
|
667
|
+
tok = docs["a.py"].token_estimate()
|
|
668
|
+
assert tok > 0
|
|
669
|
+
|
|
670
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
671
|
+
# Core integration — AST role
|
|
672
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
673
|
+
|
|
674
|
+
def test_core_regular_python_gets_ast_role(tmp_path):
|
|
675
|
+
"""Regular Python files should get FileRole.AST, not REGULAR."""
|
|
676
|
+
# A file big enough to be REGULAR (>25 lines, has external imports)
|
|
677
|
+
big_py = "import requests\n" + "def fn():\n pass\n" * 40
|
|
678
|
+
xml = f'<file path="service.py">{big_py}</file>'
|
|
679
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
680
|
+
service = next((f for f in s.files if f.path == "service.py"), None)
|
|
681
|
+
assert service is not None
|
|
682
|
+
assert service.role == FileRole.AST.value
|
|
683
|
+
|
|
684
|
+
def test_core_ast_file_has_ast_doc(tmp_path):
|
|
685
|
+
"""AST files must have ast_doc populated."""
|
|
686
|
+
big_py = '"""My service module."""\nimport requests\n' + "def fn():\n '''Does nothing.'''\n pass\n" * 40
|
|
687
|
+
xml = f'<file path="service.py">{big_py}</file>'
|
|
688
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
689
|
+
service = next(f for f in s.files if f.path == "service.py")
|
|
690
|
+
assert service.ast_doc # non-empty
|
|
691
|
+
|
|
692
|
+
def test_core_ast_file_no_llm_called(tmp_path):
|
|
693
|
+
"""AST files must NOT trigger LLM calls."""
|
|
694
|
+
big_py = "import requests\n" + "def fn(): pass\n" * 40
|
|
695
|
+
xml = f'<file path="service.py">{big_py}</file>'
|
|
696
|
+
|
|
697
|
+
call_count = {"n": 0}
|
|
698
|
+
class TrackingProvider:
|
|
699
|
+
name = "tracking"
|
|
700
|
+
def summarize_file(self, *a, **kw):
|
|
701
|
+
call_count["n"] += 1
|
|
702
|
+
return FileSummary(path=a[0], purpose="mock")
|
|
703
|
+
def summarize_project(self, *a, **kw):
|
|
704
|
+
return ProjectOverview(name="P", description="D")
|
|
705
|
+
def describe_flow(self, *a, **kw): return ""
|
|
706
|
+
|
|
707
|
+
st = _st(tmp_path)
|
|
708
|
+
st.provider = TrackingProvider()
|
|
709
|
+
st.summarize_text(xml)
|
|
710
|
+
# summarize_file should NOT have been called for the .py file
|
|
711
|
+
assert call_count["n"] == 0
|
|
712
|
+
|
|
713
|
+
def test_core_ast_section_in_markdown(tmp_path):
|
|
714
|
+
"""Rendered output must contain the File Docs [ast] section."""
|
|
715
|
+
big_py = '"""My module."""\nimport requests\n' + "def fn(): pass\n" * 40
|
|
716
|
+
xml = f'<file path="service.py">{big_py}</file>'
|
|
717
|
+
s = _st(tmp_path).summarize_text(xml)
|
|
718
|
+
out = _st(tmp_path).save(s, tmp_path / "brief.md")
|
|
719
|
+
md = out.read_text()
|
|
720
|
+
assert "File Docs" in md
|
|
721
|
+
assert "My module" in md
|
|
722
|
+
|
|
723
|
+
def test_core_non_python_regular_still_uses_llm(tmp_path):
|
|
724
|
+
"""Non-Python regular files should still use LLM summary."""
|
|
725
|
+
call_count = {"n": 0}
|
|
726
|
+
class TrackingProvider:
|
|
727
|
+
name = "tracking"
|
|
728
|
+
def summarize_file(self, *a, **kw):
|
|
729
|
+
call_count["n"] += 1
|
|
730
|
+
return FileSummary(path=a[0], purpose="mock")
|
|
731
|
+
def summarize_project(self, *a, **kw):
|
|
732
|
+
return ProjectOverview(name="P", description="D")
|
|
733
|
+
def describe_flow(self, *a, **kw): return ""
|
|
734
|
+
|
|
735
|
+
# A non-Python file (YAML config large enough to not be strategic)
|
|
736
|
+
big_yaml = "key: value\n" * 200
|
|
737
|
+
xml = f'<file path="config.yml">{big_yaml}</file>'
|
|
738
|
+
st = _st(tmp_path)
|
|
739
|
+
st.provider = TrackingProvider()
|
|
740
|
+
st.summarize_text(xml)
|
|
741
|
+
assert call_count["n"] >= 1
|