skillcost 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
skillcost/__init__.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
import urllib.request
|
|
8
|
+
from dataclasses import asdict, dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import tiktoken
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Token counting
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
_ENC: tiktoken.Encoding | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_enc() -> tiktoken.Encoding:
|
|
22
|
+
global _ENC
|
|
23
|
+
if _ENC is None:
|
|
24
|
+
_ENC = tiktoken.get_encoding("cl100k_base")
|
|
25
|
+
return _ENC
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def count_tokens(text: str) -> int:
|
|
29
|
+
return len(_get_enc().encode(text))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Data models
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class TargetData:
|
|
39
|
+
text: str
|
|
40
|
+
resident_cost: int
|
|
41
|
+
ref_links: list[tuple[Path, int]]
|
|
42
|
+
local_links: list[tuple[Path, int]]
|
|
43
|
+
http_links: list[str]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CostReport:
|
|
48
|
+
resident: int
|
|
49
|
+
baseline: int
|
|
50
|
+
maximum: int
|
|
51
|
+
files: dict[str, int] # str(Path) -> token count, all crawled files (excl. target)
|
|
52
|
+
urls: dict[str, int] # url -> token count
|
|
53
|
+
ref_link_count: int
|
|
54
|
+
local_link_count: int
|
|
55
|
+
http_link_count: int
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Parsing helpers
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
_REF_LINK_RE = re.compile(r"@\{([^}]+)\}")
|
|
63
|
+
_MD_LINK_RE = re.compile(r"\[[^\]]*\]\(([^)]+)\)")
|
|
64
|
+
_HTTP_RE = re.compile(r"^https?://")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _line_of(text: str, pos: int) -> int:
|
|
68
|
+
return text.count("\n", 0, pos) + 1
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _parse_md_links(
|
|
72
|
+
text: str, base_dir: Path
|
|
73
|
+
) -> tuple[list[tuple[Path, int]], list[tuple[Path, int]], list[str]]:
|
|
74
|
+
ref_links: list[tuple[Path, int]] = []
|
|
75
|
+
local_links: list[tuple[Path, int]] = []
|
|
76
|
+
http_links: list[str] = []
|
|
77
|
+
|
|
78
|
+
for m in _REF_LINK_RE.finditer(text):
|
|
79
|
+
ref_links.append(((base_dir / m.group(1)).resolve(), _line_of(text, m.start())))
|
|
80
|
+
|
|
81
|
+
for m in _MD_LINK_RE.finditer(text):
|
|
82
|
+
target = m.group(1)
|
|
83
|
+
if _HTTP_RE.match(target):
|
|
84
|
+
http_links.append(target)
|
|
85
|
+
else:
|
|
86
|
+
target = target.split("#")[0]
|
|
87
|
+
if not target:
|
|
88
|
+
continue
|
|
89
|
+
local_links.append(
|
|
90
|
+
((base_dir / target).resolve(), _line_of(text, m.start()))
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return ref_links, local_links, http_links
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _parse_frontmatter(text: str) -> dict | None:
|
|
97
|
+
if not text.startswith("---"):
|
|
98
|
+
return None
|
|
99
|
+
end = text.find("\n---", 3)
|
|
100
|
+
if end == -1:
|
|
101
|
+
return None
|
|
102
|
+
return yaml.safe_load(text[3:end].strip())
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# Target loading
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def load_target(path: Path) -> TargetData:
|
|
111
|
+
try:
|
|
112
|
+
text = path.read_text(encoding="utf-8")
|
|
113
|
+
except OSError as e:
|
|
114
|
+
print(f"ERROR: cannot read {path}: {e}", file=sys.stderr)
|
|
115
|
+
sys.exit(1)
|
|
116
|
+
|
|
117
|
+
resident_cost = 0
|
|
118
|
+
ref_links: list[tuple[Path, int]] = []
|
|
119
|
+
local_links: list[tuple[Path, int]] = []
|
|
120
|
+
http_links: list[str] = []
|
|
121
|
+
|
|
122
|
+
if path.name == "SKILL.md":
|
|
123
|
+
try:
|
|
124
|
+
fm = _parse_frontmatter(text)
|
|
125
|
+
if fm:
|
|
126
|
+
name = str(fm.get("name", ""))
|
|
127
|
+
description = str(fm.get("description", ""))
|
|
128
|
+
resident_cost = count_tokens(name) + count_tokens(description)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
print(
|
|
131
|
+
f"WARNING: failed to parse frontmatter in {path}: {e}", file=sys.stderr
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if path.suffix == ".md":
|
|
135
|
+
ref_links, local_links, http_links = _parse_md_links(text, path.parent)
|
|
136
|
+
|
|
137
|
+
return TargetData(
|
|
138
|
+
text=text,
|
|
139
|
+
resident_cost=resident_cost,
|
|
140
|
+
ref_links=ref_links,
|
|
141
|
+
local_links=local_links,
|
|
142
|
+
http_links=http_links,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
# Cost accumulation
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _crawl_files(
|
|
152
|
+
initial_queue: list[tuple[Path, int, Path]], visited: dict[Path, int]
|
|
153
|
+
) -> int:
|
|
154
|
+
queue = list(initial_queue)
|
|
155
|
+
total = 0
|
|
156
|
+
while queue:
|
|
157
|
+
path, line_no, source = queue.pop(0)
|
|
158
|
+
rp = path.resolve()
|
|
159
|
+
if rp in visited:
|
|
160
|
+
continue
|
|
161
|
+
if not rp.is_file():
|
|
162
|
+
print(
|
|
163
|
+
f"WARNING: {rp} is not a file or does not exist"
|
|
164
|
+
f" (referenced from {source}:{line_no})",
|
|
165
|
+
file=sys.stderr,
|
|
166
|
+
)
|
|
167
|
+
visited[rp] = 0
|
|
168
|
+
continue
|
|
169
|
+
try:
|
|
170
|
+
text = rp.read_text(encoding="utf-8")
|
|
171
|
+
except OSError as e:
|
|
172
|
+
print(
|
|
173
|
+
f"WARNING: cannot read {rp}: {e} (referenced from {source}:{line_no})",
|
|
174
|
+
file=sys.stderr,
|
|
175
|
+
)
|
|
176
|
+
visited[rp] = 0
|
|
177
|
+
continue
|
|
178
|
+
tokens = count_tokens(text)
|
|
179
|
+
visited[rp] = tokens
|
|
180
|
+
total += tokens
|
|
181
|
+
if rp.suffix == ".md":
|
|
182
|
+
ref_links, local_links, _ = _parse_md_links(text, rp.parent)
|
|
183
|
+
for p, ln in ref_links + local_links:
|
|
184
|
+
if p.resolve() not in visited:
|
|
185
|
+
queue.append((p, ln, rp))
|
|
186
|
+
return total
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _fetch_url_costs(http_links: list[str], url_costs: dict[str, int]) -> int:
|
|
190
|
+
total = 0
|
|
191
|
+
for url in http_links:
|
|
192
|
+
if url in url_costs:
|
|
193
|
+
continue
|
|
194
|
+
try:
|
|
195
|
+
with urllib.request.urlopen(url, timeout=10) as resp:
|
|
196
|
+
body = resp.read().decode("utf-8", errors="replace")
|
|
197
|
+
tokens = count_tokens(body)
|
|
198
|
+
url_costs[url] = tokens
|
|
199
|
+
total += tokens
|
|
200
|
+
except Exception as e:
|
|
201
|
+
print(f"WARNING: failed to fetch {url}: {e}", file=sys.stderr)
|
|
202
|
+
url_costs[url] = 0
|
|
203
|
+
return total
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def compute_costs(target_path: Path, target_data: TargetData) -> CostReport:
|
|
207
|
+
target_resolved = target_path.resolve()
|
|
208
|
+
target_tokens = count_tokens(target_data.text)
|
|
209
|
+
|
|
210
|
+
# Baseline: target + resident + ref_links (flat, no recursion)
|
|
211
|
+
ref_token_map: dict[Path, int] = {}
|
|
212
|
+
for p, _ln in target_data.ref_links:
|
|
213
|
+
rp = p.resolve()
|
|
214
|
+
if rp in ref_token_map or rp == target_resolved:
|
|
215
|
+
continue
|
|
216
|
+
try:
|
|
217
|
+
ref_token_map[rp] = count_tokens(p.read_text(encoding="utf-8"))
|
|
218
|
+
except OSError as e:
|
|
219
|
+
print(f"WARNING: cannot read ref link {p}: {e}", file=sys.stderr)
|
|
220
|
+
ref_token_map[rp] = 0
|
|
221
|
+
|
|
222
|
+
baseline = target_tokens + target_data.resident_cost + sum(ref_token_map.values())
|
|
223
|
+
|
|
224
|
+
# Pre-populate visited with target + ref_links so they aren't double-counted
|
|
225
|
+
visited: dict[Path, int] = {target_resolved: target_tokens, **ref_token_map}
|
|
226
|
+
|
|
227
|
+
# Crawl all local files recursively (ref_links already visited → not re-added)
|
|
228
|
+
seen_paths: set[Path] = set()
|
|
229
|
+
initial_queue: list[tuple[Path, int, Path]] = []
|
|
230
|
+
for p, ln in target_data.ref_links + target_data.local_links:
|
|
231
|
+
rp = p.resolve()
|
|
232
|
+
if rp not in seen_paths:
|
|
233
|
+
seen_paths.add(rp)
|
|
234
|
+
initial_queue.append((rp, ln, target_path))
|
|
235
|
+
file_crawl_total = _crawl_files(initial_queue, visited)
|
|
236
|
+
|
|
237
|
+
# Files breakdown: everything visited except the target itself
|
|
238
|
+
files: dict[str, int] = {
|
|
239
|
+
str(p): t for p, t in visited.items() if p != target_resolved
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
url_costs: dict[str, int] = {}
|
|
243
|
+
url_total = _fetch_url_costs(target_data.http_links, url_costs)
|
|
244
|
+
|
|
245
|
+
maximum = baseline + file_crawl_total + url_total
|
|
246
|
+
|
|
247
|
+
return CostReport(
|
|
248
|
+
resident=target_data.resident_cost,
|
|
249
|
+
baseline=baseline,
|
|
250
|
+
maximum=maximum,
|
|
251
|
+
files=files,
|
|
252
|
+
urls=url_costs,
|
|
253
|
+
ref_link_count=len(target_data.ref_links),
|
|
254
|
+
local_link_count=len(target_data.local_links),
|
|
255
|
+
http_link_count=len(target_data.http_links),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
# Output formatting
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def format_report(report: CostReport, fmt: str) -> str:
|
|
265
|
+
if fmt == "json":
|
|
266
|
+
return json.dumps(asdict(report), indent=2)
|
|
267
|
+
if fmt == "yaml":
|
|
268
|
+
return yaml.dump(asdict(report), sort_keys=False)
|
|
269
|
+
|
|
270
|
+
# Human-readable text
|
|
271
|
+
lines: list[str] = []
|
|
272
|
+
lines.append("Token Cost Report")
|
|
273
|
+
lines.append("=================")
|
|
274
|
+
lines.append(f" Resident : {report.resident:,}")
|
|
275
|
+
lines.append(f" Baseline : {report.baseline:,}")
|
|
276
|
+
lines.append(f" Maximum : {report.maximum:,}")
|
|
277
|
+
|
|
278
|
+
if report.files:
|
|
279
|
+
lines.append(f"\nFiles ({len(report.files)})")
|
|
280
|
+
for path_str, tokens in report.files.items():
|
|
281
|
+
lines.append(f" {path_str:<60} {tokens:>8,}")
|
|
282
|
+
|
|
283
|
+
if report.urls:
|
|
284
|
+
lines.append(f"\nURLs ({len(report.urls)})")
|
|
285
|
+
for url, tokens in report.urls.items():
|
|
286
|
+
lines.append(f" {url:<60} {tokens:>8,}")
|
|
287
|
+
|
|
288
|
+
lines.append("\nLinks")
|
|
289
|
+
lines.append(f" @-directives : {report.ref_link_count}")
|
|
290
|
+
lines.append(f" Local links : {report.local_link_count}")
|
|
291
|
+
lines.append(f" HTTP links : {report.http_link_count}")
|
|
292
|
+
|
|
293
|
+
return "\n".join(lines)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# ---------------------------------------------------------------------------
|
|
297
|
+
# CLI entry point
|
|
298
|
+
# ---------------------------------------------------------------------------
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def main() -> None:
|
|
302
|
+
parser = argparse.ArgumentParser(
|
|
303
|
+
description="Estimate the token cost of a text file and its linked resources."
|
|
304
|
+
)
|
|
305
|
+
parser.add_argument("target", help="Path to a plain-text file to analyse")
|
|
306
|
+
fmt_group = parser.add_mutually_exclusive_group()
|
|
307
|
+
fmt_group.add_argument("--json", action="store_true", help="Output as JSON")
|
|
308
|
+
fmt_group.add_argument("--yaml", action="store_true", help="Output as YAML")
|
|
309
|
+
args = parser.parse_args()
|
|
310
|
+
|
|
311
|
+
target_path = Path(args.target).resolve()
|
|
312
|
+
target_data = load_target(target_path)
|
|
313
|
+
report = compute_costs(target_path, target_data)
|
|
314
|
+
|
|
315
|
+
fmt = "json" if args.json else "yaml" if args.yaml else "text"
|
|
316
|
+
print(format_report(report, fmt))
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
if __name__ == "__main__":
|
|
320
|
+
main()
|
skillcost/__main__.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skillcost
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: Estimates the token cost of agent skills and their linked resources
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
7
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# skillcost
|
|
11
|
+
|
|
12
|
+
Estimates the token cost of agent skills and their linked resources.
|
|
13
|
+
|
|
14
|
+
Given a target file (typically a `SKILL.md`), `skillcost` counts tokens and crawls all linked files
|
|
15
|
+
and URLs to produce three cost figures:
|
|
16
|
+
|
|
17
|
+
| Metric | Description |
|
|
18
|
+
| ------------ | ------------------------------------------------------------------------------- |
|
|
19
|
+
| **Resident** | Tokens loaded on every prompt, just because the skill is installed. |
|
|
20
|
+
| **Baseline** | Tokens loaded when the skill is invoked — the skill file plus any `@`-includes. |
|
|
21
|
+
| **Maximum** | Tokens loaded if the agent follows every link in the skill file. |
|
|
22
|
+
|
|
23
|
+
This also works on `CLAUDE.md` or any other plain-text UTF-8 file.
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
Requires Python 3.12+ and [uv](https://github.com/astral-sh/uv).
|
|
28
|
+
|
|
29
|
+
```sh
|
|
30
|
+
uv sync
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```sh
|
|
36
|
+
uv run python -m main <path/to/file>
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Output defaults to human-readable text. Pass `--json` or `--yaml` for machine-readable output.
|
|
40
|
+
|
|
41
|
+
```sh
|
|
42
|
+
uv run python -m main SKILL.md
|
|
43
|
+
uv run python -m main SKILL.md --json
|
|
44
|
+
uv run python -m main SKILL.md --yaml
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Example output
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Token Cost Report
|
|
51
|
+
=================
|
|
52
|
+
Resident : 97
|
|
53
|
+
Baseline : 1,940
|
|
54
|
+
Maximum : 18,390
|
|
55
|
+
|
|
56
|
+
Files (24)
|
|
57
|
+
/path/to/reference/conventions.md 926
|
|
58
|
+
/path/to/reference/utilities.md 1,751
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
Links
|
|
62
|
+
@-directives : 0
|
|
63
|
+
Local links : 17
|
|
64
|
+
HTTP links : 0
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## How links are counted
|
|
68
|
+
|
|
69
|
+
`skillcost` recognizes two link syntaxes in Markdown files:
|
|
70
|
+
|
|
71
|
+
- **`@{relative/path}`** — an include directive; the file is always fetched when the skill is
|
|
72
|
+
invoked, so it counts toward the **baseline** cost.
|
|
73
|
+
- **`[text](relative/path)`** — a standard Markdown link; followed recursively and counted toward
|
|
74
|
+
the **maximum** cost only.
|
|
75
|
+
|
|
76
|
+
HTTP links (`https://...`) are fetched and counted toward the maximum cost only.
|
|
77
|
+
|
|
78
|
+
## Contributing
|
|
79
|
+
|
|
80
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
skillcost/__init__.py,sha256=r4moz-gPU2Z2AeEz9qeM6dtjuD0ntAqRFKm9_lq6z8M,10160
|
|
2
|
+
skillcost/__main__.py,sha256=B2dbxzFEPolhdyA6HsXPdJm61iIfMje39O-2nAW1dcU,35
|
|
3
|
+
skillcost-0.0.3.dist-info/METADATA,sha256=3QclmxCm4d-CSFVmww0gF5BE4HuYznH0RCrGdcxCALA,2285
|
|
4
|
+
skillcost-0.0.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
5
|
+
skillcost-0.0.3.dist-info/entry_points.txt,sha256=2dLjNS5yR0In9TcvC_91vT8oLLEso2AVGNC6Bq1zUOs,45
|
|
6
|
+
skillcost-0.0.3.dist-info/RECORD,,
|