skillcost 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
skillcost/__init__.py ADDED
@@ -0,0 +1,320 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import re
6
+ import sys
7
+ import urllib.request
8
+ from dataclasses import asdict, dataclass
9
+ from pathlib import Path
10
+
11
+ import tiktoken
12
+ import yaml
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Token counting
16
+ # ---------------------------------------------------------------------------
17
+
18
+ _ENC: tiktoken.Encoding | None = None
19
+
20
+
21
+ def _get_enc() -> tiktoken.Encoding:
22
+ global _ENC
23
+ if _ENC is None:
24
+ _ENC = tiktoken.get_encoding("cl100k_base")
25
+ return _ENC
26
+
27
+
28
+ def count_tokens(text: str) -> int:
29
+ return len(_get_enc().encode(text))
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Data models
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ @dataclass
38
+ class TargetData:
39
+ text: str
40
+ resident_cost: int
41
+ ref_links: list[tuple[Path, int]]
42
+ local_links: list[tuple[Path, int]]
43
+ http_links: list[str]
44
+
45
+
46
+ @dataclass
47
+ class CostReport:
48
+ resident: int
49
+ baseline: int
50
+ maximum: int
51
+ files: dict[str, int] # str(Path) -> token count, all crawled files (excl. target)
52
+ urls: dict[str, int] # url -> token count
53
+ ref_link_count: int
54
+ local_link_count: int
55
+ http_link_count: int
56
+
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Parsing helpers
60
+ # ---------------------------------------------------------------------------
61
+
62
+ _REF_LINK_RE = re.compile(r"@\{([^}]+)\}")
63
+ _MD_LINK_RE = re.compile(r"\[[^\]]*\]\(([^)]+)\)")
64
+ _HTTP_RE = re.compile(r"^https?://")
65
+
66
+
67
+ def _line_of(text: str, pos: int) -> int:
68
+ return text.count("\n", 0, pos) + 1
69
+
70
+
71
+ def _parse_md_links(
72
+ text: str, base_dir: Path
73
+ ) -> tuple[list[tuple[Path, int]], list[tuple[Path, int]], list[str]]:
74
+ ref_links: list[tuple[Path, int]] = []
75
+ local_links: list[tuple[Path, int]] = []
76
+ http_links: list[str] = []
77
+
78
+ for m in _REF_LINK_RE.finditer(text):
79
+ ref_links.append(((base_dir / m.group(1)).resolve(), _line_of(text, m.start())))
80
+
81
+ for m in _MD_LINK_RE.finditer(text):
82
+ target = m.group(1)
83
+ if _HTTP_RE.match(target):
84
+ http_links.append(target)
85
+ else:
86
+ target = target.split("#")[0]
87
+ if not target:
88
+ continue
89
+ local_links.append(
90
+ ((base_dir / target).resolve(), _line_of(text, m.start()))
91
+ )
92
+
93
+ return ref_links, local_links, http_links
94
+
95
+
96
+ def _parse_frontmatter(text: str) -> dict | None:
97
+ if not text.startswith("---"):
98
+ return None
99
+ end = text.find("\n---", 3)
100
+ if end == -1:
101
+ return None
102
+ return yaml.safe_load(text[3:end].strip())
103
+
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # Target loading
107
+ # ---------------------------------------------------------------------------
108
+
109
+
110
+ def load_target(path: Path) -> TargetData:
111
+ try:
112
+ text = path.read_text(encoding="utf-8")
113
+ except OSError as e:
114
+ print(f"ERROR: cannot read {path}: {e}", file=sys.stderr)
115
+ sys.exit(1)
116
+
117
+ resident_cost = 0
118
+ ref_links: list[tuple[Path, int]] = []
119
+ local_links: list[tuple[Path, int]] = []
120
+ http_links: list[str] = []
121
+
122
+ if path.name == "SKILL.md":
123
+ try:
124
+ fm = _parse_frontmatter(text)
125
+ if fm:
126
+ name = str(fm.get("name", ""))
127
+ description = str(fm.get("description", ""))
128
+ resident_cost = count_tokens(name) + count_tokens(description)
129
+ except Exception as e:
130
+ print(
131
+ f"WARNING: failed to parse frontmatter in {path}: {e}", file=sys.stderr
132
+ )
133
+
134
+ if path.suffix == ".md":
135
+ ref_links, local_links, http_links = _parse_md_links(text, path.parent)
136
+
137
+ return TargetData(
138
+ text=text,
139
+ resident_cost=resident_cost,
140
+ ref_links=ref_links,
141
+ local_links=local_links,
142
+ http_links=http_links,
143
+ )
144
+
145
+
146
+ # ---------------------------------------------------------------------------
147
+ # Cost accumulation
148
+ # ---------------------------------------------------------------------------
149
+
150
+
151
+ def _crawl_files(
152
+ initial_queue: list[tuple[Path, int, Path]], visited: dict[Path, int]
153
+ ) -> int:
154
+ queue = list(initial_queue)
155
+ total = 0
156
+ while queue:
157
+ path, line_no, source = queue.pop(0)
158
+ rp = path.resolve()
159
+ if rp in visited:
160
+ continue
161
+ if not rp.is_file():
162
+ print(
163
+ f"WARNING: {rp} is not a file or does not exist"
164
+ f" (referenced from {source}:{line_no})",
165
+ file=sys.stderr,
166
+ )
167
+ visited[rp] = 0
168
+ continue
169
+ try:
170
+ text = rp.read_text(encoding="utf-8")
171
+ except OSError as e:
172
+ print(
173
+ f"WARNING: cannot read {rp}: {e} (referenced from {source}:{line_no})",
174
+ file=sys.stderr,
175
+ )
176
+ visited[rp] = 0
177
+ continue
178
+ tokens = count_tokens(text)
179
+ visited[rp] = tokens
180
+ total += tokens
181
+ if rp.suffix == ".md":
182
+ ref_links, local_links, _ = _parse_md_links(text, rp.parent)
183
+ for p, ln in ref_links + local_links:
184
+ if p.resolve() not in visited:
185
+ queue.append((p, ln, rp))
186
+ return total
187
+
188
+
189
+ def _fetch_url_costs(http_links: list[str], url_costs: dict[str, int]) -> int:
190
+ total = 0
191
+ for url in http_links:
192
+ if url in url_costs:
193
+ continue
194
+ try:
195
+ with urllib.request.urlopen(url, timeout=10) as resp:
196
+ body = resp.read().decode("utf-8", errors="replace")
197
+ tokens = count_tokens(body)
198
+ url_costs[url] = tokens
199
+ total += tokens
200
+ except Exception as e:
201
+ print(f"WARNING: failed to fetch {url}: {e}", file=sys.stderr)
202
+ url_costs[url] = 0
203
+ return total
204
+
205
+
206
+ def compute_costs(target_path: Path, target_data: TargetData) -> CostReport:
207
+ target_resolved = target_path.resolve()
208
+ target_tokens = count_tokens(target_data.text)
209
+
210
+ # Baseline: target + resident + ref_links (flat, no recursion)
211
+ ref_token_map: dict[Path, int] = {}
212
+ for p, _ln in target_data.ref_links:
213
+ rp = p.resolve()
214
+ if rp in ref_token_map or rp == target_resolved:
215
+ continue
216
+ try:
217
+ ref_token_map[rp] = count_tokens(p.read_text(encoding="utf-8"))
218
+ except OSError as e:
219
+ print(f"WARNING: cannot read ref link {p}: {e}", file=sys.stderr)
220
+ ref_token_map[rp] = 0
221
+
222
+ baseline = target_tokens + target_data.resident_cost + sum(ref_token_map.values())
223
+
224
+ # Pre-populate visited with target + ref_links so they aren't double-counted
225
+ visited: dict[Path, int] = {target_resolved: target_tokens, **ref_token_map}
226
+
227
+ # Crawl all local files recursively (ref_links already visited → not re-added)
228
+ seen_paths: set[Path] = set()
229
+ initial_queue: list[tuple[Path, int, Path]] = []
230
+ for p, ln in target_data.ref_links + target_data.local_links:
231
+ rp = p.resolve()
232
+ if rp not in seen_paths:
233
+ seen_paths.add(rp)
234
+ initial_queue.append((rp, ln, target_path))
235
+ file_crawl_total = _crawl_files(initial_queue, visited)
236
+
237
+ # Files breakdown: everything visited except the target itself
238
+ files: dict[str, int] = {
239
+ str(p): t for p, t in visited.items() if p != target_resolved
240
+ }
241
+
242
+ url_costs: dict[str, int] = {}
243
+ url_total = _fetch_url_costs(target_data.http_links, url_costs)
244
+
245
+ maximum = baseline + file_crawl_total + url_total
246
+
247
+ return CostReport(
248
+ resident=target_data.resident_cost,
249
+ baseline=baseline,
250
+ maximum=maximum,
251
+ files=files,
252
+ urls=url_costs,
253
+ ref_link_count=len(target_data.ref_links),
254
+ local_link_count=len(target_data.local_links),
255
+ http_link_count=len(target_data.http_links),
256
+ )
257
+
258
+
259
+ # ---------------------------------------------------------------------------
260
+ # Output formatting
261
+ # ---------------------------------------------------------------------------
262
+
263
+
264
+ def format_report(report: CostReport, fmt: str) -> str:
265
+ if fmt == "json":
266
+ return json.dumps(asdict(report), indent=2)
267
+ if fmt == "yaml":
268
+ return yaml.dump(asdict(report), sort_keys=False)
269
+
270
+ # Human-readable text
271
+ lines: list[str] = []
272
+ lines.append("Token Cost Report")
273
+ lines.append("=================")
274
+ lines.append(f" Resident : {report.resident:,}")
275
+ lines.append(f" Baseline : {report.baseline:,}")
276
+ lines.append(f" Maximum : {report.maximum:,}")
277
+
278
+ if report.files:
279
+ lines.append(f"\nFiles ({len(report.files)})")
280
+ for path_str, tokens in report.files.items():
281
+ lines.append(f" {path_str:<60} {tokens:>8,}")
282
+
283
+ if report.urls:
284
+ lines.append(f"\nURLs ({len(report.urls)})")
285
+ for url, tokens in report.urls.items():
286
+ lines.append(f" {url:<60} {tokens:>8,}")
287
+
288
+ lines.append("\nLinks")
289
+ lines.append(f" @-directives : {report.ref_link_count}")
290
+ lines.append(f" Local links : {report.local_link_count}")
291
+ lines.append(f" HTTP links : {report.http_link_count}")
292
+
293
+ return "\n".join(lines)
294
+
295
+
296
+ # ---------------------------------------------------------------------------
297
+ # CLI entry point
298
+ # ---------------------------------------------------------------------------
299
+
300
+
301
+ def main() -> None:
302
+ parser = argparse.ArgumentParser(
303
+ description="Estimate the token cost of a text file and its linked resources."
304
+ )
305
+ parser.add_argument("target", help="Path to a plain-text file to analyse")
306
+ fmt_group = parser.add_mutually_exclusive_group()
307
+ fmt_group.add_argument("--json", action="store_true", help="Output as JSON")
308
+ fmt_group.add_argument("--yaml", action="store_true", help="Output as YAML")
309
+ args = parser.parse_args()
310
+
311
+ target_path = Path(args.target).resolve()
312
+ target_data = load_target(target_path)
313
+ report = compute_costs(target_path, target_data)
314
+
315
+ fmt = "json" if args.json else "yaml" if args.yaml else "text"
316
+ print(format_report(report, fmt))
317
+
318
+
319
+ if __name__ == "__main__":
320
+ main()
skillcost/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from skillcost import main
2
+
3
+ main()
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: skillcost
3
+ Version: 0.0.3
4
+ Summary: Estimates the token cost of agent skills and their linked resources
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: pyyaml>=6.0.3
7
+ Requires-Dist: tiktoken>=0.12.0
8
+ Description-Content-Type: text/markdown
9
+
10
+ # skillcost
11
+
12
+ Estimates the token cost of agent skills and their linked resources.
13
+
14
+ Given a target file (typically a `SKILL.md`), `skillcost` counts tokens and crawls all linked files
15
+ and URLs to produce three cost figures:
16
+
17
+ | Metric | Description |
18
+ | ------------ | ------------------------------------------------------------------------------- |
19
+ | **Resident** | Tokens loaded on every prompt, just because the skill is installed. |
20
+ | **Baseline** | Tokens loaded when the skill is invoked — the skill file plus any `@`-includes. |
21
+ | **Maximum** | Tokens loaded if the agent follows every link in the skill file. |
22
+
23
+ This also works on `CLAUDE.md` or any other plain-text UTF-8 file.
24
+
25
+ ## Installation
26
+
27
+ Requires Python 3.12+ and [uv](https://github.com/astral-sh/uv).
28
+
29
+ ```sh
30
+ uv sync
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ```sh
36
+ uv run python -m main <path/to/file>
37
+ ```
38
+
39
+ Output defaults to human-readable text. Pass `--json` or `--yaml` for machine-readable output.
40
+
41
+ ```sh
42
+ uv run python -m main SKILL.md
43
+ uv run python -m main SKILL.md --json
44
+ uv run python -m main SKILL.md --yaml
45
+ ```
46
+
47
+ ### Example output
48
+
49
+ ```
50
+ Token Cost Report
51
+ =================
52
+ Resident : 97
53
+ Baseline : 1,940
54
+ Maximum : 18,390
55
+
56
+ Files (24)
57
+ /path/to/reference/conventions.md 926
58
+ /path/to/reference/utilities.md 1,751
59
+ ...
60
+
61
+ Links
62
+ @-directives : 0
63
+ Local links : 17
64
+ HTTP links : 0
65
+ ```
66
+
67
+ ## How links are counted
68
+
69
+ `skillcost` recognizes two link syntaxes in Markdown files:
70
+
71
+ - **`@{relative/path}`** — an include directive; the file is always fetched when the skill is
72
+ invoked, so it counts toward the **baseline** cost.
73
+ - **`[text](relative/path)`** — a standard Markdown link; followed recursively and counted toward
74
+ the **maximum** cost only.
75
+
76
+ HTTP links (`https://...`) are fetched and counted toward the maximum cost only.
77
+
78
+ ## Contributing
79
+
80
+ See [CONTRIBUTING.md](CONTRIBUTING.md).
@@ -0,0 +1,6 @@
1
+ skillcost/__init__.py,sha256=r4moz-gPU2Z2AeEz9qeM6dtjuD0ntAqRFKm9_lq6z8M,10160
2
+ skillcost/__main__.py,sha256=B2dbxzFEPolhdyA6HsXPdJm61iIfMje39O-2nAW1dcU,35
3
+ skillcost-0.0.3.dist-info/METADATA,sha256=3QclmxCm4d-CSFVmww0gF5BE4HuYznH0RCrGdcxCALA,2285
4
+ skillcost-0.0.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
5
+ skillcost-0.0.3.dist-info/entry_points.txt,sha256=2dLjNS5yR0In9TcvC_91vT8oLLEso2AVGNC6Bq1zUOs,45
6
+ skillcost-0.0.3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ skillcost = skillcost:main