claude-skill-forge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_skill_forge-0.1.0.dist-info/METADATA +178 -0
- claude_skill_forge-0.1.0.dist-info/RECORD +25 -0
- claude_skill_forge-0.1.0.dist-info/WHEEL +4 -0
- claude_skill_forge-0.1.0.dist-info/entry_points.txt +2 -0
- claude_skill_forge-0.1.0.dist-info/licenses/LICENSE +21 -0
- skill_forge/__init__.py +56 -0
- skill_forge/analyzers/__init__.py +66 -0
- skill_forge/analyzers/base.py +203 -0
- skill_forge/analyzers/cli_help.py +99 -0
- skill_forge/analyzers/docs.py +35 -0
- skill_forge/analyzers/generic.py +47 -0
- skill_forge/analyzers/node.py +73 -0
- skill_forge/analyzers/python.py +354 -0
- skill_forge/cli.py +166 -0
- skill_forge/config.py +32 -0
- skill_forge/describe.py +101 -0
- skill_forge/errors.py +27 -0
- skill_forge/frontmatter.py +77 -0
- skill_forge/generate.py +84 -0
- skill_forge/llm.py +121 -0
- skill_forge/models.py +56 -0
- skill_forge/skill.py +39 -0
- skill_forge/slug.py +41 -0
- skill_forge/templates.py +151 -0
- skill_forge/validate.py +147 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: claude-skill-forge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Turn a codebase, package, or doc into a valid Claude SKILL.md — offline, deterministic, and valid by construction. Optional Claude refinement. Zero runtime dependencies.
|
|
5
|
+
Project-URL: Homepage, https://github.com/shaxzodbek-uzb/skill-forge
|
|
6
|
+
Project-URL: Repository, https://github.com/shaxzodbek-uzb/skill-forge
|
|
7
|
+
Project-URL: Issues, https://github.com/shaxzodbek-uzb/skill-forge/issues
|
|
8
|
+
Author-email: Shaxzodbek Sobirov <shaxzodbek@blaze.uz>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,anthropic,claude,cli,codegen,linter,mcp,scaffold,skill-md,skills
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
22
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Provides-Extra: anthropic
|
|
26
|
+
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# skill-forge
|
|
33
|
+
|
|
34
|
+
**Point it at your code. Get a valid Claude skill. No API key required.**
|
|
35
|
+
|
|
36
|
+
`skill-forge` turns a codebase, package, or doc into a well-formed Claude
|
|
37
|
+
[`SKILL.md`](https://docs.claude.com/en/docs/agents-and-tools/agent-skills) — and the
|
|
38
|
+
skill it writes is **valid by construction**.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install claude-skill-forge # the CLI it installs is `skill-forge`
|
|
42
|
+
skill-forge forge ./my-tool # writes .claude/skills/my-tool/SKILL.md
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
[](https://github.com/shaxzodbek-uzb/skill-forge/actions/workflows/ci.yml)
|
|
46
|
+
[](https://pypi.org/project/claude-skill-forge/)
|
|
47
|
+

|
|
48
|
+

|
|
49
|
+

|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Why
|
|
54
|
+
|
|
55
|
+
Writing a good skill is fiddly: the frontmatter has to be exactly right, the `name` has to
|
|
56
|
+
match its directory, and the `description` — the one field an agent actually reads to
|
|
57
|
+
decide *whether to load the skill* — has to say **when** to trigger, inside a tight
|
|
58
|
+
character budget. Get any of it wrong and the skill is silently undiscoverable.
|
|
59
|
+
|
|
60
|
+
Most "ask an LLM to write my SKILL.md" approaches are non-reproducible, need an API key,
|
|
61
|
+
and still emit invalid frontmatter. `skill-forge` is different on two axes:
|
|
62
|
+
|
|
63
|
+
1. **Offline & deterministic by default.** It reads your source with static analysis — no
|
|
64
|
+
code execution, no network, no key — and emits the skill. Same input → same output.
|
|
65
|
+
The optional `--llm` flag only *refines* the prose; it never owns the structure.
|
|
66
|
+
2. **Valid by construction.** Every generated skill passes the built-in linter (the same
|
|
67
|
+
rules a skill must satisfy to be discoverable). `skill-forge` refuses to write a skill
|
|
68
|
+
that doesn't lint clean, so you never ship a broken one.
|
|
69
|
+
|
|
70
|
+
It's the *forge* half of a pair: **forge generates, `skillcheck` checks.** The linter is
|
|
71
|
+
bundled here too (`skill-forge lint`) so the tool stands alone.
|
|
72
|
+
|
|
73
|
+
## Install
|
|
74
|
+
|
|
75
|
+
The package is published on PyPI as **`claude-skill-forge`**; it installs a CLI named
|
|
76
|
+
`skill-forge` (and the import package is `skill_forge`).
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install claude-skill-forge # core: zero runtime dependencies
|
|
80
|
+
pip install 'claude-skill-forge[anthropic]' # adds the optional --llm refiner
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Quickstart (30 seconds)
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# From a Python/Node project, a package, or a single doc:
|
|
87
|
+
skill-forge forge ./my-tool # -> .claude/skills/my-tool/SKILL.md
|
|
88
|
+
skill-forge forge ./README.md # generate from docs
|
|
89
|
+
skill-forge forge ./pkg --name my-skill # override the skill name
|
|
90
|
+
skill-forge forge ./my-tool --stdout # preview, don't write
|
|
91
|
+
skill-forge forge ./my-tool --llm # sharpen the prose with Claude
|
|
92
|
+
|
|
93
|
+
# Validate any skill / folder of skills (the bundled checker):
|
|
94
|
+
skill-forge lint .claude/skills
|
|
95
|
+
|
|
96
|
+
# CI drift guard — fail if the skill no longer matches the code:
|
|
97
|
+
skill-forge check ./my-tool --name my-tool
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### What it extracts
|
|
101
|
+
|
|
102
|
+
| Source | What it reads |
|
|
103
|
+
| --- | --- |
|
|
104
|
+
| **Python** | `pyproject.toml` / `setup.cfg` (name, description, keywords, `[project.scripts]`), `__all__` and public defs/classes via `ast`, and argparse / click / typer subcommands — **never importing or running your code** |
|
|
105
|
+
| **Node** | `package.json` (name, description, keywords, `bin`), TS/JS detection, README |
|
|
106
|
+
| **Docs** | A markdown/rst/txt file: H1 title, first paragraph, section headings, fenced code blocks |
|
|
107
|
+
| **Any directory** | README + a language histogram of the file tree |
|
|
108
|
+
| **A CLI tool** | `skill-forge forge --from-cli "mytool"` captures and parses `mytool --help` |
|
|
109
|
+
|
|
110
|
+
The result is a complete `SKILL.md`: trigger-oriented `description`, a `## When to use`
|
|
111
|
+
section, an overview, and `## Commands` / `## API` / `## Usage` sections built from what was
|
|
112
|
+
found.
|
|
113
|
+
|
|
114
|
+
## Use it from Python
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from skill_forge import forge, render_skill, write_skill
|
|
118
|
+
|
|
119
|
+
draft = forge("./my-tool") # a validated SkillDraft
|
|
120
|
+
print(render_skill(draft)) # the SKILL.md text
|
|
121
|
+
write_skill(draft, ".claude/skills")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## The `--llm` refiner (optional)
|
|
125
|
+
|
|
126
|
+
`--llm` sends the *extracted signals* (not your source) to Claude to sharpen the
|
|
127
|
+
description's trigger phrasing and tighten the body. It is fail-safe: if the model is
|
|
128
|
+
unavailable it tells you how to fix it, and if its output is anything but a valid
|
|
129
|
+
improvement, `skill-forge` keeps the deterministic draft. You never get a worse skill than
|
|
130
|
+
the offline path. Set `ANTHROPIC_API_KEY` and install the extra to use it.
|
|
131
|
+
|
|
132
|
+
## CI: catch stale skills
|
|
133
|
+
|
|
134
|
+
`skill-forge check` regenerates the skill in memory and diffs it against the one on disk,
|
|
135
|
+
exiting non-zero if they differ — so a skill that drifted from the code it describes fails
|
|
136
|
+
the build:
|
|
137
|
+
|
|
138
|
+
```yaml
|
|
139
|
+
- run: pip install claude-skill-forge
|
|
140
|
+
- run: skill-forge check ./my-tool --name my-tool
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## What this is **not**
|
|
144
|
+
|
|
145
|
+
- **Not a replacement for judgment.** Generated skills are a strong *first draft*. The body
|
|
146
|
+
is assembled from your structure, not written from deep understanding — read it, trim it,
|
|
147
|
+
and add the hard-won "do this, not that" guidance only you know.
|
|
148
|
+
- **Not a runtime.** It writes `SKILL.md` files; it does not execute skills.
|
|
149
|
+
- **Not magic prose.** The offline path is deterministic and a little formulaic by design.
|
|
150
|
+
Reach for `--llm` when you want the description polished.
|
|
151
|
+
- **It does not run your code.** The only time it executes anything is the explicit
|
|
152
|
+
`--from-cli` flag, which runs `<cmd> --help` with no shell and a timeout.
|
|
153
|
+
|
|
154
|
+
## Configuration
|
|
155
|
+
|
|
156
|
+
Environment overrides (all optional):
|
|
157
|
+
|
|
158
|
+
| Variable | Default | Purpose |
|
|
159
|
+
| --- | --- | --- |
|
|
160
|
+
| `SKILL_FORGE_OUTDIR` | `.claude/skills` | Default output directory |
|
|
161
|
+
| `SKILL_FORGE_VERSION` | `0.1.0` | Version stamped on generated skills |
|
|
162
|
+
| `SKILL_FORGE_MODEL_ID` | `claude-haiku-4-5` | Model used by `--llm` |
|
|
163
|
+
| `ANTHROPIC_API_KEY` | — | Required for `--llm` |
|
|
164
|
+
|
|
165
|
+
## Development
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
pip install -e ".[dev]"
|
|
169
|
+
ruff check .
|
|
170
|
+
pytest -q
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
The design is pinned in [`SPEC.md`](SPEC.md) — the single source of truth for the public
|
|
174
|
+
API and behavior. See [`CONTRIBUTING.md`](CONTRIBUTING.md) before opening a PR.
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
skill_forge/__init__.py,sha256=7-E4iGGc7xl6g9j3_m5iju1l5FeUEeSpDkezAK39AdA,1401
|
|
2
|
+
skill_forge/cli.py,sha256=GzgSBneJ8GC_PW4WkgSEoVkfuU7on6zre9EDedV9nL0,6174
|
|
3
|
+
skill_forge/config.py,sha256=qMMiFX9Abj2plW_JwH7FtxbEtig5L8JGqn6My2Qlp3M,1067
|
|
4
|
+
skill_forge/describe.py,sha256=zyUqZJb7aVWTWQLsa8J5bvlm77_PYDvdi29efbpEauM,3518
|
|
5
|
+
skill_forge/errors.py,sha256=qobC-rCE8q7tVTgDxnVxdTQfIygazVkyvGGGT_nSMuE,809
|
|
6
|
+
skill_forge/frontmatter.py,sha256=YFjSBpZh1nnmIhlYxXIfDDxEIp5EOf_krXyrBeb9A_8,2722
|
|
7
|
+
skill_forge/generate.py,sha256=QttjLf_M7vgbcfFhnM6vQSuRjaBsUmPLDa_VeoSM9y4,2735
|
|
8
|
+
skill_forge/llm.py,sha256=fgx0H8OTuVM0_tgRK-bx9gSmvq-cRj06O28foXrTb14,4163
|
|
9
|
+
skill_forge/models.py,sha256=VqVFo5IIn_BELp1X5wxACL-OvAdeai2HpdFgzirjewE,1661
|
|
10
|
+
skill_forge/skill.py,sha256=FNmYUsGgr0pJWEsivXVmxBsXEhZANgUO_TK05y6ya_c,1488
|
|
11
|
+
skill_forge/slug.py,sha256=imTrlcTtIFLRp9_mZ0oiEA1LbTkJpuI7wZGQs1JWCAk,1541
|
|
12
|
+
skill_forge/templates.py,sha256=RIszOb1vMxQoYmvUIV9w0Rcy27NETDnazz8bjNKNsOQ,5228
|
|
13
|
+
skill_forge/validate.py,sha256=jT2R9TPmLH-uTR2dsrYxN5xTWHlgwKA3hxA3fIlgvc0,4911
|
|
14
|
+
skill_forge/analyzers/__init__.py,sha256=muIf5Ndzh_k32WB0xlODihZhcf0INMiqRPB2A44cw0k,2158
|
|
15
|
+
skill_forge/analyzers/base.py,sha256=vu780bnqFUdBGHNi-WDbRh8nE7tI4ohrQZP69pN3wj8,6542
|
|
16
|
+
skill_forge/analyzers/cli_help.py,sha256=29waB_5VHelSNrI9v6L13h1xE6bmNf-kOVsgNrXtsto,3602
|
|
17
|
+
skill_forge/analyzers/docs.py,sha256=YPFRTPgLc7l8JuAjNUuOS3WjTvQGlGFFIxkU7DNWsMw,1130
|
|
18
|
+
skill_forge/analyzers/generic.py,sha256=Qmz6XN9dvgGBxI-HkQ2LoJ1VicPskshzbBbG4EehY-c,1208
|
|
19
|
+
skill_forge/analyzers/node.py,sha256=wBYOh4pvgJr_YrRG4nYyAJ5Ni9X5uJqCaRBSV86ovGs,2296
|
|
20
|
+
skill_forge/analyzers/python.py,sha256=S2XMZ3Rym5rnLT6Dl037sKbjQojPxmQbLjyLguKcWAg,11877
|
|
21
|
+
claude_skill_forge-0.1.0.dist-info/METADATA,sha256=u6_K-BEDxJgWPegoxT42tmQV82QToK8MxYMnn5CG9j0,7771
|
|
22
|
+
claude_skill_forge-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
23
|
+
claude_skill_forge-0.1.0.dist-info/entry_points.txt,sha256=NqV9C-Qsp50k254PqKe5qyI-LgPBsoTXlD0pLUMIjZQ,53
|
|
24
|
+
claude_skill_forge-0.1.0.dist-info/licenses/LICENSE,sha256=xuVZOvGxq8qpZlpQ_VjFMSwc44vsEidNj65gBKDY7Fs,1083
|
|
25
|
+
claude_skill_forge-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shaxzodbek Sobirov / Blaze
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
skill_forge/__init__.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""skill-forge — turn a codebase, package, or doc into a valid Claude SKILL.md.
|
|
2
|
+
|
|
3
|
+
The deterministic path (analyze → draft → validate) is offline and dependency-free; the
|
|
4
|
+
optional ``--llm`` refinement is the only thing that needs the ``anthropic`` extra.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .analyzers import analyze, detect
|
|
10
|
+
from .config import Settings
|
|
11
|
+
from .describe import build_description
|
|
12
|
+
from .errors import (
|
|
13
|
+
AnalyzerError,
|
|
14
|
+
InvalidSkill,
|
|
15
|
+
LLMUnavailable,
|
|
16
|
+
SkillForgeError,
|
|
17
|
+
SourceNotFound,
|
|
18
|
+
)
|
|
19
|
+
from .generate import draft_from_signals, forge
|
|
20
|
+
from .llm import refine_with_llm
|
|
21
|
+
from .models import Command, SkillDraft, SourceSignals
|
|
22
|
+
from .skill import render_skill, write_skill
|
|
23
|
+
from .slug import is_kebab_case, slugify
|
|
24
|
+
from .templates import build_body
|
|
25
|
+
from .validate import LintResult, Problem, lint_path, lint_skill_file, lint_text
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"forge",
|
|
31
|
+
"draft_from_signals",
|
|
32
|
+
"SkillDraft",
|
|
33
|
+
"SourceSignals",
|
|
34
|
+
"Command",
|
|
35
|
+
"Settings",
|
|
36
|
+
"render_skill",
|
|
37
|
+
"write_skill",
|
|
38
|
+
"analyze",
|
|
39
|
+
"detect",
|
|
40
|
+
"build_description",
|
|
41
|
+
"build_body",
|
|
42
|
+
"lint_text",
|
|
43
|
+
"lint_skill_file",
|
|
44
|
+
"lint_path",
|
|
45
|
+
"LintResult",
|
|
46
|
+
"Problem",
|
|
47
|
+
"slugify",
|
|
48
|
+
"is_kebab_case",
|
|
49
|
+
"refine_with_llm",
|
|
50
|
+
"SkillForgeError",
|
|
51
|
+
"SourceNotFound",
|
|
52
|
+
"AnalyzerError",
|
|
53
|
+
"InvalidSkill",
|
|
54
|
+
"LLMUnavailable",
|
|
55
|
+
"__version__",
|
|
56
|
+
]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Source detection and analyzer dispatch.
|
|
2
|
+
|
|
3
|
+
``detect()`` picks an analyzer kind from a path; ``analyze()`` runs it. The CLI-help
|
|
4
|
+
capture lives in :mod:`cli_help` and is intentionally *not* wired into detection — it only
|
|
5
|
+
runs on the explicit ``--from-cli`` flag.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from ..errors import AnalyzerError, SourceNotFound
|
|
13
|
+
from ..models import SourceSignals
|
|
14
|
+
from ..slug import slugify
|
|
15
|
+
from . import docs, generic, node, python
|
|
16
|
+
|
|
17
|
+
_DOC_SUFFIXES = {".md", ".markdown", ".rst", ".txt"}
|
|
18
|
+
|
|
19
|
+
_ANALYZERS = {
|
|
20
|
+
"python": python.analyze,
|
|
21
|
+
"node": node.analyze,
|
|
22
|
+
"docs": docs.analyze,
|
|
23
|
+
"generic": generic.analyze,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def detect(source: str | Path) -> str:
|
|
28
|
+
"""Return the analyzer kind for ``source`` (``python|node|docs|generic``)."""
|
|
29
|
+
p = Path(source)
|
|
30
|
+
if not p.exists():
|
|
31
|
+
raise SourceNotFound(f"source does not exist: {p}")
|
|
32
|
+
if p.is_file():
|
|
33
|
+
if p.suffix == ".py":
|
|
34
|
+
return "python"
|
|
35
|
+
if p.suffix.lower() in _DOC_SUFFIXES:
|
|
36
|
+
return "docs"
|
|
37
|
+
return "docs"
|
|
38
|
+
if (p / "package.json").is_file():
|
|
39
|
+
return "node"
|
|
40
|
+
if any((p / m).is_file() for m in ("pyproject.toml", "setup.py", "setup.cfg")):
|
|
41
|
+
return "python"
|
|
42
|
+
if list(p.glob("*.py")) or list(p.glob("*/*.py")):
|
|
43
|
+
return "python"
|
|
44
|
+
return "generic"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def analyze(source: str | Path, *, kind: str | None = None) -> SourceSignals:
|
|
48
|
+
"""Dispatch to the matching analyzer and return its signals."""
|
|
49
|
+
p = Path(source)
|
|
50
|
+
if not p.exists():
|
|
51
|
+
raise SourceNotFound(f"source does not exist: {p}")
|
|
52
|
+
chosen = kind or detect(p)
|
|
53
|
+
func = _ANALYZERS.get(chosen)
|
|
54
|
+
if func is None:
|
|
55
|
+
raise AnalyzerError(f"unknown analyzer kind: {chosen!r}")
|
|
56
|
+
signals = func(p)
|
|
57
|
+
if not signals.name or not signals.name.strip():
|
|
58
|
+
raise AnalyzerError(f"could not determine a name from {p}")
|
|
59
|
+
try:
|
|
60
|
+
slugify(signals.name)
|
|
61
|
+
except ValueError as exc:
|
|
62
|
+
raise AnalyzerError(
|
|
63
|
+
f"could not derive a usable skill name from {p} (name {signals.name!r}); "
|
|
64
|
+
"pass --name to set one explicitly"
|
|
65
|
+
) from exc
|
|
66
|
+
return signals
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Shared, dependency-free helpers for the analyzers.
|
|
2
|
+
|
|
3
|
+
Everything here is pure text processing over files already on disk — no network, no code
|
|
4
|
+
execution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
README_NAMES = [
|
|
13
|
+
"README.md",
|
|
14
|
+
"README.markdown",
|
|
15
|
+
"README.rst",
|
|
16
|
+
"README.txt",
|
|
17
|
+
"README",
|
|
18
|
+
"readme.md",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
EXT_LANGUAGE = {
|
|
22
|
+
".py": "Python",
|
|
23
|
+
".js": "JavaScript",
|
|
24
|
+
".mjs": "JavaScript",
|
|
25
|
+
".cjs": "JavaScript",
|
|
26
|
+
".ts": "TypeScript",
|
|
27
|
+
".tsx": "TypeScript",
|
|
28
|
+
".jsx": "JavaScript",
|
|
29
|
+
".go": "Go",
|
|
30
|
+
".rs": "Rust",
|
|
31
|
+
".rb": "Ruby",
|
|
32
|
+
".php": "PHP",
|
|
33
|
+
".java": "Java",
|
|
34
|
+
".kt": "Kotlin",
|
|
35
|
+
".sh": "Shell",
|
|
36
|
+
".c": "C",
|
|
37
|
+
".cpp": "C++",
|
|
38
|
+
".cs": "C#",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_STOPWORDS = {
|
|
42
|
+
"the", "a", "an", "and", "or", "for", "with", "to", "of", "in", "on", "is", "are",
|
|
43
|
+
"your", "you", "this", "that", "it", "as", "by", "be", "can", "from", "into", "via",
|
|
44
|
+
"using", "use", "used", "uses", "any", "all", "not", "no", "yes", "tool", "library",
|
|
45
|
+
"package", "project", "simple", "easy", "fast", "small", "based",
|
|
46
|
+
# Low-signal filler that tends to leak from prose summaries / headings.
|
|
47
|
+
"single", "source", "truth", "turns", "point", "required", "valid", "canonical",
|
|
48
|
+
"well", "formed", "make", "makes", "made", "just", "also", "new", "one", "first",
|
|
49
|
+
"more", "most", "very", "really", "over", "get", "gets", "why", "out", "only",
|
|
50
|
+
"such", "when", "what", "how", "we", "us", "its", "their",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Headings that are structural/boilerplate, not real topics worth surfacing as triggers.
|
|
54
|
+
STOP_HEADINGS = {
|
|
55
|
+
"install", "installation", "license", "licence", "contributing", "contribution",
|
|
56
|
+
"development", "configuration", "config", "why", "table of contents", "contents",
|
|
57
|
+
"changelog", "acknowledgements", "acknowledgments", "badges", "usage", "notes",
|
|
58
|
+
"getting started", "quickstart", "quick start", "overview", "requirements",
|
|
59
|
+
"features", "faq", "support", "credits", "authors", "roadmap", "todo", "examples",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
_MD_MARKUP_RE = re.compile(r"[`*_]+")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def strip_markdown(text: str) -> str:
|
|
66
|
+
"""Remove inline emphasis/code markers and leading heading hashes; collapse whitespace."""
|
|
67
|
+
cleaned = _MD_MARKUP_RE.sub("", text or "")
|
|
68
|
+
cleaned = re.sub(r"^#+\s*", "", cleaned)
|
|
69
|
+
return re.sub(r"\s+", " ", cleaned).strip()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def clean_section_titles(titles: list[str]) -> list[str]:
|
|
73
|
+
"""Strip markup and drop boilerplate/duplicate section headings (for the Notes list)."""
|
|
74
|
+
out: list[str] = []
|
|
75
|
+
seen: set[str] = set()
|
|
76
|
+
for title in titles:
|
|
77
|
+
cleaned = strip_markdown(title)
|
|
78
|
+
key = cleaned.lower()
|
|
79
|
+
if not cleaned or key in STOP_HEADINGS or key in seen:
|
|
80
|
+
continue
|
|
81
|
+
seen.add(key)
|
|
82
|
+
out.append(cleaned)
|
|
83
|
+
return out
|
|
84
|
+
|
|
85
|
+
_HEADING_RE = re.compile(r"^(#{1,6})\s+(.*?)\s*#*\s*$")
|
|
86
|
+
_FENCE_RE = re.compile(r"^\s*(```|~~~)")
|
|
87
|
+
_WORD_RE = re.compile(r"[A-Za-z][A-Za-z0-9_+-]{1,}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def read_text(path: str | Path) -> str:
|
|
91
|
+
"""Read a file as UTF-8, ignoring undecodable bytes; '' if unreadable."""
|
|
92
|
+
try:
|
|
93
|
+
return Path(path).read_text(encoding="utf-8", errors="ignore")
|
|
94
|
+
except OSError:
|
|
95
|
+
return ""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def find_readme(directory: str | Path) -> Path | None:
|
|
99
|
+
"""Return the first README-like file in ``directory``, or None."""
|
|
100
|
+
d = Path(directory)
|
|
101
|
+
for name in README_NAMES:
|
|
102
|
+
candidate = d / name
|
|
103
|
+
if candidate.is_file():
|
|
104
|
+
return candidate
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _is_noise(line: str) -> bool:
|
|
109
|
+
"""True for lines that should not seed a prose summary (badges, images, HTML, tables)."""
|
|
110
|
+
return (
|
|
111
|
+
line.startswith("![")
|
|
112
|
+
or line.startswith("[![")
|
|
113
|
+
or line.startswith("<")
|
|
114
|
+
or line.startswith("|")
|
|
115
|
+
or line.startswith(">")
|
|
116
|
+
or bool(re.fullmatch(r"[-=*_]{3,}", line))
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def parse_markdown(text: str) -> dict:
|
|
121
|
+
"""Extract ``title``, ``summary``, ``headings`` (list of (level, text)), and
|
|
122
|
+
``code_blocks`` (fenced block bodies) from markdown-ish text."""
|
|
123
|
+
title = ""
|
|
124
|
+
summary = ""
|
|
125
|
+
summary_done = False
|
|
126
|
+
headings: list[tuple[int, str]] = []
|
|
127
|
+
code_blocks: list[str] = []
|
|
128
|
+
para: list[str] = []
|
|
129
|
+
in_code = False
|
|
130
|
+
buf: list[str] = []
|
|
131
|
+
|
|
132
|
+
def flush_para() -> None:
|
|
133
|
+
nonlocal summary, summary_done
|
|
134
|
+
if para and not summary_done:
|
|
135
|
+
summary = " ".join(para).strip()
|
|
136
|
+
summary_done = True
|
|
137
|
+
para.clear()
|
|
138
|
+
|
|
139
|
+
for line in text.splitlines():
|
|
140
|
+
if _FENCE_RE.match(line):
|
|
141
|
+
if in_code:
|
|
142
|
+
code_blocks.append("\n".join(buf))
|
|
143
|
+
buf = []
|
|
144
|
+
in_code = not in_code
|
|
145
|
+
continue
|
|
146
|
+
if in_code:
|
|
147
|
+
buf.append(line)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
heading = _HEADING_RE.match(line)
|
|
151
|
+
if heading:
|
|
152
|
+
level = len(heading.group(1))
|
|
153
|
+
htext = heading.group(2).strip()
|
|
154
|
+
if level == 1 and not title:
|
|
155
|
+
title = htext
|
|
156
|
+
headings.append((level, htext))
|
|
157
|
+
flush_para()
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
stripped = line.strip()
|
|
161
|
+
if stripped == "":
|
|
162
|
+
flush_para()
|
|
163
|
+
continue
|
|
164
|
+
if not summary_done and not _is_noise(stripped):
|
|
165
|
+
para.append(stripped)
|
|
166
|
+
|
|
167
|
+
if buf:
|
|
168
|
+
code_blocks.append("\n".join(buf))
|
|
169
|
+
flush_para()
|
|
170
|
+
return {"title": title, "summary": summary, "headings": headings, "code_blocks": code_blocks}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def tokenize_keywords(*texts: str, limit: int = 12) -> list[str]:
|
|
174
|
+
"""Pull distinct lowercase keyword tokens from free text, dropping stopwords."""
|
|
175
|
+
out: list[str] = []
|
|
176
|
+
seen: set[str] = set()
|
|
177
|
+
for text in texts:
|
|
178
|
+
for match in _WORD_RE.findall(text or ""):
|
|
179
|
+
token = match.lower()
|
|
180
|
+
if token in _STOPWORDS or token in seen or len(token) < 3:
|
|
181
|
+
continue
|
|
182
|
+
seen.add(token)
|
|
183
|
+
out.append(token)
|
|
184
|
+
if len(out) >= limit:
|
|
185
|
+
return out
|
|
186
|
+
return out
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def guess_language(directory: str | Path) -> str | None:
|
|
190
|
+
"""Guess the dominant language of a directory from its file extensions."""
|
|
191
|
+
skip = {".git", "node_modules", ".venv", "venv", "dist", "build"}
|
|
192
|
+
counts: dict[str, int] = {}
|
|
193
|
+
for path in Path(directory).rglob("*"):
|
|
194
|
+
if not path.is_file():
|
|
195
|
+
continue
|
|
196
|
+
if any(part in skip for part in path.parts):
|
|
197
|
+
continue
|
|
198
|
+
lang = EXT_LANGUAGE.get(path.suffix)
|
|
199
|
+
if lang:
|
|
200
|
+
counts[lang] = counts.get(lang, 0) + 1
|
|
201
|
+
if not counts:
|
|
202
|
+
return None
|
|
203
|
+
return max(counts, key=lambda k: counts[k])
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Build signals by capturing a CLI tool's ``--help`` output.
|
|
2
|
+
|
|
3
|
+
This is the *one* place skill-forge runs an external process, and it only happens when the
|
|
4
|
+
user explicitly passes ``--from-cli "<command>"``. The command is split with :func:`shlex.split`
|
|
5
|
+
(no shell), ``--help`` is appended, and the call is timeout-guarded. We never auto-detect
|
|
6
|
+
or auto-run anything.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
import shlex
|
|
13
|
+
import subprocess
|
|
14
|
+
|
|
15
|
+
from ..errors import AnalyzerError
|
|
16
|
+
from ..models import Command, SourceSignals
|
|
17
|
+
from .base import tokenize_keywords
|
|
18
|
+
|
|
19
|
+
_USAGE_RE = re.compile(r"^\s*usage:", re.IGNORECASE)
|
|
20
|
+
_SECTION_RE = re.compile(r"^\s*(commands|subcommands|available commands)\b", re.IGNORECASE)
|
|
21
|
+
_ITEM_RE = re.compile(r"^\s{1,6}([a-z][\w-]*)\s{2,}(.+?)\s*$")
|
|
22
|
+
_CHOICES_RE = re.compile(r"\{([a-z][\w,-]+)\}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def capture_cli_help(command: str, *, timeout: float = 10.0) -> SourceSignals:
|
|
26
|
+
"""Run ``<command> --help`` (no shell) and parse it into signals."""
|
|
27
|
+
parts = shlex.split(command)
|
|
28
|
+
if not parts:
|
|
29
|
+
raise AnalyzerError("empty --from-cli command")
|
|
30
|
+
if parts[-1].startswith("-"):
|
|
31
|
+
raise AnalyzerError(
|
|
32
|
+
"--from-cli expects a program name, optionally a subcommand — not a flag: "
|
|
33
|
+
f"got {command!r}. Try `--from-cli \"{parts[0]}\"`."
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
proc = subprocess.run( # noqa: S603 - args are a parsed list, shell=False
|
|
37
|
+
[*parts, "--help"],
|
|
38
|
+
capture_output=True,
|
|
39
|
+
text=True,
|
|
40
|
+
timeout=timeout,
|
|
41
|
+
check=False,
|
|
42
|
+
)
|
|
43
|
+
except FileNotFoundError as exc:
|
|
44
|
+
raise AnalyzerError(f"command not found: {parts[0]}") from exc
|
|
45
|
+
except subprocess.TimeoutExpired as exc:
|
|
46
|
+
raise AnalyzerError(f"`{parts[0]} --help` timed out after {timeout}s") from exc
|
|
47
|
+
|
|
48
|
+
text = (proc.stdout or "") + ("\n" + proc.stderr if proc.stderr else "")
|
|
49
|
+
if not text.strip():
|
|
50
|
+
raise AnalyzerError(f"`{parts[0]} --help` produced no output")
|
|
51
|
+
|
|
52
|
+
name = parts[-1] if len(parts) > 1 else parts[0].rsplit("/", 1)[-1]
|
|
53
|
+
summary, commands, notes = _parse_help(text)
|
|
54
|
+
keywords = tokenize_keywords(name, summary, " ".join(c.name for c in commands))
|
|
55
|
+
|
|
56
|
+
return SourceSignals(
|
|
57
|
+
name=name,
|
|
58
|
+
summary=summary,
|
|
59
|
+
kind="cli",
|
|
60
|
+
commands=commands,
|
|
61
|
+
keywords=keywords,
|
|
62
|
+
notes=notes,
|
|
63
|
+
usage=[text.strip()] if not commands else [],
|
|
64
|
+
source=f"`{command} --help`",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _parse_help(text: str) -> tuple[str, list[Command], list[str]]:
|
|
69
|
+
lines = text.splitlines()
|
|
70
|
+
summary = ""
|
|
71
|
+
commands: dict[str, Command] = {}
|
|
72
|
+
notes: list[str] = []
|
|
73
|
+
in_commands = False
|
|
74
|
+
|
|
75
|
+
for line in lines:
|
|
76
|
+
if _USAGE_RE.match(line):
|
|
77
|
+
in_commands = False
|
|
78
|
+
for group in _CHOICES_RE.findall(line):
|
|
79
|
+
for choice in group.split(","):
|
|
80
|
+
choice = choice.strip()
|
|
81
|
+
if choice:
|
|
82
|
+
commands.setdefault(choice, Command(name=choice))
|
|
83
|
+
continue
|
|
84
|
+
if _SECTION_RE.match(line):
|
|
85
|
+
in_commands = True
|
|
86
|
+
continue
|
|
87
|
+
if in_commands:
|
|
88
|
+
item = _ITEM_RE.match(line)
|
|
89
|
+
if item:
|
|
90
|
+
name, help_text = item.group(1), item.group(2).strip()
|
|
91
|
+
commands.setdefault(name, Command(name=name, help=help_text))
|
|
92
|
+
elif line.strip() == "":
|
|
93
|
+
in_commands = False
|
|
94
|
+
continue
|
|
95
|
+
stripped = line.strip()
|
|
96
|
+
if not summary and stripped and not stripped.startswith("-"):
|
|
97
|
+
summary = stripped
|
|
98
|
+
|
|
99
|
+
return summary, list(commands.values()), notes
|