repocrunch 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repocrunch-0.1.0/.env.example +1 -0
- repocrunch-0.1.0/.github/workflows/publish.yml +24 -0
- repocrunch-0.1.0/.gitignore +16 -0
- repocrunch-0.1.0/LICENSE +21 -0
- repocrunch-0.1.0/PKG-INFO +218 -0
- repocrunch-0.1.0/README.md +180 -0
- repocrunch-0.1.0/pyproject.toml +54 -0
- repocrunch-0.1.0/smithery.yaml +5 -0
- repocrunch-0.1.0/src/repocrunch/__init__.py +27 -0
- repocrunch-0.1.0/src/repocrunch/analyzer.py +91 -0
- repocrunch-0.1.0/src/repocrunch/api.py +44 -0
- repocrunch-0.1.0/src/repocrunch/cli.py +83 -0
- repocrunch-0.1.0/src/repocrunch/client.py +154 -0
- repocrunch-0.1.0/src/repocrunch/detection.py +117 -0
- repocrunch-0.1.0/src/repocrunch/extractors/__init__.py +15 -0
- repocrunch-0.1.0/src/repocrunch/extractors/architecture.py +113 -0
- repocrunch-0.1.0/src/repocrunch/extractors/health.py +107 -0
- repocrunch-0.1.0/src/repocrunch/extractors/metadata.py +49 -0
- repocrunch-0.1.0/src/repocrunch/extractors/security.py +58 -0
- repocrunch-0.1.0/src/repocrunch/extractors/tech_stack.py +218 -0
- repocrunch-0.1.0/src/repocrunch/mcp_server.py +28 -0
- repocrunch-0.1.0/src/repocrunch/models.py +82 -0
- repocrunch-0.1.0/src/repocrunch/parsers/__init__.py +23 -0
- repocrunch-0.1.0/src/repocrunch/parsers/build_gradle.py +64 -0
- repocrunch-0.1.0/src/repocrunch/parsers/cargo_toml.py +21 -0
- repocrunch-0.1.0/src/repocrunch/parsers/cmakelists.py +26 -0
- repocrunch-0.1.0/src/repocrunch/parsers/gemfile.py +64 -0
- repocrunch-0.1.0/src/repocrunch/parsers/go_mod.py +30 -0
- repocrunch-0.1.0/src/repocrunch/parsers/package_json.py +28 -0
- repocrunch-0.1.0/src/repocrunch/parsers/pom_xml.py +59 -0
- repocrunch-0.1.0/src/repocrunch/parsers/pyproject_toml.py +71 -0
- repocrunch-0.1.0/src/repocrunch/parsers/requirements_txt.py +18 -0
- repocrunch-0.1.0/tests/__init__.py +0 -0
- repocrunch-0.1.0/tests/conftest.py +218 -0
- repocrunch-0.1.0/tests/fixtures/repo_data.json +22 -0
- repocrunch-0.1.0/tests/fixtures/tree_data.json +21 -0
- repocrunch-0.1.0/tests/test_analyzer.py +119 -0
- repocrunch-0.1.0/tests/test_api.py +52 -0
- repocrunch-0.1.0/tests/test_cli.py +67 -0
- repocrunch-0.1.0/tests/test_client.py +104 -0
- repocrunch-0.1.0/tests/test_extractors/__init__.py +0 -0
- repocrunch-0.1.0/tests/test_extractors/test_architecture.py +57 -0
- repocrunch-0.1.0/tests/test_extractors/test_health.py +75 -0
- repocrunch-0.1.0/tests/test_models.py +55 -0
- repocrunch-0.1.0/tests/test_parsers/__init__.py +0 -0
- repocrunch-0.1.0/tests/test_parsers/test_build_gradle.py +42 -0
- repocrunch-0.1.0/tests/test_parsers/test_cargo_toml.py +37 -0
- repocrunch-0.1.0/tests/test_parsers/test_cmakelists.py +37 -0
- repocrunch-0.1.0/tests/test_parsers/test_gemfile.py +47 -0
- repocrunch-0.1.0/tests/test_parsers/test_go_mod.py +26 -0
- repocrunch-0.1.0/tests/test_parsers/test_package_json.py +36 -0
- repocrunch-0.1.0/tests/test_parsers/test_pom_xml.py +51 -0
- repocrunch-0.1.0/tests/test_parsers/test_pyproject_toml.py +63 -0
- repocrunch-0.1.0/tests/test_parsers/test_requirements_txt.py +28 -0
- repocrunch-0.1.0/uv.lock +1855 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
GITHUB_TOKEN=ghp_...
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: pypi
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v5
|
|
19
|
+
|
|
20
|
+
- name: Build
|
|
21
|
+
run: uv build
|
|
22
|
+
|
|
23
|
+
- name: Publish to PyPI
|
|
24
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
repocrunch-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: repocrunch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Analyze GitHub repos into structured JSON. No AI, fully deterministic.
|
|
5
|
+
Project-URL: Homepage, https://github.com/repocrunch/repocrunch
|
|
6
|
+
Project-URL: Repository, https://github.com/repocrunch/repocrunch
|
|
7
|
+
Author: Kim
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: analysis,devtools,github,repository
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: httpx>=0.27
|
|
19
|
+
Requires-Dist: pydantic>=2.0
|
|
20
|
+
Provides-Extra: all
|
|
21
|
+
Requires-Dist: fastapi>=0.115; extra == 'all'
|
|
22
|
+
Requires-Dist: fastmcp>=0.1; extra == 'all'
|
|
23
|
+
Requires-Dist: typer>=0.12; extra == 'all'
|
|
24
|
+
Requires-Dist: uvicorn>=0.30; extra == 'all'
|
|
25
|
+
Provides-Extra: api
|
|
26
|
+
Requires-Dist: fastapi>=0.115; extra == 'api'
|
|
27
|
+
Requires-Dist: uvicorn>=0.30; extra == 'api'
|
|
28
|
+
Provides-Extra: cli
|
|
29
|
+
Requires-Dist: typer>=0.12; extra == 'cli'
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-httpx>=0.30; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
35
|
+
Provides-Extra: mcp
|
|
36
|
+
Requires-Dist: fastmcp>=0.1; extra == 'mcp'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# RepoCrunch
|
|
40
|
+
|
|
41
|
+
Analyze any public GitHub repository into structured JSON. No AI, no LLMs — fully deterministic.
|
|
42
|
+
|
|
43
|
+
Give it a repo, get back tech stack, dependencies, architecture, health metrics, and security signals in clean, consistent JSON. Use it as a Python library, CLI tool, REST API, or MCP server.
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
Requires **Python 3.11+** and [uv](https://github.com/astral-sh/uv).
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Install
|
|
51
|
+
git clone https://github.com/kimwwk/repocrunch.git
|
|
52
|
+
cd repocrunch
|
|
53
|
+
uv venv && uv pip install -e ".[all]"
|
|
54
|
+
|
|
55
|
+
# Analyze a repo
|
|
56
|
+
repocrunch analyze astral-sh/uv --pretty
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Or install just what you need:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv pip install -e "." # Library only (httpx + pydantic)
|
|
63
|
+
uv pip install -e ".[cli]" # + CLI
|
|
64
|
+
uv pip install -e ".[api]" # + REST API
|
|
65
|
+
uv pip install -e ".[mcp]" # + MCP server
|
|
66
|
+
uv pip install -e ".[all]" # Everything
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Set a GitHub Token (optional)
|
|
70
|
+
|
|
71
|
+
Without a token you get 60 API calls/hour. With one, 5,000/hour.
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
export GITHUB_TOKEN=ghp_...
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Usage
|
|
78
|
+
|
|
79
|
+
### CLI
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
repocrunch analyze fastapi/fastapi --pretty # Full analysis, pretty JSON
|
|
83
|
+
repocrunch analyze facebook/react -f tech_stack # Single field
|
|
84
|
+
repocrunch analyze https://github.com/gin-gonic/gin # Full URL works too
|
|
85
|
+
repocrunch serve # Start REST API on :8000
|
|
86
|
+
repocrunch mcp # Start MCP server (STDIO)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Python Library
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from repocrunch import analyze, analyze_sync
|
|
93
|
+
|
|
94
|
+
# Async
|
|
95
|
+
result = await analyze("fastapi/fastapi")
|
|
96
|
+
|
|
97
|
+
# Sync
|
|
98
|
+
result = analyze_sync("pallets/flask")
|
|
99
|
+
|
|
100
|
+
print(result.summary.stars)
|
|
101
|
+
print(result.tech_stack.framework)
|
|
102
|
+
print(result.model_dump_json(indent=2))
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### REST API
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
repocrunch serve
|
|
109
|
+
|
|
110
|
+
# Then:
|
|
111
|
+
curl "http://localhost:8000/analyze?repo=fastapi/fastapi" | python -m json.tool
|
|
112
|
+
curl "http://localhost:8000/health"
|
|
113
|
+
curl "http://localhost:8000/docs" # OpenAPI docs
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### MCP Server (for Claude, Cursor, etc.)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
repocrunch mcp # Starts STDIO transport
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Sample Output
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
$ repocrunch analyze pallets/flask --pretty
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"schema_version": "1",
|
|
131
|
+
"repo": "pallets/flask",
|
|
132
|
+
"url": "https://github.com/pallets/flask",
|
|
133
|
+
"analyzed_at": "2026-02-08T19:07:31Z",
|
|
134
|
+
"summary": {
|
|
135
|
+
"stars": 71143,
|
|
136
|
+
"forks": 16697,
|
|
137
|
+
"watchers": 2092,
|
|
138
|
+
"last_commit": "2026-02-06T21:23:01Z",
|
|
139
|
+
"age_days": 5787,
|
|
140
|
+
"license": "BSD-3-Clause",
|
|
141
|
+
"primary_language": "Python",
|
|
142
|
+
"languages": { "Python": 99.9, "HTML": 0.1 }
|
|
143
|
+
},
|
|
144
|
+
"tech_stack": {
|
|
145
|
+
"runtime": "Python",
|
|
146
|
+
"framework": null,
|
|
147
|
+
"package_manager": "pip",
|
|
148
|
+
"dependencies": { "direct": 6, "dev": 0 },
|
|
149
|
+
"key_deps": ["blinker", "click", "itsdangerous", "jinja2", "markupsafe", "werkzeug"]
|
|
150
|
+
},
|
|
151
|
+
"architecture": {
|
|
152
|
+
"monorepo": false,
|
|
153
|
+
"docker": false,
|
|
154
|
+
"ci_cd": ["GitHub Actions"],
|
|
155
|
+
"test_framework": "pytest",
|
|
156
|
+
"has_tests": true
|
|
157
|
+
},
|
|
158
|
+
"health": {
|
|
159
|
+
"open_issues": 2,
|
|
160
|
+
"open_prs": 0,
|
|
161
|
+
"contributors": 862,
|
|
162
|
+
"commit_frequency": "daily",
|
|
163
|
+
"maintenance_status": "actively_maintained"
|
|
164
|
+
},
|
|
165
|
+
"security": {
|
|
166
|
+
"has_env_file": false,
|
|
167
|
+
"dependabot_enabled": false,
|
|
168
|
+
"branch_protection": false,
|
|
169
|
+
"security_policy": false
|
|
170
|
+
},
|
|
171
|
+
"warnings": [
|
|
172
|
+
"Branch protection status unknown (requires admin access or authenticated request)"
|
|
173
|
+
]
|
|
174
|
+
}
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Supported Ecosystems
|
|
178
|
+
|
|
179
|
+
| Language | Manifest Files | Package Manager Detection |
|
|
180
|
+
|----------|---------------|--------------------------|
|
|
181
|
+
| JavaScript / TypeScript | `package.json` | npm, yarn, pnpm, bun (from lockfiles) |
|
|
182
|
+
| Python | `pyproject.toml`, `requirements.txt` | pip, poetry, uv, pdm, pipenv |
|
|
183
|
+
| Rust | `Cargo.toml` | cargo |
|
|
184
|
+
| Go | `go.mod` | go |
|
|
185
|
+
| Java / Kotlin | `pom.xml`, `build.gradle`, `build.gradle.kts` | maven, gradle |
|
|
186
|
+
| Ruby | `Gemfile` | bundler |
|
|
187
|
+
| C / C++ | `CMakeLists.txt` | cmake |
|
|
188
|
+
|
|
189
|
+
Framework detection covers 40+ frameworks across all supported ecosystems (FastAPI, Django, React, Next.js, Spring Boot, Rails, Gin, Actix, and many more).
|
|
190
|
+
|
|
191
|
+
## What It Detects
|
|
192
|
+
|
|
193
|
+
| Category | Signals |
|
|
194
|
+
|----------|---------|
|
|
195
|
+
| **Summary** | Stars, forks, watchers, age, license, languages |
|
|
196
|
+
| **Tech Stack** | Runtime, framework, package manager, direct/dev dependency count, key deps |
|
|
197
|
+
| **Architecture** | Monorepo, Docker, CI/CD platform, test framework |
|
|
198
|
+
| **Health** | Commit frequency (daily/weekly/monthly/sporadic/inactive), maintenance status, contributors, open issues |
|
|
199
|
+
| **Security** | `.env` file committed, Dependabot enabled, branch protection, SECURITY.md present |
|
|
200
|
+
|
|
201
|
+
## Roadmap
|
|
202
|
+
|
|
203
|
+
Not yet implemented, but planned:
|
|
204
|
+
|
|
205
|
+
- **Secrets regex scanning** — detect leaked API keys, tokens, passwords in the file tree
|
|
206
|
+
- **Architecture type classification** — library vs. application vs. framework
|
|
207
|
+
- **API rate limiting** — per-key throttling for the REST API
|
|
208
|
+
- **Private repo support** — authenticated analysis of private repositories
|
|
209
|
+
- **npm/npx package** — `npx repocrunch analyze owner/repo`
|
|
210
|
+
- **Vulnerability scanning** — known CVE detection in dependencies
|
|
211
|
+
- **Comparison mode** — side-by-side analysis of multiple repos
|
|
212
|
+
- **Historical tracking** — track how a repo's health changes over time
|
|
213
|
+
- **PyPI / npm publishing** — `pip install repocrunch` / `npm install repocrunch`
|
|
214
|
+
- **Platform deployments** — Apify Store, Smithery, mcp.so, RapidAPI
|
|
215
|
+
|
|
216
|
+
## License
|
|
217
|
+
|
|
218
|
+
MIT
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# RepoCrunch
|
|
2
|
+
|
|
3
|
+
Analyze any public GitHub repository into structured JSON. No AI, no LLMs — fully deterministic.
|
|
4
|
+
|
|
5
|
+
Give it a repo, get back tech stack, dependencies, architecture, health metrics, and security signals in clean, consistent JSON. Use it as a Python library, CLI tool, REST API, or MCP server.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
Requires **Python 3.11+** and [uv](https://github.com/astral-sh/uv).
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install
|
|
13
|
+
git clone https://github.com/kimwwk/repocrunch.git
|
|
14
|
+
cd repocrunch
|
|
15
|
+
uv venv && uv pip install -e ".[all]"
|
|
16
|
+
|
|
17
|
+
# Analyze a repo
|
|
18
|
+
repocrunch analyze astral-sh/uv --pretty
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Or install just what you need:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv pip install -e "." # Library only (httpx + pydantic)
|
|
25
|
+
uv pip install -e ".[cli]" # + CLI
|
|
26
|
+
uv pip install -e ".[api]" # + REST API
|
|
27
|
+
uv pip install -e ".[mcp]" # + MCP server
|
|
28
|
+
uv pip install -e ".[all]" # Everything
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Set a GitHub Token (optional)
|
|
32
|
+
|
|
33
|
+
Without a token you get 60 API calls/hour. With one, 5,000/hour.
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
export GITHUB_TOKEN=ghp_...
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
### CLI
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
repocrunch analyze fastapi/fastapi --pretty # Full analysis, pretty JSON
|
|
45
|
+
repocrunch analyze facebook/react -f tech_stack # Single field
|
|
46
|
+
repocrunch analyze https://github.com/gin-gonic/gin # Full URL works too
|
|
47
|
+
repocrunch serve # Start REST API on :8000
|
|
48
|
+
repocrunch mcp # Start MCP server (STDIO)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Python Library
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from repocrunch import analyze, analyze_sync
|
|
55
|
+
|
|
56
|
+
# Async
|
|
57
|
+
result = await analyze("fastapi/fastapi")
|
|
58
|
+
|
|
59
|
+
# Sync
|
|
60
|
+
result = analyze_sync("pallets/flask")
|
|
61
|
+
|
|
62
|
+
print(result.summary.stars)
|
|
63
|
+
print(result.tech_stack.framework)
|
|
64
|
+
print(result.model_dump_json(indent=2))
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### REST API
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
repocrunch serve
|
|
71
|
+
|
|
72
|
+
# Then:
|
|
73
|
+
curl "http://localhost:8000/analyze?repo=fastapi/fastapi" | python -m json.tool
|
|
74
|
+
curl "http://localhost:8000/health"
|
|
75
|
+
curl "http://localhost:8000/docs" # OpenAPI docs
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### MCP Server (for Claude, Cursor, etc.)
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
repocrunch mcp # Starts STDIO transport
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Sample Output
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
$ repocrunch analyze pallets/flask --pretty
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"schema_version": "1",
|
|
93
|
+
"repo": "pallets/flask",
|
|
94
|
+
"url": "https://github.com/pallets/flask",
|
|
95
|
+
"analyzed_at": "2026-02-08T19:07:31Z",
|
|
96
|
+
"summary": {
|
|
97
|
+
"stars": 71143,
|
|
98
|
+
"forks": 16697,
|
|
99
|
+
"watchers": 2092,
|
|
100
|
+
"last_commit": "2026-02-06T21:23:01Z",
|
|
101
|
+
"age_days": 5787,
|
|
102
|
+
"license": "BSD-3-Clause",
|
|
103
|
+
"primary_language": "Python",
|
|
104
|
+
"languages": { "Python": 99.9, "HTML": 0.1 }
|
|
105
|
+
},
|
|
106
|
+
"tech_stack": {
|
|
107
|
+
"runtime": "Python",
|
|
108
|
+
"framework": null,
|
|
109
|
+
"package_manager": "pip",
|
|
110
|
+
"dependencies": { "direct": 6, "dev": 0 },
|
|
111
|
+
"key_deps": ["blinker", "click", "itsdangerous", "jinja2", "markupsafe", "werkzeug"]
|
|
112
|
+
},
|
|
113
|
+
"architecture": {
|
|
114
|
+
"monorepo": false,
|
|
115
|
+
"docker": false,
|
|
116
|
+
"ci_cd": ["GitHub Actions"],
|
|
117
|
+
"test_framework": "pytest",
|
|
118
|
+
"has_tests": true
|
|
119
|
+
},
|
|
120
|
+
"health": {
|
|
121
|
+
"open_issues": 2,
|
|
122
|
+
"open_prs": 0,
|
|
123
|
+
"contributors": 862,
|
|
124
|
+
"commit_frequency": "daily",
|
|
125
|
+
"maintenance_status": "actively_maintained"
|
|
126
|
+
},
|
|
127
|
+
"security": {
|
|
128
|
+
"has_env_file": false,
|
|
129
|
+
"dependabot_enabled": false,
|
|
130
|
+
"branch_protection": false,
|
|
131
|
+
"security_policy": false
|
|
132
|
+
},
|
|
133
|
+
"warnings": [
|
|
134
|
+
"Branch protection status unknown (requires admin access or authenticated request)"
|
|
135
|
+
]
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Supported Ecosystems
|
|
140
|
+
|
|
141
|
+
| Language | Manifest Files | Package Manager Detection |
|
|
142
|
+
|----------|---------------|--------------------------|
|
|
143
|
+
| JavaScript / TypeScript | `package.json` | npm, yarn, pnpm, bun (from lockfiles) |
|
|
144
|
+
| Python | `pyproject.toml`, `requirements.txt` | pip, poetry, uv, pdm, pipenv |
|
|
145
|
+
| Rust | `Cargo.toml` | cargo |
|
|
146
|
+
| Go | `go.mod` | go |
|
|
147
|
+
| Java / Kotlin | `pom.xml`, `build.gradle`, `build.gradle.kts` | maven, gradle |
|
|
148
|
+
| Ruby | `Gemfile` | bundler |
|
|
149
|
+
| C / C++ | `CMakeLists.txt` | cmake |
|
|
150
|
+
|
|
151
|
+
Framework detection covers 40+ frameworks across all supported ecosystems (FastAPI, Django, React, Next.js, Spring Boot, Rails, Gin, Actix, and many more).
|
|
152
|
+
|
|
153
|
+
## What It Detects
|
|
154
|
+
|
|
155
|
+
| Category | Signals |
|
|
156
|
+
|----------|---------|
|
|
157
|
+
| **Summary** | Stars, forks, watchers, age, license, languages |
|
|
158
|
+
| **Tech Stack** | Runtime, framework, package manager, direct/dev dependency count, key deps |
|
|
159
|
+
| **Architecture** | Monorepo, Docker, CI/CD platform, test framework |
|
|
160
|
+
| **Health** | Commit frequency (daily/weekly/monthly/sporadic/inactive), maintenance status, contributors, open issues |
|
|
161
|
+
| **Security** | `.env` file committed, Dependabot enabled, branch protection, SECURITY.md present |
|
|
162
|
+
|
|
163
|
+
## Roadmap
|
|
164
|
+
|
|
165
|
+
Not yet implemented, but planned:
|
|
166
|
+
|
|
167
|
+
- **Secrets regex scanning** — detect leaked API keys, tokens, passwords in the file tree
|
|
168
|
+
- **Architecture type classification** — library vs. application vs. framework
|
|
169
|
+
- **API rate limiting** — per-key throttling for the REST API
|
|
170
|
+
- **Private repo support** — authenticated analysis of private repositories
|
|
171
|
+
- **npm/npx package** — `npx repocrunch analyze owner/repo`
|
|
172
|
+
- **Vulnerability scanning** — known CVE detection in dependencies
|
|
173
|
+
- **Comparison mode** — side-by-side analysis of multiple repos
|
|
174
|
+
- **Historical tracking** — track how a repo's health changes over time
|
|
175
|
+
- **PyPI / npm publishing** — `pip install repocrunch` / `npm install repocrunch`
|
|
176
|
+
- **Platform deployments** — Apify Store, Smithery, mcp.so, RapidAPI
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "repocrunch"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Analyze GitHub repos into structured JSON. No AI, fully deterministic."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "Kim" }]
|
|
13
|
+
keywords = ["github", "repository", "analysis", "devtools"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"httpx>=0.27",
|
|
24
|
+
"pydantic>=2.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
cli = ["typer>=0.12"]
|
|
29
|
+
api = ["fastapi>=0.115", "uvicorn>=0.30"]
|
|
30
|
+
mcp = ["fastmcp>=0.1"]
|
|
31
|
+
all = ["repocrunch[cli,api,mcp]"]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=8.0",
|
|
34
|
+
"pytest-httpx>=0.30",
|
|
35
|
+
"pytest-asyncio>=0.24",
|
|
36
|
+
"pytest-cov>=5.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
repocrunch = "repocrunch.cli:app"
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://github.com/repocrunch/repocrunch"
|
|
44
|
+
Repository = "https://github.com/repocrunch/repocrunch"
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build.targets.wheel]
|
|
47
|
+
packages = ["src/repocrunch"]
|
|
48
|
+
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
asyncio_mode = "auto"
|
|
51
|
+
testpaths = ["tests"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
target-version = "py311"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""RepoCrunch — Analyze GitHub repos into structured JSON."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
from repocrunch.analyzer import analyze_repo
|
|
8
|
+
from repocrunch.models import SCHEMA_VERSION, RepoAnalysis
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
__all__ = ["analyze", "analyze_sync", "RepoAnalysis", "SCHEMA_VERSION", "__version__"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def analyze(
|
|
15
|
+
repo: str,
|
|
16
|
+
token: str | None = None,
|
|
17
|
+
) -> RepoAnalysis:
|
|
18
|
+
"""Analyze a GitHub repo asynchronously."""
|
|
19
|
+
return await analyze_repo(repo, token=token)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def analyze_sync(
|
|
23
|
+
repo: str,
|
|
24
|
+
token: str | None = None,
|
|
25
|
+
) -> RepoAnalysis:
|
|
26
|
+
"""Analyze a GitHub repo synchronously."""
|
|
27
|
+
return asyncio.run(analyze_repo(repo, token=token))
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Orchestrator: parse input → gather data → run extractors → assemble result."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import re
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
|
|
9
|
+
from repocrunch.client import GitHubClient
|
|
10
|
+
from repocrunch.extractors.architecture import extract_architecture
|
|
11
|
+
from repocrunch.extractors.health import extract_health
|
|
12
|
+
from repocrunch.extractors.metadata import extract_metadata
|
|
13
|
+
from repocrunch.extractors.security import extract_security
|
|
14
|
+
from repocrunch.extractors.tech_stack import extract_tech_stack
|
|
15
|
+
from repocrunch.models import RepoAnalysis
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_repo_input(raw: str) -> tuple[str, str]:
|
|
19
|
+
"""Parse 'owner/repo' or a GitHub URL into (owner, repo)."""
|
|
20
|
+
raw = raw.strip().rstrip("/")
|
|
21
|
+
|
|
22
|
+
# Full URL
|
|
23
|
+
match = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?$", raw)
|
|
24
|
+
if match:
|
|
25
|
+
return match.group(1), match.group(2)
|
|
26
|
+
|
|
27
|
+
# owner/repo shorthand
|
|
28
|
+
match = re.match(r"^([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)$", raw)
|
|
29
|
+
if match:
|
|
30
|
+
return match.group(1), match.group(2)
|
|
31
|
+
|
|
32
|
+
raise ValueError(f"Cannot parse repo input: {raw!r}. Use 'owner/repo' or a GitHub URL.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def analyze_repo(
|
|
36
|
+
repo_input: str,
|
|
37
|
+
token: str | None = None,
|
|
38
|
+
client: GitHubClient | None = None,
|
|
39
|
+
) -> RepoAnalysis:
|
|
40
|
+
"""Analyze a GitHub repo and return structured results."""
|
|
41
|
+
owner, repo = parse_repo_input(repo_input)
|
|
42
|
+
warnings: list[str] = []
|
|
43
|
+
|
|
44
|
+
owns_client = client is None
|
|
45
|
+
if owns_client:
|
|
46
|
+
client = GitHubClient(token=token)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
# Phase 1: parallel fetch of repo metadata, languages, and file tree
|
|
50
|
+
repo_data, languages, tree_data = await asyncio.gather(
|
|
51
|
+
client.get(f"/repos/{owner}/{repo}"),
|
|
52
|
+
client.get(f"/repos/{owner}/{repo}/languages"),
|
|
53
|
+
client.get(f"/repos/{owner}/{repo}/git/trees/HEAD", params={"recursive": "1"}),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
if repo_data is None:
|
|
57
|
+
raise ValueError(f"Repository not found: {owner}/{repo}")
|
|
58
|
+
|
|
59
|
+
tree_data = tree_data or {"tree": []}
|
|
60
|
+
languages = languages or {}
|
|
61
|
+
primary_language = repo_data.get("language")
|
|
62
|
+
|
|
63
|
+
# Phase 2: parallel extraction (async extractors run concurrently)
|
|
64
|
+
summary = extract_metadata(repo_data, languages)
|
|
65
|
+
|
|
66
|
+
tech_stack, health, security = await asyncio.gather(
|
|
67
|
+
extract_tech_stack(client, owner, repo, tree_data, primary_language),
|
|
68
|
+
extract_health(client, owner, repo, repo_data),
|
|
69
|
+
extract_security(client, owner, repo, tree_data, repo_data, warnings),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Architecture is sync — run after tech_stack so we have deps for test detection
|
|
73
|
+
architecture = extract_architecture(tree_data, tech_stack.key_deps)
|
|
74
|
+
|
|
75
|
+
# Collect client warnings
|
|
76
|
+
warnings.extend(client.warnings)
|
|
77
|
+
|
|
78
|
+
return RepoAnalysis(
|
|
79
|
+
repo=f"{owner}/{repo}",
|
|
80
|
+
url=f"https://github.com/{owner}/{repo}",
|
|
81
|
+
analyzed_at=datetime.now(timezone.utc),
|
|
82
|
+
summary=summary,
|
|
83
|
+
tech_stack=tech_stack,
|
|
84
|
+
architecture=architecture,
|
|
85
|
+
health=health,
|
|
86
|
+
security=security,
|
|
87
|
+
warnings=warnings,
|
|
88
|
+
)
|
|
89
|
+
finally:
|
|
90
|
+
if owns_client:
|
|
91
|
+
await client.close()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""FastAPI REST API for RepoCrunch."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fastapi import FastAPI, HTTPException, Query
|
|
6
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
7
|
+
|
|
8
|
+
from repocrunch import __version__
|
|
9
|
+
from repocrunch.analyzer import analyze_repo
|
|
10
|
+
from repocrunch.client import RateLimitError
|
|
11
|
+
|
|
12
|
+
app = FastAPI(
|
|
13
|
+
title="RepoCrunch",
|
|
14
|
+
version=__version__,
|
|
15
|
+
description="Analyze GitHub repos into structured JSON.",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
app.add_middleware(
|
|
19
|
+
CORSMiddleware,
|
|
20
|
+
allow_origins=["*"],
|
|
21
|
+
allow_methods=["GET"],
|
|
22
|
+
allow_headers=["*"],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.get("/analyze")
|
|
27
|
+
async def analyze(
|
|
28
|
+
repo: str = Query(description="GitHub repo as 'owner/repo' or URL"),
|
|
29
|
+
github_token: str | None = Query(None, description="GitHub token for higher rate limits"),
|
|
30
|
+
):
|
|
31
|
+
try:
|
|
32
|
+
result = await analyze_repo(repo, token=github_token)
|
|
33
|
+
return result.model_dump(mode="json")
|
|
34
|
+
except ValueError as e:
|
|
35
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
36
|
+
except RateLimitError:
|
|
37
|
+
raise HTTPException(status_code=429, detail="GitHub API rate limit exhausted")
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.get("/health")
|
|
43
|
+
async def health():
|
|
44
|
+
return {"status": "ok", "version": __version__}
|