repocrunch 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. repocrunch-0.1.0/.env.example +1 -0
  2. repocrunch-0.1.0/.github/workflows/publish.yml +24 -0
  3. repocrunch-0.1.0/.gitignore +16 -0
  4. repocrunch-0.1.0/LICENSE +21 -0
  5. repocrunch-0.1.0/PKG-INFO +218 -0
  6. repocrunch-0.1.0/README.md +180 -0
  7. repocrunch-0.1.0/pyproject.toml +54 -0
  8. repocrunch-0.1.0/smithery.yaml +5 -0
  9. repocrunch-0.1.0/src/repocrunch/__init__.py +27 -0
  10. repocrunch-0.1.0/src/repocrunch/analyzer.py +91 -0
  11. repocrunch-0.1.0/src/repocrunch/api.py +44 -0
  12. repocrunch-0.1.0/src/repocrunch/cli.py +83 -0
  13. repocrunch-0.1.0/src/repocrunch/client.py +154 -0
  14. repocrunch-0.1.0/src/repocrunch/detection.py +117 -0
  15. repocrunch-0.1.0/src/repocrunch/extractors/__init__.py +15 -0
  16. repocrunch-0.1.0/src/repocrunch/extractors/architecture.py +113 -0
  17. repocrunch-0.1.0/src/repocrunch/extractors/health.py +107 -0
  18. repocrunch-0.1.0/src/repocrunch/extractors/metadata.py +49 -0
  19. repocrunch-0.1.0/src/repocrunch/extractors/security.py +58 -0
  20. repocrunch-0.1.0/src/repocrunch/extractors/tech_stack.py +218 -0
  21. repocrunch-0.1.0/src/repocrunch/mcp_server.py +28 -0
  22. repocrunch-0.1.0/src/repocrunch/models.py +82 -0
  23. repocrunch-0.1.0/src/repocrunch/parsers/__init__.py +23 -0
  24. repocrunch-0.1.0/src/repocrunch/parsers/build_gradle.py +64 -0
  25. repocrunch-0.1.0/src/repocrunch/parsers/cargo_toml.py +21 -0
  26. repocrunch-0.1.0/src/repocrunch/parsers/cmakelists.py +26 -0
  27. repocrunch-0.1.0/src/repocrunch/parsers/gemfile.py +64 -0
  28. repocrunch-0.1.0/src/repocrunch/parsers/go_mod.py +30 -0
  29. repocrunch-0.1.0/src/repocrunch/parsers/package_json.py +28 -0
  30. repocrunch-0.1.0/src/repocrunch/parsers/pom_xml.py +59 -0
  31. repocrunch-0.1.0/src/repocrunch/parsers/pyproject_toml.py +71 -0
  32. repocrunch-0.1.0/src/repocrunch/parsers/requirements_txt.py +18 -0
  33. repocrunch-0.1.0/tests/__init__.py +0 -0
  34. repocrunch-0.1.0/tests/conftest.py +218 -0
  35. repocrunch-0.1.0/tests/fixtures/repo_data.json +22 -0
  36. repocrunch-0.1.0/tests/fixtures/tree_data.json +21 -0
  37. repocrunch-0.1.0/tests/test_analyzer.py +119 -0
  38. repocrunch-0.1.0/tests/test_api.py +52 -0
  39. repocrunch-0.1.0/tests/test_cli.py +67 -0
  40. repocrunch-0.1.0/tests/test_client.py +104 -0
  41. repocrunch-0.1.0/tests/test_extractors/__init__.py +0 -0
  42. repocrunch-0.1.0/tests/test_extractors/test_architecture.py +57 -0
  43. repocrunch-0.1.0/tests/test_extractors/test_health.py +75 -0
  44. repocrunch-0.1.0/tests/test_models.py +55 -0
  45. repocrunch-0.1.0/tests/test_parsers/__init__.py +0 -0
  46. repocrunch-0.1.0/tests/test_parsers/test_build_gradle.py +42 -0
  47. repocrunch-0.1.0/tests/test_parsers/test_cargo_toml.py +37 -0
  48. repocrunch-0.1.0/tests/test_parsers/test_cmakelists.py +37 -0
  49. repocrunch-0.1.0/tests/test_parsers/test_gemfile.py +47 -0
  50. repocrunch-0.1.0/tests/test_parsers/test_go_mod.py +26 -0
  51. repocrunch-0.1.0/tests/test_parsers/test_package_json.py +36 -0
  52. repocrunch-0.1.0/tests/test_parsers/test_pom_xml.py +51 -0
  53. repocrunch-0.1.0/tests/test_parsers/test_pyproject_toml.py +63 -0
  54. repocrunch-0.1.0/tests/test_parsers/test_requirements_txt.py +28 -0
  55. repocrunch-0.1.0/uv.lock +1855 -0
@@ -0,0 +1 @@
1
+ GITHUB_TOKEN=ghp_...
@@ -0,0 +1,24 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v5
19
+
20
+ - name: Build
21
+ run: uv build
22
+
23
+ - name: Publish to PyPI
24
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ .env
12
+ .pytest_cache/
13
+ .coverage
14
+ htmlcov/
15
+ .mypy_cache/
16
+ .ruff_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: repocrunch
3
+ Version: 0.1.0
4
+ Summary: Analyze GitHub repos into structured JSON. No AI, fully deterministic.
5
+ Project-URL: Homepage, https://github.com/repocrunch/repocrunch
6
+ Project-URL: Repository, https://github.com/repocrunch/repocrunch
7
+ Author: Kim
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: analysis,devtools,github,repository
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: httpx>=0.27
19
+ Requires-Dist: pydantic>=2.0
20
+ Provides-Extra: all
21
+ Requires-Dist: fastapi>=0.115; extra == 'all'
22
+ Requires-Dist: fastmcp>=0.1; extra == 'all'
23
+ Requires-Dist: typer>=0.12; extra == 'all'
24
+ Requires-Dist: uvicorn>=0.30; extra == 'all'
25
+ Provides-Extra: api
26
+ Requires-Dist: fastapi>=0.115; extra == 'api'
27
+ Requires-Dist: uvicorn>=0.30; extra == 'api'
28
+ Provides-Extra: cli
29
+ Requires-Dist: typer>=0.12; extra == 'cli'
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
32
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
33
+ Requires-Dist: pytest-httpx>=0.30; extra == 'dev'
34
+ Requires-Dist: pytest>=8.0; extra == 'dev'
35
+ Provides-Extra: mcp
36
+ Requires-Dist: fastmcp>=0.1; extra == 'mcp'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # RepoCrunch
40
+
41
+ Analyze any public GitHub repository into structured JSON. No AI, no LLMs — fully deterministic.
42
+
43
+ Give it a repo, get back tech stack, dependencies, architecture, health metrics, and security signals in clean, consistent JSON. Use it as a Python library, CLI tool, REST API, or MCP server.
44
+
45
+ ## Quick Start
46
+
47
+ Requires **Python 3.11+** and [uv](https://github.com/astral-sh/uv).
48
+
49
+ ```bash
50
+ # Install
51
+ git clone https://github.com/kimwwk/repocrunch.git
52
+ cd repocrunch
53
+ uv venv && uv pip install -e ".[all]"
54
+
55
+ # Analyze a repo
56
+ repocrunch analyze astral-sh/uv --pretty
57
+ ```
58
+
59
+ Or install just what you need:
60
+
61
+ ```bash
62
+ uv pip install -e "." # Library only (httpx + pydantic)
63
+ uv pip install -e ".[cli]" # + CLI
64
+ uv pip install -e ".[api]" # + REST API
65
+ uv pip install -e ".[mcp]" # + MCP server
66
+ uv pip install -e ".[all]" # Everything
67
+ ```
68
+
69
+ ### Set a GitHub Token (optional)
70
+
71
+ Without a token you get 60 API calls/hour. With one, 5,000/hour.
72
+
73
+ ```bash
74
+ export GITHUB_TOKEN=ghp_...
75
+ ```
76
+
77
+ ## Usage
78
+
79
+ ### CLI
80
+
81
+ ```bash
82
+ repocrunch analyze fastapi/fastapi --pretty # Full analysis, pretty JSON
83
+ repocrunch analyze facebook/react -f tech_stack # Single field
84
+ repocrunch analyze https://github.com/gin-gonic/gin # Full URL works too
85
+ repocrunch serve # Start REST API on :8000
86
+ repocrunch mcp # Start MCP server (STDIO)
87
+ ```
88
+
89
+ ### Python Library
90
+
91
+ ```python
92
+ from repocrunch import analyze, analyze_sync
93
+
94
+ # Async
95
+ result = await analyze("fastapi/fastapi")
96
+
97
+ # Sync
98
+ result = analyze_sync("pallets/flask")
99
+
100
+ print(result.summary.stars)
101
+ print(result.tech_stack.framework)
102
+ print(result.model_dump_json(indent=2))
103
+ ```
104
+
105
+ ### REST API
106
+
107
+ ```bash
108
+ repocrunch serve
109
+
110
+ # Then:
111
+ curl "http://localhost:8000/analyze?repo=fastapi/fastapi" | python -m json.tool
112
+ curl "http://localhost:8000/health"
113
+ curl "http://localhost:8000/docs" # OpenAPI docs
114
+ ```
115
+
116
+ ### MCP Server (for Claude, Cursor, etc.)
117
+
118
+ ```bash
119
+ repocrunch mcp # Starts STDIO transport
120
+ ```
121
+
122
+ ## Sample Output
123
+
124
+ ```bash
125
+ $ repocrunch analyze pallets/flask --pretty
126
+ ```
127
+
128
+ ```json
129
+ {
130
+ "schema_version": "1",
131
+ "repo": "pallets/flask",
132
+ "url": "https://github.com/pallets/flask",
133
+ "analyzed_at": "2026-02-08T19:07:31Z",
134
+ "summary": {
135
+ "stars": 71143,
136
+ "forks": 16697,
137
+ "watchers": 2092,
138
+ "last_commit": "2026-02-06T21:23:01Z",
139
+ "age_days": 5787,
140
+ "license": "BSD-3-Clause",
141
+ "primary_language": "Python",
142
+ "languages": { "Python": 99.9, "HTML": 0.1 }
143
+ },
144
+ "tech_stack": {
145
+ "runtime": "Python",
146
+ "framework": null,
147
+ "package_manager": "pip",
148
+ "dependencies": { "direct": 6, "dev": 0 },
149
+ "key_deps": ["blinker", "click", "itsdangerous", "jinja2", "markupsafe", "werkzeug"]
150
+ },
151
+ "architecture": {
152
+ "monorepo": false,
153
+ "docker": false,
154
+ "ci_cd": ["GitHub Actions"],
155
+ "test_framework": "pytest",
156
+ "has_tests": true
157
+ },
158
+ "health": {
159
+ "open_issues": 2,
160
+ "open_prs": 0,
161
+ "contributors": 862,
162
+ "commit_frequency": "daily",
163
+ "maintenance_status": "actively_maintained"
164
+ },
165
+ "security": {
166
+ "has_env_file": false,
167
+ "dependabot_enabled": false,
168
+ "branch_protection": false,
169
+ "security_policy": false
170
+ },
171
+ "warnings": [
172
+ "Branch protection status unknown (requires admin access or authenticated request)"
173
+ ]
174
+ }
175
+ ```
176
+
177
+ ## Supported Ecosystems
178
+
179
+ | Language | Manifest Files | Package Manager Detection |
180
+ |----------|---------------|--------------------------|
181
+ | JavaScript / TypeScript | `package.json` | npm, yarn, pnpm, bun (from lockfiles) |
182
+ | Python | `pyproject.toml`, `requirements.txt` | pip, poetry, uv, pdm, pipenv |
183
+ | Rust | `Cargo.toml` | cargo |
184
+ | Go | `go.mod` | go |
185
+ | Java / Kotlin | `pom.xml`, `build.gradle`, `build.gradle.kts` | maven, gradle |
186
+ | Ruby | `Gemfile` | bundler |
187
+ | C / C++ | `CMakeLists.txt` | cmake |
188
+
189
+ Framework detection covers 40+ frameworks across all supported ecosystems (FastAPI, Django, React, Next.js, Spring Boot, Rails, Gin, Actix, and many more).
190
+
191
+ ## What It Detects
192
+
193
+ | Category | Signals |
194
+ |----------|---------|
195
+ | **Summary** | Stars, forks, watchers, age, license, languages |
196
+ | **Tech Stack** | Runtime, framework, package manager, direct/dev dependency count, key deps |
197
+ | **Architecture** | Monorepo, Docker, CI/CD platform, test framework |
198
+ | **Health** | Commit frequency (daily/weekly/monthly/sporadic/inactive), maintenance status, contributors, open issues |
199
+ | **Security** | `.env` file committed, Dependabot enabled, branch protection, SECURITY.md present |
200
+
201
+ ## Roadmap
202
+
203
+ Not yet implemented, but planned:
204
+
205
+ - **Secrets regex scanning** — detect leaked API keys, tokens, passwords in the file tree
206
+ - **Architecture type classification** — library vs. application vs. framework
207
+ - **API rate limiting** — per-key throttling for the REST API
208
+ - **Private repo support** — authenticated analysis of private repositories
209
+ - **npm/npx package** — `npx repocrunch analyze owner/repo`
210
+ - **Vulnerability scanning** — known CVE detection in dependencies
211
+ - **Comparison mode** — side-by-side analysis of multiple repos
212
+ - **Historical tracking** — track how a repo's health changes over time
213
+ - **PyPI / npm publishing** — `pip install repocrunch` / `npm install repocrunch`
214
+ - **Platform deployments** — Apify Store, Smithery, mcp.so, RapidAPI
215
+
216
+ ## License
217
+
218
+ MIT
@@ -0,0 +1,180 @@
1
+ # RepoCrunch
2
+
3
+ Analyze any public GitHub repository into structured JSON. No AI, no LLMs — fully deterministic.
4
+
5
+ Give it a repo, get back tech stack, dependencies, architecture, health metrics, and security signals in clean, consistent JSON. Use it as a Python library, CLI tool, REST API, or MCP server.
6
+
7
+ ## Quick Start
8
+
9
+ Requires **Python 3.11+** and [uv](https://github.com/astral-sh/uv).
10
+
11
+ ```bash
12
+ # Install
13
+ git clone https://github.com/kimwwk/repocrunch.git
14
+ cd repocrunch
15
+ uv venv && uv pip install -e ".[all]"
16
+
17
+ # Analyze a repo
18
+ repocrunch analyze astral-sh/uv --pretty
19
+ ```
20
+
21
+ Or install just what you need:
22
+
23
+ ```bash
24
+ uv pip install -e "." # Library only (httpx + pydantic)
25
+ uv pip install -e ".[cli]" # + CLI
26
+ uv pip install -e ".[api]" # + REST API
27
+ uv pip install -e ".[mcp]" # + MCP server
28
+ uv pip install -e ".[all]" # Everything
29
+ ```
30
+
31
+ ### Set a GitHub Token (optional)
32
+
33
+ Without a token you get 60 API calls/hour. With one, 5,000/hour.
34
+
35
+ ```bash
36
+ export GITHUB_TOKEN=ghp_...
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ### CLI
42
+
43
+ ```bash
44
+ repocrunch analyze fastapi/fastapi --pretty # Full analysis, pretty JSON
45
+ repocrunch analyze facebook/react -f tech_stack # Single field
46
+ repocrunch analyze https://github.com/gin-gonic/gin # Full URL works too
47
+ repocrunch serve # Start REST API on :8000
48
+ repocrunch mcp # Start MCP server (STDIO)
49
+ ```
50
+
51
+ ### Python Library
52
+
53
+ ```python
54
+ from repocrunch import analyze, analyze_sync
55
+
56
+ # Async
57
+ result = await analyze("fastapi/fastapi")
58
+
59
+ # Sync
60
+ result = analyze_sync("pallets/flask")
61
+
62
+ print(result.summary.stars)
63
+ print(result.tech_stack.framework)
64
+ print(result.model_dump_json(indent=2))
65
+ ```
66
+
67
+ ### REST API
68
+
69
+ ```bash
70
+ repocrunch serve
71
+
72
+ # Then:
73
+ curl "http://localhost:8000/analyze?repo=fastapi/fastapi" | python -m json.tool
74
+ curl "http://localhost:8000/health"
75
+ curl "http://localhost:8000/docs" # OpenAPI docs
76
+ ```
77
+
78
+ ### MCP Server (for Claude, Cursor, etc.)
79
+
80
+ ```bash
81
+ repocrunch mcp # Starts STDIO transport
82
+ ```
83
+
84
+ ## Sample Output
85
+
86
+ ```bash
87
+ $ repocrunch analyze pallets/flask --pretty
88
+ ```
89
+
90
+ ```json
91
+ {
92
+ "schema_version": "1",
93
+ "repo": "pallets/flask",
94
+ "url": "https://github.com/pallets/flask",
95
+ "analyzed_at": "2026-02-08T19:07:31Z",
96
+ "summary": {
97
+ "stars": 71143,
98
+ "forks": 16697,
99
+ "watchers": 2092,
100
+ "last_commit": "2026-02-06T21:23:01Z",
101
+ "age_days": 5787,
102
+ "license": "BSD-3-Clause",
103
+ "primary_language": "Python",
104
+ "languages": { "Python": 99.9, "HTML": 0.1 }
105
+ },
106
+ "tech_stack": {
107
+ "runtime": "Python",
108
+ "framework": null,
109
+ "package_manager": "pip",
110
+ "dependencies": { "direct": 6, "dev": 0 },
111
+ "key_deps": ["blinker", "click", "itsdangerous", "jinja2", "markupsafe", "werkzeug"]
112
+ },
113
+ "architecture": {
114
+ "monorepo": false,
115
+ "docker": false,
116
+ "ci_cd": ["GitHub Actions"],
117
+ "test_framework": "pytest",
118
+ "has_tests": true
119
+ },
120
+ "health": {
121
+ "open_issues": 2,
122
+ "open_prs": 0,
123
+ "contributors": 862,
124
+ "commit_frequency": "daily",
125
+ "maintenance_status": "actively_maintained"
126
+ },
127
+ "security": {
128
+ "has_env_file": false,
129
+ "dependabot_enabled": false,
130
+ "branch_protection": false,
131
+ "security_policy": false
132
+ },
133
+ "warnings": [
134
+ "Branch protection status unknown (requires admin access or authenticated request)"
135
+ ]
136
+ }
137
+ ```
138
+
139
+ ## Supported Ecosystems
140
+
141
+ | Language | Manifest Files | Package Manager Detection |
142
+ |----------|---------------|--------------------------|
143
+ | JavaScript / TypeScript | `package.json` | npm, yarn, pnpm, bun (from lockfiles) |
144
+ | Python | `pyproject.toml`, `requirements.txt` | pip, poetry, uv, pdm, pipenv |
145
+ | Rust | `Cargo.toml` | cargo |
146
+ | Go | `go.mod` | go |
147
+ | Java / Kotlin | `pom.xml`, `build.gradle`, `build.gradle.kts` | maven, gradle |
148
+ | Ruby | `Gemfile` | bundler |
149
+ | C / C++ | `CMakeLists.txt` | cmake |
150
+
151
+ Framework detection covers 40+ frameworks across all supported ecosystems (FastAPI, Django, React, Next.js, Spring Boot, Rails, Gin, Actix, and many more).
152
+
153
+ ## What It Detects
154
+
155
+ | Category | Signals |
156
+ |----------|---------|
157
+ | **Summary** | Stars, forks, watchers, age, license, languages |
158
+ | **Tech Stack** | Runtime, framework, package manager, direct/dev dependency count, key deps |
159
+ | **Architecture** | Monorepo, Docker, CI/CD platform, test framework |
160
+ | **Health** | Commit frequency (daily/weekly/monthly/sporadic/inactive), maintenance status, contributors, open issues |
161
+ | **Security** | `.env` file committed, Dependabot enabled, branch protection, SECURITY.md present |
162
+
163
+ ## Roadmap
164
+
165
+ Not yet implemented, but planned:
166
+
167
+ - **Secrets regex scanning** — detect leaked API keys, tokens, passwords in the file tree
168
+ - **Architecture type classification** — library vs. application vs. framework
169
+ - **API rate limiting** — per-key throttling for the REST API
170
+ - **Private repo support** — authenticated analysis of private repositories
171
+ - **npm/npx package** — `npx repocrunch analyze owner/repo`
172
+ - **Vulnerability scanning** — known CVE detection in dependencies
173
+ - **Comparison mode** — side-by-side analysis of multiple repos
174
+ - **Historical tracking** — track how a repo's health changes over time
175
+ - **PyPI / npm publishing** — `pip install repocrunch` / `npm install repocrunch`
176
+ - **Platform deployments** — Apify Store, Smithery, mcp.so, RapidAPI
177
+
178
+ ## License
179
+
180
+ MIT
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "repocrunch"
7
+ version = "0.1.0"
8
+ description = "Analyze GitHub repos into structured JSON. No AI, fully deterministic."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+ authors = [{ name = "Kim" }]
13
+ keywords = ["github", "repository", "analysis", "devtools"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ ]
22
+ dependencies = [
23
+ "httpx>=0.27",
24
+ "pydantic>=2.0",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ cli = ["typer>=0.12"]
29
+ api = ["fastapi>=0.115", "uvicorn>=0.30"]
30
+ mcp = ["fastmcp>=0.1"]
31
+ all = ["repocrunch[cli,api,mcp]"]
32
+ dev = [
33
+ "pytest>=8.0",
34
+ "pytest-httpx>=0.30",
35
+ "pytest-asyncio>=0.24",
36
+ "pytest-cov>=5.0",
37
+ ]
38
+
39
+ [project.scripts]
40
+ repocrunch = "repocrunch.cli:app"
41
+
42
+ [project.urls]
43
+ Homepage = "https://github.com/repocrunch/repocrunch"
44
+ Repository = "https://github.com/repocrunch/repocrunch"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/repocrunch"]
48
+
49
+ [tool.pytest.ini_options]
50
+ asyncio_mode = "auto"
51
+ testpaths = ["tests"]
52
+
53
+ [tool.ruff]
54
+ target-version = "py311"
@@ -0,0 +1,5 @@
1
+ name: repocrunch
2
+ description: Analyze GitHub repos into structured JSON
3
+ version: 0.1.0
4
+ transport: stdio
5
+ command: repocrunch mcp
@@ -0,0 +1,27 @@
1
+ """RepoCrunch — Analyze GitHub repos into structured JSON."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+ from repocrunch.analyzer import analyze_repo
8
+ from repocrunch.models import SCHEMA_VERSION, RepoAnalysis
9
+
10
+ __version__ = "0.1.0"
11
+ __all__ = ["analyze", "analyze_sync", "RepoAnalysis", "SCHEMA_VERSION", "__version__"]
12
+
13
+
14
+ async def analyze(
15
+ repo: str,
16
+ token: str | None = None,
17
+ ) -> RepoAnalysis:
18
+ """Analyze a GitHub repo asynchronously."""
19
+ return await analyze_repo(repo, token=token)
20
+
21
+
22
+ def analyze_sync(
23
+ repo: str,
24
+ token: str | None = None,
25
+ ) -> RepoAnalysis:
26
+ """Analyze a GitHub repo synchronously."""
27
+ return asyncio.run(analyze_repo(repo, token=token))
@@ -0,0 +1,91 @@
1
+ """Orchestrator: parse input → gather data → run extractors → assemble result."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import re
7
+ from datetime import datetime, timezone
8
+
9
+ from repocrunch.client import GitHubClient
10
+ from repocrunch.extractors.architecture import extract_architecture
11
+ from repocrunch.extractors.health import extract_health
12
+ from repocrunch.extractors.metadata import extract_metadata
13
+ from repocrunch.extractors.security import extract_security
14
+ from repocrunch.extractors.tech_stack import extract_tech_stack
15
+ from repocrunch.models import RepoAnalysis
16
+
17
+
18
+ def parse_repo_input(raw: str) -> tuple[str, str]:
19
+ """Parse 'owner/repo' or a GitHub URL into (owner, repo)."""
20
+ raw = raw.strip().rstrip("/")
21
+
22
+ # Full URL
23
+ match = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?$", raw)
24
+ if match:
25
+ return match.group(1), match.group(2)
26
+
27
+ # owner/repo shorthand
28
+ match = re.match(r"^([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)$", raw)
29
+ if match:
30
+ return match.group(1), match.group(2)
31
+
32
+ raise ValueError(f"Cannot parse repo input: {raw!r}. Use 'owner/repo' or a GitHub URL.")
33
+
34
+
35
+ async def analyze_repo(
36
+ repo_input: str,
37
+ token: str | None = None,
38
+ client: GitHubClient | None = None,
39
+ ) -> RepoAnalysis:
40
+ """Analyze a GitHub repo and return structured results."""
41
+ owner, repo = parse_repo_input(repo_input)
42
+ warnings: list[str] = []
43
+
44
+ owns_client = client is None
45
+ if owns_client:
46
+ client = GitHubClient(token=token)
47
+
48
+ try:
49
+ # Phase 1: parallel fetch of repo metadata, languages, and file tree
50
+ repo_data, languages, tree_data = await asyncio.gather(
51
+ client.get(f"/repos/{owner}/{repo}"),
52
+ client.get(f"/repos/{owner}/{repo}/languages"),
53
+ client.get(f"/repos/{owner}/{repo}/git/trees/HEAD", params={"recursive": "1"}),
54
+ )
55
+
56
+ if repo_data is None:
57
+ raise ValueError(f"Repository not found: {owner}/{repo}")
58
+
59
+ tree_data = tree_data or {"tree": []}
60
+ languages = languages or {}
61
+ primary_language = repo_data.get("language")
62
+
63
+ # Phase 2: parallel extraction (async extractors run concurrently)
64
+ summary = extract_metadata(repo_data, languages)
65
+
66
+ tech_stack, health, security = await asyncio.gather(
67
+ extract_tech_stack(client, owner, repo, tree_data, primary_language),
68
+ extract_health(client, owner, repo, repo_data),
69
+ extract_security(client, owner, repo, tree_data, repo_data, warnings),
70
+ )
71
+
72
+ # Architecture is sync — run after tech_stack so we have deps for test detection
73
+ architecture = extract_architecture(tree_data, tech_stack.key_deps)
74
+
75
+ # Collect client warnings
76
+ warnings.extend(client.warnings)
77
+
78
+ return RepoAnalysis(
79
+ repo=f"{owner}/{repo}",
80
+ url=f"https://github.com/{owner}/{repo}",
81
+ analyzed_at=datetime.now(timezone.utc),
82
+ summary=summary,
83
+ tech_stack=tech_stack,
84
+ architecture=architecture,
85
+ health=health,
86
+ security=security,
87
+ warnings=warnings,
88
+ )
89
+ finally:
90
+ if owns_client:
91
+ await client.close()
@@ -0,0 +1,44 @@
1
+ """FastAPI REST API for RepoCrunch."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import FastAPI, HTTPException, Query
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ from repocrunch import __version__
9
+ from repocrunch.analyzer import analyze_repo
10
+ from repocrunch.client import RateLimitError
11
+
12
+ app = FastAPI(
13
+ title="RepoCrunch",
14
+ version=__version__,
15
+ description="Analyze GitHub repos into structured JSON.",
16
+ )
17
+
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_methods=["GET"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+
26
+ @app.get("/analyze")
27
+ async def analyze(
28
+ repo: str = Query(description="GitHub repo as 'owner/repo' or URL"),
29
+ github_token: str | None = Query(None, description="GitHub token for higher rate limits"),
30
+ ):
31
+ try:
32
+ result = await analyze_repo(repo, token=github_token)
33
+ return result.model_dump(mode="json")
34
+ except ValueError as e:
35
+ raise HTTPException(status_code=400, detail=str(e))
36
+ except RateLimitError:
37
+ raise HTTPException(status_code=429, detail="GitHub API rate limit exhausted")
38
+ except Exception as e:
39
+ raise HTTPException(status_code=500, detail=str(e))
40
+
41
+
42
+ @app.get("/health")
43
+ async def health():
44
+ return {"status": "ok", "version": __version__}