repocrunch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repocrunch/__init__.py ADDED
@@ -0,0 +1,27 @@
1
+ """RepoCrunch — Analyze GitHub repos into structured JSON."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+ from repocrunch.analyzer import analyze_repo
8
+ from repocrunch.models import SCHEMA_VERSION, RepoAnalysis
9
+
10
+ __version__ = "0.1.0"
11
+ __all__ = ["analyze", "analyze_sync", "RepoAnalysis", "SCHEMA_VERSION", "__version__"]
12
+
13
+
14
+ async def analyze(
15
+ repo: str,
16
+ token: str | None = None,
17
+ ) -> RepoAnalysis:
18
+ """Analyze a GitHub repo asynchronously."""
19
+ return await analyze_repo(repo, token=token)
20
+
21
+
22
+ def analyze_sync(
23
+ repo: str,
24
+ token: str | None = None,
25
+ ) -> RepoAnalysis:
26
+ """Analyze a GitHub repo synchronously."""
27
+ return asyncio.run(analyze_repo(repo, token=token))
repocrunch/analyzer.py ADDED
@@ -0,0 +1,91 @@
1
+ """Orchestrator: parse input → gather data → run extractors → assemble result."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import re
7
+ from datetime import datetime, timezone
8
+
9
+ from repocrunch.client import GitHubClient
10
+ from repocrunch.extractors.architecture import extract_architecture
11
+ from repocrunch.extractors.health import extract_health
12
+ from repocrunch.extractors.metadata import extract_metadata
13
+ from repocrunch.extractors.security import extract_security
14
+ from repocrunch.extractors.tech_stack import extract_tech_stack
15
+ from repocrunch.models import RepoAnalysis
16
+
17
+
18
+ def parse_repo_input(raw: str) -> tuple[str, str]:
19
+ """Parse 'owner/repo' or a GitHub URL into (owner, repo)."""
20
+ raw = raw.strip().rstrip("/")
21
+
22
+ # Full URL
23
+ match = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?$", raw)
24
+ if match:
25
+ return match.group(1), match.group(2)
26
+
27
+ # owner/repo shorthand
28
+ match = re.match(r"^([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)$", raw)
29
+ if match:
30
+ return match.group(1), match.group(2)
31
+
32
+ raise ValueError(f"Cannot parse repo input: {raw!r}. Use 'owner/repo' or a GitHub URL.")
33
+
34
+
35
+ async def analyze_repo(
36
+ repo_input: str,
37
+ token: str | None = None,
38
+ client: GitHubClient | None = None,
39
+ ) -> RepoAnalysis:
40
+ """Analyze a GitHub repo and return structured results."""
41
+ owner, repo = parse_repo_input(repo_input)
42
+ warnings: list[str] = []
43
+
44
+ owns_client = client is None
45
+ if owns_client:
46
+ client = GitHubClient(token=token)
47
+
48
+ try:
49
+ # Phase 1: parallel fetch of repo metadata, languages, and file tree
50
+ repo_data, languages, tree_data = await asyncio.gather(
51
+ client.get(f"/repos/{owner}/{repo}"),
52
+ client.get(f"/repos/{owner}/{repo}/languages"),
53
+ client.get(f"/repos/{owner}/{repo}/git/trees/HEAD", params={"recursive": "1"}),
54
+ )
55
+
56
+ if repo_data is None:
57
+ raise ValueError(f"Repository not found: {owner}/{repo}")
58
+
59
+ tree_data = tree_data or {"tree": []}
60
+ languages = languages or {}
61
+ primary_language = repo_data.get("language")
62
+
63
+ # Phase 2: parallel extraction (async extractors run concurrently)
64
+ summary = extract_metadata(repo_data, languages)
65
+
66
+ tech_stack, health, security = await asyncio.gather(
67
+ extract_tech_stack(client, owner, repo, tree_data, primary_language),
68
+ extract_health(client, owner, repo, repo_data),
69
+ extract_security(client, owner, repo, tree_data, repo_data, warnings),
70
+ )
71
+
72
+ # Architecture is sync — run after tech_stack so we have deps for test detection
73
+ architecture = extract_architecture(tree_data, tech_stack.key_deps)
74
+
75
+ # Collect client warnings
76
+ warnings.extend(client.warnings)
77
+
78
+ return RepoAnalysis(
79
+ repo=f"{owner}/{repo}",
80
+ url=f"https://github.com/{owner}/{repo}",
81
+ analyzed_at=datetime.now(timezone.utc),
82
+ summary=summary,
83
+ tech_stack=tech_stack,
84
+ architecture=architecture,
85
+ health=health,
86
+ security=security,
87
+ warnings=warnings,
88
+ )
89
+ finally:
90
+ if owns_client:
91
+ await client.close()
repocrunch/api.py ADDED
@@ -0,0 +1,44 @@
1
+ """FastAPI REST API for RepoCrunch."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import FastAPI, HTTPException, Query
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ from repocrunch import __version__
9
+ from repocrunch.analyzer import analyze_repo
10
+ from repocrunch.client import RateLimitError
11
+
12
+ app = FastAPI(
13
+ title="RepoCrunch",
14
+ version=__version__,
15
+ description="Analyze GitHub repos into structured JSON.",
16
+ )
17
+
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_methods=["GET"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+
26
+ @app.get("/analyze")
27
+ async def analyze(
28
+ repo: str = Query(description="GitHub repo as 'owner/repo' or URL"),
29
+ github_token: str | None = Query(None, description="GitHub token for higher rate limits"),
30
+ ):
31
+ try:
32
+ result = await analyze_repo(repo, token=github_token)
33
+ return result.model_dump(mode="json")
34
+ except ValueError as e:
35
+ raise HTTPException(status_code=400, detail=str(e))
36
+ except RateLimitError:
37
+ raise HTTPException(status_code=429, detail="GitHub API rate limit exhausted")
38
+ except Exception as e:
39
+ raise HTTPException(status_code=500, detail=str(e))
40
+
41
+
42
+ @app.get("/health")
43
+ async def health():
44
+ return {"status": "ok", "version": __version__}
repocrunch/cli.py ADDED
@@ -0,0 +1,83 @@
1
+ """Typer CLI for RepoCrunch."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ import typer
8
+
9
+ from repocrunch import __version__, analyze_sync
10
+
11
+ app = typer.Typer(
12
+ name="repocrunch",
13
+ help="Analyze GitHub repos into structured JSON.",
14
+ no_args_is_help=True,
15
+ )
16
+
17
+
18
+ @app.command()
19
+ def analyze(
20
+ repo: str = typer.Argument(help="GitHub repo as 'owner/repo' or URL"),
21
+ pretty: bool = typer.Option(False, "--pretty", "-p", help="Pretty-print JSON output"),
22
+ field: str | None = typer.Option(None, "--field", "-f", help="Extract a single top-level field"),
23
+ token: str | None = typer.Option(None, "--token", "-t", help="GitHub token (or set GITHUB_TOKEN)"),
24
+ ) -> None:
25
+ """Analyze a GitHub repository."""
26
+ try:
27
+ result = analyze_sync(repo, token=token)
28
+ except ValueError as e:
29
+ typer.echo(f"Error: {e}", err=True)
30
+ raise typer.Exit(1)
31
+ except Exception as e:
32
+ typer.echo(f"Error: {e}", err=True)
33
+ raise typer.Exit(1)
34
+
35
+ data = result.model_dump(mode="json")
36
+
37
+ if field:
38
+ if field not in data:
39
+ typer.echo(f"Unknown field: {field}. Available: {', '.join(data.keys())}", err=True)
40
+ raise typer.Exit(1)
41
+ data = data[field]
42
+
43
+ indent = 2 if pretty else None
44
+ typer.echo(json.dumps(data, indent=indent, default=str))
45
+
46
+
47
+ @app.command()
48
+ def serve(
49
+ host: str = typer.Option("0.0.0.0", help="Host to bind to"),
50
+ port: int = typer.Option(8000, help="Port to bind to"),
51
+ ) -> None:
52
+ """Start the REST API server."""
53
+ try:
54
+ import uvicorn
55
+
56
+ from repocrunch.api import app as fastapi_app
57
+ except ImportError:
58
+ typer.echo("Install API extras: pip install repocrunch[api]", err=True)
59
+ raise typer.Exit(1)
60
+
61
+ uvicorn.run(fastapi_app, host=host, port=port)
62
+
63
+
64
+ @app.command()
65
+ def mcp() -> None:
66
+ """Start the MCP server (STDIO transport)."""
67
+ try:
68
+ from repocrunch.mcp_server import mcp as mcp_app
69
+ except ImportError:
70
+ typer.echo("Install MCP extras: pip install repocrunch[mcp]", err=True)
71
+ raise typer.Exit(1)
72
+
73
+ mcp_app.run()
74
+
75
+
76
+ @app.command()
77
+ def version() -> None:
78
+ """Print version information."""
79
+ typer.echo(f"repocrunch {__version__}")
80
+
81
+
82
+ if __name__ == "__main__":
83
+ app()
repocrunch/client.py ADDED
@@ -0,0 +1,154 @@
1
+ """GitHub API client with auth, rate limiting, ETag caching, and retries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import logging
7
+ import os
8
+ from collections import OrderedDict
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ GITHUB_API = "https://api.github.com"
16
+ CACHE_MAX = 200
17
+
18
+
19
+ class RateLimitError(Exception):
20
+ def __init__(self, reset_at: int | None = None):
21
+ self.reset_at = reset_at
22
+ super().__init__("GitHub API rate limit exhausted")
23
+
24
+
25
+ class GitHubClient:
26
+ def __init__(
27
+ self,
28
+ token: str | None = None,
29
+ client: httpx.AsyncClient | None = None,
30
+ ):
31
+ self.token = token or os.environ.get("GITHUB_TOKEN")
32
+ self._external_client = client is not None
33
+ self._client = client or self._make_client()
34
+ self._etag_cache: OrderedDict[str, tuple[str, Any]] = OrderedDict()
35
+ self.rate_remaining: int | None = None
36
+ self.rate_limit: int | None = None
37
+ self.warnings: list[str] = []
38
+
39
+ def _make_client(self) -> httpx.AsyncClient:
40
+ headers = {
41
+ "Accept": "application/vnd.github+json",
42
+ "X-GitHub-Api-Version": "2022-11-28",
43
+ }
44
+ if self.token:
45
+ headers["Authorization"] = f"Bearer {self.token}"
46
+ return httpx.AsyncClient(
47
+ base_url=GITHUB_API,
48
+ headers=headers,
49
+ timeout=30.0,
50
+ )
51
+
52
+ def _update_rate_info(self, response: httpx.Response) -> None:
53
+ remaining = response.headers.get("X-RateLimit-Remaining")
54
+ if remaining is not None:
55
+ self.rate_remaining = int(remaining)
56
+ limit = response.headers.get("X-RateLimit-Limit")
57
+ if limit is not None:
58
+ self.rate_limit = int(limit)
59
+ if self.rate_remaining is not None and self.rate_remaining < 5:
60
+ self.warnings.append(
61
+ f"GitHub API rate limit low: {self.rate_remaining}/{self.rate_limit} remaining"
62
+ )
63
+
64
+ def _cache_set(self, url: str, etag: str, data: Any) -> None:
65
+ if len(self._etag_cache) >= CACHE_MAX:
66
+ self._etag_cache.popitem(last=False)
67
+ self._etag_cache[url] = (etag, data)
68
+
69
+ async def get(self, path: str, params: dict | None = None) -> Any:
70
+ """GET a GitHub API endpoint. Returns parsed JSON or None on 404."""
71
+ if self.rate_remaining is not None and self.rate_remaining <= 0:
72
+ raise RateLimitError()
73
+
74
+ url = path
75
+ headers: dict[str, str] = {}
76
+
77
+ cache_key = f"{path}?{params}" if params else path
78
+ if cache_key in self._etag_cache:
79
+ etag, cached_data = self._etag_cache[cache_key]
80
+ headers["If-None-Match"] = etag
81
+
82
+ retries = 2
83
+ for attempt in range(retries + 1):
84
+ try:
85
+ response = await self._client.get(url, params=params, headers=headers)
86
+ break
87
+ except httpx.TransportError:
88
+ if attempt == retries:
89
+ raise
90
+ continue
91
+
92
+ self._update_rate_info(response)
93
+
94
+ if response.status_code == 304:
95
+ self._etag_cache.move_to_end(cache_key)
96
+ return self._etag_cache[cache_key][1]
97
+
98
+ if response.status_code in (401, 404):
99
+ return None
100
+
101
+ if response.status_code == 403:
102
+ if self.rate_remaining is not None and self.rate_remaining <= 0:
103
+ reset = response.headers.get("X-RateLimit-Reset")
104
+ raise RateLimitError(int(reset) if reset else None)
105
+ # Permission denied (e.g. branch protection without admin access)
106
+ return None
107
+
108
+ response.raise_for_status()
109
+
110
+ data = response.json()
111
+ etag = response.headers.get("ETag")
112
+ if etag:
113
+ self._cache_set(cache_key, etag, data)
114
+
115
+ return data
116
+
117
+ async def get_file_content(self, owner: str, repo: str, path: str) -> str | None:
118
+ """Get decoded file content from a repo. Returns None if not found."""
119
+ data = await self.get(f"/repos/{owner}/{repo}/contents/{path}")
120
+ if data is None:
121
+ return None
122
+ if isinstance(data, dict) and data.get("encoding") == "base64":
123
+ return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
124
+ return None
125
+
126
+ async def get_contributor_count(self, owner: str, repo: str) -> int:
127
+ """Get total contributor count using the Link header pagination trick."""
128
+ response = await self._client.get(
129
+ f"/repos/{owner}/{repo}/contributors",
130
+ params={"per_page": 1, "anon": "true"},
131
+ )
132
+ self._update_rate_info(response)
133
+ if response.status_code != 200:
134
+ return 0
135
+
136
+ link = response.headers.get("Link", "")
137
+ if 'rel="last"' in link:
138
+ for part in link.split(","):
139
+ if 'rel="last"' in part:
140
+ url_part = part.split(";")[0].strip().strip("<>")
141
+ if "page=" in url_part:
142
+ page = url_part.split("page=")[-1].split("&")[0]
143
+ return int(page)
144
+ return len(response.json()) if isinstance(response.json(), list) else 0
145
+
146
+ async def close(self) -> None:
147
+ if not self._external_client:
148
+ await self._client.aclose()
149
+
150
+ async def __aenter__(self) -> GitHubClient:
151
+ return self
152
+
153
+ async def __aexit__(self, *args: Any) -> None:
154
+ await self.close()
@@ -0,0 +1,117 @@
1
+ """Framework and test framework detection maps."""
2
+
3
+ from __future__ import annotations
4
+
5
+ # dependency name → framework label
6
+ FRAMEWORK_MAP: dict[str, str] = {
7
+ # Python
8
+ "fastapi": "FastAPI",
9
+ "django": "Django",
10
+ "flask": "Flask",
11
+ "starlette": "Starlette",
12
+ "tornado": "Tornado",
13
+ "sanic": "Sanic",
14
+ "litestar": "Litestar",
15
+ "aiohttp": "aiohttp",
16
+ "bottle": "Bottle",
17
+ "falcon": "Falcon",
18
+ "quart": "Quart",
19
+ "streamlit": "Streamlit",
20
+ "gradio": "Gradio",
21
+ # Node.js / TypeScript
22
+ "next": "Next.js",
23
+ "react": "React",
24
+ "vue": "Vue.js",
25
+ "angular": "Angular",
26
+ "@angular/core": "Angular",
27
+ "svelte": "Svelte",
28
+ "express": "Express",
29
+ "nestjs": "NestJS",
30
+ "@nestjs/core": "NestJS",
31
+ "nuxt": "Nuxt",
32
+ "remix": "Remix",
33
+ "@remix-run/react": "Remix",
34
+ "gatsby": "Gatsby",
35
+ "astro": "Astro",
36
+ "hono": "Hono",
37
+ "fastify": "Fastify",
38
+ "koa": "Koa",
39
+ "solid-js": "SolidJS",
40
+ "preact": "Preact",
41
+ # Rust
42
+ "actix-web": "Actix Web",
43
+ "axum": "Axum",
44
+ "rocket": "Rocket",
45
+ "warp": "Warp",
46
+ "tide": "Tide",
47
+ "leptos": "Leptos",
48
+ "yew": "Yew",
49
+ "tauri": "Tauri",
50
+ # Go (module paths)
51
+ "github.com/gin-gonic/gin": "Gin",
52
+ "github.com/gofiber/fiber": "Fiber",
53
+ "github.com/labstack/echo": "Echo",
54
+ "github.com/gorilla/mux": "Gorilla Mux",
55
+ "github.com/go-chi/chi": "Chi",
56
+ "github.com/beego/beego": "Beego",
57
+ # Java / Kotlin
58
+ "org.springframework.boot:spring-boot-starter-web": "Spring Boot",
59
+ "io.quarkus:quarkus-core": "Quarkus",
60
+ "io.micronaut:micronaut-core": "Micronaut",
61
+ "io.vertx:vertx-core": "Vert.x",
62
+ "com.typesafe.play:play_2.13": "Play Framework",
63
+ "com.typesafe.play:play_3": "Play Framework",
64
+ "io.ktor:ktor-server-core": "Ktor",
65
+ # Ruby
66
+ "rails": "Rails",
67
+ "sinatra": "Sinatra",
68
+ "hanami": "Hanami",
69
+ # C / C++
70
+ "Boost": "Boost",
71
+ "Qt5": "Qt",
72
+ "Qt6": "Qt",
73
+ "OpenCV": "OpenCV",
74
+ "SFML": "SFML",
75
+ }
76
+
77
+ # dependency name → test framework label
78
+ TEST_FRAMEWORK_MAP: dict[str, str] = {
79
+ # Python
80
+ "pytest": "pytest",
81
+ "unittest": "unittest",
82
+ "nose": "nose",
83
+ "nose2": "nose2",
84
+ # Node.js
85
+ "jest": "Jest",
86
+ "mocha": "Mocha",
87
+ "vitest": "Vitest",
88
+ "@playwright/test": "Playwright",
89
+ "cypress": "Cypress",
90
+ "ava": "AVA",
91
+ "tap": "tap",
92
+ # Rust (built-in, detected from tree)
93
+ # Go (built-in testing package)
94
+ # Java / Kotlin
95
+ "junit": "JUnit",
96
+ "org.junit.jupiter:junit-jupiter": "JUnit 5",
97
+ "org.junit.jupiter:junit-jupiter-api": "JUnit 5",
98
+ "junit:junit": "JUnit 4",
99
+ "org.mockito:mockito-core": "Mockito",
100
+ "org.testng:testng": "TestNG",
101
+ # Ruby
102
+ "rspec": "RSpec",
103
+ "rspec-rails": "RSpec",
104
+ "minitest": "Minitest",
105
+ }
106
+
107
+ # Files in tree that indicate test framework
108
+ TEST_FILE_PATTERNS: dict[str, str] = {
109
+ "jest.config": "Jest",
110
+ "vitest.config": "Vitest",
111
+ "cypress.config": "Cypress",
112
+ "playwright.config": "Playwright",
113
+ ".mocharc": "Mocha",
114
+ "pytest.ini": "pytest",
115
+ "setup.cfg": "pytest", # often contains [tool:pytest]
116
+ "conftest.py": "pytest",
117
+ }
@@ -0,0 +1,15 @@
1
+ """Extractors that transform raw GitHub API data into structured models."""
2
+
3
+ from repocrunch.extractors.architecture import extract_architecture
4
+ from repocrunch.extractors.health import extract_health
5
+ from repocrunch.extractors.metadata import extract_metadata
6
+ from repocrunch.extractors.security import extract_security
7
+ from repocrunch.extractors.tech_stack import extract_tech_stack
8
+
9
+ __all__ = [
10
+ "extract_metadata",
11
+ "extract_tech_stack",
12
+ "extract_architecture",
13
+ "extract_health",
14
+ "extract_security",
15
+ ]
@@ -0,0 +1,113 @@
1
+ """Extract architecture signals from the file tree."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from repocrunch.detection import TEST_FILE_PATTERNS, TEST_FRAMEWORK_MAP
8
+ from repocrunch.models import Architecture
9
+
10
+
11
+ def _get_tree_paths(tree_data: dict[str, Any]) -> set[str]:
12
+ return {item["path"] for item in tree_data.get("tree", []) if item.get("type") == "blob"}
13
+
14
+
15
+ def _detect_monorepo(paths: set[str], tree_data: dict[str, Any]) -> bool:
16
+ dirs = {item["path"] for item in tree_data.get("tree", []) if item.get("type") == "tree"}
17
+
18
+ # Workspace indicators
19
+ if "lerna.json" in paths or "pnpm-workspace.yaml" in paths:
20
+ return True
21
+
22
+ # Multiple package.json at different levels
23
+ pkg_jsons = [p for p in paths if p.endswith("package.json") and "/" in p]
24
+ if len(pkg_jsons) >= 2:
25
+ return True
26
+
27
+ # packages/ or apps/ directories
28
+ if any(d.startswith("packages/") for d in dirs) or any(d.startswith("apps/") for d in dirs):
29
+ return True
30
+
31
+ return False
32
+
33
+
34
+ def _detect_docker(paths: set[str]) -> bool:
35
+ return any(
36
+ p == "Dockerfile" or p == "docker-compose.yml" or p == "docker-compose.yaml"
37
+ or p.endswith("/Dockerfile") or p == "compose.yml" or p == "compose.yaml"
38
+ for p in paths
39
+ )
40
+
41
+
42
+ def _detect_ci_cd(paths: set[str]) -> list[str]:
43
+ ci: list[str] = []
44
+ if any(p.startswith(".github/workflows/") for p in paths):
45
+ ci.append("GitHub Actions")
46
+ if ".gitlab-ci.yml" in paths:
47
+ ci.append("GitLab CI")
48
+ if "Jenkinsfile" in paths:
49
+ ci.append("Jenkins")
50
+ if ".circleci/config.yml" in paths or ".circleci/config.yaml" in paths:
51
+ ci.append("CircleCI")
52
+ if ".travis.yml" in paths:
53
+ ci.append("Travis CI")
54
+ if any(p.startswith("azure-pipelines") for p in paths):
55
+ ci.append("Azure Pipelines")
56
+ if "bitbucket-pipelines.yml" in paths:
57
+ ci.append("Bitbucket Pipelines")
58
+ return ci
59
+
60
+
61
+ def _detect_test_framework(paths: set[str], deps: list[str] | None = None) -> tuple[str | None, bool]:
62
+ """Detect test framework and whether tests exist. Returns (framework, has_tests)."""
63
+ framework = None
64
+
65
+ # Check deps first
66
+ if deps:
67
+ for dep in deps:
68
+ dep_lower = dep.lower()
69
+ if dep_lower in TEST_FRAMEWORK_MAP:
70
+ framework = TEST_FRAMEWORK_MAP[dep_lower]
71
+ break
72
+
73
+ # Check config files in tree
74
+ if not framework:
75
+ for filename, fw in TEST_FILE_PATTERNS.items():
76
+ if any(p.endswith(filename) or p == filename for p in paths):
77
+ framework = fw
78
+ break
79
+
80
+ # Check for test directories
81
+ has_tests = any(
82
+ p.startswith("tests/") or p.startswith("test/") or p.startswith("__tests__/")
83
+ or "/tests/" in p or "/test/" in p or "/__tests__/" in p
84
+ or p.endswith("_test.py") or p.endswith("_test.go") or p.endswith("_test.rs")
85
+ or p.endswith(".test.js") or p.endswith(".test.ts") or p.endswith(".test.tsx")
86
+ or p.endswith(".spec.js") or p.endswith(".spec.ts") or p.endswith(".spec.tsx")
87
+ for p in paths
88
+ )
89
+
90
+ # Rust/Go have built-in test frameworks
91
+ if has_tests and not framework:
92
+ if any(p.endswith("_test.go") for p in paths):
93
+ framework = "go test"
94
+ elif any(p.endswith("_test.rs") or p.endswith("/tests/") for p in paths):
95
+ framework = "cargo test"
96
+
97
+ return framework, has_tests
98
+
99
+
100
+ def extract_architecture(
101
+ tree_data: dict[str, Any],
102
+ deps: list[str] | None = None,
103
+ ) -> Architecture:
104
+ paths = _get_tree_paths(tree_data)
105
+ test_framework, has_tests = _detect_test_framework(paths, deps)
106
+
107
+ return Architecture(
108
+ monorepo=_detect_monorepo(paths, tree_data),
109
+ docker=_detect_docker(paths),
110
+ ci_cd=_detect_ci_cd(paths),
111
+ test_framework=test_framework,
112
+ has_tests=has_tests,
113
+ )