code-explore 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,144 @@
1
+ """Calculate code quality metrics for a project."""
2
+
3
+ from pathlib import Path
4
+
5
+ from code_explore.models import QualityMetrics
6
+
7
+ SKIP_DIRS: set[str] = {
8
+ "node_modules", ".git", ".svn", ".hg", "__pycache__", ".mypy_cache",
9
+ ".pytest_cache", ".tox", ".nox", ".venv", "venv", "env", ".env",
10
+ "dist", "build", "out", "target", ".next", ".nuxt", ".output",
11
+ "vendor", "third_party", ".gradle", ".idea", ".vscode",
12
+ ".vs", "bin", "obj", ".cache", "coverage", ".terraform",
13
+ }
14
+
15
+ BINARY_EXTENSIONS: set[str] = {
16
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp",
17
+ ".mp3", ".mp4", ".wav", ".avi", ".mov",
18
+ ".zip", ".tar", ".gz", ".bz2", ".xz", ".rar", ".7z",
19
+ ".exe", ".dll", ".so", ".dylib", ".o", ".obj",
20
+ ".class", ".jar", ".war",
21
+ ".pyc", ".pyo", ".whl",
22
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx",
23
+ ".ttf", ".otf", ".woff", ".woff2",
24
+ ".db", ".sqlite", ".sqlite3",
25
+ ".lock",
26
+ }
27
+
28
+ TEST_INDICATORS: set[str] = {
29
+ "test", "tests", "spec", "specs", "__tests__", "test_", "_test",
30
+ "testing", "e2e", "integration_tests", "unit_tests",
31
+ }
32
+
33
+ CI_FILES: set[str] = {
34
+ ".github/workflows",
35
+ ".gitlab-ci.yml",
36
+ ".gitlab-ci.yaml",
37
+ "Jenkinsfile",
38
+ ".circleci",
39
+ ".travis.yml",
40
+ ".travis.yaml",
41
+ "azure-pipelines.yml",
42
+ "bitbucket-pipelines.yml",
43
+ ".buildkite",
44
+ "Taskfile.yml",
45
+ ".drone.yml",
46
+ }
47
+
48
+
49
+ def _should_skip_dir(name: str) -> bool:
50
+ return name in SKIP_DIRS or name.startswith(".")
51
+
52
+
53
+ def _count_lines(path: Path) -> int:
54
+ try:
55
+ return sum(1 for _ in path.open("r", encoding="utf-8", errors="replace"))
56
+ except (OSError, ValueError):
57
+ return 0
58
+
59
+
60
+ def _file_size(path: Path) -> int:
61
+ try:
62
+ return path.stat().st_size
63
+ except OSError:
64
+ return 0
65
+
66
+
67
+ def calculate_metrics(project_path: str | Path) -> QualityMetrics:
68
+ root = Path(project_path)
69
+ if not root.is_dir():
70
+ return QualityMetrics()
71
+
72
+ total_files = 0
73
+ total_lines = 0
74
+ max_size = 0
75
+ sizes: list[int] = []
76
+ has_tests = False
77
+ has_ci = False
78
+ has_docs = False
79
+ has_readme = False
80
+ has_license = False
81
+ has_gitignore = False
82
+
83
+ for item in root.iterdir():
84
+ name_lower = item.name.lower()
85
+ if name_lower.startswith("readme"):
86
+ has_readme = True
87
+ if name_lower.startswith("license") or name_lower.startswith("licence"):
88
+ has_license = True
89
+ if name_lower == ".gitignore":
90
+ has_gitignore = True
91
+ if name_lower in ("docs", "doc", "documentation"):
92
+ if item.is_dir():
93
+ has_docs = True
94
+
95
+ for ci_indicator in CI_FILES:
96
+ ci_path = root / ci_indicator
97
+ if ci_path.exists():
98
+ has_ci = True
99
+ break
100
+
101
+ for item in root.rglob("*"):
102
+ if not item.is_file():
103
+ continue
104
+
105
+ rel_parts = item.relative_to(root).parts
106
+ if any(_should_skip_dir(p) for p in rel_parts[:-1]):
107
+ continue
108
+
109
+ if item.suffix.lower() in BINARY_EXTENSIONS:
110
+ continue
111
+
112
+ if not has_tests:
113
+ for part in rel_parts:
114
+ part_lower = part.lower().replace(".", "_")
115
+ if part_lower in TEST_INDICATORS or any(
116
+ part_lower.startswith(t) or part_lower.endswith(t)
117
+ for t in ("test_", "_test", "spec_", "_spec")
118
+ ):
119
+ has_tests = True
120
+ break
121
+
122
+ size = _file_size(item)
123
+ lines = _count_lines(item)
124
+
125
+ total_files += 1
126
+ total_lines += lines
127
+ sizes.append(size)
128
+ if size > max_size:
129
+ max_size = size
130
+
131
+ avg_size = round(sum(sizes) / len(sizes), 1) if sizes else 0.0
132
+
133
+ return QualityMetrics(
134
+ total_files=total_files,
135
+ total_lines=total_lines,
136
+ avg_file_size=avg_size,
137
+ max_file_size=max_size,
138
+ has_tests=has_tests,
139
+ has_ci=has_ci,
140
+ has_docs=has_docs,
141
+ has_readme=has_readme,
142
+ has_license=has_license,
143
+ has_gitignore=has_gitignore,
144
+ )
@@ -0,0 +1,371 @@
1
+ """Detect architectural patterns and concepts from project structure."""
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from code_explore.models import PatternInfo
8
+
9
+ SKIP_DIRS: set[str] = {
10
+ "node_modules", ".git", ".svn", ".hg", "__pycache__", ".mypy_cache",
11
+ ".pytest_cache", ".tox", ".venv", "venv", "env", "dist", "build",
12
+ "out", "target", ".next", ".nuxt", "vendor", ".cache", "coverage",
13
+ }
14
+
15
+
16
+ class _PatternRule:
17
+ __slots__ = ("name", "category", "file_patterns", "dir_patterns",
18
+ "content_patterns", "content_files")
19
+
20
+ def __init__(
21
+ self,
22
+ name: str,
23
+ category: str,
24
+ file_patterns: list[str] | None = None,
25
+ dir_patterns: list[str] | None = None,
26
+ content_patterns: list[str] | None = None,
27
+ content_files: list[str] | None = None,
28
+ ):
29
+ self.name = name
30
+ self.category = category
31
+ self.file_patterns = [p.lower() for p in (file_patterns or [])]
32
+ self.dir_patterns = [p.lower() for p in (dir_patterns or [])]
33
+ self.content_patterns = content_patterns or []
34
+ self.content_files = [f.lower() for f in (content_files or [])]
35
+
36
+
37
+ PATTERN_RULES: list[_PatternRule] = [
38
+ # APIs
39
+ _PatternRule("REST API", "API",
40
+ file_patterns=["routes", "router", "endpoint", "controller", "api"],
41
+ dir_patterns=["routes", "controllers", "endpoints", "api"],
42
+ content_patterns=[r"@(app|router)\.(get|post|put|delete|patch)", r"express\.Router",
43
+ r"@GetMapping|@PostMapping|@RestController",
44
+ r"@api_view|@action"],
45
+ content_files=["*.py", "*.js", "*.ts", "*.java"]),
46
+ _PatternRule("GraphQL", "API",
47
+ file_patterns=["schema.graphql", "resolvers", ".graphql", "typedefs"],
48
+ dir_patterns=["graphql", "resolvers"],
49
+ content_patterns=[r"type Query", r"gql`", r"@Query\(", r"graphql"],
50
+ content_files=["*.graphql", "*.gql", "*.py", "*.js", "*.ts"]),
51
+ _PatternRule("gRPC", "API",
52
+ file_patterns=[".proto"],
53
+ dir_patterns=["proto", "protos", "grpc"],
54
+ content_patterns=[r"service\s+\w+\s*\{", r"rpc\s+\w+"],
55
+ content_files=["*.proto"]),
56
+ _PatternRule("WebSocket", "API",
57
+ content_patterns=[r"websocket|ws://|wss://|socket\.io|WebSocket",
58
+ r"@websocket|channels"],
59
+ content_files=["*.py", "*.js", "*.ts", "*.java"]),
60
+
61
+ # Auth
62
+ _PatternRule("OAuth", "Auth",
63
+ content_patterns=[r"oauth|OAuth|passport\.authenticate",
64
+ r"oauth2|authorization_code|client_credentials"],
65
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json"]),
66
+ _PatternRule("JWT", "Auth",
67
+ content_patterns=[r"jsonwebtoken|jwt\.|PyJWT|jose|JWT",
68
+ r"access_token|refresh_token|Bearer"],
69
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json"]),
70
+ _PatternRule("Auth0", "Auth",
71
+ content_patterns=[r"auth0|@auth0"],
72
+ content_files=["*.py", "*.js", "*.ts", "*.json", "*.env*"]),
73
+ _PatternRule("Firebase Auth", "Auth",
74
+ content_patterns=[r"firebase.*auth|firebaseAuth|firebase\.auth"],
75
+ content_files=["*.py", "*.js", "*.ts", "*.json"]),
76
+
77
+ # Databases
78
+ _PatternRule("PostgreSQL", "Database",
79
+ content_patterns=[r"postgres|postgresql|pg\.|psycopg|asyncpg"],
80
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json", "*.yml", "*.yaml",
81
+ "*.toml", "*.env*", "docker-compose*"]),
82
+ _PatternRule("MongoDB", "Database",
83
+ content_patterns=[r"mongodb|mongoose|mongo\.|pymongo|MongoClient"],
84
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json", "*.yml"]),
85
+ _PatternRule("Redis", "Database",
86
+ content_patterns=[r"redis|ioredis|aioredis|Redis\("],
87
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json", "*.yml"]),
88
+ _PatternRule("SQLite", "Database",
89
+ file_patterns=[".sqlite", ".sqlite3", ".db"],
90
+ content_patterns=[r"sqlite3|sqlite|better-sqlite"],
91
+ content_files=["*.py", "*.js", "*.ts"]),
92
+ _PatternRule("Prisma", "Database",
93
+ file_patterns=["schema.prisma", "prisma"],
94
+ dir_patterns=["prisma"],
95
+ content_patterns=[r"@prisma/client|prisma generate"],
96
+ content_files=["*.ts", "*.js", "*.json"]),
97
+ _PatternRule("TypeORM", "Database",
98
+ content_patterns=[r"typeorm|@Entity|@Column|createConnection"],
99
+ content_files=["*.ts", "*.js"]),
100
+
101
+ # Frameworks
102
+ _PatternRule("React", "Framework",
103
+ content_patterns=[r"from ['\"]react['\"]|import React|useState|useEffect|jsx"],
104
+ content_files=["*.js", "*.jsx", "*.ts", "*.tsx"]),
105
+ _PatternRule("Vue", "Framework",
106
+ file_patterns=[".vue"],
107
+ content_patterns=[r"createApp|Vue\.component|defineComponent"],
108
+ content_files=["*.vue", "*.js", "*.ts"]),
109
+ _PatternRule("Angular", "Framework",
110
+ file_patterns=["angular.json", ".angular"],
111
+ dir_patterns=["angular"],
112
+ content_patterns=[r"@angular/core|@Component|@NgModule"],
113
+ content_files=["*.ts"]),
114
+ _PatternRule("Next.js", "Framework",
115
+ file_patterns=["next.config.js", "next.config.mjs", "next.config.ts"],
116
+ content_patterns=[r"from ['\"]next|next/link|next/router|getServerSideProps|getStaticProps"],
117
+ content_files=["*.js", "*.jsx", "*.ts", "*.tsx"]),
118
+ _PatternRule("FastAPI", "Framework",
119
+ content_patterns=[r"from fastapi|FastAPI\(|@app\.(get|post|put|delete)"],
120
+ content_files=["*.py"]),
121
+ _PatternRule("Django", "Framework",
122
+ file_patterns=["manage.py", "wsgi.py", "asgi.py"],
123
+ dir_patterns=["templates", "migrations"],
124
+ content_patterns=[r"from django|django\.conf|INSTALLED_APPS"],
125
+ content_files=["*.py"]),
126
+ _PatternRule("Express", "Framework",
127
+ content_patterns=[r"express\(\)|require\(['\"]express['\"]\)|from ['\"]express['\"]"],
128
+ content_files=["*.js", "*.ts"]),
129
+ _PatternRule("Spring", "Framework",
130
+ file_patterns=["pom.xml", "build.gradle"],
131
+ content_patterns=[r"@SpringBootApplication|spring-boot|org\.springframework"],
132
+ content_files=["*.java", "*.kt", "*.xml", "*.gradle"]),
133
+ _PatternRule("Flask", "Framework",
134
+ content_patterns=[r"from flask|Flask\(__name__\)"],
135
+ content_files=["*.py"]),
136
+ _PatternRule("Svelte", "Framework",
137
+ file_patterns=["svelte.config.js", "svelte.config.ts"],
138
+ content_patterns=[r"from ['\"]svelte|import.*svelte"],
139
+ content_files=["*.svelte", "*.js", "*.ts"]),
140
+ _PatternRule("NestJS", "Framework",
141
+ file_patterns=["nest-cli.json"],
142
+ content_patterns=[r"@nestjs/|@Module|@Controller|@Injectable"],
143
+ content_files=["*.ts"]),
144
+ _PatternRule("Ruby on Rails", "Framework",
145
+ file_patterns=["Gemfile", "Rakefile"],
146
+ dir_patterns=["app/models", "app/controllers", "app/views", "db/migrate"],
147
+ content_patterns=[r"rails|ActiveRecord|ApplicationController"],
148
+ content_files=["*.rb"]),
149
+ _PatternRule("Laravel", "Framework",
150
+ file_patterns=["artisan", "composer.json"],
151
+ dir_patterns=["app/Http", "resources/views"],
152
+ content_patterns=[r"laravel|Illuminate|Artisan"],
153
+ content_files=["*.php", "*.json"]),
154
+
155
+ # Cloud
156
+ _PatternRule("AWS", "Cloud",
157
+ file_patterns=["serverless.yml", "sam-template.yaml", "cdk.json",
158
+ "cloudformation.yml", "cloudformation.yaml"],
159
+ content_patterns=[r"aws-sdk|boto3|amazonaws\.com|AWS::|aws-cdk"],
160
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.yml", "*.yaml", "*.json"]),
161
+ _PatternRule("GCP", "Cloud",
162
+ file_patterns=["app.yaml"],
163
+ content_patterns=[r"google-cloud|googleapis|gcloud|@google-cloud"],
164
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.yml", "*.json"]),
165
+ _PatternRule("Azure", "Cloud",
166
+ content_patterns=[r"azure|@azure|microsoft\.azure|WindowsAzure"],
167
+ content_files=["*.py", "*.js", "*.ts", "*.java", "*.json", "*.yml"]),
168
+ _PatternRule("Docker", "Cloud",
169
+ file_patterns=["Dockerfile", "docker-compose.yml", "docker-compose.yaml",
170
+ ".dockerignore"],
171
+ dir_patterns=["docker"]),
172
+ _PatternRule("Kubernetes", "Cloud",
173
+ file_patterns=["k8s", "kubernetes"],
174
+ dir_patterns=["k8s", "kubernetes", "helm", "charts"],
175
+ content_patterns=[r"apiVersion.*kind|kubectl|helm"],
176
+ content_files=["*.yml", "*.yaml"]),
177
+
178
+ # Concepts
179
+ _PatternRule("Web Scraping", "Concept",
180
+ content_patterns=[r"beautifulsoup|scrapy|puppeteer|playwright|cheerio|selenium|crawl"],
181
+ content_files=["*.py", "*.js", "*.ts"]),
182
+ _PatternRule("ML/AI", "Concept",
183
+ content_patterns=[r"tensorflow|pytorch|torch|sklearn|scikit-learn|keras|transformers|openai|langchain|huggingface"],
184
+ content_files=["*.py", "*.js", "*.ts", "*.ipynb"]),
185
+ _PatternRule("CLI Tool", "Concept",
186
+ content_patterns=[r"argparse|click|typer|commander|yargs|clap::"],
187
+ content_files=["*.py", "*.js", "*.ts", "*.rs"]),
188
+ _PatternRule("Browser Extension", "Concept",
189
+ dir_patterns=["extension", "addon"],
190
+ content_patterns=[r'"browser_action"', r'"content_scripts"',
191
+ r"chrome\.runtime", r"browser\.tabs",
192
+ r"chrome\.tabs", r"browser\.runtime"],
193
+ content_files=["manifest.json"]),
194
+ _PatternRule("Mobile App", "Concept",
195
+ file_patterns=["Podfile", "pubspec.yaml", "AndroidManifest.xml",
196
+ "*.swift", "*.kt", "*.dart"],
197
+ dir_patterns=["ios", "android", "maui", "flutter"],
198
+ content_patterns=[r"react-native", r"from ['\"]expo['\"]",
199
+ r"import\s+Flutter", r"SwiftUI",
200
+ r"UIApplicationDelegate"],
201
+ content_files=["app.json", "*.js", "*.ts", "*.dart", "*.swift", "*.kt"]),
202
+ _PatternRule("Testing", "Concept",
203
+ file_patterns=["jest.config.js", "jest.config.ts", "jest.config.mjs",
204
+ "pytest.ini", "pyproject.toml", "setup.cfg",
205
+ "vitest.config.ts", "vitest.config.js",
206
+ ".rspec", "karma.conf.js", "cypress.config.ts",
207
+ "cypress.config.js"],
208
+ dir_patterns=["e2e", "cypress", "__tests__"]),
209
+ _PatternRule("Microservices", "Concept",
210
+ dir_patterns=["microservices"],
211
+ content_patterns=[r"service-discovery|consul|eureka|api-gateway|service-mesh|istio"],
212
+ content_files=["*.yml", "*.yaml", "*.json", "docker-compose*"]),
213
+ ]
214
+
215
+ FRAMEWORK_NAMES: set[str] = {
216
+ "React", "Vue", "Angular", "Next.js", "FastAPI", "Django", "Express",
217
+ "Spring", "Flask", "Svelte", "NestJS", "Ruby on Rails", "Laravel",
218
+ }
219
+
220
+
221
+ def _should_skip_dir(name: str) -> bool:
222
+ return name in SKIP_DIRS or name.startswith(".")
223
+
224
+
225
+ def _collect_project_files(root: Path) -> tuple[list[Path], set[str], set[str]]:
226
+ files: list[Path] = []
227
+ all_filenames: set[str] = set()
228
+ all_dirnames: set[str] = set()
229
+
230
+ for item in root.rglob("*"):
231
+ rel = item.relative_to(root)
232
+ parts = rel.parts
233
+
234
+ if any(_should_skip_dir(p) for p in parts[:-1] if item.is_file()):
235
+ continue
236
+ if any(_should_skip_dir(p) for p in parts if item.is_dir()):
237
+ continue
238
+
239
+ if item.is_file():
240
+ files.append(item)
241
+ all_filenames.add(item.name.lower())
242
+ for part in parts[:-1]:
243
+ all_dirnames.add(part.lower())
244
+ if len(parts) > 1:
245
+ all_dirnames.add("/".join(p.lower() for p in parts[:-1]))
246
+ elif item.is_dir():
247
+ all_dirnames.add(item.name.lower())
248
+
249
+ return files, all_filenames, all_dirnames
250
+
251
+
252
+ def _matches_glob(filename: str, pattern: str) -> bool:
253
+ if pattern.startswith("*."):
254
+ return filename.endswith(pattern[1:])
255
+ return filename == pattern.lower()
256
+
257
+
258
+ def _search_content(files: list[Path], rule: _PatternRule, root: Path) -> list[str]:
259
+ evidence: list[str] = []
260
+ if not rule.content_patterns or not rule.content_files:
261
+ return evidence
262
+
263
+ target_files = [
264
+ f for f in files
265
+ if any(_matches_glob(f.name.lower(), cf) for cf in rule.content_files)
266
+ ]
267
+
268
+ compiled = [re.compile(p, re.IGNORECASE) for p in rule.content_patterns]
269
+
270
+ for f in target_files[:200]:
271
+ try:
272
+ content = f.read_text(encoding="utf-8", errors="replace")[:50_000]
273
+ except OSError:
274
+ continue
275
+
276
+ for pattern in compiled:
277
+ if pattern.search(content):
278
+ rel_path = str(f.relative_to(root))
279
+ evidence.append(rel_path)
280
+ break
281
+
282
+ return evidence
283
+
284
+
285
+ def detect_patterns(project_path: str | Path) -> tuple[list[PatternInfo], list[str]]:
286
+ root = Path(project_path)
287
+ if not root.is_dir():
288
+ return [], []
289
+
290
+ files, all_filenames, all_dirnames = _collect_project_files(root)
291
+
292
+ detected: list[PatternInfo] = []
293
+ frameworks: list[str] = []
294
+
295
+ for rule in PATTERN_RULES:
296
+ evidence: list[str] = []
297
+ file_hits = 0
298
+ dir_hits = 0
299
+
300
+ for fp in rule.file_patterns:
301
+ if fp.startswith("*."):
302
+ # Extension match (e.g. *.swift) — require exact suffix
303
+ ext = fp[1:] # e.g. ".swift"
304
+ matches = [f for f in all_filenames if f.endswith(ext)]
305
+ elif fp.startswith("."):
306
+ matches = [f for f in all_filenames if f.endswith(fp)]
307
+ else:
308
+ # Exact filename match only (no substring matching)
309
+ matches = [f for f in all_filenames if f == fp.lower()]
310
+ if matches:
311
+ evidence.extend(matches[:3])
312
+ file_hits += 1
313
+
314
+ for dp in rule.dir_patterns:
315
+ if dp in all_dirnames:
316
+ evidence.append(f"{dp}/")
317
+ dir_hits += 1
318
+
319
+ content_evidence = _search_content(files, rule, root)
320
+ if content_evidence:
321
+ evidence.extend(content_evidence[:5])
322
+
323
+ if not evidence:
324
+ continue
325
+
326
+ # Require at least 2 different types of evidence to reduce false positives.
327
+ # Types: file matches, directory matches, content matches.
328
+ evidence_types = sum([
329
+ file_hits > 0,
330
+ dir_hits > 0,
331
+ len(content_evidence) > 0,
332
+ ])
333
+
334
+ # For most patterns, require at least 2 evidence types.
335
+ # Content-only patterns (those with no file/dir patterns defined) can
336
+ # match on content alone but need multiple content hits.
337
+ has_structural_rules = bool(rule.file_patterns or rule.dir_patterns)
338
+
339
+ if has_structural_rules and evidence_types < 2:
340
+ # Single evidence type is too weak for patterns that define
341
+ # structural rules — skip to avoid false positives.
342
+ continue
343
+
344
+ if not has_structural_rules and len(content_evidence) < 2:
345
+ # Content-only patterns need at least 2 file hits.
346
+ continue
347
+
348
+ # Calculate confidence based on accumulated evidence
349
+ score = 0.0
350
+ if file_hits:
351
+ score += min(0.35, file_hits * 0.15)
352
+ if dir_hits:
353
+ score += min(0.25, dir_hits * 0.15)
354
+ if content_evidence:
355
+ score += 0.2 + min(0.2, len(content_evidence) * 0.04)
356
+
357
+ confidence = min(1.0, score)
358
+ unique_evidence = list(dict.fromkeys(evidence))
359
+
360
+ detected.append(PatternInfo(
361
+ name=rule.name,
362
+ category=rule.category,
363
+ confidence=round(confidence, 2),
364
+ evidence=unique_evidence[:10],
365
+ ))
366
+
367
+ if rule.name in FRAMEWORK_NAMES and confidence >= 0.3:
368
+ frameworks.append(rule.name)
369
+
370
+ detected.sort(key=lambda p: p.confidence, reverse=True)
371
+ return detected, frameworks
@@ -0,0 +1 @@
1
+ """FastAPI REST API for Code Explore."""