code-explore 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_explore/__init__.py +3 -0
- code_explore/analyzer/__init__.py +13 -0
- code_explore/analyzer/dependencies.py +328 -0
- code_explore/analyzer/language.py +240 -0
- code_explore/analyzer/metrics.py +144 -0
- code_explore/analyzer/patterns.py +371 -0
- code_explore/api/__init__.py +1 -0
- code_explore/api/main.py +197 -0
- code_explore/cli/__init__.py +1 -0
- code_explore/cli/main.py +557 -0
- code_explore/database.py +207 -0
- code_explore/indexer/__init__.py +1 -0
- code_explore/indexer/embeddings.py +181 -0
- code_explore/models.py +106 -0
- code_explore/scanner/__init__.py +1 -0
- code_explore/scanner/git_info.py +94 -0
- code_explore/scanner/local.py +70 -0
- code_explore/scanner/readme.py +70 -0
- code_explore/search/__init__.py +1 -0
- code_explore/search/fulltext.py +137 -0
- code_explore/search/hybrid.py +92 -0
- code_explore/search/semantic.py +76 -0
- code_explore/summarizer/__init__.py +1 -0
- code_explore/summarizer/ollama.py +130 -0
- code_explore-0.1.0.dist-info/METADATA +67 -0
- code_explore-0.1.0.dist-info/RECORD +28 -0
- code_explore-0.1.0.dist-info/WHEEL +4 -0
- code_explore-0.1.0.dist-info/entry_points.txt +3 -0
code_explore/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Code analysis - languages, metrics, dependencies, patterns."""
|
|
2
|
+
|
|
3
|
+
from code_explore.analyzer.dependencies import detect_dependencies
|
|
4
|
+
from code_explore.analyzer.language import detect_languages
|
|
5
|
+
from code_explore.analyzer.metrics import calculate_metrics
|
|
6
|
+
from code_explore.analyzer.patterns import detect_patterns
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"detect_dependencies",
|
|
10
|
+
"detect_languages",
|
|
11
|
+
"calculate_metrics",
|
|
12
|
+
"detect_patterns",
|
|
13
|
+
]
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Parse dependency files to extract project dependencies."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from code_explore.models import DependencyInfo
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _read_file(path: Path) -> str:
|
|
11
|
+
try:
|
|
12
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
13
|
+
except OSError:
|
|
14
|
+
return ""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _parse_package_json(path: Path) -> list[DependencyInfo]:
|
|
18
|
+
content = _read_file(path)
|
|
19
|
+
if not content:
|
|
20
|
+
return []
|
|
21
|
+
try:
|
|
22
|
+
data = json.loads(content)
|
|
23
|
+
except (json.JSONDecodeError, ValueError):
|
|
24
|
+
return []
|
|
25
|
+
|
|
26
|
+
deps: list[DependencyInfo] = []
|
|
27
|
+
for name, version in (data.get("dependencies") or {}).items():
|
|
28
|
+
deps.append(DependencyInfo(name=name, version=version, dev=False, source="package.json"))
|
|
29
|
+
for name, version in (data.get("devDependencies") or {}).items():
|
|
30
|
+
deps.append(DependencyInfo(name=name, version=version, dev=True, source="package.json"))
|
|
31
|
+
return deps
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _parse_requirements_txt(path: Path) -> list[DependencyInfo]:
|
|
35
|
+
content = _read_file(path)
|
|
36
|
+
if not content:
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
deps: list[DependencyInfo] = []
|
|
40
|
+
for line in content.splitlines():
|
|
41
|
+
line = line.strip()
|
|
42
|
+
if not line or line.startswith("#") or line.startswith("-"):
|
|
43
|
+
continue
|
|
44
|
+
match = re.match(r"^([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(?:([><=!~]+.*))?", line)
|
|
45
|
+
if match:
|
|
46
|
+
name = match.group(1)
|
|
47
|
+
version = match.group(2)
|
|
48
|
+
if version:
|
|
49
|
+
version = version.strip()
|
|
50
|
+
deps.append(DependencyInfo(
|
|
51
|
+
name=name, version=version or None, dev=False, source=path.name,
|
|
52
|
+
))
|
|
53
|
+
return deps
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _parse_pyproject_toml(path: Path) -> list[DependencyInfo]:
|
|
57
|
+
content = _read_file(path)
|
|
58
|
+
if not content:
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
deps: list[DependencyInfo] = []
|
|
62
|
+
|
|
63
|
+
in_deps = False
|
|
64
|
+
in_dev_deps = False
|
|
65
|
+
for line in content.splitlines():
|
|
66
|
+
stripped = line.strip()
|
|
67
|
+
|
|
68
|
+
if stripped.startswith("["):
|
|
69
|
+
in_deps = stripped in (
|
|
70
|
+
"[project]",
|
|
71
|
+
"[tool.poetry.dependencies]",
|
|
72
|
+
)
|
|
73
|
+
in_dev_deps = stripped in (
|
|
74
|
+
"[tool.poetry.dev-dependencies]",
|
|
75
|
+
"[tool.poetry.group.dev.dependencies]",
|
|
76
|
+
)
|
|
77
|
+
if stripped == "[project]":
|
|
78
|
+
in_deps = False
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if stripped == "dependencies = [":
|
|
82
|
+
in_deps = True
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
if in_deps and stripped.startswith("]"):
|
|
86
|
+
in_deps = False
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if in_deps:
|
|
90
|
+
match = re.match(r'"([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(?:([><=!~]+[^"]*))?"', stripped)
|
|
91
|
+
if match:
|
|
92
|
+
deps.append(DependencyInfo(
|
|
93
|
+
name=match.group(1),
|
|
94
|
+
version=match.group(2) or None,
|
|
95
|
+
dev=False,
|
|
96
|
+
source="pyproject.toml",
|
|
97
|
+
))
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
toml_match = re.match(
|
|
101
|
+
r'([A-Za-z0-9_][A-Za-z0-9._-]*)\s*=\s*["\{]([^"}\n]*)', stripped,
|
|
102
|
+
)
|
|
103
|
+
if toml_match:
|
|
104
|
+
name = toml_match.group(1)
|
|
105
|
+
if name == "python":
|
|
106
|
+
continue
|
|
107
|
+
version_str = toml_match.group(2).strip().rstrip('"')
|
|
108
|
+
deps.append(DependencyInfo(
|
|
109
|
+
name=name,
|
|
110
|
+
version=version_str or None,
|
|
111
|
+
dev=in_dev_deps,
|
|
112
|
+
source="pyproject.toml",
|
|
113
|
+
))
|
|
114
|
+
|
|
115
|
+
if in_dev_deps:
|
|
116
|
+
toml_match = re.match(
|
|
117
|
+
r'([A-Za-z0-9_][A-Za-z0-9._-]*)\s*=\s*["\{]([^"}\n]*)', stripped,
|
|
118
|
+
)
|
|
119
|
+
if toml_match:
|
|
120
|
+
name = toml_match.group(1)
|
|
121
|
+
if name == "python":
|
|
122
|
+
continue
|
|
123
|
+
version_str = toml_match.group(2).strip().rstrip('"')
|
|
124
|
+
deps.append(DependencyInfo(
|
|
125
|
+
name=name,
|
|
126
|
+
version=version_str or None,
|
|
127
|
+
dev=True,
|
|
128
|
+
source="pyproject.toml",
|
|
129
|
+
))
|
|
130
|
+
|
|
131
|
+
return deps
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _parse_setup_py(path: Path) -> list[DependencyInfo]:
|
|
135
|
+
content = _read_file(path)
|
|
136
|
+
if not content:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
deps: list[DependencyInfo] = []
|
|
140
|
+
for match in re.finditer(r"install_requires\s*=\s*\[(.*?)]", content, re.DOTALL):
|
|
141
|
+
block = match.group(1)
|
|
142
|
+
for pkg in re.findall(r"['\"]([A-Za-z0-9_][A-Za-z0-9._-]*(?:\s*[><=!~]+[^'\"]*)?)['\"]", block):
|
|
143
|
+
parts = re.match(r"([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(.*)", pkg)
|
|
144
|
+
if parts:
|
|
145
|
+
deps.append(DependencyInfo(
|
|
146
|
+
name=parts.group(1),
|
|
147
|
+
version=parts.group(2).strip() or None,
|
|
148
|
+
dev=False,
|
|
149
|
+
source="setup.py",
|
|
150
|
+
))
|
|
151
|
+
return deps
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _parse_cargo_toml(path: Path) -> list[DependencyInfo]:
|
|
155
|
+
content = _read_file(path)
|
|
156
|
+
if not content:
|
|
157
|
+
return []
|
|
158
|
+
|
|
159
|
+
deps: list[DependencyInfo] = []
|
|
160
|
+
in_deps = False
|
|
161
|
+
in_dev_deps = False
|
|
162
|
+
|
|
163
|
+
for line in content.splitlines():
|
|
164
|
+
stripped = line.strip()
|
|
165
|
+
if stripped.startswith("["):
|
|
166
|
+
in_deps = stripped == "[dependencies]"
|
|
167
|
+
in_dev_deps = stripped in ("[dev-dependencies]", "[build-dependencies]")
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
if in_deps or in_dev_deps:
|
|
171
|
+
match = re.match(r'([A-Za-z0-9_][A-Za-z0-9_-]*)\s*=\s*"([^"]*)"', stripped)
|
|
172
|
+
if match:
|
|
173
|
+
deps.append(DependencyInfo(
|
|
174
|
+
name=match.group(1),
|
|
175
|
+
version=match.group(2),
|
|
176
|
+
dev=in_dev_deps,
|
|
177
|
+
source="Cargo.toml",
|
|
178
|
+
))
|
|
179
|
+
continue
|
|
180
|
+
match = re.match(r'([A-Za-z0-9_][A-Za-z0-9_-]*)\s*=\s*\{.*?version\s*=\s*"([^"]*)"', stripped)
|
|
181
|
+
if match:
|
|
182
|
+
deps.append(DependencyInfo(
|
|
183
|
+
name=match.group(1),
|
|
184
|
+
version=match.group(2),
|
|
185
|
+
dev=in_dev_deps,
|
|
186
|
+
source="Cargo.toml",
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
return deps
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _parse_go_mod(path: Path) -> list[DependencyInfo]:
|
|
193
|
+
content = _read_file(path)
|
|
194
|
+
if not content:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
deps: list[DependencyInfo] = []
|
|
198
|
+
in_require = False
|
|
199
|
+
|
|
200
|
+
for line in content.splitlines():
|
|
201
|
+
stripped = line.strip()
|
|
202
|
+
if stripped.startswith("require ("):
|
|
203
|
+
in_require = True
|
|
204
|
+
continue
|
|
205
|
+
if stripped == ")":
|
|
206
|
+
in_require = False
|
|
207
|
+
continue
|
|
208
|
+
if stripped.startswith("require "):
|
|
209
|
+
parts = stripped[len("require "):].strip().split()
|
|
210
|
+
if len(parts) >= 2:
|
|
211
|
+
deps.append(DependencyInfo(
|
|
212
|
+
name=parts[0], version=parts[1], dev=False, source="go.mod",
|
|
213
|
+
))
|
|
214
|
+
continue
|
|
215
|
+
if in_require:
|
|
216
|
+
parts = stripped.split()
|
|
217
|
+
if len(parts) >= 2 and not parts[0].startswith("//"):
|
|
218
|
+
deps.append(DependencyInfo(
|
|
219
|
+
name=parts[0], version=parts[1], dev=False, source="go.mod",
|
|
220
|
+
))
|
|
221
|
+
|
|
222
|
+
return deps
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _parse_pom_xml(path: Path) -> list[DependencyInfo]:
|
|
226
|
+
content = _read_file(path)
|
|
227
|
+
if not content:
|
|
228
|
+
return []
|
|
229
|
+
|
|
230
|
+
deps: list[DependencyInfo] = []
|
|
231
|
+
for match in re.finditer(
|
|
232
|
+
r"<dependency>\s*"
|
|
233
|
+
r"<groupId>([^<]+)</groupId>\s*"
|
|
234
|
+
r"<artifactId>([^<]+)</artifactId>\s*"
|
|
235
|
+
r"(?:<version>([^<]+)</version>)?",
|
|
236
|
+
content,
|
|
237
|
+
):
|
|
238
|
+
group_id = match.group(1).strip()
|
|
239
|
+
artifact_id = match.group(2).strip()
|
|
240
|
+
version = match.group(3).strip() if match.group(3) else None
|
|
241
|
+
scope_match = re.search(r"<scope>([^<]+)</scope>", content[match.start():match.start() + 500])
|
|
242
|
+
is_dev = scope_match and scope_match.group(1).strip() in ("test", "provided")
|
|
243
|
+
deps.append(DependencyInfo(
|
|
244
|
+
name=f"{group_id}:{artifact_id}",
|
|
245
|
+
version=version,
|
|
246
|
+
dev=bool(is_dev),
|
|
247
|
+
source="pom.xml",
|
|
248
|
+
))
|
|
249
|
+
|
|
250
|
+
return deps
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _parse_gemfile(path: Path) -> list[DependencyInfo]:
|
|
254
|
+
content = _read_file(path)
|
|
255
|
+
if not content:
|
|
256
|
+
return []
|
|
257
|
+
|
|
258
|
+
deps: list[DependencyInfo] = []
|
|
259
|
+
in_dev_group = False
|
|
260
|
+
|
|
261
|
+
for line in content.splitlines():
|
|
262
|
+
stripped = line.strip()
|
|
263
|
+
if stripped.startswith("group :development") or stripped.startswith("group :test"):
|
|
264
|
+
in_dev_group = True
|
|
265
|
+
continue
|
|
266
|
+
if stripped == "end" and in_dev_group:
|
|
267
|
+
in_dev_group = False
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
match = re.match(r"gem\s+['\"]([^'\"]+)['\"](?:\s*,\s*['\"]([^'\"]*)['\"])?", stripped)
|
|
271
|
+
if match:
|
|
272
|
+
deps.append(DependencyInfo(
|
|
273
|
+
name=match.group(1),
|
|
274
|
+
version=match.group(2) or None,
|
|
275
|
+
dev=in_dev_group,
|
|
276
|
+
source="Gemfile",
|
|
277
|
+
))
|
|
278
|
+
|
|
279
|
+
return deps
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _parse_composer_json(path: Path) -> list[DependencyInfo]:
|
|
283
|
+
content = _read_file(path)
|
|
284
|
+
if not content:
|
|
285
|
+
return []
|
|
286
|
+
try:
|
|
287
|
+
data = json.loads(content)
|
|
288
|
+
except (json.JSONDecodeError, ValueError):
|
|
289
|
+
return []
|
|
290
|
+
|
|
291
|
+
deps: list[DependencyInfo] = []
|
|
292
|
+
for name, version in (data.get("require") or {}).items():
|
|
293
|
+
if name == "php" or name.startswith("ext-"):
|
|
294
|
+
continue
|
|
295
|
+
deps.append(DependencyInfo(name=name, version=version, dev=False, source="composer.json"))
|
|
296
|
+
for name, version in (data.get("require-dev") or {}).items():
|
|
297
|
+
deps.append(DependencyInfo(name=name, version=version, dev=True, source="composer.json"))
|
|
298
|
+
return deps
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
_PARSERS: dict[str, callable] = {
|
|
302
|
+
"package.json": _parse_package_json,
|
|
303
|
+
"requirements.txt": _parse_requirements_txt,
|
|
304
|
+
"requirements-dev.txt": _parse_requirements_txt,
|
|
305
|
+
"requirements_dev.txt": _parse_requirements_txt,
|
|
306
|
+
"pyproject.toml": _parse_pyproject_toml,
|
|
307
|
+
"setup.py": _parse_setup_py,
|
|
308
|
+
"Cargo.toml": _parse_cargo_toml,
|
|
309
|
+
"go.mod": _parse_go_mod,
|
|
310
|
+
"pom.xml": _parse_pom_xml,
|
|
311
|
+
"Gemfile": _parse_gemfile,
|
|
312
|
+
"composer.json": _parse_composer_json,
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def detect_dependencies(project_path: str | Path) -> list[DependencyInfo]:
|
|
317
|
+
root = Path(project_path)
|
|
318
|
+
if not root.is_dir():
|
|
319
|
+
return []
|
|
320
|
+
|
|
321
|
+
all_deps: list[DependencyInfo] = []
|
|
322
|
+
|
|
323
|
+
for filename, parser in _PARSERS.items():
|
|
324
|
+
dep_file = root / filename
|
|
325
|
+
if dep_file.is_file():
|
|
326
|
+
all_deps.extend(parser(dep_file))
|
|
327
|
+
|
|
328
|
+
return all_deps
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Detect programming languages in a project directory."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from code_explore.models import LanguageInfo
|
|
6
|
+
|
|
7
|
+
EXTENSION_MAP: dict[str, str] = {
|
|
8
|
+
".py": "Python",
|
|
9
|
+
".pyw": "Python",
|
|
10
|
+
".pyi": "Python",
|
|
11
|
+
".pyx": "Python",
|
|
12
|
+
".js": "JavaScript",
|
|
13
|
+
".mjs": "JavaScript",
|
|
14
|
+
".cjs": "JavaScript",
|
|
15
|
+
".jsx": "JavaScript",
|
|
16
|
+
".ts": "TypeScript",
|
|
17
|
+
".tsx": "TypeScript",
|
|
18
|
+
".mts": "TypeScript",
|
|
19
|
+
".cts": "TypeScript",
|
|
20
|
+
".java": "Java",
|
|
21
|
+
".kt": "Kotlin",
|
|
22
|
+
".kts": "Kotlin",
|
|
23
|
+
".scala": "Scala",
|
|
24
|
+
".groovy": "Groovy",
|
|
25
|
+
".gradle": "Groovy",
|
|
26
|
+
".c": "C",
|
|
27
|
+
".h": "C",
|
|
28
|
+
".cpp": "C++",
|
|
29
|
+
".cc": "C++",
|
|
30
|
+
".cxx": "C++",
|
|
31
|
+
".hpp": "C++",
|
|
32
|
+
".hh": "C++",
|
|
33
|
+
".hxx": "C++",
|
|
34
|
+
".cs": "C#",
|
|
35
|
+
".fs": "F#",
|
|
36
|
+
".fsx": "F#",
|
|
37
|
+
".vb": "Visual Basic",
|
|
38
|
+
".go": "Go",
|
|
39
|
+
".rs": "Rust",
|
|
40
|
+
".rb": "Ruby",
|
|
41
|
+
".erb": "Ruby",
|
|
42
|
+
".rake": "Ruby",
|
|
43
|
+
".php": "PHP",
|
|
44
|
+
".swift": "Swift",
|
|
45
|
+
".m": "Objective-C",
|
|
46
|
+
".mm": "Objective-C",
|
|
47
|
+
".r": "R",
|
|
48
|
+
".R": "R",
|
|
49
|
+
".rmd": "R",
|
|
50
|
+
".jl": "Julia",
|
|
51
|
+
".lua": "Lua",
|
|
52
|
+
".pl": "Perl",
|
|
53
|
+
".pm": "Perl",
|
|
54
|
+
".ex": "Elixir",
|
|
55
|
+
".exs": "Elixir",
|
|
56
|
+
".erl": "Erlang",
|
|
57
|
+
".hrl": "Erlang",
|
|
58
|
+
".hs": "Haskell",
|
|
59
|
+
".lhs": "Haskell",
|
|
60
|
+
".ml": "OCaml",
|
|
61
|
+
".mli": "OCaml",
|
|
62
|
+
".clj": "Clojure",
|
|
63
|
+
".cljs": "Clojure",
|
|
64
|
+
".cljc": "Clojure",
|
|
65
|
+
".dart": "Dart",
|
|
66
|
+
".zig": "Zig",
|
|
67
|
+
".nim": "Nim",
|
|
68
|
+
".v": "V",
|
|
69
|
+
".d": "D",
|
|
70
|
+
".pas": "Pascal",
|
|
71
|
+
".pp": "Pascal",
|
|
72
|
+
".f90": "Fortran",
|
|
73
|
+
".f95": "Fortran",
|
|
74
|
+
".f03": "Fortran",
|
|
75
|
+
".f": "Fortran",
|
|
76
|
+
".for": "Fortran",
|
|
77
|
+
".asm": "Assembly",
|
|
78
|
+
".s": "Assembly",
|
|
79
|
+
".sh": "Shell",
|
|
80
|
+
".bash": "Shell",
|
|
81
|
+
".zsh": "Shell",
|
|
82
|
+
".fish": "Shell",
|
|
83
|
+
".ps1": "PowerShell",
|
|
84
|
+
".psm1": "PowerShell",
|
|
85
|
+
".bat": "Batch",
|
|
86
|
+
".cmd": "Batch",
|
|
87
|
+
".html": "HTML",
|
|
88
|
+
".htm": "HTML",
|
|
89
|
+
".css": "CSS",
|
|
90
|
+
".scss": "SCSS",
|
|
91
|
+
".sass": "Sass",
|
|
92
|
+
".less": "Less",
|
|
93
|
+
".styl": "Stylus",
|
|
94
|
+
".vue": "Vue",
|
|
95
|
+
".svelte": "Svelte",
|
|
96
|
+
".sql": "SQL",
|
|
97
|
+
".graphql": "GraphQL",
|
|
98
|
+
".gql": "GraphQL",
|
|
99
|
+
".proto": "Protocol Buffers",
|
|
100
|
+
".yaml": "YAML",
|
|
101
|
+
".yml": "YAML",
|
|
102
|
+
".json": "JSON",
|
|
103
|
+
".xml": "XML",
|
|
104
|
+
".toml": "TOML",
|
|
105
|
+
".ini": "INI",
|
|
106
|
+
".cfg": "INI",
|
|
107
|
+
".md": "Markdown",
|
|
108
|
+
".rst": "reStructuredText",
|
|
109
|
+
".tex": "LaTeX",
|
|
110
|
+
".tf": "Terraform",
|
|
111
|
+
".hcl": "HCL",
|
|
112
|
+
".sol": "Solidity",
|
|
113
|
+
".vy": "Vyper",
|
|
114
|
+
".wasm": "WebAssembly",
|
|
115
|
+
".wat": "WebAssembly",
|
|
116
|
+
".dockerfile": "Dockerfile",
|
|
117
|
+
".cmake": "CMake",
|
|
118
|
+
".makefile": "Makefile",
|
|
119
|
+
".mk": "Makefile",
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
SKIP_DIRS: set[str] = {
|
|
123
|
+
"node_modules", ".git", ".svn", ".hg", "__pycache__", ".mypy_cache",
|
|
124
|
+
".pytest_cache", ".tox", ".nox", ".venv", "venv", "env", ".env",
|
|
125
|
+
"dist", "build", "out", "target", ".next", ".nuxt", ".output",
|
|
126
|
+
"vendor", "third_party", "3rdparty", ".gradle", ".idea", ".vscode",
|
|
127
|
+
".vs", "bin", "obj", ".cache", ".parcel-cache", "coverage",
|
|
128
|
+
".nyc_output", ".terraform", ".eggs", "*.egg-info",
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
GENERATED_PATTERNS: set[str] = {
|
|
132
|
+
".min.js", ".min.css", ".bundle.js", ".chunk.js",
|
|
133
|
+
".Designer.cs", ".generated.cs", ".g.cs", ".g.i.cs",
|
|
134
|
+
".pb.go", ".pb.cc", ".pb.h", "_pb2.py", "_pb2_grpc.py",
|
|
135
|
+
".d.ts",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
BINARY_EXTENSIONS: set[str] = {
|
|
139
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp",
|
|
140
|
+
".mp3", ".mp4", ".wav", ".avi", ".mov", ".mkv", ".flac",
|
|
141
|
+
".zip", ".tar", ".gz", ".bz2", ".xz", ".rar", ".7z",
|
|
142
|
+
".exe", ".dll", ".so", ".dylib", ".a", ".o", ".obj",
|
|
143
|
+
".class", ".jar", ".war", ".ear",
|
|
144
|
+
".pyc", ".pyo", ".whl",
|
|
145
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
|
146
|
+
".ttf", ".otf", ".woff", ".woff2", ".eot",
|
|
147
|
+
".db", ".sqlite", ".sqlite3",
|
|
148
|
+
".lock",
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _is_generated(filename: str) -> bool:
|
|
153
|
+
lower = filename.lower()
|
|
154
|
+
return any(lower.endswith(pat) for pat in GENERATED_PATTERNS)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _is_binary(ext: str) -> bool:
|
|
158
|
+
return ext.lower() in BINARY_EXTENSIONS
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _count_lines(path: Path) -> int:
|
|
162
|
+
try:
|
|
163
|
+
return sum(1 for _ in path.open("r", encoding="utf-8", errors="replace"))
|
|
164
|
+
except (OSError, ValueError):
|
|
165
|
+
return 0
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _should_skip_dir(name: str) -> bool:
|
|
169
|
+
return name in SKIP_DIRS or name.startswith(".")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _get_language(path: Path) -> str | None:
|
|
173
|
+
name_lower = path.name.lower()
|
|
174
|
+
if name_lower == "dockerfile" or name_lower.startswith("dockerfile."):
|
|
175
|
+
return "Dockerfile"
|
|
176
|
+
if name_lower == "makefile" or name_lower == "gnumakefile":
|
|
177
|
+
return "Makefile"
|
|
178
|
+
if name_lower == "cmakelists.txt":
|
|
179
|
+
return "CMake"
|
|
180
|
+
if name_lower == "jenkinsfile":
|
|
181
|
+
return "Groovy"
|
|
182
|
+
if name_lower == "vagrantfile":
|
|
183
|
+
return "Ruby"
|
|
184
|
+
if name_lower == "rakefile":
|
|
185
|
+
return "Ruby"
|
|
186
|
+
if name_lower == "gemfile":
|
|
187
|
+
return "Ruby"
|
|
188
|
+
|
|
189
|
+
ext = path.suffix
|
|
190
|
+
if not ext:
|
|
191
|
+
return None
|
|
192
|
+
return EXTENSION_MAP.get(ext) or EXTENSION_MAP.get(ext.lower())
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def detect_languages(project_path: str | Path) -> tuple[list[LanguageInfo], str | None]:
|
|
196
|
+
root = Path(project_path)
|
|
197
|
+
if not root.is_dir():
|
|
198
|
+
return [], None
|
|
199
|
+
|
|
200
|
+
stats: dict[str, dict[str, int]] = {}
|
|
201
|
+
|
|
202
|
+
for item in root.rglob("*"):
|
|
203
|
+
if not item.is_file():
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
rel_parts = item.relative_to(root).parts
|
|
207
|
+
if any(_should_skip_dir(p) for p in rel_parts[:-1]):
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if _is_binary(item.suffix):
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
if _is_generated(item.name):
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
language = _get_language(item)
|
|
217
|
+
if language is None:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
lines = _count_lines(item)
|
|
221
|
+
if language not in stats:
|
|
222
|
+
stats[language] = {"files": 0, "lines": 0}
|
|
223
|
+
stats[language]["files"] += 1
|
|
224
|
+
stats[language]["lines"] += lines
|
|
225
|
+
|
|
226
|
+
total_lines = sum(s["lines"] for s in stats.values())
|
|
227
|
+
|
|
228
|
+
results = [
|
|
229
|
+
LanguageInfo(
|
|
230
|
+
name=lang,
|
|
231
|
+
files=data["files"],
|
|
232
|
+
lines=data["lines"],
|
|
233
|
+
percentage=round((data["lines"] / total_lines * 100) if total_lines > 0 else 0, 1),
|
|
234
|
+
)
|
|
235
|
+
for lang, data in stats.items()
|
|
236
|
+
]
|
|
237
|
+
results.sort(key=lambda x: x.lines, reverse=True)
|
|
238
|
+
|
|
239
|
+
primary = results[0].name if results else None
|
|
240
|
+
return results, primary
|