code-explore 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """Code Explore - Personal developer knowledge base."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,13 @@
1
+ """Code analysis - languages, metrics, dependencies, patterns."""
2
+
3
+ from code_explore.analyzer.dependencies import detect_dependencies
4
+ from code_explore.analyzer.language import detect_languages
5
+ from code_explore.analyzer.metrics import calculate_metrics
6
+ from code_explore.analyzer.patterns import detect_patterns
7
+
8
+ __all__ = [
9
+ "detect_dependencies",
10
+ "detect_languages",
11
+ "calculate_metrics",
12
+ "detect_patterns",
13
+ ]
@@ -0,0 +1,328 @@
1
+ """Parse dependency files to extract project dependencies."""
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from code_explore.models import DependencyInfo
8
+
9
+
10
+ def _read_file(path: Path) -> str:
11
+ try:
12
+ return path.read_text(encoding="utf-8", errors="replace")
13
+ except OSError:
14
+ return ""
15
+
16
+
17
+ def _parse_package_json(path: Path) -> list[DependencyInfo]:
18
+ content = _read_file(path)
19
+ if not content:
20
+ return []
21
+ try:
22
+ data = json.loads(content)
23
+ except (json.JSONDecodeError, ValueError):
24
+ return []
25
+
26
+ deps: list[DependencyInfo] = []
27
+ for name, version in (data.get("dependencies") or {}).items():
28
+ deps.append(DependencyInfo(name=name, version=version, dev=False, source="package.json"))
29
+ for name, version in (data.get("devDependencies") or {}).items():
30
+ deps.append(DependencyInfo(name=name, version=version, dev=True, source="package.json"))
31
+ return deps
32
+
33
+
34
+ def _parse_requirements_txt(path: Path) -> list[DependencyInfo]:
35
+ content = _read_file(path)
36
+ if not content:
37
+ return []
38
+
39
+ deps: list[DependencyInfo] = []
40
+ for line in content.splitlines():
41
+ line = line.strip()
42
+ if not line or line.startswith("#") or line.startswith("-"):
43
+ continue
44
+ match = re.match(r"^([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(?:([><=!~]+.*))?", line)
45
+ if match:
46
+ name = match.group(1)
47
+ version = match.group(2)
48
+ if version:
49
+ version = version.strip()
50
+ deps.append(DependencyInfo(
51
+ name=name, version=version or None, dev=False, source=path.name,
52
+ ))
53
+ return deps
54
+
55
+
56
+ def _parse_pyproject_toml(path: Path) -> list[DependencyInfo]:
57
+ content = _read_file(path)
58
+ if not content:
59
+ return []
60
+
61
+ deps: list[DependencyInfo] = []
62
+
63
+ in_deps = False
64
+ in_dev_deps = False
65
+ for line in content.splitlines():
66
+ stripped = line.strip()
67
+
68
+ if stripped.startswith("["):
69
+ in_deps = stripped in (
70
+ "[project]",
71
+ "[tool.poetry.dependencies]",
72
+ )
73
+ in_dev_deps = stripped in (
74
+ "[tool.poetry.dev-dependencies]",
75
+ "[tool.poetry.group.dev.dependencies]",
76
+ )
77
+ if stripped == "[project]":
78
+ in_deps = False
79
+ continue
80
+
81
+ if stripped == "dependencies = [":
82
+ in_deps = True
83
+ continue
84
+
85
+ if in_deps and stripped.startswith("]"):
86
+ in_deps = False
87
+ continue
88
+
89
+ if in_deps:
90
+ match = re.match(r'"([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(?:([><=!~]+[^"]*))?"', stripped)
91
+ if match:
92
+ deps.append(DependencyInfo(
93
+ name=match.group(1),
94
+ version=match.group(2) or None,
95
+ dev=False,
96
+ source="pyproject.toml",
97
+ ))
98
+ continue
99
+
100
+ toml_match = re.match(
101
+ r'([A-Za-z0-9_][A-Za-z0-9._-]*)\s*=\s*["\{]([^"}\n]*)', stripped,
102
+ )
103
+ if toml_match:
104
+ name = toml_match.group(1)
105
+ if name == "python":
106
+ continue
107
+ version_str = toml_match.group(2).strip().rstrip('"')
108
+ deps.append(DependencyInfo(
109
+ name=name,
110
+ version=version_str or None,
111
+ dev=in_dev_deps,
112
+ source="pyproject.toml",
113
+ ))
114
+
115
+ if in_dev_deps:
116
+ toml_match = re.match(
117
+ r'([A-Za-z0-9_][A-Za-z0-9._-]*)\s*=\s*["\{]([^"}\n]*)', stripped,
118
+ )
119
+ if toml_match:
120
+ name = toml_match.group(1)
121
+ if name == "python":
122
+ continue
123
+ version_str = toml_match.group(2).strip().rstrip('"')
124
+ deps.append(DependencyInfo(
125
+ name=name,
126
+ version=version_str or None,
127
+ dev=True,
128
+ source="pyproject.toml",
129
+ ))
130
+
131
+ return deps
132
+
133
+
134
+ def _parse_setup_py(path: Path) -> list[DependencyInfo]:
135
+ content = _read_file(path)
136
+ if not content:
137
+ return []
138
+
139
+ deps: list[DependencyInfo] = []
140
+ for match in re.finditer(r"install_requires\s*=\s*\[(.*?)]", content, re.DOTALL):
141
+ block = match.group(1)
142
+ for pkg in re.findall(r"['\"]([A-Za-z0-9_][A-Za-z0-9._-]*(?:\s*[><=!~]+[^'\"]*)?)['\"]", block):
143
+ parts = re.match(r"([A-Za-z0-9_][A-Za-z0-9._-]*)\s*(.*)", pkg)
144
+ if parts:
145
+ deps.append(DependencyInfo(
146
+ name=parts.group(1),
147
+ version=parts.group(2).strip() or None,
148
+ dev=False,
149
+ source="setup.py",
150
+ ))
151
+ return deps
152
+
153
+
154
+ def _parse_cargo_toml(path: Path) -> list[DependencyInfo]:
155
+ content = _read_file(path)
156
+ if not content:
157
+ return []
158
+
159
+ deps: list[DependencyInfo] = []
160
+ in_deps = False
161
+ in_dev_deps = False
162
+
163
+ for line in content.splitlines():
164
+ stripped = line.strip()
165
+ if stripped.startswith("["):
166
+ in_deps = stripped == "[dependencies]"
167
+ in_dev_deps = stripped in ("[dev-dependencies]", "[build-dependencies]")
168
+ continue
169
+
170
+ if in_deps or in_dev_deps:
171
+ match = re.match(r'([A-Za-z0-9_][A-Za-z0-9_-]*)\s*=\s*"([^"]*)"', stripped)
172
+ if match:
173
+ deps.append(DependencyInfo(
174
+ name=match.group(1),
175
+ version=match.group(2),
176
+ dev=in_dev_deps,
177
+ source="Cargo.toml",
178
+ ))
179
+ continue
180
+ match = re.match(r'([A-Za-z0-9_][A-Za-z0-9_-]*)\s*=\s*\{.*?version\s*=\s*"([^"]*)"', stripped)
181
+ if match:
182
+ deps.append(DependencyInfo(
183
+ name=match.group(1),
184
+ version=match.group(2),
185
+ dev=in_dev_deps,
186
+ source="Cargo.toml",
187
+ ))
188
+
189
+ return deps
190
+
191
+
192
+ def _parse_go_mod(path: Path) -> list[DependencyInfo]:
193
+ content = _read_file(path)
194
+ if not content:
195
+ return []
196
+
197
+ deps: list[DependencyInfo] = []
198
+ in_require = False
199
+
200
+ for line in content.splitlines():
201
+ stripped = line.strip()
202
+ if stripped.startswith("require ("):
203
+ in_require = True
204
+ continue
205
+ if stripped == ")":
206
+ in_require = False
207
+ continue
208
+ if stripped.startswith("require "):
209
+ parts = stripped[len("require "):].strip().split()
210
+ if len(parts) >= 2:
211
+ deps.append(DependencyInfo(
212
+ name=parts[0], version=parts[1], dev=False, source="go.mod",
213
+ ))
214
+ continue
215
+ if in_require:
216
+ parts = stripped.split()
217
+ if len(parts) >= 2 and not parts[0].startswith("//"):
218
+ deps.append(DependencyInfo(
219
+ name=parts[0], version=parts[1], dev=False, source="go.mod",
220
+ ))
221
+
222
+ return deps
223
+
224
+
225
+ def _parse_pom_xml(path: Path) -> list[DependencyInfo]:
226
+ content = _read_file(path)
227
+ if not content:
228
+ return []
229
+
230
+ deps: list[DependencyInfo] = []
231
+ for match in re.finditer(
232
+ r"<dependency>\s*"
233
+ r"<groupId>([^<]+)</groupId>\s*"
234
+ r"<artifactId>([^<]+)</artifactId>\s*"
235
+ r"(?:<version>([^<]+)</version>)?",
236
+ content,
237
+ ):
238
+ group_id = match.group(1).strip()
239
+ artifact_id = match.group(2).strip()
240
+ version = match.group(3).strip() if match.group(3) else None
241
+ scope_match = re.search(r"<scope>([^<]+)</scope>", content[match.start():match.start() + 500])
242
+ is_dev = scope_match and scope_match.group(1).strip() in ("test", "provided")
243
+ deps.append(DependencyInfo(
244
+ name=f"{group_id}:{artifact_id}",
245
+ version=version,
246
+ dev=bool(is_dev),
247
+ source="pom.xml",
248
+ ))
249
+
250
+ return deps
251
+
252
+
253
+ def _parse_gemfile(path: Path) -> list[DependencyInfo]:
254
+ content = _read_file(path)
255
+ if not content:
256
+ return []
257
+
258
+ deps: list[DependencyInfo] = []
259
+ in_dev_group = False
260
+
261
+ for line in content.splitlines():
262
+ stripped = line.strip()
263
+ if stripped.startswith("group :development") or stripped.startswith("group :test"):
264
+ in_dev_group = True
265
+ continue
266
+ if stripped == "end" and in_dev_group:
267
+ in_dev_group = False
268
+ continue
269
+
270
+ match = re.match(r"gem\s+['\"]([^'\"]+)['\"](?:\s*,\s*['\"]([^'\"]*)['\"])?", stripped)
271
+ if match:
272
+ deps.append(DependencyInfo(
273
+ name=match.group(1),
274
+ version=match.group(2) or None,
275
+ dev=in_dev_group,
276
+ source="Gemfile",
277
+ ))
278
+
279
+ return deps
280
+
281
+
282
+ def _parse_composer_json(path: Path) -> list[DependencyInfo]:
283
+ content = _read_file(path)
284
+ if not content:
285
+ return []
286
+ try:
287
+ data = json.loads(content)
288
+ except (json.JSONDecodeError, ValueError):
289
+ return []
290
+
291
+ deps: list[DependencyInfo] = []
292
+ for name, version in (data.get("require") or {}).items():
293
+ if name == "php" or name.startswith("ext-"):
294
+ continue
295
+ deps.append(DependencyInfo(name=name, version=version, dev=False, source="composer.json"))
296
+ for name, version in (data.get("require-dev") or {}).items():
297
+ deps.append(DependencyInfo(name=name, version=version, dev=True, source="composer.json"))
298
+ return deps
299
+
300
+
301
+ _PARSERS: dict[str, callable] = {
302
+ "package.json": _parse_package_json,
303
+ "requirements.txt": _parse_requirements_txt,
304
+ "requirements-dev.txt": _parse_requirements_txt,
305
+ "requirements_dev.txt": _parse_requirements_txt,
306
+ "pyproject.toml": _parse_pyproject_toml,
307
+ "setup.py": _parse_setup_py,
308
+ "Cargo.toml": _parse_cargo_toml,
309
+ "go.mod": _parse_go_mod,
310
+ "pom.xml": _parse_pom_xml,
311
+ "Gemfile": _parse_gemfile,
312
+ "composer.json": _parse_composer_json,
313
+ }
314
+
315
+
316
+ def detect_dependencies(project_path: str | Path) -> list[DependencyInfo]:
317
+ root = Path(project_path)
318
+ if not root.is_dir():
319
+ return []
320
+
321
+ all_deps: list[DependencyInfo] = []
322
+
323
+ for filename, parser in _PARSERS.items():
324
+ dep_file = root / filename
325
+ if dep_file.is_file():
326
+ all_deps.extend(parser(dep_file))
327
+
328
+ return all_deps
@@ -0,0 +1,240 @@
1
+ """Detect programming languages in a project directory."""
2
+
3
+ from pathlib import Path
4
+
5
+ from code_explore.models import LanguageInfo
6
+
7
+ EXTENSION_MAP: dict[str, str] = {
8
+ ".py": "Python",
9
+ ".pyw": "Python",
10
+ ".pyi": "Python",
11
+ ".pyx": "Python",
12
+ ".js": "JavaScript",
13
+ ".mjs": "JavaScript",
14
+ ".cjs": "JavaScript",
15
+ ".jsx": "JavaScript",
16
+ ".ts": "TypeScript",
17
+ ".tsx": "TypeScript",
18
+ ".mts": "TypeScript",
19
+ ".cts": "TypeScript",
20
+ ".java": "Java",
21
+ ".kt": "Kotlin",
22
+ ".kts": "Kotlin",
23
+ ".scala": "Scala",
24
+ ".groovy": "Groovy",
25
+ ".gradle": "Groovy",
26
+ ".c": "C",
27
+ ".h": "C",
28
+ ".cpp": "C++",
29
+ ".cc": "C++",
30
+ ".cxx": "C++",
31
+ ".hpp": "C++",
32
+ ".hh": "C++",
33
+ ".hxx": "C++",
34
+ ".cs": "C#",
35
+ ".fs": "F#",
36
+ ".fsx": "F#",
37
+ ".vb": "Visual Basic",
38
+ ".go": "Go",
39
+ ".rs": "Rust",
40
+ ".rb": "Ruby",
41
+ ".erb": "Ruby",
42
+ ".rake": "Ruby",
43
+ ".php": "PHP",
44
+ ".swift": "Swift",
45
+ ".m": "Objective-C",
46
+ ".mm": "Objective-C",
47
+ ".r": "R",
48
+ ".R": "R",
49
+ ".rmd": "R",
50
+ ".jl": "Julia",
51
+ ".lua": "Lua",
52
+ ".pl": "Perl",
53
+ ".pm": "Perl",
54
+ ".ex": "Elixir",
55
+ ".exs": "Elixir",
56
+ ".erl": "Erlang",
57
+ ".hrl": "Erlang",
58
+ ".hs": "Haskell",
59
+ ".lhs": "Haskell",
60
+ ".ml": "OCaml",
61
+ ".mli": "OCaml",
62
+ ".clj": "Clojure",
63
+ ".cljs": "Clojure",
64
+ ".cljc": "Clojure",
65
+ ".dart": "Dart",
66
+ ".zig": "Zig",
67
+ ".nim": "Nim",
68
+ ".v": "V",
69
+ ".d": "D",
70
+ ".pas": "Pascal",
71
+ ".pp": "Pascal",
72
+ ".f90": "Fortran",
73
+ ".f95": "Fortran",
74
+ ".f03": "Fortran",
75
+ ".f": "Fortran",
76
+ ".for": "Fortran",
77
+ ".asm": "Assembly",
78
+ ".s": "Assembly",
79
+ ".sh": "Shell",
80
+ ".bash": "Shell",
81
+ ".zsh": "Shell",
82
+ ".fish": "Shell",
83
+ ".ps1": "PowerShell",
84
+ ".psm1": "PowerShell",
85
+ ".bat": "Batch",
86
+ ".cmd": "Batch",
87
+ ".html": "HTML",
88
+ ".htm": "HTML",
89
+ ".css": "CSS",
90
+ ".scss": "SCSS",
91
+ ".sass": "Sass",
92
+ ".less": "Less",
93
+ ".styl": "Stylus",
94
+ ".vue": "Vue",
95
+ ".svelte": "Svelte",
96
+ ".sql": "SQL",
97
+ ".graphql": "GraphQL",
98
+ ".gql": "GraphQL",
99
+ ".proto": "Protocol Buffers",
100
+ ".yaml": "YAML",
101
+ ".yml": "YAML",
102
+ ".json": "JSON",
103
+ ".xml": "XML",
104
+ ".toml": "TOML",
105
+ ".ini": "INI",
106
+ ".cfg": "INI",
107
+ ".md": "Markdown",
108
+ ".rst": "reStructuredText",
109
+ ".tex": "LaTeX",
110
+ ".tf": "Terraform",
111
+ ".hcl": "HCL",
112
+ ".sol": "Solidity",
113
+ ".vy": "Vyper",
114
+ ".wasm": "WebAssembly",
115
+ ".wat": "WebAssembly",
116
+ ".dockerfile": "Dockerfile",
117
+ ".cmake": "CMake",
118
+ ".makefile": "Makefile",
119
+ ".mk": "Makefile",
120
+ }
121
+
122
+ SKIP_DIRS: set[str] = {
123
+ "node_modules", ".git", ".svn", ".hg", "__pycache__", ".mypy_cache",
124
+ ".pytest_cache", ".tox", ".nox", ".venv", "venv", "env", ".env",
125
+ "dist", "build", "out", "target", ".next", ".nuxt", ".output",
126
+ "vendor", "third_party", "3rdparty", ".gradle", ".idea", ".vscode",
127
+ ".vs", "bin", "obj", ".cache", ".parcel-cache", "coverage",
128
+ ".nyc_output", ".terraform", ".eggs", "*.egg-info",
129
+ }
130
+
131
+ GENERATED_PATTERNS: set[str] = {
132
+ ".min.js", ".min.css", ".bundle.js", ".chunk.js",
133
+ ".Designer.cs", ".generated.cs", ".g.cs", ".g.i.cs",
134
+ ".pb.go", ".pb.cc", ".pb.h", "_pb2.py", "_pb2_grpc.py",
135
+ ".d.ts",
136
+ }
137
+
138
+ BINARY_EXTENSIONS: set[str] = {
139
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp",
140
+ ".mp3", ".mp4", ".wav", ".avi", ".mov", ".mkv", ".flac",
141
+ ".zip", ".tar", ".gz", ".bz2", ".xz", ".rar", ".7z",
142
+ ".exe", ".dll", ".so", ".dylib", ".a", ".o", ".obj",
143
+ ".class", ".jar", ".war", ".ear",
144
+ ".pyc", ".pyo", ".whl",
145
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
146
+ ".ttf", ".otf", ".woff", ".woff2", ".eot",
147
+ ".db", ".sqlite", ".sqlite3",
148
+ ".lock",
149
+ }
150
+
151
+
152
+ def _is_generated(filename: str) -> bool:
153
+ lower = filename.lower()
154
+ return any(lower.endswith(pat) for pat in GENERATED_PATTERNS)
155
+
156
+
157
+ def _is_binary(ext: str) -> bool:
158
+ return ext.lower() in BINARY_EXTENSIONS
159
+
160
+
161
+ def _count_lines(path: Path) -> int:
162
+ try:
163
+ return sum(1 for _ in path.open("r", encoding="utf-8", errors="replace"))
164
+ except (OSError, ValueError):
165
+ return 0
166
+
167
+
168
+ def _should_skip_dir(name: str) -> bool:
169
+ return name in SKIP_DIRS or name.startswith(".")
170
+
171
+
172
+ def _get_language(path: Path) -> str | None:
173
+ name_lower = path.name.lower()
174
+ if name_lower == "dockerfile" or name_lower.startswith("dockerfile."):
175
+ return "Dockerfile"
176
+ if name_lower == "makefile" or name_lower == "gnumakefile":
177
+ return "Makefile"
178
+ if name_lower == "cmakelists.txt":
179
+ return "CMake"
180
+ if name_lower == "jenkinsfile":
181
+ return "Groovy"
182
+ if name_lower == "vagrantfile":
183
+ return "Ruby"
184
+ if name_lower == "rakefile":
185
+ return "Ruby"
186
+ if name_lower == "gemfile":
187
+ return "Ruby"
188
+
189
+ ext = path.suffix
190
+ if not ext:
191
+ return None
192
+ return EXTENSION_MAP.get(ext) or EXTENSION_MAP.get(ext.lower())
193
+
194
+
195
+ def detect_languages(project_path: str | Path) -> tuple[list[LanguageInfo], str | None]:
196
+ root = Path(project_path)
197
+ if not root.is_dir():
198
+ return [], None
199
+
200
+ stats: dict[str, dict[str, int]] = {}
201
+
202
+ for item in root.rglob("*"):
203
+ if not item.is_file():
204
+ continue
205
+
206
+ rel_parts = item.relative_to(root).parts
207
+ if any(_should_skip_dir(p) for p in rel_parts[:-1]):
208
+ continue
209
+
210
+ if _is_binary(item.suffix):
211
+ continue
212
+
213
+ if _is_generated(item.name):
214
+ continue
215
+
216
+ language = _get_language(item)
217
+ if language is None:
218
+ continue
219
+
220
+ lines = _count_lines(item)
221
+ if language not in stats:
222
+ stats[language] = {"files": 0, "lines": 0}
223
+ stats[language]["files"] += 1
224
+ stats[language]["lines"] += lines
225
+
226
+ total_lines = sum(s["lines"] for s in stats.values())
227
+
228
+ results = [
229
+ LanguageInfo(
230
+ name=lang,
231
+ files=data["files"],
232
+ lines=data["lines"],
233
+ percentage=round((data["lines"] / total_lines * 100) if total_lines > 0 else 0, 1),
234
+ )
235
+ for lang, data in stats.items()
236
+ ]
237
+ results.sort(key=lambda x: x.lines, reverse=True)
238
+
239
+ primary = results[0].name if results else None
240
+ return results, primary