skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified Codebase Analyzer
|
|
3
|
+
|
|
4
|
+
Key Insight: C3.x is an ANALYSIS DEPTH, not a source type.
|
|
5
|
+
|
|
6
|
+
This analyzer works with ANY codebase source:
|
|
7
|
+
- GitHub URLs (uses three-stream fetcher)
|
|
8
|
+
- Local paths (analyzes directly)
|
|
9
|
+
|
|
10
|
+
Analysis modes:
|
|
11
|
+
- basic (1-2 min): File structure, imports, entry points
|
|
12
|
+
- c3x (20-60 min): Full C3.x suite + GitHub insights
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from skill_seekers.cli.github_fetcher import GitHubThreeStreamFetcher
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class AnalysisResult:
|
|
24
|
+
"""Unified analysis result from any codebase source."""
|
|
25
|
+
|
|
26
|
+
code_analysis: dict
|
|
27
|
+
github_docs: dict | None = None
|
|
28
|
+
github_insights: dict | None = None
|
|
29
|
+
source_type: str = "local" # 'local' or 'github'
|
|
30
|
+
analysis_depth: str = "basic" # 'basic' or 'c3x'
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class UnifiedCodebaseAnalyzer:
|
|
34
|
+
"""
|
|
35
|
+
Unified analyzer for ANY codebase (local or GitHub).
|
|
36
|
+
|
|
37
|
+
Key insight: C3.x is a DEPTH MODE, not a source type.
|
|
38
|
+
|
|
39
|
+
Usage:
|
|
40
|
+
analyzer = UnifiedCodebaseAnalyzer()
|
|
41
|
+
|
|
42
|
+
# Analyze from GitHub
|
|
43
|
+
result = analyzer.analyze(
|
|
44
|
+
source="https://github.com/facebook/react",
|
|
45
|
+
depth="c3x",
|
|
46
|
+
fetch_github_metadata=True
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Analyze local directory
|
|
50
|
+
result = analyzer.analyze(
|
|
51
|
+
source="/path/to/project",
|
|
52
|
+
depth="c3x"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Quick basic analysis
|
|
56
|
+
result = analyzer.analyze(
|
|
57
|
+
source="/path/to/project",
|
|
58
|
+
depth="basic"
|
|
59
|
+
)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, github_token: str | None = None):
|
|
63
|
+
"""
|
|
64
|
+
Initialize analyzer.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
github_token: Optional GitHub API token for higher rate limits
|
|
68
|
+
"""
|
|
69
|
+
self.github_token = github_token or os.getenv("GITHUB_TOKEN")
|
|
70
|
+
|
|
71
|
+
def analyze(
|
|
72
|
+
self,
|
|
73
|
+
source: str,
|
|
74
|
+
depth: str = "c3x",
|
|
75
|
+
fetch_github_metadata: bool = True,
|
|
76
|
+
output_dir: Path | None = None,
|
|
77
|
+
interactive: bool = True,
|
|
78
|
+
) -> AnalysisResult:
|
|
79
|
+
"""
|
|
80
|
+
Analyze codebase with specified depth.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
source: GitHub URL or local path
|
|
84
|
+
depth: 'basic' or 'c3x'
|
|
85
|
+
fetch_github_metadata: Whether to fetch GitHub insights (only for GitHub sources)
|
|
86
|
+
output_dir: Directory for temporary files (GitHub clones)
|
|
87
|
+
interactive: Whether to show interactive prompts (False for CI/CD and tests)
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
AnalysisResult with all available streams
|
|
91
|
+
"""
|
|
92
|
+
print(f"🔍 Analyzing codebase: {source}")
|
|
93
|
+
print(f"📊 Analysis depth: {depth}")
|
|
94
|
+
|
|
95
|
+
# Step 1: Acquire source
|
|
96
|
+
if self.is_github_url(source):
|
|
97
|
+
print("📦 Source type: GitHub repository")
|
|
98
|
+
return self._analyze_github(
|
|
99
|
+
source, depth, fetch_github_metadata, output_dir, interactive
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
print("📁 Source type: Local directory")
|
|
103
|
+
return self._analyze_local(source, depth)
|
|
104
|
+
|
|
105
|
+
def _analyze_github(
|
|
106
|
+
self,
|
|
107
|
+
repo_url: str,
|
|
108
|
+
depth: str,
|
|
109
|
+
fetch_metadata: bool,
|
|
110
|
+
output_dir: Path | None,
|
|
111
|
+
interactive: bool = True,
|
|
112
|
+
) -> AnalysisResult:
|
|
113
|
+
"""
|
|
114
|
+
Analyze GitHub repository with three-stream fetcher.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
repo_url: GitHub repository URL
|
|
118
|
+
depth: Analysis depth mode
|
|
119
|
+
fetch_metadata: Whether to fetch GitHub metadata
|
|
120
|
+
output_dir: Output directory for clone
|
|
121
|
+
interactive: Whether to show interactive prompts (False for CI/CD and tests)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
AnalysisResult with all 3 streams
|
|
125
|
+
"""
|
|
126
|
+
# Use three-stream fetcher
|
|
127
|
+
fetcher = GitHubThreeStreamFetcher(repo_url, self.github_token, interactive=interactive)
|
|
128
|
+
three_streams = fetcher.fetch(output_dir)
|
|
129
|
+
|
|
130
|
+
# Analyze code with specified depth
|
|
131
|
+
code_directory = three_streams.code_stream.directory
|
|
132
|
+
if depth == "basic":
|
|
133
|
+
code_analysis = self.basic_analysis(code_directory)
|
|
134
|
+
elif depth == "c3x":
|
|
135
|
+
code_analysis = self.c3x_analysis(code_directory)
|
|
136
|
+
else:
|
|
137
|
+
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
|
138
|
+
|
|
139
|
+
# Build result with all streams
|
|
140
|
+
result = AnalysisResult(
|
|
141
|
+
code_analysis=code_analysis, source_type="github", analysis_depth=depth
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Add GitHub-specific data if available
|
|
145
|
+
if fetch_metadata:
|
|
146
|
+
result.github_docs = {
|
|
147
|
+
"readme": three_streams.docs_stream.readme,
|
|
148
|
+
"contributing": three_streams.docs_stream.contributing,
|
|
149
|
+
"docs_files": three_streams.docs_stream.docs_files,
|
|
150
|
+
}
|
|
151
|
+
result.github_insights = {
|
|
152
|
+
"metadata": three_streams.insights_stream.metadata,
|
|
153
|
+
"common_problems": three_streams.insights_stream.common_problems,
|
|
154
|
+
"known_solutions": three_streams.insights_stream.known_solutions,
|
|
155
|
+
"top_labels": three_streams.insights_stream.top_labels,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
def _analyze_local(self, directory: str, depth: str) -> AnalysisResult:
|
|
161
|
+
"""
|
|
162
|
+
Analyze local directory.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
directory: Path to local directory
|
|
166
|
+
depth: Analysis depth mode
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
AnalysisResult with code analysis only
|
|
170
|
+
"""
|
|
171
|
+
code_directory = Path(directory)
|
|
172
|
+
|
|
173
|
+
if not code_directory.exists():
|
|
174
|
+
raise FileNotFoundError(f"Directory not found: {directory}")
|
|
175
|
+
|
|
176
|
+
if not code_directory.is_dir():
|
|
177
|
+
raise NotADirectoryError(f"Not a directory: {directory}")
|
|
178
|
+
|
|
179
|
+
# Analyze code with specified depth
|
|
180
|
+
if depth == "basic":
|
|
181
|
+
code_analysis = self.basic_analysis(code_directory)
|
|
182
|
+
elif depth == "c3x":
|
|
183
|
+
code_analysis = self.c3x_analysis(code_directory)
|
|
184
|
+
else:
|
|
185
|
+
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
|
186
|
+
|
|
187
|
+
return AnalysisResult(
|
|
188
|
+
code_analysis=code_analysis, source_type="local", analysis_depth=depth
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def basic_analysis(self, directory: Path) -> dict:
|
|
192
|
+
"""
|
|
193
|
+
Fast, shallow analysis (1-2 min).
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
- File structure
|
|
197
|
+
- Imports
|
|
198
|
+
- Entry points
|
|
199
|
+
- Basic statistics
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
directory: Path to analyze
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Dict with basic analysis
|
|
206
|
+
"""
|
|
207
|
+
print("📊 Running basic analysis (1-2 min)...")
|
|
208
|
+
|
|
209
|
+
analysis = {
|
|
210
|
+
"directory": str(directory),
|
|
211
|
+
"analysis_type": "basic",
|
|
212
|
+
"files": self.list_files(directory),
|
|
213
|
+
"structure": self.get_directory_structure(directory),
|
|
214
|
+
"imports": self.extract_imports(directory),
|
|
215
|
+
"entry_points": self.find_entry_points(directory),
|
|
216
|
+
"statistics": self.compute_statistics(directory),
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
print(f"✅ Basic analysis complete: {len(analysis['files'])} files analyzed")
|
|
220
|
+
return analysis
|
|
221
|
+
|
|
222
|
+
def c3x_analysis(self, directory: Path) -> dict:
|
|
223
|
+
"""
|
|
224
|
+
Deep C3.x analysis (20-60 min).
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
- Everything from basic
|
|
228
|
+
- C3.1: Design patterns
|
|
229
|
+
- C3.2: Test examples
|
|
230
|
+
- C3.3: How-to guides
|
|
231
|
+
- C3.4: Config patterns
|
|
232
|
+
- C3.7: Architecture
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
directory: Path to analyze
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Dict with full C3.x analysis
|
|
239
|
+
"""
|
|
240
|
+
print("📊 Running C3.x analysis (20-60 min)...")
|
|
241
|
+
|
|
242
|
+
# Start with basic analysis
|
|
243
|
+
basic = self.basic_analysis(directory)
|
|
244
|
+
|
|
245
|
+
# Run full C3.x analysis using existing codebase_scraper
|
|
246
|
+
print("🔍 Running C3.x components (patterns, examples, guides, configs, architecture)...")
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
# Import codebase analyzer
|
|
250
|
+
import tempfile
|
|
251
|
+
|
|
252
|
+
from .codebase_scraper import analyze_codebase
|
|
253
|
+
|
|
254
|
+
# Create temporary output directory for C3.x analysis
|
|
255
|
+
temp_output = Path(tempfile.mkdtemp(prefix="c3x_analysis_"))
|
|
256
|
+
|
|
257
|
+
# Run full C3.x analysis
|
|
258
|
+
analyze_codebase(
|
|
259
|
+
directory=directory,
|
|
260
|
+
output_dir=temp_output,
|
|
261
|
+
depth="deep",
|
|
262
|
+
languages=None, # All languages
|
|
263
|
+
file_patterns=None, # All files
|
|
264
|
+
build_api_reference=True,
|
|
265
|
+
build_dependency_graph=True,
|
|
266
|
+
detect_patterns=True,
|
|
267
|
+
extract_test_examples=True,
|
|
268
|
+
build_how_to_guides=True,
|
|
269
|
+
extract_config_patterns=True,
|
|
270
|
+
enhance_with_ai=False, # Disable AI for speed
|
|
271
|
+
ai_mode="none",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Load C3.x results from output files
|
|
275
|
+
c3x_data = self._load_c3x_results(temp_output)
|
|
276
|
+
|
|
277
|
+
# Merge with basic analysis
|
|
278
|
+
c3x = {**basic, "analysis_type": "c3x", **c3x_data}
|
|
279
|
+
|
|
280
|
+
print("✅ C3.x analysis complete!")
|
|
281
|
+
print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected")
|
|
282
|
+
print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted")
|
|
283
|
+
print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated")
|
|
284
|
+
print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed")
|
|
285
|
+
print(f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found")
|
|
286
|
+
|
|
287
|
+
return c3x
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
print(f"⚠️ C3.x analysis failed: {e}")
|
|
291
|
+
print(" Falling back to basic analysis with placeholders")
|
|
292
|
+
|
|
293
|
+
# Fall back to placeholders
|
|
294
|
+
c3x = {
|
|
295
|
+
**basic,
|
|
296
|
+
"analysis_type": "c3x",
|
|
297
|
+
"c3_1_patterns": [],
|
|
298
|
+
"c3_2_examples": [],
|
|
299
|
+
"c3_2_examples_count": 0,
|
|
300
|
+
"c3_3_guides": [],
|
|
301
|
+
"c3_4_configs": [],
|
|
302
|
+
"c3_7_architecture": [],
|
|
303
|
+
"error": str(e),
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return c3x
|
|
307
|
+
|
|
308
|
+
def _load_c3x_results(self, output_dir: Path) -> dict:
|
|
309
|
+
"""
|
|
310
|
+
Load C3.x analysis results from output directory.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
output_dir: Directory containing C3.x analysis output
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Dict with C3.x data (c3_1_patterns, c3_2_examples, etc.)
|
|
317
|
+
"""
|
|
318
|
+
import json
|
|
319
|
+
|
|
320
|
+
c3x_data = {}
|
|
321
|
+
|
|
322
|
+
# C3.1: Design Patterns
|
|
323
|
+
patterns_file = output_dir / "patterns" / "design_patterns.json"
|
|
324
|
+
if patterns_file.exists():
|
|
325
|
+
with open(patterns_file) as f:
|
|
326
|
+
patterns_data = json.load(f)
|
|
327
|
+
c3x_data["c3_1_patterns"] = patterns_data.get("patterns", [])
|
|
328
|
+
else:
|
|
329
|
+
c3x_data["c3_1_patterns"] = []
|
|
330
|
+
|
|
331
|
+
# C3.2: Test Examples
|
|
332
|
+
examples_file = output_dir / "test_examples" / "test_examples.json"
|
|
333
|
+
if examples_file.exists():
|
|
334
|
+
with open(examples_file) as f:
|
|
335
|
+
examples_data = json.load(f)
|
|
336
|
+
c3x_data["c3_2_examples"] = examples_data.get("examples", [])
|
|
337
|
+
c3x_data["c3_2_examples_count"] = examples_data.get("total_examples", 0)
|
|
338
|
+
else:
|
|
339
|
+
c3x_data["c3_2_examples"] = []
|
|
340
|
+
c3x_data["c3_2_examples_count"] = 0
|
|
341
|
+
|
|
342
|
+
# C3.3: How-to Guides
|
|
343
|
+
guides_file = output_dir / "tutorials" / "guide_collection.json"
|
|
344
|
+
if guides_file.exists():
|
|
345
|
+
with open(guides_file) as f:
|
|
346
|
+
guides_data = json.load(f)
|
|
347
|
+
c3x_data["c3_3_guides"] = guides_data.get("guides", [])
|
|
348
|
+
else:
|
|
349
|
+
c3x_data["c3_3_guides"] = []
|
|
350
|
+
|
|
351
|
+
# C3.4: Config Patterns
|
|
352
|
+
config_file = output_dir / "config_patterns" / "config_patterns.json"
|
|
353
|
+
if config_file.exists():
|
|
354
|
+
with open(config_file) as f:
|
|
355
|
+
config_data = json.load(f)
|
|
356
|
+
c3x_data["c3_4_configs"] = config_data.get("config_files", [])
|
|
357
|
+
else:
|
|
358
|
+
c3x_data["c3_4_configs"] = []
|
|
359
|
+
|
|
360
|
+
# C3.7: Architecture
|
|
361
|
+
arch_file = output_dir / "architecture" / "architectural_patterns.json"
|
|
362
|
+
if arch_file.exists():
|
|
363
|
+
with open(arch_file) as f:
|
|
364
|
+
arch_data = json.load(f)
|
|
365
|
+
c3x_data["c3_7_architecture"] = arch_data.get("patterns", [])
|
|
366
|
+
else:
|
|
367
|
+
c3x_data["c3_7_architecture"] = []
|
|
368
|
+
|
|
369
|
+
# Add dependency graph data
|
|
370
|
+
dep_file = output_dir / "dependencies" / "dependency_graph.json"
|
|
371
|
+
if dep_file.exists():
|
|
372
|
+
with open(dep_file) as f:
|
|
373
|
+
dep_data = json.load(f)
|
|
374
|
+
c3x_data["dependency_graph"] = dep_data
|
|
375
|
+
|
|
376
|
+
# Add API reference data
|
|
377
|
+
api_file = output_dir / "code_analysis.json"
|
|
378
|
+
if api_file.exists():
|
|
379
|
+
with open(api_file) as f:
|
|
380
|
+
api_data = json.load(f)
|
|
381
|
+
c3x_data["api_reference"] = api_data
|
|
382
|
+
|
|
383
|
+
return c3x_data
|
|
384
|
+
|
|
385
|
+
def is_github_url(self, source: str) -> bool:
|
|
386
|
+
"""
|
|
387
|
+
Check if source is a GitHub URL.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
source: Source string (URL or path)
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
True if GitHub URL, False otherwise
|
|
394
|
+
"""
|
|
395
|
+
return "github.com" in source
|
|
396
|
+
|
|
397
|
+
def list_files(self, directory: Path) -> list[dict]:
|
|
398
|
+
"""
|
|
399
|
+
List all files in directory with metadata.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
directory: Directory to scan
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
List of file info dicts
|
|
406
|
+
"""
|
|
407
|
+
files = []
|
|
408
|
+
for file_path in directory.rglob("*"):
|
|
409
|
+
if file_path.is_file():
|
|
410
|
+
try:
|
|
411
|
+
files.append(
|
|
412
|
+
{
|
|
413
|
+
"path": str(file_path.relative_to(directory)),
|
|
414
|
+
"size": file_path.stat().st_size,
|
|
415
|
+
"extension": file_path.suffix,
|
|
416
|
+
}
|
|
417
|
+
)
|
|
418
|
+
except Exception:
|
|
419
|
+
# Skip files we can't access
|
|
420
|
+
continue
|
|
421
|
+
return files
|
|
422
|
+
|
|
423
|
+
def get_directory_structure(self, directory: Path) -> dict:
|
|
424
|
+
"""
|
|
425
|
+
Get directory structure tree.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
directory: Directory to analyze
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Dict representing directory structure
|
|
432
|
+
"""
|
|
433
|
+
structure = {"name": directory.name, "type": "directory", "children": []}
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
for item in sorted(directory.iterdir()):
|
|
437
|
+
if item.name.startswith("."):
|
|
438
|
+
continue # Skip hidden files
|
|
439
|
+
|
|
440
|
+
if item.is_dir():
|
|
441
|
+
# Only include immediate subdirectories
|
|
442
|
+
structure["children"].append({"name": item.name, "type": "directory"})
|
|
443
|
+
elif item.is_file():
|
|
444
|
+
structure["children"].append(
|
|
445
|
+
{"name": item.name, "type": "file", "extension": item.suffix}
|
|
446
|
+
)
|
|
447
|
+
except Exception:
|
|
448
|
+
pass
|
|
449
|
+
|
|
450
|
+
return structure
|
|
451
|
+
|
|
452
|
+
def extract_imports(self, directory: Path) -> dict[str, list[str]]:
|
|
453
|
+
"""
|
|
454
|
+
Extract import statements from code files.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
directory: Directory to scan
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Dict mapping file extensions to import lists
|
|
461
|
+
"""
|
|
462
|
+
imports = {".py": [], ".js": [], ".ts": []}
|
|
463
|
+
|
|
464
|
+
# Sample up to 10 files per extension
|
|
465
|
+
for ext in imports:
|
|
466
|
+
files = list(directory.rglob(f"*{ext}"))[:10]
|
|
467
|
+
for file_path in files:
|
|
468
|
+
try:
|
|
469
|
+
content = file_path.read_text(encoding="utf-8")
|
|
470
|
+
if ext == ".py":
|
|
471
|
+
# Extract Python imports
|
|
472
|
+
for line in content.split("\n")[:50]: # Check first 50 lines
|
|
473
|
+
if line.strip().startswith(("import ", "from ")):
|
|
474
|
+
imports[ext].append(line.strip())
|
|
475
|
+
elif ext in [".js", ".ts"]:
|
|
476
|
+
# Extract JS/TS imports
|
|
477
|
+
for line in content.split("\n")[:50]:
|
|
478
|
+
if line.strip().startswith(("import ", "require(")):
|
|
479
|
+
imports[ext].append(line.strip())
|
|
480
|
+
except Exception:
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
# Remove empty lists
|
|
484
|
+
return {k: v for k, v in imports.items() if v}
|
|
485
|
+
|
|
486
|
+
def find_entry_points(self, directory: Path) -> list[str]:
|
|
487
|
+
"""
|
|
488
|
+
Find potential entry points (main files, setup files, etc.).
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
directory: Directory to scan
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
List of entry point file paths
|
|
495
|
+
"""
|
|
496
|
+
entry_points = []
|
|
497
|
+
|
|
498
|
+
# Common entry point patterns
|
|
499
|
+
entry_patterns = [
|
|
500
|
+
"main.py",
|
|
501
|
+
"__main__.py",
|
|
502
|
+
"app.py",
|
|
503
|
+
"server.py",
|
|
504
|
+
"index.js",
|
|
505
|
+
"index.ts",
|
|
506
|
+
"main.js",
|
|
507
|
+
"main.ts",
|
|
508
|
+
"setup.py",
|
|
509
|
+
"pyproject.toml",
|
|
510
|
+
"package.json",
|
|
511
|
+
"Makefile",
|
|
512
|
+
"docker-compose.yml",
|
|
513
|
+
"Dockerfile",
|
|
514
|
+
]
|
|
515
|
+
|
|
516
|
+
for pattern in entry_patterns:
|
|
517
|
+
matches = list(directory.rglob(pattern))
|
|
518
|
+
for match in matches:
|
|
519
|
+
try:
|
|
520
|
+
entry_points.append(str(match.relative_to(directory)))
|
|
521
|
+
except Exception:
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
return entry_points
|
|
525
|
+
|
|
526
|
+
def compute_statistics(self, directory: Path) -> dict:
|
|
527
|
+
"""
|
|
528
|
+
Compute basic statistics about the codebase.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
directory: Directory to analyze
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
Dict with statistics
|
|
535
|
+
"""
|
|
536
|
+
stats = {
|
|
537
|
+
"total_files": 0,
|
|
538
|
+
"total_size_bytes": 0,
|
|
539
|
+
"file_types": {},
|
|
540
|
+
"languages": {},
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
for file_path in directory.rglob("*"):
|
|
544
|
+
if not file_path.is_file():
|
|
545
|
+
continue
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
stats["total_files"] += 1
|
|
549
|
+
stats["total_size_bytes"] += file_path.stat().st_size
|
|
550
|
+
|
|
551
|
+
ext = file_path.suffix
|
|
552
|
+
if ext:
|
|
553
|
+
stats["file_types"][ext] = stats["file_types"].get(ext, 0) + 1
|
|
554
|
+
|
|
555
|
+
# Map extensions to languages
|
|
556
|
+
language_map = {
|
|
557
|
+
".py": "Python",
|
|
558
|
+
".js": "JavaScript",
|
|
559
|
+
".ts": "TypeScript",
|
|
560
|
+
".go": "Go",
|
|
561
|
+
".rs": "Rust",
|
|
562
|
+
".java": "Java",
|
|
563
|
+
".rb": "Ruby",
|
|
564
|
+
".php": "PHP",
|
|
565
|
+
}
|
|
566
|
+
if ext in language_map:
|
|
567
|
+
lang = language_map[ext]
|
|
568
|
+
stats["languages"][lang] = stats["languages"].get(lang, 0) + 1
|
|
569
|
+
except Exception:
|
|
570
|
+
continue
|
|
571
|
+
|
|
572
|
+
return stats
|