skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1225 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Codebase Scraper CLI Tool
|
|
4
|
+
|
|
5
|
+
Standalone tool for analyzing local codebases without GitHub API.
|
|
6
|
+
Extracts code signatures, comments, and optionally generates API documentation.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
codebase-scraper --directory /path/to/repo --output output/codebase/
|
|
10
|
+
codebase-scraper --directory . --depth deep --languages Python,JavaScript
|
|
11
|
+
codebase-scraper --directory /path/to/repo --build-api-reference
|
|
12
|
+
|
|
13
|
+
Features:
|
|
14
|
+
- File tree walking with .gitignore support
|
|
15
|
+
- Multi-language code analysis (9 languages: Python, JavaScript/TypeScript, C/C++, C#, Go, Rust, Java, Ruby, PHP)
|
|
16
|
+
- API reference generation
|
|
17
|
+
- Comment extraction
|
|
18
|
+
- Dependency graph analysis
|
|
19
|
+
- Configurable depth levels
|
|
20
|
+
|
|
21
|
+
Credits:
|
|
22
|
+
- Language parsing patterns inspired by official language specifications
|
|
23
|
+
- NetworkX for dependency graph analysis: https://networkx.org/
|
|
24
|
+
- pathspec for .gitignore support: https://pypi.org/project/pathspec/
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
import sys
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
# Add parent directory to path for imports
|
|
36
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
37
|
+
|
|
38
|
+
from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
|
|
39
|
+
from skill_seekers.cli.code_analyzer import CodeAnalyzer
|
|
40
|
+
from skill_seekers.cli.config_extractor import ConfigExtractor
|
|
41
|
+
from skill_seekers.cli.dependency_analyzer import DependencyAnalyzer
|
|
42
|
+
|
|
43
|
+
# Try to import pathspec for .gitignore support
|
|
44
|
+
try:
|
|
45
|
+
import pathspec
|
|
46
|
+
|
|
47
|
+
PATHSPEC_AVAILABLE = True
|
|
48
|
+
except ImportError:
|
|
49
|
+
PATHSPEC_AVAILABLE = False
|
|
50
|
+
|
|
51
|
+
# Configure logging
|
|
52
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Language extension mapping
|
|
57
|
+
LANGUAGE_EXTENSIONS = {
|
|
58
|
+
".py": "Python",
|
|
59
|
+
".js": "JavaScript",
|
|
60
|
+
".jsx": "JavaScript",
|
|
61
|
+
".ts": "TypeScript",
|
|
62
|
+
".tsx": "TypeScript",
|
|
63
|
+
".cpp": "C++",
|
|
64
|
+
".cc": "C++",
|
|
65
|
+
".cxx": "C++",
|
|
66
|
+
".h": "C++",
|
|
67
|
+
".hpp": "C++",
|
|
68
|
+
".hxx": "C++",
|
|
69
|
+
".c": "C",
|
|
70
|
+
".cs": "C#",
|
|
71
|
+
".go": "Go",
|
|
72
|
+
".rs": "Rust",
|
|
73
|
+
".java": "Java",
|
|
74
|
+
".rb": "Ruby",
|
|
75
|
+
".php": "PHP",
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Default directories to exclude
|
|
79
|
+
DEFAULT_EXCLUDED_DIRS = {
|
|
80
|
+
"node_modules",
|
|
81
|
+
"venv",
|
|
82
|
+
"__pycache__",
|
|
83
|
+
".git",
|
|
84
|
+
".svn",
|
|
85
|
+
".hg",
|
|
86
|
+
"build",
|
|
87
|
+
"dist",
|
|
88
|
+
"target",
|
|
89
|
+
".pytest_cache",
|
|
90
|
+
".tox",
|
|
91
|
+
".mypy_cache",
|
|
92
|
+
"htmlcov",
|
|
93
|
+
"coverage",
|
|
94
|
+
".coverage",
|
|
95
|
+
".eggs",
|
|
96
|
+
"*.egg-info",
|
|
97
|
+
".idea",
|
|
98
|
+
".vscode",
|
|
99
|
+
".vs",
|
|
100
|
+
"__pypackages__",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def detect_language(file_path: Path) -> str:
|
|
105
|
+
"""
|
|
106
|
+
Detect programming language from file extension.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
file_path: Path to source file
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Language name or 'Unknown'
|
|
113
|
+
"""
|
|
114
|
+
extension = file_path.suffix.lower()
|
|
115
|
+
return LANGUAGE_EXTENSIONS.get(extension, "Unknown")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def load_gitignore(directory: Path) -> pathspec.PathSpec | None:
|
|
119
|
+
"""
|
|
120
|
+
Load .gitignore file and create pathspec matcher.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
directory: Root directory to search for .gitignore
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
PathSpec object if .gitignore found, None otherwise
|
|
127
|
+
"""
|
|
128
|
+
if not PATHSPEC_AVAILABLE:
|
|
129
|
+
logger.warning("pathspec not installed - .gitignore support disabled")
|
|
130
|
+
logger.warning("Install with: pip install pathspec")
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
gitignore_path = directory / ".gitignore"
|
|
134
|
+
if not gitignore_path.exists():
|
|
135
|
+
logger.debug(f"No .gitignore found in {directory}")
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
with open(gitignore_path, encoding="utf-8") as f:
|
|
140
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
|
|
141
|
+
logger.info(f"Loaded .gitignore from {gitignore_path}")
|
|
142
|
+
return spec
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.warning(f"Failed to load .gitignore: {e}")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def should_exclude_dir(dir_name: str, excluded_dirs: set) -> bool:
|
|
149
|
+
"""
|
|
150
|
+
Check if directory should be excluded from analysis.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
dir_name: Directory name
|
|
154
|
+
excluded_dirs: Set of directory names to exclude
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
True if directory should be excluded
|
|
158
|
+
"""
|
|
159
|
+
return dir_name in excluded_dirs
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def walk_directory(
|
|
163
|
+
root: Path,
|
|
164
|
+
patterns: list[str] | None = None,
|
|
165
|
+
gitignore_spec: pathspec.PathSpec | None = None,
|
|
166
|
+
excluded_dirs: set | None = None,
|
|
167
|
+
) -> list[Path]:
|
|
168
|
+
"""
|
|
169
|
+
Walk directory tree and collect source files.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
root: Root directory to walk
|
|
173
|
+
patterns: Optional file patterns to include (e.g., ['*.py', '*.js'])
|
|
174
|
+
gitignore_spec: Optional PathSpec object for .gitignore rules
|
|
175
|
+
excluded_dirs: Set of directory names to exclude
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
List of source file paths
|
|
179
|
+
"""
|
|
180
|
+
if excluded_dirs is None:
|
|
181
|
+
excluded_dirs = DEFAULT_EXCLUDED_DIRS
|
|
182
|
+
|
|
183
|
+
files = []
|
|
184
|
+
root = Path(root).resolve()
|
|
185
|
+
|
|
186
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
187
|
+
current_dir = Path(dirpath)
|
|
188
|
+
|
|
189
|
+
# Filter out excluded directories (in-place modification)
|
|
190
|
+
dirnames[:] = [d for d in dirnames if not should_exclude_dir(d, excluded_dirs)]
|
|
191
|
+
|
|
192
|
+
for filename in filenames:
|
|
193
|
+
file_path = current_dir / filename
|
|
194
|
+
|
|
195
|
+
# Check .gitignore rules
|
|
196
|
+
if gitignore_spec:
|
|
197
|
+
try:
|
|
198
|
+
rel_path = file_path.relative_to(root)
|
|
199
|
+
if gitignore_spec.match_file(str(rel_path)):
|
|
200
|
+
logger.debug(f"Skipping (gitignore): {rel_path}")
|
|
201
|
+
continue
|
|
202
|
+
except ValueError:
|
|
203
|
+
# File is outside root, skip it
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
# Check file extension
|
|
207
|
+
if file_path.suffix.lower() not in LANGUAGE_EXTENSIONS:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
# Check file patterns if provided
|
|
211
|
+
if patterns and not any(file_path.match(pattern) for pattern in patterns):
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
files.append(file_path)
|
|
215
|
+
|
|
216
|
+
return sorted(files)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def analyze_codebase(
|
|
220
|
+
directory: Path,
|
|
221
|
+
output_dir: Path,
|
|
222
|
+
depth: str = "deep",
|
|
223
|
+
languages: list[str] | None = None,
|
|
224
|
+
file_patterns: list[str] | None = None,
|
|
225
|
+
build_api_reference: bool = True,
|
|
226
|
+
extract_comments: bool = True,
|
|
227
|
+
build_dependency_graph: bool = True,
|
|
228
|
+
detect_patterns: bool = True,
|
|
229
|
+
extract_test_examples: bool = True,
|
|
230
|
+
build_how_to_guides: bool = True,
|
|
231
|
+
extract_config_patterns: bool = True,
|
|
232
|
+
enhance_with_ai: bool = True,
|
|
233
|
+
ai_mode: str = "auto",
|
|
234
|
+
) -> dict[str, Any]:
|
|
235
|
+
"""
|
|
236
|
+
Analyze local codebase and extract code knowledge.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
directory: Directory to analyze
|
|
240
|
+
output_dir: Output directory for results
|
|
241
|
+
depth: Analysis depth (surface, deep, full)
|
|
242
|
+
languages: Optional list of languages to analyze
|
|
243
|
+
file_patterns: Optional file patterns to include
|
|
244
|
+
build_api_reference: Generate API reference markdown
|
|
245
|
+
extract_comments: Extract inline comments
|
|
246
|
+
build_dependency_graph: Generate dependency graph and detect circular dependencies
|
|
247
|
+
detect_patterns: Detect design patterns (Singleton, Factory, Observer, etc.)
|
|
248
|
+
extract_test_examples: Extract usage examples from test files
|
|
249
|
+
build_how_to_guides: Build how-to guides from workflow examples (C3.3)
|
|
250
|
+
extract_config_patterns: Extract configuration patterns from config files (C3.4)
|
|
251
|
+
enhance_with_ai: Enhance patterns and examples with AI analysis (C3.6)
|
|
252
|
+
ai_mode: AI enhancement mode for how-to guides (auto, api, local, none)
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Analysis results dictionary
|
|
256
|
+
"""
|
|
257
|
+
# Resolve directory to absolute path to avoid relative_to() errors
|
|
258
|
+
directory = Path(directory).resolve()
|
|
259
|
+
|
|
260
|
+
logger.info(f"Analyzing codebase: {directory}")
|
|
261
|
+
logger.info(f"Depth: {depth}")
|
|
262
|
+
|
|
263
|
+
# Create output directory
|
|
264
|
+
output_dir = Path(output_dir)
|
|
265
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
266
|
+
|
|
267
|
+
# Load .gitignore
|
|
268
|
+
gitignore_spec = load_gitignore(directory)
|
|
269
|
+
|
|
270
|
+
# Walk directory tree
|
|
271
|
+
logger.info("Scanning directory tree...")
|
|
272
|
+
files = walk_directory(directory, patterns=file_patterns, gitignore_spec=gitignore_spec)
|
|
273
|
+
|
|
274
|
+
logger.info(f"Found {len(files)} source files")
|
|
275
|
+
|
|
276
|
+
# Filter by language if specified
|
|
277
|
+
if languages:
|
|
278
|
+
language_set = set(languages)
|
|
279
|
+
files = [f for f in files if detect_language(f) in language_set]
|
|
280
|
+
logger.info(f"Filtered to {len(files)} files for languages: {', '.join(languages)}")
|
|
281
|
+
|
|
282
|
+
# Initialize code analyzer
|
|
283
|
+
analyzer = CodeAnalyzer(depth=depth)
|
|
284
|
+
|
|
285
|
+
# Analyze each file
|
|
286
|
+
results = {"files": []}
|
|
287
|
+
analyzed_count = 0
|
|
288
|
+
|
|
289
|
+
for file_path in files:
|
|
290
|
+
try:
|
|
291
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
292
|
+
language = detect_language(file_path)
|
|
293
|
+
|
|
294
|
+
if language == "Unknown":
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
# Analyze file
|
|
298
|
+
analysis = analyzer.analyze_file(str(file_path), content, language)
|
|
299
|
+
|
|
300
|
+
# Only include files with actual analysis results
|
|
301
|
+
if analysis and (analysis.get("classes") or analysis.get("functions")):
|
|
302
|
+
results["files"].append(
|
|
303
|
+
{
|
|
304
|
+
"file": str(file_path.relative_to(directory)),
|
|
305
|
+
"language": language,
|
|
306
|
+
**analysis,
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
analyzed_count += 1
|
|
310
|
+
|
|
311
|
+
if analyzed_count % 10 == 0:
|
|
312
|
+
logger.info(f"Analyzed {analyzed_count}/{len(files)} files...")
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.warning(f"Error analyzing {file_path}: {e}")
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
logger.info(f"✅ Successfully analyzed {analyzed_count} files")
|
|
319
|
+
|
|
320
|
+
# Save results
|
|
321
|
+
output_json = output_dir / "code_analysis.json"
|
|
322
|
+
with open(output_json, "w", encoding="utf-8") as f:
|
|
323
|
+
json.dump(results, f, indent=2)
|
|
324
|
+
|
|
325
|
+
logger.info(f"📁 Saved analysis to: {output_json}")
|
|
326
|
+
|
|
327
|
+
# Build API reference if requested
|
|
328
|
+
if build_api_reference and results["files"]:
|
|
329
|
+
logger.info("Building API reference documentation...")
|
|
330
|
+
builder = APIReferenceBuilder(results)
|
|
331
|
+
api_output_dir = output_dir / "api_reference"
|
|
332
|
+
generated_files = builder.build_reference(api_output_dir)
|
|
333
|
+
logger.info(f"✅ Generated {len(generated_files)} API reference files")
|
|
334
|
+
logger.info(f"📁 API reference: {api_output_dir}")
|
|
335
|
+
|
|
336
|
+
# Build dependency graph if requested (C2.6)
|
|
337
|
+
if build_dependency_graph:
|
|
338
|
+
logger.info("Building dependency graph...")
|
|
339
|
+
dep_analyzer = DependencyAnalyzer()
|
|
340
|
+
|
|
341
|
+
# Analyze dependencies for all files
|
|
342
|
+
for file_path in files:
|
|
343
|
+
try:
|
|
344
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
345
|
+
language = detect_language(file_path)
|
|
346
|
+
|
|
347
|
+
if language != "Unknown":
|
|
348
|
+
# Use relative path from directory for better graph readability
|
|
349
|
+
rel_path = str(file_path.relative_to(directory))
|
|
350
|
+
dep_analyzer.analyze_file(rel_path, content, language)
|
|
351
|
+
except Exception as e:
|
|
352
|
+
logger.warning(f"Error analyzing dependencies for {file_path}: {e}")
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
# Build the graph
|
|
356
|
+
graph = dep_analyzer.build_graph()
|
|
357
|
+
|
|
358
|
+
# Detect circular dependencies
|
|
359
|
+
cycles = dep_analyzer.detect_cycles()
|
|
360
|
+
if cycles:
|
|
361
|
+
logger.warning(f"⚠️ Found {len(cycles)} circular dependencies:")
|
|
362
|
+
for i, cycle in enumerate(cycles[:5], 1): # Show first 5
|
|
363
|
+
cycle_str = " → ".join(cycle) + f" → {cycle[0]}"
|
|
364
|
+
logger.warning(f" {i}. {cycle_str}")
|
|
365
|
+
if len(cycles) > 5:
|
|
366
|
+
logger.warning(f" ... and {len(cycles) - 5} more")
|
|
367
|
+
else:
|
|
368
|
+
logger.info("✅ No circular dependencies found")
|
|
369
|
+
|
|
370
|
+
# Save dependency graph data
|
|
371
|
+
dep_output_dir = output_dir / "dependencies"
|
|
372
|
+
dep_output_dir.mkdir(parents=True, exist_ok=True)
|
|
373
|
+
|
|
374
|
+
# Export as JSON
|
|
375
|
+
dep_json = dep_output_dir / "dependency_graph.json"
|
|
376
|
+
with open(dep_json, "w", encoding="utf-8") as f:
|
|
377
|
+
json.dump(dep_analyzer.export_json(), f, indent=2)
|
|
378
|
+
logger.info(f"📁 Saved dependency graph: {dep_json}")
|
|
379
|
+
|
|
380
|
+
# Export as Mermaid diagram
|
|
381
|
+
mermaid_file = dep_output_dir / "dependency_graph.mmd"
|
|
382
|
+
mermaid_file.write_text(dep_analyzer.export_mermaid())
|
|
383
|
+
logger.info(f"📁 Saved Mermaid diagram: {mermaid_file}")
|
|
384
|
+
|
|
385
|
+
# Save statistics
|
|
386
|
+
stats = dep_analyzer.get_statistics()
|
|
387
|
+
stats_file = dep_output_dir / "statistics.json"
|
|
388
|
+
with open(stats_file, "w", encoding="utf-8") as f:
|
|
389
|
+
json.dump(stats, f, indent=2)
|
|
390
|
+
logger.info(
|
|
391
|
+
f"📊 Statistics: {stats['total_files']} files, "
|
|
392
|
+
f"{stats['total_dependencies']} dependencies, "
|
|
393
|
+
f"{stats['circular_dependencies']} cycles"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Try to export as DOT (requires pydot)
|
|
397
|
+
try:
|
|
398
|
+
dot_file = dep_output_dir / "dependency_graph.dot"
|
|
399
|
+
dep_analyzer.export_dot(str(dot_file))
|
|
400
|
+
except Exception:
|
|
401
|
+
pass # pydot not installed, skip DOT export
|
|
402
|
+
|
|
403
|
+
# Detect design patterns if requested (C3.1)
|
|
404
|
+
if detect_patterns:
|
|
405
|
+
logger.info("Detecting design patterns...")
|
|
406
|
+
from skill_seekers.cli.pattern_recognizer import PatternRecognizer
|
|
407
|
+
|
|
408
|
+
pattern_recognizer = PatternRecognizer(depth=depth, enhance_with_ai=enhance_with_ai)
|
|
409
|
+
pattern_results = []
|
|
410
|
+
|
|
411
|
+
for file_path in files:
|
|
412
|
+
try:
|
|
413
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
414
|
+
language = detect_language(file_path)
|
|
415
|
+
|
|
416
|
+
if language != "Unknown":
|
|
417
|
+
report = pattern_recognizer.analyze_file(str(file_path), content, language)
|
|
418
|
+
|
|
419
|
+
if report.patterns:
|
|
420
|
+
pattern_results.append(report.to_dict())
|
|
421
|
+
except Exception as e:
|
|
422
|
+
logger.warning(f"Pattern detection failed for {file_path}: {e}")
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
# Save pattern results
|
|
426
|
+
if pattern_results:
|
|
427
|
+
pattern_output = output_dir / "patterns"
|
|
428
|
+
pattern_output.mkdir(parents=True, exist_ok=True)
|
|
429
|
+
|
|
430
|
+
pattern_json = pattern_output / "detected_patterns.json"
|
|
431
|
+
with open(pattern_json, "w", encoding="utf-8") as f:
|
|
432
|
+
json.dump(pattern_results, f, indent=2)
|
|
433
|
+
|
|
434
|
+
total_patterns = sum(len(r["patterns"]) for r in pattern_results)
|
|
435
|
+
logger.info(f"✅ Detected {total_patterns} patterns in {len(pattern_results)} files")
|
|
436
|
+
logger.info(f"📁 Saved to: {pattern_json}")
|
|
437
|
+
else:
|
|
438
|
+
logger.info("No design patterns detected")
|
|
439
|
+
|
|
440
|
+
# Extract test examples if requested (C3.2)
|
|
441
|
+
if extract_test_examples:
|
|
442
|
+
logger.info("Extracting usage examples from test files...")
|
|
443
|
+
from skill_seekers.cli.test_example_extractor import TestExampleExtractor
|
|
444
|
+
|
|
445
|
+
# Create extractor
|
|
446
|
+
test_extractor = TestExampleExtractor(
|
|
447
|
+
min_confidence=0.5,
|
|
448
|
+
max_per_file=10,
|
|
449
|
+
languages=languages,
|
|
450
|
+
enhance_with_ai=enhance_with_ai,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# Extract examples from directory
|
|
454
|
+
try:
|
|
455
|
+
example_report = test_extractor.extract_from_directory(directory, recursive=True)
|
|
456
|
+
|
|
457
|
+
if example_report.total_examples > 0:
|
|
458
|
+
# Save results
|
|
459
|
+
examples_output = output_dir / "test_examples"
|
|
460
|
+
examples_output.mkdir(parents=True, exist_ok=True)
|
|
461
|
+
|
|
462
|
+
# Save as JSON
|
|
463
|
+
examples_json = examples_output / "test_examples.json"
|
|
464
|
+
with open(examples_json, "w", encoding="utf-8") as f:
|
|
465
|
+
json.dump(example_report.to_dict(), f, indent=2)
|
|
466
|
+
|
|
467
|
+
# Save as Markdown
|
|
468
|
+
examples_md = examples_output / "test_examples.md"
|
|
469
|
+
examples_md.write_text(example_report.to_markdown(), encoding="utf-8")
|
|
470
|
+
|
|
471
|
+
logger.info(
|
|
472
|
+
f"✅ Extracted {example_report.total_examples} test examples "
|
|
473
|
+
f"({example_report.high_value_count} high-value)"
|
|
474
|
+
)
|
|
475
|
+
logger.info(f"📁 Saved to: {examples_output}")
|
|
476
|
+
else:
|
|
477
|
+
logger.info("No test examples extracted")
|
|
478
|
+
|
|
479
|
+
except Exception as e:
|
|
480
|
+
logger.warning(f"Test example extraction failed: {e}")
|
|
481
|
+
example_report = None
|
|
482
|
+
|
|
483
|
+
# Build how-to guides from workflow examples (C3.3)
|
|
484
|
+
if build_how_to_guides and extract_test_examples:
|
|
485
|
+
logger.info("Building how-to guides from workflow examples...")
|
|
486
|
+
try:
|
|
487
|
+
from skill_seekers.cli.how_to_guide_builder import HowToGuideBuilder
|
|
488
|
+
|
|
489
|
+
# Create guide builder
|
|
490
|
+
guide_builder = HowToGuideBuilder(enhance_with_ai=enhance_with_ai)
|
|
491
|
+
|
|
492
|
+
# Build guides from workflow examples
|
|
493
|
+
tutorials_dir = output_dir / "tutorials"
|
|
494
|
+
|
|
495
|
+
# Get workflow examples from the example_report if available
|
|
496
|
+
if (
|
|
497
|
+
"example_report" in locals()
|
|
498
|
+
and example_report
|
|
499
|
+
and example_report.total_examples > 0
|
|
500
|
+
):
|
|
501
|
+
# Convert example_report to list of dicts for processing
|
|
502
|
+
examples_list = example_report.to_dict().get("examples", [])
|
|
503
|
+
|
|
504
|
+
guide_collection = guide_builder.build_guides_from_examples(
|
|
505
|
+
examples_list,
|
|
506
|
+
grouping_strategy="ai-tutorial-group",
|
|
507
|
+
output_dir=tutorials_dir,
|
|
508
|
+
enhance_with_ai=enhance_with_ai,
|
|
509
|
+
ai_mode=ai_mode,
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
if guide_collection and guide_collection.total_guides > 0:
|
|
513
|
+
# Save collection summary
|
|
514
|
+
collection_json = tutorials_dir / "guide_collection.json"
|
|
515
|
+
with open(collection_json, "w", encoding="utf-8") as f:
|
|
516
|
+
json.dump(guide_collection.to_dict(), f, indent=2)
|
|
517
|
+
|
|
518
|
+
logger.info(f"✅ Built {guide_collection.total_guides} how-to guides")
|
|
519
|
+
logger.info(f"📁 Saved to: {tutorials_dir}")
|
|
520
|
+
else:
|
|
521
|
+
logger.info("No how-to guides generated (insufficient workflow examples)")
|
|
522
|
+
else:
|
|
523
|
+
logger.info("No workflow examples available for guide generation")
|
|
524
|
+
|
|
525
|
+
except Exception as e:
|
|
526
|
+
logger.warning(f"How-to guide building failed: {e}")
|
|
527
|
+
|
|
528
|
+
# Extract configuration patterns (C3.4)
|
|
529
|
+
if extract_config_patterns:
|
|
530
|
+
logger.info("Extracting configuration patterns...")
|
|
531
|
+
try:
|
|
532
|
+
config_extractor = ConfigExtractor()
|
|
533
|
+
|
|
534
|
+
# Extract config patterns from directory
|
|
535
|
+
extraction_result = config_extractor.extract_from_directory(directory)
|
|
536
|
+
|
|
537
|
+
if extraction_result.config_files:
|
|
538
|
+
# Convert to dict for enhancement
|
|
539
|
+
result_dict = config_extractor.to_dict(extraction_result)
|
|
540
|
+
|
|
541
|
+
# AI Enhancement (if enabled)
|
|
542
|
+
if enhance_with_ai and ai_mode != "none":
|
|
543
|
+
try:
|
|
544
|
+
from skill_seekers.cli.config_enhancer import ConfigEnhancer
|
|
545
|
+
|
|
546
|
+
logger.info(f"🤖 Enhancing config analysis with AI (mode: {ai_mode})...")
|
|
547
|
+
enhancer = ConfigEnhancer(mode=ai_mode)
|
|
548
|
+
result_dict = enhancer.enhance_config_result(result_dict)
|
|
549
|
+
logger.info("✅ AI enhancement complete")
|
|
550
|
+
except Exception as e:
|
|
551
|
+
logger.warning(f"⚠️ Config AI enhancement failed: {e}")
|
|
552
|
+
|
|
553
|
+
# Save results
|
|
554
|
+
config_output = output_dir / "config_patterns"
|
|
555
|
+
config_output.mkdir(parents=True, exist_ok=True)
|
|
556
|
+
|
|
557
|
+
# Save as JSON
|
|
558
|
+
config_json = config_output / "config_patterns.json"
|
|
559
|
+
with open(config_json, "w", encoding="utf-8") as f:
|
|
560
|
+
json.dump(result_dict, f, indent=2)
|
|
561
|
+
|
|
562
|
+
# Save as Markdown (basic - AI enhancements in JSON only for now)
|
|
563
|
+
config_md = config_output / "config_patterns.md"
|
|
564
|
+
config_md.write_text(extraction_result.to_markdown(), encoding="utf-8")
|
|
565
|
+
|
|
566
|
+
# Count total settings across all files
|
|
567
|
+
total_settings = sum(len(cf.settings) for cf in extraction_result.config_files)
|
|
568
|
+
total_patterns = sum(len(cf.patterns) for cf in extraction_result.config_files)
|
|
569
|
+
|
|
570
|
+
logger.info(
|
|
571
|
+
f"✅ Extracted {len(extraction_result.config_files)} config files "
|
|
572
|
+
f"with {total_settings} settings and {total_patterns} detected patterns"
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
if "ai_enhancements" in result_dict:
|
|
576
|
+
insights = result_dict["ai_enhancements"].get("overall_insights", {})
|
|
577
|
+
if insights.get("security_issues_found"):
|
|
578
|
+
logger.info(
|
|
579
|
+
f"🔐 Security issues found: {insights['security_issues_found']}"
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
logger.info(f"📁 Saved to: {config_output}")
|
|
583
|
+
else:
|
|
584
|
+
logger.info("No configuration files found")
|
|
585
|
+
|
|
586
|
+
except Exception as e:
|
|
587
|
+
logger.warning(f"Config pattern extraction failed: {e}")
|
|
588
|
+
|
|
589
|
+
# Detect architectural patterns (C3.7)
|
|
590
|
+
# Always run this - it provides high-level overview
|
|
591
|
+
logger.info("Analyzing architectural patterns...")
|
|
592
|
+
from skill_seekers.cli.architectural_pattern_detector import ArchitecturalPatternDetector
|
|
593
|
+
|
|
594
|
+
arch_detector = ArchitecturalPatternDetector(enhance_with_ai=enhance_with_ai)
|
|
595
|
+
arch_report = arch_detector.analyze(directory, results["files"])
|
|
596
|
+
|
|
597
|
+
if arch_report.patterns:
|
|
598
|
+
arch_output = output_dir / "architecture"
|
|
599
|
+
arch_output.mkdir(parents=True, exist_ok=True)
|
|
600
|
+
|
|
601
|
+
# Save as JSON
|
|
602
|
+
arch_json = arch_output / "architectural_patterns.json"
|
|
603
|
+
with open(arch_json, "w", encoding="utf-8") as f:
|
|
604
|
+
json.dump(arch_report.to_dict(), f, indent=2)
|
|
605
|
+
|
|
606
|
+
logger.info(f"🏗️ Detected {len(arch_report.patterns)} architectural patterns")
|
|
607
|
+
for pattern in arch_report.patterns:
|
|
608
|
+
logger.info(f" - {pattern.pattern_name} (confidence: {pattern.confidence:.2f})")
|
|
609
|
+
logger.info(f"📁 Saved to: {arch_json}")
|
|
610
|
+
else:
|
|
611
|
+
logger.info("No clear architectural patterns detected")
|
|
612
|
+
|
|
613
|
+
# Generate SKILL.md and references/ directory
|
|
614
|
+
logger.info("Generating SKILL.md and references...")
|
|
615
|
+
_generate_skill_md(
|
|
616
|
+
output_dir=output_dir,
|
|
617
|
+
directory=directory,
|
|
618
|
+
results=results,
|
|
619
|
+
depth=depth,
|
|
620
|
+
build_api_reference=build_api_reference,
|
|
621
|
+
build_dependency_graph=build_dependency_graph,
|
|
622
|
+
detect_patterns=detect_patterns,
|
|
623
|
+
extract_test_examples=extract_test_examples,
|
|
624
|
+
extract_config_patterns=extract_config_patterns,
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
return results
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def _generate_skill_md(
|
|
631
|
+
output_dir: Path,
|
|
632
|
+
directory: Path,
|
|
633
|
+
results: dict[str, Any],
|
|
634
|
+
depth: str,
|
|
635
|
+
build_api_reference: bool,
|
|
636
|
+
build_dependency_graph: bool,
|
|
637
|
+
detect_patterns: bool,
|
|
638
|
+
extract_test_examples: bool,
|
|
639
|
+
extract_config_patterns: bool,
|
|
640
|
+
):
|
|
641
|
+
"""
|
|
642
|
+
Generate rich SKILL.md from codebase analysis results.
|
|
643
|
+
|
|
644
|
+
Creates a 300+ line skill file with:
|
|
645
|
+
- Front matter (name, description)
|
|
646
|
+
- Repository info (path, languages, file count)
|
|
647
|
+
- When to Use section
|
|
648
|
+
- Quick Reference (patterns, languages, stats)
|
|
649
|
+
- Code Examples (from test files)
|
|
650
|
+
- API Reference (from code analysis)
|
|
651
|
+
- Architecture Overview
|
|
652
|
+
- Configuration Patterns
|
|
653
|
+
- Available References
|
|
654
|
+
"""
|
|
655
|
+
repo_name = directory.name
|
|
656
|
+
|
|
657
|
+
# Generate skill name (lowercase, hyphens only, max 64 chars)
|
|
658
|
+
skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
|
|
659
|
+
|
|
660
|
+
# Generate description
|
|
661
|
+
description = f"Local codebase analysis for {repo_name}"
|
|
662
|
+
|
|
663
|
+
# Count files by language
|
|
664
|
+
language_stats = _get_language_stats(results.get("files", []))
|
|
665
|
+
total_files = len(results.get("files", []))
|
|
666
|
+
|
|
667
|
+
# Start building content
|
|
668
|
+
skill_content = f"""---
|
|
669
|
+
name: {skill_name}
|
|
670
|
+
description: {description}
|
|
671
|
+
---
|
|
672
|
+
|
|
673
|
+
# {repo_name} Codebase
|
|
674
|
+
|
|
675
|
+
## Description
|
|
676
|
+
|
|
677
|
+
Local codebase analysis and documentation generated from code analysis.
|
|
678
|
+
|
|
679
|
+
**Path:** `{directory}`
|
|
680
|
+
**Files Analyzed:** {total_files}
|
|
681
|
+
**Languages:** {", ".join(language_stats.keys())}
|
|
682
|
+
**Analysis Depth:** {depth}
|
|
683
|
+
|
|
684
|
+
## When to Use This Skill
|
|
685
|
+
|
|
686
|
+
Use this skill when you need to:
|
|
687
|
+
- Understand the codebase architecture and design patterns
|
|
688
|
+
- Find implementation examples and usage patterns
|
|
689
|
+
- Review API documentation extracted from code
|
|
690
|
+
- Check configuration patterns and best practices
|
|
691
|
+
- Explore test examples and real-world usage
|
|
692
|
+
- Navigate the codebase structure efficiently
|
|
693
|
+
|
|
694
|
+
## ⚡ Quick Reference
|
|
695
|
+
|
|
696
|
+
### Codebase Statistics
|
|
697
|
+
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
# Language breakdown
|
|
701
|
+
skill_content += "**Languages:**\n"
|
|
702
|
+
for lang, count in sorted(language_stats.items(), key=lambda x: x[1], reverse=True):
|
|
703
|
+
percentage = (count / total_files * 100) if total_files > 0 else 0
|
|
704
|
+
skill_content += f"- **{lang}**: {count} files ({percentage:.1f}%)\n"
|
|
705
|
+
skill_content += "\n"
|
|
706
|
+
|
|
707
|
+
# Analysis features performed
|
|
708
|
+
skill_content += "**Analysis Performed:**\n"
|
|
709
|
+
if build_api_reference:
|
|
710
|
+
skill_content += "- ✅ API Reference (C2.5)\n"
|
|
711
|
+
if build_dependency_graph:
|
|
712
|
+
skill_content += "- ✅ Dependency Graph (C2.6)\n"
|
|
713
|
+
if detect_patterns:
|
|
714
|
+
skill_content += "- ✅ Design Patterns (C3.1)\n"
|
|
715
|
+
if extract_test_examples:
|
|
716
|
+
skill_content += "- ✅ Test Examples (C3.2)\n"
|
|
717
|
+
if extract_config_patterns:
|
|
718
|
+
skill_content += "- ✅ Configuration Patterns (C3.4)\n"
|
|
719
|
+
skill_content += "- ✅ Architectural Analysis (C3.7)\n\n"
|
|
720
|
+
|
|
721
|
+
# Add design patterns if available
|
|
722
|
+
if detect_patterns:
|
|
723
|
+
patterns_content = _format_patterns_section(output_dir)
|
|
724
|
+
if patterns_content:
|
|
725
|
+
skill_content += patterns_content
|
|
726
|
+
|
|
727
|
+
# Add code examples if available
|
|
728
|
+
if extract_test_examples:
|
|
729
|
+
examples_content = _format_examples_section(output_dir)
|
|
730
|
+
if examples_content:
|
|
731
|
+
skill_content += examples_content
|
|
732
|
+
|
|
733
|
+
# Add API reference if available
|
|
734
|
+
if build_api_reference:
|
|
735
|
+
api_content = _format_api_section(output_dir)
|
|
736
|
+
if api_content:
|
|
737
|
+
skill_content += api_content
|
|
738
|
+
|
|
739
|
+
# Add architecture if available
|
|
740
|
+
arch_content = _format_architecture_section(output_dir)
|
|
741
|
+
if arch_content:
|
|
742
|
+
skill_content += arch_content
|
|
743
|
+
|
|
744
|
+
# Add configuration patterns if available
|
|
745
|
+
if extract_config_patterns:
|
|
746
|
+
config_content = _format_config_section(output_dir)
|
|
747
|
+
if config_content:
|
|
748
|
+
skill_content += config_content
|
|
749
|
+
|
|
750
|
+
# Available references
|
|
751
|
+
skill_content += "## 📚 Available References\n\n"
|
|
752
|
+
skill_content += "This skill includes detailed reference documentation:\n\n"
|
|
753
|
+
|
|
754
|
+
refs_added = False
|
|
755
|
+
if build_api_reference and (output_dir / "api_reference").exists():
|
|
756
|
+
skill_content += (
|
|
757
|
+
"- **API Reference**: `references/api_reference/` - Complete API documentation\n"
|
|
758
|
+
)
|
|
759
|
+
refs_added = True
|
|
760
|
+
if build_dependency_graph and (output_dir / "dependencies").exists():
|
|
761
|
+
skill_content += (
|
|
762
|
+
"- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
|
|
763
|
+
)
|
|
764
|
+
refs_added = True
|
|
765
|
+
if detect_patterns and (output_dir / "patterns").exists():
|
|
766
|
+
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
|
|
767
|
+
refs_added = True
|
|
768
|
+
if extract_test_examples and (output_dir / "test_examples").exists():
|
|
769
|
+
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
|
|
770
|
+
refs_added = True
|
|
771
|
+
if extract_config_patterns and (output_dir / "config_patterns").exists():
|
|
772
|
+
skill_content += (
|
|
773
|
+
"- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
|
|
774
|
+
)
|
|
775
|
+
refs_added = True
|
|
776
|
+
if (output_dir / "architecture").exists():
|
|
777
|
+
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
|
|
778
|
+
refs_added = True
|
|
779
|
+
|
|
780
|
+
if not refs_added:
|
|
781
|
+
skill_content += "No additional references generated (analysis features disabled).\n"
|
|
782
|
+
|
|
783
|
+
skill_content += "\n"
|
|
784
|
+
|
|
785
|
+
# Footer
|
|
786
|
+
skill_content += "---\n\n"
|
|
787
|
+
skill_content += "**Generated by Skill Seeker** | Codebase Analyzer with C3.x Analysis\n"
|
|
788
|
+
|
|
789
|
+
# Write SKILL.md
|
|
790
|
+
skill_path = output_dir / "SKILL.md"
|
|
791
|
+
skill_path.write_text(skill_content, encoding="utf-8")
|
|
792
|
+
|
|
793
|
+
line_count = len(skill_content.split("\n"))
|
|
794
|
+
logger.info(f"✅ Generated SKILL.md: {skill_path} ({line_count} lines)")
|
|
795
|
+
|
|
796
|
+
# Generate references/ directory structure
|
|
797
|
+
_generate_references(output_dir)
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _get_language_stats(files: list[dict]) -> dict[str, int]:
|
|
801
|
+
"""Count files by language from analysis results."""
|
|
802
|
+
stats = {}
|
|
803
|
+
for file_data in files:
|
|
804
|
+
# files is a list of dicts with 'language' key
|
|
805
|
+
lang = file_data.get("language", "Unknown")
|
|
806
|
+
if lang != "Unknown":
|
|
807
|
+
stats[lang] = stats.get(lang, 0) + 1
|
|
808
|
+
return stats
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _format_patterns_section(output_dir: Path) -> str:
|
|
812
|
+
"""Format design patterns section from patterns/detected_patterns.json."""
|
|
813
|
+
patterns_file = output_dir / "patterns" / "detected_patterns.json"
|
|
814
|
+
if not patterns_file.exists():
|
|
815
|
+
return ""
|
|
816
|
+
|
|
817
|
+
try:
|
|
818
|
+
with open(patterns_file, encoding="utf-8") as f:
|
|
819
|
+
patterns_data = json.load(f)
|
|
820
|
+
except Exception:
|
|
821
|
+
return ""
|
|
822
|
+
|
|
823
|
+
if not patterns_data:
|
|
824
|
+
return ""
|
|
825
|
+
|
|
826
|
+
# Count patterns by type (deduplicate by class, keep highest confidence)
|
|
827
|
+
pattern_counts = {}
|
|
828
|
+
by_class = {}
|
|
829
|
+
|
|
830
|
+
for pattern_file in patterns_data:
|
|
831
|
+
for pattern in pattern_file.get("patterns", []):
|
|
832
|
+
ptype = pattern.get("pattern_type", "Unknown")
|
|
833
|
+
cls = pattern.get("class_name", "")
|
|
834
|
+
confidence = pattern.get("confidence", 0)
|
|
835
|
+
|
|
836
|
+
# Skip low confidence
|
|
837
|
+
if confidence < 0.7:
|
|
838
|
+
continue
|
|
839
|
+
|
|
840
|
+
# Deduplicate by class
|
|
841
|
+
key = f"{cls}:{ptype}"
|
|
842
|
+
if key not in by_class or by_class[key]["confidence"] < confidence:
|
|
843
|
+
by_class[key] = pattern
|
|
844
|
+
|
|
845
|
+
# Count by type
|
|
846
|
+
pattern_counts[ptype] = pattern_counts.get(ptype, 0) + 1
|
|
847
|
+
|
|
848
|
+
if not pattern_counts:
|
|
849
|
+
return ""
|
|
850
|
+
|
|
851
|
+
content = "### 🎨 Design Patterns Detected\n\n"
|
|
852
|
+
content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n"
|
|
853
|
+
|
|
854
|
+
# Top 5 pattern types
|
|
855
|
+
for ptype, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
|
|
856
|
+
content += f"- **{ptype}**: {count} instances\n"
|
|
857
|
+
|
|
858
|
+
content += f"\n*Total: {len(by_class)} high-confidence patterns*\n\n"
|
|
859
|
+
content += "*See `references/patterns/` for complete pattern analysis*\n\n"
|
|
860
|
+
return content
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
def _format_examples_section(output_dir: Path) -> str:
|
|
864
|
+
"""Format code examples section from test_examples/test_examples.json."""
|
|
865
|
+
examples_file = output_dir / "test_examples" / "test_examples.json"
|
|
866
|
+
if not examples_file.exists():
|
|
867
|
+
return ""
|
|
868
|
+
|
|
869
|
+
try:
|
|
870
|
+
with open(examples_file, encoding="utf-8") as f:
|
|
871
|
+
examples_data = json.load(f)
|
|
872
|
+
except Exception:
|
|
873
|
+
return ""
|
|
874
|
+
|
|
875
|
+
examples = examples_data.get("examples", [])
|
|
876
|
+
if not examples:
|
|
877
|
+
return ""
|
|
878
|
+
|
|
879
|
+
# Filter high-value examples (complexity > 0.7)
|
|
880
|
+
high_value = [ex for ex in examples if ex.get("complexity_score", 0) > 0.7]
|
|
881
|
+
|
|
882
|
+
if not high_value:
|
|
883
|
+
# If no high complexity, take any examples
|
|
884
|
+
high_value = examples[:10]
|
|
885
|
+
|
|
886
|
+
if not high_value:
|
|
887
|
+
return ""
|
|
888
|
+
|
|
889
|
+
content = "## 📝 Code Examples\n\n"
|
|
890
|
+
content += "*High-quality examples extracted from test files (C3.2)*\n\n"
|
|
891
|
+
|
|
892
|
+
# Top 10 examples
|
|
893
|
+
for ex in sorted(high_value, key=lambda x: x.get("complexity_score", 0), reverse=True)[:10]:
|
|
894
|
+
desc = ex.get("description", "Example")
|
|
895
|
+
lang = ex.get("language", "python").lower()
|
|
896
|
+
code = ex.get("code", "")
|
|
897
|
+
complexity = ex.get("complexity_score", 0)
|
|
898
|
+
|
|
899
|
+
content += f"**{desc}** (complexity: {complexity:.2f})\n\n"
|
|
900
|
+
content += f"```{lang}\n{code}\n```\n\n"
|
|
901
|
+
|
|
902
|
+
content += "*See `references/test_examples/` for all extracted examples*\n\n"
|
|
903
|
+
return content
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def _format_api_section(output_dir: Path) -> str:
|
|
907
|
+
"""Format API reference section."""
|
|
908
|
+
api_dir = output_dir / "api_reference"
|
|
909
|
+
if not api_dir.exists():
|
|
910
|
+
return ""
|
|
911
|
+
|
|
912
|
+
api_md = api_dir / "api_reference.md"
|
|
913
|
+
if not api_md.exists():
|
|
914
|
+
return ""
|
|
915
|
+
|
|
916
|
+
try:
|
|
917
|
+
api_content = api_md.read_text(encoding="utf-8")
|
|
918
|
+
except Exception:
|
|
919
|
+
return ""
|
|
920
|
+
|
|
921
|
+
# Extract first section (up to 500 chars)
|
|
922
|
+
preview = api_content[:500]
|
|
923
|
+
if len(api_content) > 500:
|
|
924
|
+
preview += "..."
|
|
925
|
+
|
|
926
|
+
content = "## 🔧 API Reference\n\n"
|
|
927
|
+
content += "*Extracted from codebase analysis (C2.5)*\n\n"
|
|
928
|
+
content += preview + "\n\n"
|
|
929
|
+
content += "*See `references/api_reference/` for complete API documentation*\n\n"
|
|
930
|
+
return content
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def _format_architecture_section(output_dir: Path) -> str:
|
|
934
|
+
"""Format architecture section from architecture/architectural_patterns.json."""
|
|
935
|
+
arch_file = output_dir / "architecture" / "architectural_patterns.json"
|
|
936
|
+
if not arch_file.exists():
|
|
937
|
+
return ""
|
|
938
|
+
|
|
939
|
+
try:
|
|
940
|
+
with open(arch_file, encoding="utf-8") as f:
|
|
941
|
+
arch_data = json.load(f)
|
|
942
|
+
except Exception:
|
|
943
|
+
return ""
|
|
944
|
+
|
|
945
|
+
patterns = arch_data.get("patterns", [])
|
|
946
|
+
if not patterns:
|
|
947
|
+
return ""
|
|
948
|
+
|
|
949
|
+
content = "## 🏗️ Architecture Overview\n\n"
|
|
950
|
+
content += "*From C3.7 architectural analysis*\n\n"
|
|
951
|
+
|
|
952
|
+
content += "**Detected Architectural Patterns:**\n\n"
|
|
953
|
+
for pattern in patterns[:5]:
|
|
954
|
+
name = pattern.get("pattern_name", "Unknown")
|
|
955
|
+
confidence = pattern.get("confidence", 0)
|
|
956
|
+
indicators = pattern.get("indicators", [])
|
|
957
|
+
|
|
958
|
+
content += f"- **{name}** (confidence: {confidence:.2f})\n"
|
|
959
|
+
if indicators:
|
|
960
|
+
content += f" - Indicators: {', '.join(indicators[:3])}\n"
|
|
961
|
+
|
|
962
|
+
content += f"\n*Total: {len(patterns)} architectural patterns detected*\n\n"
|
|
963
|
+
content += "*See `references/architecture/` for complete architectural analysis*\n\n"
|
|
964
|
+
return content
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
def _format_config_section(output_dir: Path) -> str:
|
|
968
|
+
"""Format configuration patterns section."""
|
|
969
|
+
config_file = output_dir / "config_patterns" / "config_patterns.json"
|
|
970
|
+
if not config_file.exists():
|
|
971
|
+
return ""
|
|
972
|
+
|
|
973
|
+
try:
|
|
974
|
+
with open(config_file, encoding="utf-8") as f:
|
|
975
|
+
config_data = json.load(f)
|
|
976
|
+
except Exception:
|
|
977
|
+
return ""
|
|
978
|
+
|
|
979
|
+
config_files = config_data.get("config_files", [])
|
|
980
|
+
if not config_files:
|
|
981
|
+
return ""
|
|
982
|
+
|
|
983
|
+
total_settings = sum(len(cf.get("settings", [])) for cf in config_files)
|
|
984
|
+
total_patterns = sum(len(cf.get("patterns", [])) for cf in config_files)
|
|
985
|
+
|
|
986
|
+
content = "## ⚙️ Configuration Patterns\n\n"
|
|
987
|
+
content += "*From C3.4 configuration analysis*\n\n"
|
|
988
|
+
content += f"**Configuration Files Analyzed:** {len(config_files)}\n"
|
|
989
|
+
content += f"**Total Settings:** {total_settings}\n"
|
|
990
|
+
content += f"**Patterns Detected:** {total_patterns}\n\n"
|
|
991
|
+
|
|
992
|
+
# List config file types found
|
|
993
|
+
file_types = {}
|
|
994
|
+
for cf in config_files:
|
|
995
|
+
ctype = cf.get("config_type", "unknown")
|
|
996
|
+
file_types[ctype] = file_types.get(ctype, 0) + 1
|
|
997
|
+
|
|
998
|
+
if file_types:
|
|
999
|
+
content += "**Configuration Types:**\n"
|
|
1000
|
+
for ctype, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
|
|
1001
|
+
content += f"- {ctype}: {count} files\n"
|
|
1002
|
+
content += "\n"
|
|
1003
|
+
|
|
1004
|
+
content += "*See `references/config_patterns/` for detailed configuration analysis*\n\n"
|
|
1005
|
+
return content
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def _generate_references(output_dir: Path):
|
|
1009
|
+
"""
|
|
1010
|
+
Generate references/ directory structure by symlinking analysis output.
|
|
1011
|
+
|
|
1012
|
+
Creates a clean references/ directory that links to all analysis outputs.
|
|
1013
|
+
"""
|
|
1014
|
+
references_dir = output_dir / "references"
|
|
1015
|
+
references_dir.mkdir(exist_ok=True)
|
|
1016
|
+
|
|
1017
|
+
# Map analysis directories to reference names
|
|
1018
|
+
mappings = {
|
|
1019
|
+
"api_reference": "api_reference",
|
|
1020
|
+
"dependencies": "dependencies",
|
|
1021
|
+
"patterns": "patterns",
|
|
1022
|
+
"test_examples": "test_examples",
|
|
1023
|
+
"tutorials": "tutorials",
|
|
1024
|
+
"config_patterns": "config_patterns",
|
|
1025
|
+
"architecture": "architecture",
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
for source, target in mappings.items():
|
|
1029
|
+
source_dir = output_dir / source
|
|
1030
|
+
target_dir = references_dir / target
|
|
1031
|
+
|
|
1032
|
+
if source_dir.exists() and source_dir.is_dir():
|
|
1033
|
+
# Copy directory to references/ (not symlink, for portability)
|
|
1034
|
+
if target_dir.exists():
|
|
1035
|
+
import shutil
|
|
1036
|
+
|
|
1037
|
+
shutil.rmtree(target_dir)
|
|
1038
|
+
|
|
1039
|
+
import shutil
|
|
1040
|
+
|
|
1041
|
+
shutil.copytree(source_dir, target_dir)
|
|
1042
|
+
logger.debug(f"Copied {source} → references/{target}")
|
|
1043
|
+
|
|
1044
|
+
logger.info(f"✅ Generated references directory: {references_dir}")
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def main():
|
|
1048
|
+
"""Command-line interface for codebase analysis."""
|
|
1049
|
+
parser = argparse.ArgumentParser(
|
|
1050
|
+
description="Analyze local codebases and extract code knowledge",
|
|
1051
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1052
|
+
epilog="""
|
|
1053
|
+
Examples:
|
|
1054
|
+
# Analyze current directory
|
|
1055
|
+
codebase-scraper --directory . --output output/codebase/
|
|
1056
|
+
|
|
1057
|
+
# Deep analysis with API reference and dependency graph
|
|
1058
|
+
codebase-scraper --directory /path/to/repo --depth deep --build-api-reference --build-dependency-graph
|
|
1059
|
+
|
|
1060
|
+
# Analyze only Python and JavaScript
|
|
1061
|
+
codebase-scraper --directory . --languages Python,JavaScript
|
|
1062
|
+
|
|
1063
|
+
# Use file patterns
|
|
1064
|
+
codebase-scraper --directory . --file-patterns "*.py,src/**/*.js"
|
|
1065
|
+
|
|
1066
|
+
# Full analysis with all features (default)
|
|
1067
|
+
codebase-scraper --directory . --depth deep
|
|
1068
|
+
|
|
1069
|
+
# Surface analysis (fast, skip all analysis features)
|
|
1070
|
+
codebase-scraper --directory . --depth surface --skip-api-reference --skip-dependency-graph --skip-patterns --skip-test-examples
|
|
1071
|
+
|
|
1072
|
+
# Skip specific features
|
|
1073
|
+
codebase-scraper --directory . --skip-patterns --skip-test-examples
|
|
1074
|
+
""",
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
parser.add_argument("--directory", required=True, help="Directory to analyze")
|
|
1078
|
+
parser.add_argument(
|
|
1079
|
+
"--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
|
|
1080
|
+
)
|
|
1081
|
+
parser.add_argument(
|
|
1082
|
+
"--depth",
|
|
1083
|
+
choices=["surface", "deep", "full"],
|
|
1084
|
+
default="deep",
|
|
1085
|
+
help="Analysis depth (default: deep)",
|
|
1086
|
+
)
|
|
1087
|
+
parser.add_argument(
|
|
1088
|
+
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
|
|
1089
|
+
)
|
|
1090
|
+
parser.add_argument(
|
|
1091
|
+
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
|
|
1092
|
+
)
|
|
1093
|
+
parser.add_argument(
|
|
1094
|
+
"--skip-api-reference",
|
|
1095
|
+
action="store_true",
|
|
1096
|
+
default=False,
|
|
1097
|
+
help="Skip API reference markdown documentation generation (default: enabled)",
|
|
1098
|
+
)
|
|
1099
|
+
parser.add_argument(
|
|
1100
|
+
"--skip-dependency-graph",
|
|
1101
|
+
action="store_true",
|
|
1102
|
+
default=False,
|
|
1103
|
+
help="Skip dependency graph and circular dependency detection (default: enabled)",
|
|
1104
|
+
)
|
|
1105
|
+
parser.add_argument(
|
|
1106
|
+
"--skip-patterns",
|
|
1107
|
+
action="store_true",
|
|
1108
|
+
default=False,
|
|
1109
|
+
help="Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)",
|
|
1110
|
+
)
|
|
1111
|
+
parser.add_argument(
|
|
1112
|
+
"--skip-test-examples",
|
|
1113
|
+
action="store_true",
|
|
1114
|
+
default=False,
|
|
1115
|
+
help="Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)",
|
|
1116
|
+
)
|
|
1117
|
+
parser.add_argument(
|
|
1118
|
+
"--skip-how-to-guides",
|
|
1119
|
+
action="store_true",
|
|
1120
|
+
default=False,
|
|
1121
|
+
help="Skip how-to guide generation from workflow examples (default: enabled)",
|
|
1122
|
+
)
|
|
1123
|
+
parser.add_argument(
|
|
1124
|
+
"--skip-config-patterns",
|
|
1125
|
+
action="store_true",
|
|
1126
|
+
default=False,
|
|
1127
|
+
help="Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)",
|
|
1128
|
+
)
|
|
1129
|
+
parser.add_argument(
|
|
1130
|
+
"--ai-mode",
|
|
1131
|
+
choices=["auto", "api", "local", "none"],
|
|
1132
|
+
default="auto",
|
|
1133
|
+
help="AI enhancement mode for how-to guides: auto (detect best), api (Claude API), local (Claude Code CLI), none (disable) (default: auto)",
|
|
1134
|
+
)
|
|
1135
|
+
parser.add_argument("--no-comments", action="store_true", help="Skip comment extraction")
|
|
1136
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
|
1137
|
+
|
|
1138
|
+
# Check for deprecated flags
|
|
1139
|
+
deprecated_flags = {
|
|
1140
|
+
"--build-api-reference": "--skip-api-reference",
|
|
1141
|
+
"--build-dependency-graph": "--skip-dependency-graph",
|
|
1142
|
+
"--detect-patterns": "--skip-patterns",
|
|
1143
|
+
"--extract-test-examples": "--skip-test-examples",
|
|
1144
|
+
"--build-how-to-guides": "--skip-how-to-guides",
|
|
1145
|
+
"--extract-config-patterns": "--skip-config-patterns",
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
for old_flag, new_flag in deprecated_flags.items():
|
|
1149
|
+
if old_flag in sys.argv:
|
|
1150
|
+
logger.warning(
|
|
1151
|
+
f"⚠️ DEPRECATED: {old_flag} is deprecated. "
|
|
1152
|
+
f"All features are now enabled by default. "
|
|
1153
|
+
f"Use {new_flag} to disable this feature."
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
args = parser.parse_args()
|
|
1157
|
+
|
|
1158
|
+
# Set logging level
|
|
1159
|
+
if args.verbose:
|
|
1160
|
+
logging.getLogger().setLevel(logging.DEBUG)
|
|
1161
|
+
|
|
1162
|
+
# Validate directory
|
|
1163
|
+
directory = Path(args.directory)
|
|
1164
|
+
if not directory.exists():
|
|
1165
|
+
logger.error(f"Directory not found: {directory}")
|
|
1166
|
+
return 1
|
|
1167
|
+
|
|
1168
|
+
if not directory.is_dir():
|
|
1169
|
+
logger.error(f"Not a directory: {directory}")
|
|
1170
|
+
return 1
|
|
1171
|
+
|
|
1172
|
+
# Parse languages
|
|
1173
|
+
languages = None
|
|
1174
|
+
if args.languages:
|
|
1175
|
+
languages = [lang.strip() for lang in args.languages.split(",")]
|
|
1176
|
+
|
|
1177
|
+
# Parse file patterns
|
|
1178
|
+
file_patterns = None
|
|
1179
|
+
if args.file_patterns:
|
|
1180
|
+
file_patterns = [p.strip() for p in args.file_patterns.split(",")]
|
|
1181
|
+
|
|
1182
|
+
# Analyze codebase
|
|
1183
|
+
try:
|
|
1184
|
+
results = analyze_codebase(
|
|
1185
|
+
directory=directory,
|
|
1186
|
+
output_dir=Path(args.output),
|
|
1187
|
+
depth=args.depth,
|
|
1188
|
+
languages=languages,
|
|
1189
|
+
file_patterns=file_patterns,
|
|
1190
|
+
build_api_reference=not args.skip_api_reference,
|
|
1191
|
+
extract_comments=not args.no_comments,
|
|
1192
|
+
build_dependency_graph=not args.skip_dependency_graph,
|
|
1193
|
+
detect_patterns=not args.skip_patterns,
|
|
1194
|
+
extract_test_examples=not args.skip_test_examples,
|
|
1195
|
+
build_how_to_guides=not args.skip_how_to_guides,
|
|
1196
|
+
extract_config_patterns=not args.skip_config_patterns,
|
|
1197
|
+
enhance_with_ai=True, # Auto-disables if no API key present
|
|
1198
|
+
ai_mode=args.ai_mode, # NEW: AI enhancement mode for how-to guides
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1201
|
+
# Print summary
|
|
1202
|
+
print(f"\n{'=' * 60}")
|
|
1203
|
+
print("CODEBASE ANALYSIS COMPLETE")
|
|
1204
|
+
print(f"{'=' * 60}")
|
|
1205
|
+
print(f"Files analyzed: {len(results['files'])}")
|
|
1206
|
+
print(f"Output directory: {args.output}")
|
|
1207
|
+
if not args.skip_api_reference:
|
|
1208
|
+
print(f"API reference: {Path(args.output) / 'api_reference'}")
|
|
1209
|
+
print(f"{'=' * 60}\n")
|
|
1210
|
+
|
|
1211
|
+
return 0
|
|
1212
|
+
|
|
1213
|
+
except KeyboardInterrupt:
|
|
1214
|
+
logger.error("\nAnalysis interrupted by user")
|
|
1215
|
+
return 130
|
|
1216
|
+
except Exception as e:
|
|
1217
|
+
logger.error(f"Analysis failed: {e}")
|
|
1218
|
+
import traceback
|
|
1219
|
+
|
|
1220
|
+
traceback.print_exc()
|
|
1221
|
+
return 1
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
if __name__ == "__main__":
|
|
1225
|
+
sys.exit(main())
|