skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1466 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
GitHub Repository to Claude Skill Converter (Tasks C1.1-C1.12)
|
|
4
|
+
|
|
5
|
+
Converts GitHub repositories into Claude AI skills by extracting:
|
|
6
|
+
- README and documentation
|
|
7
|
+
- Code structure and signatures
|
|
8
|
+
- GitHub Issues, Changelog, and Releases
|
|
9
|
+
- Usage examples from tests
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
skill-seekers github --repo facebook/react
|
|
13
|
+
skill-seekers github --config configs/react_github.json
|
|
14
|
+
skill-seekers github --repo owner/repo --token $GITHUB_TOKEN
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any, Optional
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from github import Github, GithubException, Repository
|
|
28
|
+
from github.GithubException import RateLimitExceededException
|
|
29
|
+
except ImportError:
|
|
30
|
+
print("Error: PyGithub not installed. Run: pip install PyGithub")
|
|
31
|
+
sys.exit(1)
|
|
32
|
+
|
|
33
|
+
# Try to import pathspec for .gitignore support
|
|
34
|
+
try:
|
|
35
|
+
import pathspec
|
|
36
|
+
|
|
37
|
+
PATHSPEC_AVAILABLE = True
|
|
38
|
+
except ImportError:
|
|
39
|
+
PATHSPEC_AVAILABLE = False
|
|
40
|
+
|
|
41
|
+
# Configure logging FIRST (before using logger)
|
|
42
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
# Import code analyzer for deep code analysis
|
|
46
|
+
try:
|
|
47
|
+
from .code_analyzer import CodeAnalyzer
|
|
48
|
+
|
|
49
|
+
CODE_ANALYZER_AVAILABLE = True
|
|
50
|
+
except ImportError:
|
|
51
|
+
CODE_ANALYZER_AVAILABLE = False
|
|
52
|
+
logger.warning("Code analyzer not available - deep analysis disabled")
|
|
53
|
+
|
|
54
|
+
# Directories to exclude from local repository analysis
|
|
55
|
+
EXCLUDED_DIRS = {
|
|
56
|
+
"venv",
|
|
57
|
+
"env",
|
|
58
|
+
".venv",
|
|
59
|
+
".env", # Virtual environments
|
|
60
|
+
"node_modules",
|
|
61
|
+
"__pycache__",
|
|
62
|
+
".pytest_cache", # Dependencies and caches
|
|
63
|
+
".git",
|
|
64
|
+
".svn",
|
|
65
|
+
".hg", # Version control
|
|
66
|
+
"build",
|
|
67
|
+
"dist",
|
|
68
|
+
"*.egg-info", # Build artifacts
|
|
69
|
+
"htmlcov",
|
|
70
|
+
".coverage", # Coverage reports
|
|
71
|
+
".tox",
|
|
72
|
+
".nox", # Testing environments
|
|
73
|
+
".mypy_cache",
|
|
74
|
+
".ruff_cache", # Linter caches
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def extract_description_from_readme(readme_content: str, repo_name: str) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Extract a meaningful description from README content for skill description.
|
|
81
|
+
|
|
82
|
+
Parses README to find the first meaningful paragraph that describes
|
|
83
|
+
what the project does, suitable for "Use when..." format.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
readme_content: README.md content
|
|
87
|
+
repo_name: Repository name (e.g., 'facebook/react')
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Description string, or improved fallback if extraction fails
|
|
91
|
+
"""
|
|
92
|
+
if not readme_content:
|
|
93
|
+
return f"Use when working with {repo_name.split('/')[-1]}"
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
lines = readme_content.split("\n")
|
|
97
|
+
|
|
98
|
+
# Skip badges, images, title - find first meaningful text paragraph
|
|
99
|
+
meaningful_paragraph = None
|
|
100
|
+
in_code_block = False
|
|
101
|
+
|
|
102
|
+
for _i, line in enumerate(lines):
|
|
103
|
+
stripped = line.strip()
|
|
104
|
+
|
|
105
|
+
# Track code blocks
|
|
106
|
+
if stripped.startswith("```"):
|
|
107
|
+
in_code_block = not in_code_block
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Skip if in code block
|
|
111
|
+
if in_code_block:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# Skip empty lines, badges, images, HTML
|
|
115
|
+
if not stripped or stripped.startswith(("#", "!", "<", "[ < 100:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# Found a meaningful paragraph - take up to 200 chars
|
|
123
|
+
if len(stripped) > 20: # Meaningful length
|
|
124
|
+
meaningful_paragraph = stripped
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
if meaningful_paragraph:
|
|
128
|
+
# Clean up and extract purpose
|
|
129
|
+
# Remove markdown formatting
|
|
130
|
+
clean = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", meaningful_paragraph) # Links
|
|
131
|
+
clean = re.sub(r"[*_`]", "", clean) # Bold, italic, code
|
|
132
|
+
clean = re.sub(r"<[^>]+>", "", clean) # HTML tags
|
|
133
|
+
|
|
134
|
+
# Truncate if too long (keep first sentence or ~150 chars)
|
|
135
|
+
if ". " in clean:
|
|
136
|
+
first_sentence = clean.split(". ")[0] + "."
|
|
137
|
+
if len(first_sentence) < 200:
|
|
138
|
+
clean = first_sentence
|
|
139
|
+
|
|
140
|
+
if len(clean) > 150:
|
|
141
|
+
clean = clean[:147] + "..."
|
|
142
|
+
|
|
143
|
+
# Format as "Use when..." description
|
|
144
|
+
# If it already starts with action words, use as-is
|
|
145
|
+
action_words = ["build", "create", "develop", "work", "use", "implement", "manage"]
|
|
146
|
+
if any(clean.lower().startswith(word) for word in action_words):
|
|
147
|
+
return f"Use when {clean.lower()}"
|
|
148
|
+
else:
|
|
149
|
+
return f"Use when working with {clean.lower()}"
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.debug(f"Could not extract description from README: {e}")
|
|
153
|
+
|
|
154
|
+
# Improved fallback
|
|
155
|
+
project_name = repo_name.split("/")[-1]
|
|
156
|
+
return f"Use when working with {project_name}"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class GitHubScraper:
|
|
160
|
+
"""
|
|
161
|
+
GitHub Repository Scraper (C1.1-C1.9)
|
|
162
|
+
|
|
163
|
+
Extracts repository information for skill generation:
|
|
164
|
+
- Repository structure
|
|
165
|
+
- README files
|
|
166
|
+
- Code comments and docstrings
|
|
167
|
+
- Programming language detection
|
|
168
|
+
- Function/class signatures
|
|
169
|
+
- Test examples
|
|
170
|
+
- GitHub Issues
|
|
171
|
+
- CHANGELOG
|
|
172
|
+
- Releases
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, config: dict[str, Any], local_repo_path: str | None = None):
|
|
176
|
+
"""Initialize GitHub scraper with configuration."""
|
|
177
|
+
self.config = config
|
|
178
|
+
self.repo_name = config["repo"]
|
|
179
|
+
self.name = config.get("name", self.repo_name.split("/")[-1])
|
|
180
|
+
# Set initial description (will be improved after README extraction if not in config)
|
|
181
|
+
self.description = config.get(
|
|
182
|
+
"description", f"Use when working with {self.repo_name.split('/')[-1]}"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Local repository path (optional - enables unlimited analysis)
|
|
186
|
+
self.local_repo_path = local_repo_path or config.get("local_repo_path")
|
|
187
|
+
if self.local_repo_path:
|
|
188
|
+
self.local_repo_path = os.path.expanduser(self.local_repo_path)
|
|
189
|
+
logger.info(f"Local repository mode enabled: {self.local_repo_path}")
|
|
190
|
+
|
|
191
|
+
# Configure directory exclusions (smart defaults + optional customization)
|
|
192
|
+
self.excluded_dirs = set(EXCLUDED_DIRS) # Start with smart defaults
|
|
193
|
+
|
|
194
|
+
# Option 1: Replace mode - Use only specified exclusions
|
|
195
|
+
if "exclude_dirs" in config:
|
|
196
|
+
self.excluded_dirs = set(config["exclude_dirs"])
|
|
197
|
+
logger.warning(
|
|
198
|
+
f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden"
|
|
199
|
+
)
|
|
200
|
+
logger.debug(f"Custom exclusions: {sorted(self.excluded_dirs)}")
|
|
201
|
+
|
|
202
|
+
# Option 2: Extend mode - Add to default exclusions
|
|
203
|
+
elif "exclude_dirs_additional" in config:
|
|
204
|
+
additional = set(config["exclude_dirs_additional"])
|
|
205
|
+
self.excluded_dirs = self.excluded_dirs.union(additional)
|
|
206
|
+
logger.info(
|
|
207
|
+
f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})"
|
|
208
|
+
)
|
|
209
|
+
logger.debug(f"Additional exclusions: {sorted(additional)}")
|
|
210
|
+
|
|
211
|
+
# Load .gitignore for additional exclusions (C2.1)
|
|
212
|
+
self.gitignore_spec = None
|
|
213
|
+
if self.local_repo_path:
|
|
214
|
+
self.gitignore_spec = self._load_gitignore()
|
|
215
|
+
|
|
216
|
+
# GitHub client setup (C1.1)
|
|
217
|
+
token = self._get_token()
|
|
218
|
+
self.github = Github(token) if token else Github()
|
|
219
|
+
self.repo: Repository.Repository | None = None
|
|
220
|
+
|
|
221
|
+
# Options
|
|
222
|
+
self.include_issues = config.get("include_issues", True)
|
|
223
|
+
self.max_issues = config.get("max_issues", 100)
|
|
224
|
+
self.include_changelog = config.get("include_changelog", True)
|
|
225
|
+
self.include_releases = config.get("include_releases", True)
|
|
226
|
+
self.include_code = config.get("include_code", False)
|
|
227
|
+
self.code_analysis_depth = config.get(
|
|
228
|
+
"code_analysis_depth", "surface"
|
|
229
|
+
) # 'surface', 'deep', 'full'
|
|
230
|
+
self.file_patterns = config.get("file_patterns", [])
|
|
231
|
+
|
|
232
|
+
# Initialize code analyzer if deep analysis requested
|
|
233
|
+
self.code_analyzer = None
|
|
234
|
+
if self.code_analysis_depth != "surface" and CODE_ANALYZER_AVAILABLE:
|
|
235
|
+
self.code_analyzer = CodeAnalyzer(depth=self.code_analysis_depth)
|
|
236
|
+
logger.info(f"Code analysis depth: {self.code_analysis_depth}")
|
|
237
|
+
|
|
238
|
+
# Output paths
|
|
239
|
+
self.skill_dir = f"output/{self.name}"
|
|
240
|
+
self.data_file = f"output/{self.name}_github_data.json"
|
|
241
|
+
|
|
242
|
+
# Extracted data storage
|
|
243
|
+
self.extracted_data = {
|
|
244
|
+
"repo_info": {},
|
|
245
|
+
"readme": "",
|
|
246
|
+
"file_tree": [],
|
|
247
|
+
"languages": {},
|
|
248
|
+
"signatures": [],
|
|
249
|
+
"test_examples": [],
|
|
250
|
+
"issues": [],
|
|
251
|
+
"changelog": "",
|
|
252
|
+
"releases": [],
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
def _get_token(self) -> str | None:
|
|
256
|
+
"""
|
|
257
|
+
Get GitHub token from env var or config (both options supported).
|
|
258
|
+
Priority: GITHUB_TOKEN env var > config file > None
|
|
259
|
+
"""
|
|
260
|
+
# Try environment variable first (recommended)
|
|
261
|
+
token = os.getenv("GITHUB_TOKEN")
|
|
262
|
+
if token:
|
|
263
|
+
logger.info("Using GitHub token from GITHUB_TOKEN environment variable")
|
|
264
|
+
return token
|
|
265
|
+
|
|
266
|
+
# Fall back to config file
|
|
267
|
+
token = self.config.get("github_token")
|
|
268
|
+
if token:
|
|
269
|
+
logger.warning("Using GitHub token from config file (less secure)")
|
|
270
|
+
return token
|
|
271
|
+
|
|
272
|
+
logger.warning(
|
|
273
|
+
"No GitHub token provided - using unauthenticated access (lower rate limits)"
|
|
274
|
+
)
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
def scrape(self) -> dict[str, Any]:
|
|
278
|
+
"""
|
|
279
|
+
Main scraping entry point.
|
|
280
|
+
Executes all C1 tasks in sequence.
|
|
281
|
+
"""
|
|
282
|
+
try:
|
|
283
|
+
logger.info(f"Starting GitHub scrape for: {self.repo_name}")
|
|
284
|
+
|
|
285
|
+
# C1.1: Fetch repository
|
|
286
|
+
self._fetch_repository()
|
|
287
|
+
|
|
288
|
+
# C1.2: Extract README
|
|
289
|
+
self._extract_readme()
|
|
290
|
+
|
|
291
|
+
# C1.3-C1.6: Extract code structure
|
|
292
|
+
self._extract_code_structure()
|
|
293
|
+
|
|
294
|
+
# C1.7: Extract Issues
|
|
295
|
+
if self.include_issues:
|
|
296
|
+
self._extract_issues()
|
|
297
|
+
|
|
298
|
+
# C1.8: Extract CHANGELOG
|
|
299
|
+
if self.include_changelog:
|
|
300
|
+
self._extract_changelog()
|
|
301
|
+
|
|
302
|
+
# C1.9: Extract Releases
|
|
303
|
+
if self.include_releases:
|
|
304
|
+
self._extract_releases()
|
|
305
|
+
|
|
306
|
+
# Save extracted data
|
|
307
|
+
self._save_data()
|
|
308
|
+
|
|
309
|
+
logger.info(f"ā
Scraping complete! Data saved to: {self.data_file}")
|
|
310
|
+
return self.extracted_data
|
|
311
|
+
|
|
312
|
+
except RateLimitExceededException:
|
|
313
|
+
logger.error("GitHub API rate limit exceeded. Please wait or use authentication token.")
|
|
314
|
+
raise
|
|
315
|
+
except GithubException as e:
|
|
316
|
+
logger.error(f"GitHub API error: {e}")
|
|
317
|
+
raise
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.error(f"Unexpected error during scraping: {e}")
|
|
320
|
+
raise
|
|
321
|
+
|
|
322
|
+
def _fetch_repository(self):
|
|
323
|
+
"""C1.1: Fetch repository structure using GitHub API."""
|
|
324
|
+
logger.info(f"Fetching repository: {self.repo_name}")
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
self.repo = self.github.get_repo(self.repo_name)
|
|
328
|
+
|
|
329
|
+
# Extract basic repo info
|
|
330
|
+
self.extracted_data["repo_info"] = {
|
|
331
|
+
"name": self.repo.name,
|
|
332
|
+
"full_name": self.repo.full_name,
|
|
333
|
+
"description": self.repo.description,
|
|
334
|
+
"url": self.repo.html_url,
|
|
335
|
+
"homepage": self.repo.homepage,
|
|
336
|
+
"stars": self.repo.stargazers_count,
|
|
337
|
+
"forks": self.repo.forks_count,
|
|
338
|
+
"open_issues": self.repo.open_issues_count,
|
|
339
|
+
"default_branch": self.repo.default_branch,
|
|
340
|
+
"created_at": self.repo.created_at.isoformat() if self.repo.created_at else None,
|
|
341
|
+
"updated_at": self.repo.updated_at.isoformat() if self.repo.updated_at else None,
|
|
342
|
+
"language": self.repo.language,
|
|
343
|
+
"license": self.repo.license.name if self.repo.license else None,
|
|
344
|
+
"topics": self.repo.get_topics(),
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
logger.info(
|
|
348
|
+
f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
except GithubException as e:
|
|
352
|
+
if e.status == 404:
|
|
353
|
+
raise ValueError(f"Repository not found: {self.repo_name}") from e
|
|
354
|
+
raise
|
|
355
|
+
|
|
356
|
+
def _get_file_content(self, file_path: str) -> str | None:
|
|
357
|
+
"""
|
|
358
|
+
Safely get file content, handling symlinks and encoding issues.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
file_path: Path to file in repository
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
File content as string, or None if file not found/error
|
|
365
|
+
"""
|
|
366
|
+
try:
|
|
367
|
+
content = self.repo.get_contents(file_path)
|
|
368
|
+
if not content:
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
# Handle symlinks - follow the target to get actual file
|
|
372
|
+
if hasattr(content, "type") and content.type == "symlink":
|
|
373
|
+
target = getattr(content, "target", None)
|
|
374
|
+
if target:
|
|
375
|
+
target = target.strip()
|
|
376
|
+
logger.debug(f"File {file_path} is a symlink to {target}, following...")
|
|
377
|
+
try:
|
|
378
|
+
content = self.repo.get_contents(target)
|
|
379
|
+
except GithubException as e:
|
|
380
|
+
logger.warning(f"Failed to follow symlink {file_path} -> {target}: {e}")
|
|
381
|
+
return None
|
|
382
|
+
else:
|
|
383
|
+
logger.warning(f"Symlink {file_path} has no target")
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
# Handle large files (encoding="none") - download via URL
|
|
387
|
+
# GitHub API doesn't base64-encode files >1MB
|
|
388
|
+
if hasattr(content, "encoding") and content.encoding in [None, "none"]:
|
|
389
|
+
download_url = getattr(content, "download_url", None)
|
|
390
|
+
file_size = getattr(content, "size", 0)
|
|
391
|
+
|
|
392
|
+
if download_url:
|
|
393
|
+
logger.info(
|
|
394
|
+
f"File {file_path} is large ({file_size:,} bytes), downloading via URL..."
|
|
395
|
+
)
|
|
396
|
+
try:
|
|
397
|
+
import requests
|
|
398
|
+
|
|
399
|
+
response = requests.get(download_url, timeout=30)
|
|
400
|
+
response.raise_for_status()
|
|
401
|
+
return response.text
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
|
|
404
|
+
return None
|
|
405
|
+
else:
|
|
406
|
+
logger.warning(
|
|
407
|
+
f"File {file_path} has no download URL (encoding={content.encoding})"
|
|
408
|
+
)
|
|
409
|
+
return None
|
|
410
|
+
|
|
411
|
+
# Handle regular files - decode content
|
|
412
|
+
try:
|
|
413
|
+
if isinstance(content.decoded_content, bytes):
|
|
414
|
+
return content.decoded_content.decode("utf-8")
|
|
415
|
+
else:
|
|
416
|
+
return str(content.decoded_content)
|
|
417
|
+
except (UnicodeDecodeError, AttributeError, LookupError, AssertionError) as e:
|
|
418
|
+
logger.warning(f"Encoding issue with {file_path}: {e}")
|
|
419
|
+
# Try alternative encoding
|
|
420
|
+
try:
|
|
421
|
+
if isinstance(content.decoded_content, bytes):
|
|
422
|
+
return content.decoded_content.decode("latin-1")
|
|
423
|
+
except Exception:
|
|
424
|
+
return None
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
except GithubException:
|
|
428
|
+
return None
|
|
429
|
+
except Exception as e:
|
|
430
|
+
logger.warning(f"Error reading {file_path}: {e}")
|
|
431
|
+
return None
|
|
432
|
+
|
|
433
|
+
def _extract_readme(self):
|
|
434
|
+
"""C1.2: Extract README.md files."""
|
|
435
|
+
logger.info("Extracting README...")
|
|
436
|
+
|
|
437
|
+
# Try common README locations
|
|
438
|
+
readme_files = [
|
|
439
|
+
"README.md",
|
|
440
|
+
"README.rst",
|
|
441
|
+
"README.txt",
|
|
442
|
+
"README",
|
|
443
|
+
"docs/README.md",
|
|
444
|
+
".github/README.md",
|
|
445
|
+
]
|
|
446
|
+
|
|
447
|
+
for readme_path in readme_files:
|
|
448
|
+
readme_content = self._get_file_content(readme_path)
|
|
449
|
+
if readme_content:
|
|
450
|
+
self.extracted_data["readme"] = readme_content
|
|
451
|
+
logger.info(f"README found: {readme_path}")
|
|
452
|
+
|
|
453
|
+
# Update description if not explicitly set in config
|
|
454
|
+
if "description" not in self.config:
|
|
455
|
+
smart_description = extract_description_from_readme(
|
|
456
|
+
self.extracted_data["readme"], self.repo_name
|
|
457
|
+
)
|
|
458
|
+
self.description = smart_description
|
|
459
|
+
logger.debug(f"Generated description: {self.description}")
|
|
460
|
+
|
|
461
|
+
return
|
|
462
|
+
|
|
463
|
+
logger.warning("No README found in repository")
|
|
464
|
+
|
|
465
|
+
def _extract_code_structure(self):
|
|
466
|
+
"""
|
|
467
|
+
C1.3-C1.6: Extract code structure, languages, signatures, and test examples.
|
|
468
|
+
Surface layer only - no full implementation code.
|
|
469
|
+
"""
|
|
470
|
+
logger.info("Extracting code structure...")
|
|
471
|
+
|
|
472
|
+
# C1.4: Get language breakdown
|
|
473
|
+
self._extract_languages()
|
|
474
|
+
|
|
475
|
+
# Get file tree
|
|
476
|
+
self._extract_file_tree()
|
|
477
|
+
|
|
478
|
+
# Extract signatures and test examples
|
|
479
|
+
if self.include_code:
|
|
480
|
+
self._extract_signatures_and_tests()
|
|
481
|
+
|
|
482
|
+
def _extract_languages(self):
|
|
483
|
+
"""C1.4: Detect programming languages in repository."""
|
|
484
|
+
logger.info("Detecting programming languages...")
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
languages = self.repo.get_languages()
|
|
488
|
+
total_bytes = sum(languages.values())
|
|
489
|
+
|
|
490
|
+
self.extracted_data["languages"] = {
|
|
491
|
+
lang: {
|
|
492
|
+
"bytes": bytes_count,
|
|
493
|
+
"percentage": round((bytes_count / total_bytes) * 100, 2)
|
|
494
|
+
if total_bytes > 0
|
|
495
|
+
else 0,
|
|
496
|
+
}
|
|
497
|
+
for lang, bytes_count in languages.items()
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
logger.info(f"Languages detected: {', '.join(languages.keys())}")
|
|
501
|
+
|
|
502
|
+
except GithubException as e:
|
|
503
|
+
logger.warning(f"Could not fetch languages: {e}")
|
|
504
|
+
|
|
505
|
+
def should_exclude_dir(self, dir_name: str, dir_path: str = None) -> bool:
|
|
506
|
+
"""
|
|
507
|
+
Check if directory should be excluded from analysis.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
dir_name: Directory name (e.g., "Examples & Extras")
|
|
511
|
+
dir_path: Full relative path (e.g., "TextMesh Pro/Examples & Extras")
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
True if directory should be excluded
|
|
515
|
+
"""
|
|
516
|
+
# Check directory name
|
|
517
|
+
if dir_name in self.excluded_dirs or dir_name.startswith("."):
|
|
518
|
+
return True
|
|
519
|
+
|
|
520
|
+
# Check full path if provided (for nested exclusions like "TextMesh Pro/Examples & Extras")
|
|
521
|
+
if dir_path:
|
|
522
|
+
for excluded in self.excluded_dirs:
|
|
523
|
+
# Match if path contains the exclusion pattern
|
|
524
|
+
if excluded in dir_path or dir_path.startswith(excluded):
|
|
525
|
+
return True
|
|
526
|
+
|
|
527
|
+
# Check .gitignore rules if available (C2.1)
|
|
528
|
+
if self.gitignore_spec and dir_path:
|
|
529
|
+
# For directories, we need to check both with and without trailing slash
|
|
530
|
+
# as .gitignore patterns can match either way
|
|
531
|
+
dir_path_with_slash = dir_path if dir_path.endswith("/") else dir_path + "/"
|
|
532
|
+
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(
|
|
533
|
+
dir_path_with_slash
|
|
534
|
+
):
|
|
535
|
+
logger.debug(f"Directory excluded by .gitignore: {dir_path}")
|
|
536
|
+
return True
|
|
537
|
+
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
def _load_gitignore(self) -> Optional["pathspec.PathSpec"]:
|
|
541
|
+
"""
|
|
542
|
+
Load .gitignore file and create pathspec matcher (C2.1).
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
PathSpec object if .gitignore found, None otherwise
|
|
546
|
+
"""
|
|
547
|
+
if not PATHSPEC_AVAILABLE:
|
|
548
|
+
logger.warning("pathspec not installed - .gitignore support disabled")
|
|
549
|
+
logger.warning("Install with: pip install pathspec")
|
|
550
|
+
return None
|
|
551
|
+
|
|
552
|
+
if not self.local_repo_path:
|
|
553
|
+
return None
|
|
554
|
+
|
|
555
|
+
gitignore_path = Path(self.local_repo_path) / ".gitignore"
|
|
556
|
+
if not gitignore_path.exists():
|
|
557
|
+
logger.debug(f"No .gitignore found in {self.local_repo_path}")
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
with open(gitignore_path, encoding="utf-8") as f:
|
|
562
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
|
|
563
|
+
logger.info(f"Loaded .gitignore from {gitignore_path}")
|
|
564
|
+
return spec
|
|
565
|
+
except Exception as e:
|
|
566
|
+
logger.warning(f"Failed to load .gitignore: {e}")
|
|
567
|
+
return None
|
|
568
|
+
|
|
569
|
+
def _extract_file_tree(self):
|
|
570
|
+
"""Extract repository file tree structure (dual-mode: GitHub API or local filesystem)."""
|
|
571
|
+
logger.info("Building file tree...")
|
|
572
|
+
|
|
573
|
+
if self.local_repo_path:
|
|
574
|
+
# Local filesystem mode - unlimited files
|
|
575
|
+
self._extract_file_tree_local()
|
|
576
|
+
else:
|
|
577
|
+
# GitHub API mode - limited by API rate limits
|
|
578
|
+
self._extract_file_tree_github()
|
|
579
|
+
|
|
580
|
+
def _extract_file_tree_local(self):
|
|
581
|
+
"""Extract file tree from local filesystem (unlimited files)."""
|
|
582
|
+
if not os.path.exists(self.local_repo_path):
|
|
583
|
+
logger.error(f"Local repository path not found: {self.local_repo_path}")
|
|
584
|
+
return
|
|
585
|
+
|
|
586
|
+
# Log exclusions for debugging
|
|
587
|
+
logger.info(
|
|
588
|
+
f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}"
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
file_tree = []
|
|
592
|
+
excluded_count = 0
|
|
593
|
+
for root, dirs, files in os.walk(self.local_repo_path):
|
|
594
|
+
# Calculate relative path from repo root first (needed for exclusion checks)
|
|
595
|
+
rel_root = os.path.relpath(root, self.local_repo_path)
|
|
596
|
+
if rel_root == ".":
|
|
597
|
+
rel_root = ""
|
|
598
|
+
|
|
599
|
+
# Exclude directories in-place to prevent os.walk from descending into them
|
|
600
|
+
# Pass both dir name and full path for path-based exclusions
|
|
601
|
+
filtered_dirs = []
|
|
602
|
+
for d in dirs:
|
|
603
|
+
dir_path = os.path.join(rel_root, d) if rel_root else d
|
|
604
|
+
if self.should_exclude_dir(d, dir_path):
|
|
605
|
+
excluded_count += 1
|
|
606
|
+
logger.debug(f"Excluding directory: {dir_path}")
|
|
607
|
+
else:
|
|
608
|
+
filtered_dirs.append(d)
|
|
609
|
+
dirs[:] = filtered_dirs
|
|
610
|
+
|
|
611
|
+
# Add directories
|
|
612
|
+
for dir_name in dirs:
|
|
613
|
+
dir_path = os.path.join(rel_root, dir_name) if rel_root else dir_name
|
|
614
|
+
file_tree.append({"path": dir_path, "type": "dir", "size": None})
|
|
615
|
+
|
|
616
|
+
# Add files
|
|
617
|
+
for file_name in files:
|
|
618
|
+
file_path = os.path.join(rel_root, file_name) if rel_root else file_name
|
|
619
|
+
full_path = os.path.join(root, file_name)
|
|
620
|
+
try:
|
|
621
|
+
file_size = os.path.getsize(full_path)
|
|
622
|
+
except OSError:
|
|
623
|
+
file_size = None
|
|
624
|
+
|
|
625
|
+
file_tree.append({"path": file_path, "type": "file", "size": file_size})
|
|
626
|
+
|
|
627
|
+
self.extracted_data["file_tree"] = file_tree
|
|
628
|
+
logger.info(
|
|
629
|
+
f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)"
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
def _extract_file_tree_github(self):
|
|
633
|
+
"""Extract file tree from GitHub API (rate-limited)."""
|
|
634
|
+
try:
|
|
635
|
+
contents = self.repo.get_contents("")
|
|
636
|
+
file_tree = []
|
|
637
|
+
|
|
638
|
+
while contents:
|
|
639
|
+
file_content = contents.pop(0)
|
|
640
|
+
|
|
641
|
+
file_info = {
|
|
642
|
+
"path": file_content.path,
|
|
643
|
+
"type": file_content.type,
|
|
644
|
+
"size": file_content.size if file_content.type == "file" else None,
|
|
645
|
+
}
|
|
646
|
+
file_tree.append(file_info)
|
|
647
|
+
|
|
648
|
+
if file_content.type == "dir":
|
|
649
|
+
contents.extend(self.repo.get_contents(file_content.path))
|
|
650
|
+
|
|
651
|
+
self.extracted_data["file_tree"] = file_tree
|
|
652
|
+
logger.info(f"File tree built (GitHub API mode): {len(file_tree)} items")
|
|
653
|
+
|
|
654
|
+
except GithubException as e:
|
|
655
|
+
logger.warning(f"Could not build file tree: {e}")
|
|
656
|
+
|
|
657
|
+
def _extract_signatures_and_tests(self):
|
|
658
|
+
"""
|
|
659
|
+
C1.3, C1.5, C1.6: Extract signatures, docstrings, and test examples.
|
|
660
|
+
|
|
661
|
+
Extraction depth depends on code_analysis_depth setting:
|
|
662
|
+
- surface: File tree only (minimal)
|
|
663
|
+
- deep: Parse files for signatures, parameters, types
|
|
664
|
+
- full: Complete AST analysis (future enhancement)
|
|
665
|
+
"""
|
|
666
|
+
if self.code_analysis_depth == "surface":
|
|
667
|
+
logger.info("Code extraction: Surface level (file tree only)")
|
|
668
|
+
return
|
|
669
|
+
|
|
670
|
+
if not self.code_analyzer:
|
|
671
|
+
logger.warning("Code analyzer not available - skipping deep analysis")
|
|
672
|
+
return
|
|
673
|
+
|
|
674
|
+
logger.info(f"Extracting code signatures ({self.code_analysis_depth} analysis)...")
|
|
675
|
+
|
|
676
|
+
# Get primary language for the repository
|
|
677
|
+
languages = self.extracted_data.get("languages", {})
|
|
678
|
+
if not languages:
|
|
679
|
+
logger.warning("No languages detected - skipping code analysis")
|
|
680
|
+
return
|
|
681
|
+
|
|
682
|
+
# Determine primary language
|
|
683
|
+
primary_language = max(languages.items(), key=lambda x: x[1]["bytes"])[0]
|
|
684
|
+
logger.info(f"Primary language: {primary_language}")
|
|
685
|
+
|
|
686
|
+
# Determine file extensions to analyze
|
|
687
|
+
extension_map = {
|
|
688
|
+
"Python": [".py"],
|
|
689
|
+
"JavaScript": [".js", ".jsx"],
|
|
690
|
+
"TypeScript": [".ts", ".tsx"],
|
|
691
|
+
"C": [".c", ".h"],
|
|
692
|
+
"C++": [".cpp", ".hpp", ".cc", ".hh", ".cxx"],
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
extensions = extension_map.get(primary_language, [])
|
|
696
|
+
if not extensions:
|
|
697
|
+
logger.warning(f"No file extensions mapped for {primary_language}")
|
|
698
|
+
return
|
|
699
|
+
|
|
700
|
+
# Analyze files matching patterns and extensions
|
|
701
|
+
analyzed_files = []
|
|
702
|
+
file_tree = self.extracted_data.get("file_tree", [])
|
|
703
|
+
|
|
704
|
+
for file_info in file_tree:
|
|
705
|
+
file_path = file_info["path"]
|
|
706
|
+
|
|
707
|
+
# Check if file matches extension
|
|
708
|
+
if not any(file_path.endswith(ext) for ext in extensions):
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
# Check if file matches patterns (if specified)
|
|
712
|
+
if self.file_patterns:
|
|
713
|
+
import fnmatch
|
|
714
|
+
|
|
715
|
+
if not any(fnmatch.fnmatch(file_path, pattern) for pattern in self.file_patterns):
|
|
716
|
+
continue
|
|
717
|
+
|
|
718
|
+
# Analyze this file
|
|
719
|
+
try:
|
|
720
|
+
# Read file content based on mode
|
|
721
|
+
if self.local_repo_path:
|
|
722
|
+
# Local mode - read from filesystem
|
|
723
|
+
full_path = os.path.join(self.local_repo_path, file_path)
|
|
724
|
+
with open(full_path, encoding="utf-8") as f:
|
|
725
|
+
content = f.read()
|
|
726
|
+
else:
|
|
727
|
+
# GitHub API mode - fetch from API
|
|
728
|
+
file_content = self.repo.get_contents(file_path)
|
|
729
|
+
content = file_content.decoded_content.decode("utf-8")
|
|
730
|
+
|
|
731
|
+
analysis_result = self.code_analyzer.analyze_file(
|
|
732
|
+
file_path, content, primary_language
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
if analysis_result and (
|
|
736
|
+
analysis_result.get("classes") or analysis_result.get("functions")
|
|
737
|
+
):
|
|
738
|
+
analyzed_files.append(
|
|
739
|
+
{"file": file_path, "language": primary_language, **analysis_result}
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
logger.debug(
|
|
743
|
+
f"Analyzed {file_path}: "
|
|
744
|
+
f"{len(analysis_result.get('classes', []))} classes, "
|
|
745
|
+
f"{len(analysis_result.get('functions', []))} functions"
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
except Exception as e:
|
|
749
|
+
logger.debug(f"Could not analyze {file_path}: {e}")
|
|
750
|
+
continue
|
|
751
|
+
|
|
752
|
+
# Limit number of files analyzed to avoid rate limits (GitHub API mode only)
|
|
753
|
+
if not self.local_repo_path and len(analyzed_files) >= 50:
|
|
754
|
+
logger.info("Reached analysis limit (50 files, GitHub API mode)")
|
|
755
|
+
break
|
|
756
|
+
|
|
757
|
+
self.extracted_data["code_analysis"] = {
|
|
758
|
+
"depth": self.code_analysis_depth,
|
|
759
|
+
"language": primary_language,
|
|
760
|
+
"files_analyzed": len(analyzed_files),
|
|
761
|
+
"files": analyzed_files,
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
# Calculate totals
|
|
765
|
+
total_classes = sum(len(f.get("classes", [])) for f in analyzed_files)
|
|
766
|
+
total_functions = sum(len(f.get("functions", [])) for f in analyzed_files)
|
|
767
|
+
|
|
768
|
+
logger.info(
|
|
769
|
+
f"Code analysis complete: {len(analyzed_files)} files, {total_classes} classes, {total_functions} functions"
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
def _extract_issues(self):
|
|
773
|
+
"""C1.7: Extract GitHub Issues (open/closed, labels, milestones)."""
|
|
774
|
+
logger.info(f"Extracting GitHub Issues (max {self.max_issues})...")
|
|
775
|
+
|
|
776
|
+
try:
|
|
777
|
+
# Fetch recent issues (open + closed)
|
|
778
|
+
issues = self.repo.get_issues(state="all", sort="updated", direction="desc")
|
|
779
|
+
|
|
780
|
+
issue_list = []
|
|
781
|
+
for issue in issues[: self.max_issues]:
|
|
782
|
+
# Skip pull requests (they appear in issues)
|
|
783
|
+
if issue.pull_request:
|
|
784
|
+
continue
|
|
785
|
+
|
|
786
|
+
issue_data = {
|
|
787
|
+
"number": issue.number,
|
|
788
|
+
"title": issue.title,
|
|
789
|
+
"state": issue.state,
|
|
790
|
+
"labels": [label.name for label in issue.labels],
|
|
791
|
+
"milestone": issue.milestone.title if issue.milestone else None,
|
|
792
|
+
"created_at": issue.created_at.isoformat() if issue.created_at else None,
|
|
793
|
+
"updated_at": issue.updated_at.isoformat() if issue.updated_at else None,
|
|
794
|
+
"closed_at": issue.closed_at.isoformat() if issue.closed_at else None,
|
|
795
|
+
"url": issue.html_url,
|
|
796
|
+
"body": issue.body[:500] if issue.body else None, # First 500 chars
|
|
797
|
+
}
|
|
798
|
+
issue_list.append(issue_data)
|
|
799
|
+
|
|
800
|
+
self.extracted_data["issues"] = issue_list
|
|
801
|
+
logger.info(f"Extracted {len(issue_list)} issues")
|
|
802
|
+
|
|
803
|
+
except GithubException as e:
|
|
804
|
+
logger.warning(f"Could not fetch issues: {e}")
|
|
805
|
+
|
|
806
|
+
def _extract_changelog(self):
|
|
807
|
+
"""C1.8: Extract CHANGELOG.md and release notes."""
|
|
808
|
+
logger.info("Extracting CHANGELOG...")
|
|
809
|
+
|
|
810
|
+
# Try common changelog locations
|
|
811
|
+
changelog_files = [
|
|
812
|
+
"CHANGELOG.md",
|
|
813
|
+
"CHANGES.md",
|
|
814
|
+
"HISTORY.md",
|
|
815
|
+
"CHANGELOG.rst",
|
|
816
|
+
"CHANGELOG.txt",
|
|
817
|
+
"CHANGELOG",
|
|
818
|
+
"docs/CHANGELOG.md",
|
|
819
|
+
".github/CHANGELOG.md",
|
|
820
|
+
]
|
|
821
|
+
|
|
822
|
+
for changelog_path in changelog_files:
|
|
823
|
+
changelog_content = self._get_file_content(changelog_path)
|
|
824
|
+
if changelog_content:
|
|
825
|
+
self.extracted_data["changelog"] = changelog_content
|
|
826
|
+
logger.info(f"CHANGELOG found: {changelog_path}")
|
|
827
|
+
return
|
|
828
|
+
|
|
829
|
+
logger.warning("No CHANGELOG found in repository")
|
|
830
|
+
|
|
831
|
+
def _extract_releases(self):
|
|
832
|
+
"""C1.9: Extract GitHub Releases with version history."""
|
|
833
|
+
logger.info("Extracting GitHub Releases...")
|
|
834
|
+
|
|
835
|
+
try:
|
|
836
|
+
releases = self.repo.get_releases()
|
|
837
|
+
|
|
838
|
+
release_list = []
|
|
839
|
+
for release in releases:
|
|
840
|
+
release_data = {
|
|
841
|
+
"tag_name": release.tag_name,
|
|
842
|
+
"name": release.title,
|
|
843
|
+
"body": release.body,
|
|
844
|
+
"draft": release.draft,
|
|
845
|
+
"prerelease": release.prerelease,
|
|
846
|
+
"created_at": release.created_at.isoformat() if release.created_at else None,
|
|
847
|
+
"published_at": release.published_at.isoformat()
|
|
848
|
+
if release.published_at
|
|
849
|
+
else None,
|
|
850
|
+
"url": release.html_url,
|
|
851
|
+
"tarball_url": release.tarball_url,
|
|
852
|
+
"zipball_url": release.zipball_url,
|
|
853
|
+
}
|
|
854
|
+
release_list.append(release_data)
|
|
855
|
+
|
|
856
|
+
self.extracted_data["releases"] = release_list
|
|
857
|
+
logger.info(f"Extracted {len(release_list)} releases")
|
|
858
|
+
|
|
859
|
+
except GithubException as e:
|
|
860
|
+
logger.warning(f"Could not fetch releases: {e}")
|
|
861
|
+
|
|
862
|
+
def _save_data(self):
|
|
863
|
+
"""Save extracted data to JSON file."""
|
|
864
|
+
os.makedirs("output", exist_ok=True)
|
|
865
|
+
|
|
866
|
+
with open(self.data_file, "w", encoding="utf-8") as f:
|
|
867
|
+
json.dump(self.extracted_data, f, indent=2, ensure_ascii=False)
|
|
868
|
+
|
|
869
|
+
logger.info(f"Data saved to: {self.data_file}")
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
class GitHubToSkillConverter:
|
|
873
|
+
"""
|
|
874
|
+
Convert extracted GitHub data to Claude skill format (C1.10).
|
|
875
|
+
"""
|
|
876
|
+
|
|
877
|
+
def __init__(self, config: dict[str, Any]):
|
|
878
|
+
"""Initialize converter with configuration."""
|
|
879
|
+
self.config = config
|
|
880
|
+
self.name = config.get("name", config["repo"].split("/")[-1])
|
|
881
|
+
|
|
882
|
+
# Paths
|
|
883
|
+
self.data_file = f"output/{self.name}_github_data.json"
|
|
884
|
+
self.skill_dir = f"output/{self.name}"
|
|
885
|
+
|
|
886
|
+
# Load extracted data
|
|
887
|
+
self.data = self._load_data()
|
|
888
|
+
|
|
889
|
+
# Set description (smart extraction from README if available)
|
|
890
|
+
if "description" in config:
|
|
891
|
+
self.description = config["description"]
|
|
892
|
+
else:
|
|
893
|
+
# Try to extract from README in loaded data
|
|
894
|
+
readme_content = self.data.get("readme", "")
|
|
895
|
+
repo_name = config["repo"]
|
|
896
|
+
if readme_content:
|
|
897
|
+
self.description = extract_description_from_readme(readme_content, repo_name)
|
|
898
|
+
else:
|
|
899
|
+
self.description = f"Use when working with {repo_name.split('/')[-1]}"
|
|
900
|
+
|
|
901
|
+
def _load_data(self) -> dict[str, Any]:
|
|
902
|
+
"""Load extracted GitHub data from JSON."""
|
|
903
|
+
if not os.path.exists(self.data_file):
|
|
904
|
+
raise FileNotFoundError(f"Data file not found: {self.data_file}")
|
|
905
|
+
|
|
906
|
+
with open(self.data_file, encoding="utf-8") as f:
|
|
907
|
+
return json.load(f)
|
|
908
|
+
|
|
909
|
+
def build_skill(self):
|
|
910
|
+
"""Build complete skill structure."""
|
|
911
|
+
logger.info(f"Building skill for: {self.name}")
|
|
912
|
+
|
|
913
|
+
# Create directories
|
|
914
|
+
os.makedirs(self.skill_dir, exist_ok=True)
|
|
915
|
+
os.makedirs(f"{self.skill_dir}/references", exist_ok=True)
|
|
916
|
+
os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True)
|
|
917
|
+
os.makedirs(f"{self.skill_dir}/assets", exist_ok=True)
|
|
918
|
+
|
|
919
|
+
# Generate SKILL.md
|
|
920
|
+
self._generate_skill_md()
|
|
921
|
+
|
|
922
|
+
# Generate reference files
|
|
923
|
+
self._generate_references()
|
|
924
|
+
|
|
925
|
+
logger.info(f"ā
Skill built successfully: {self.skill_dir}/")
|
|
926
|
+
|
|
927
|
+
def _generate_skill_md(self):
|
|
928
|
+
"""Generate main SKILL.md file (rich version with C3.x data if available)."""
|
|
929
|
+
repo_info = self.data.get("repo_info", {})
|
|
930
|
+
c3_data = self.data.get("c3_analysis", {})
|
|
931
|
+
has_c3_data = bool(c3_data)
|
|
932
|
+
|
|
933
|
+
# Generate skill name (lowercase, hyphens only, max 64 chars)
|
|
934
|
+
skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
|
|
935
|
+
|
|
936
|
+
# Truncate description to 1024 chars if needed
|
|
937
|
+
desc = self.description[:1024] if len(self.description) > 1024 else self.description
|
|
938
|
+
|
|
939
|
+
# Build skill content
|
|
940
|
+
skill_content = f"""---
|
|
941
|
+
name: {skill_name}
|
|
942
|
+
description: {desc}
|
|
943
|
+
---
|
|
944
|
+
|
|
945
|
+
# {repo_info.get("name", self.name)}
|
|
946
|
+
|
|
947
|
+
{self.description}
|
|
948
|
+
|
|
949
|
+
## Description
|
|
950
|
+
|
|
951
|
+
{repo_info.get("description", "GitHub repository skill")}
|
|
952
|
+
|
|
953
|
+
**Repository:** [{repo_info.get("full_name", "N/A")}]({repo_info.get("url", "#")})
|
|
954
|
+
**Language:** {repo_info.get("language", "N/A")}
|
|
955
|
+
**Stars:** {repo_info.get("stars", 0):,}
|
|
956
|
+
**License:** {repo_info.get("license", "N/A")}
|
|
957
|
+
|
|
958
|
+
## When to Use This Skill
|
|
959
|
+
|
|
960
|
+
Use this skill when you need to:
|
|
961
|
+
- Understand how to use {repo_info.get("name", self.name)}
|
|
962
|
+
- Look up API documentation and implementation details
|
|
963
|
+
- Find real-world usage examples from the codebase
|
|
964
|
+
- Review design patterns and architecture
|
|
965
|
+
- Check for known issues or recent changes
|
|
966
|
+
- Explore release history and changelogs
|
|
967
|
+
"""
|
|
968
|
+
|
|
969
|
+
# Add Quick Reference section (enhanced with C3.x if available)
|
|
970
|
+
skill_content += "\n## ā” Quick Reference\n\n"
|
|
971
|
+
|
|
972
|
+
# Repository info
|
|
973
|
+
skill_content += "### Repository Info\n"
|
|
974
|
+
skill_content += f"- **Homepage:** {repo_info.get('homepage', 'N/A')}\n"
|
|
975
|
+
skill_content += f"- **Topics:** {', '.join(repo_info.get('topics', []))}\n"
|
|
976
|
+
skill_content += f"- **Open Issues:** {repo_info.get('open_issues', 0)}\n"
|
|
977
|
+
skill_content += f"- **Last Updated:** {repo_info.get('updated_at', 'N/A')[:10]}\n\n"
|
|
978
|
+
|
|
979
|
+
# Languages
|
|
980
|
+
skill_content += "### Languages\n"
|
|
981
|
+
skill_content += self._format_languages() + "\n\n"
|
|
982
|
+
|
|
983
|
+
# Add C3.x pattern summary if available
|
|
984
|
+
if has_c3_data and c3_data.get("patterns"):
|
|
985
|
+
skill_content += self._format_pattern_summary(c3_data)
|
|
986
|
+
|
|
987
|
+
# Add code examples if available (C3.2 test examples)
|
|
988
|
+
if has_c3_data and c3_data.get("test_examples"):
|
|
989
|
+
skill_content += self._format_code_examples(c3_data)
|
|
990
|
+
|
|
991
|
+
# Add API Reference if available (C2.5)
|
|
992
|
+
if has_c3_data and c3_data.get("api_reference"):
|
|
993
|
+
skill_content += self._format_api_reference(c3_data)
|
|
994
|
+
|
|
995
|
+
# Add Architecture Overview if available (C3.7)
|
|
996
|
+
if has_c3_data and c3_data.get("architecture"):
|
|
997
|
+
skill_content += self._format_architecture(c3_data)
|
|
998
|
+
|
|
999
|
+
# Add Known Issues section
|
|
1000
|
+
skill_content += self._format_known_issues()
|
|
1001
|
+
|
|
1002
|
+
# Add Recent Releases
|
|
1003
|
+
skill_content += "### Recent Releases\n"
|
|
1004
|
+
skill_content += self._format_recent_releases() + "\n\n"
|
|
1005
|
+
|
|
1006
|
+
# Available References
|
|
1007
|
+
skill_content += "## š Available References\n\n"
|
|
1008
|
+
skill_content += "- `references/README.md` - Complete README documentation\n"
|
|
1009
|
+
skill_content += "- `references/CHANGELOG.md` - Version history and changes\n"
|
|
1010
|
+
skill_content += "- `references/issues.md` - Recent GitHub issues\n"
|
|
1011
|
+
skill_content += "- `references/releases.md` - Release notes\n"
|
|
1012
|
+
skill_content += "- `references/file_structure.md` - Repository structure\n"
|
|
1013
|
+
|
|
1014
|
+
if has_c3_data:
|
|
1015
|
+
skill_content += "\n### Codebase Analysis References\n\n"
|
|
1016
|
+
if c3_data.get("patterns"):
|
|
1017
|
+
skill_content += (
|
|
1018
|
+
"- `references/codebase_analysis/patterns/` - Design patterns detected\n"
|
|
1019
|
+
)
|
|
1020
|
+
if c3_data.get("test_examples"):
|
|
1021
|
+
skill_content += (
|
|
1022
|
+
"- `references/codebase_analysis/examples/` - Test examples extracted\n"
|
|
1023
|
+
)
|
|
1024
|
+
if c3_data.get("config_patterns"):
|
|
1025
|
+
skill_content += (
|
|
1026
|
+
"- `references/codebase_analysis/configuration/` - Configuration analysis\n"
|
|
1027
|
+
)
|
|
1028
|
+
if c3_data.get("architecture"):
|
|
1029
|
+
skill_content += (
|
|
1030
|
+
"- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
# Usage
|
|
1034
|
+
skill_content += "\n## š» Usage\n\n"
|
|
1035
|
+
skill_content += "See README.md for complete usage instructions and examples.\n\n"
|
|
1036
|
+
|
|
1037
|
+
# Footer
|
|
1038
|
+
skill_content += "---\n\n"
|
|
1039
|
+
if has_c3_data:
|
|
1040
|
+
skill_content += "**Generated by Skill Seeker** | GitHub Repository Scraper with C3.x Codebase Analysis\n"
|
|
1041
|
+
else:
|
|
1042
|
+
skill_content += "**Generated by Skill Seeker** | GitHub Repository Scraper\n"
|
|
1043
|
+
|
|
1044
|
+
# Write to file
|
|
1045
|
+
skill_path = f"{self.skill_dir}/SKILL.md"
|
|
1046
|
+
with open(skill_path, "w", encoding="utf-8") as f:
|
|
1047
|
+
f.write(skill_content)
|
|
1048
|
+
|
|
1049
|
+
line_count = len(skill_content.split("\n"))
|
|
1050
|
+
logger.info(f"Generated: {skill_path} ({line_count} lines)")
|
|
1051
|
+
|
|
1052
|
+
def _format_languages(self) -> str:
|
|
1053
|
+
"""Format language breakdown."""
|
|
1054
|
+
languages = self.data.get("languages", {})
|
|
1055
|
+
if not languages:
|
|
1056
|
+
return "No language data available"
|
|
1057
|
+
|
|
1058
|
+
lines = []
|
|
1059
|
+
for lang, info in sorted(languages.items(), key=lambda x: x[1]["bytes"], reverse=True):
|
|
1060
|
+
lines.append(f"- **{lang}:** {info['percentage']:.1f}%")
|
|
1061
|
+
|
|
1062
|
+
return "\n".join(lines)
|
|
1063
|
+
|
|
1064
|
+
def _format_recent_releases(self) -> str:
|
|
1065
|
+
"""Format recent releases (top 3)."""
|
|
1066
|
+
releases = self.data.get("releases", [])
|
|
1067
|
+
if not releases:
|
|
1068
|
+
return "No releases available"
|
|
1069
|
+
|
|
1070
|
+
lines = []
|
|
1071
|
+
for release in releases[:3]:
|
|
1072
|
+
lines.append(
|
|
1073
|
+
f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}"
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
return "\n".join(lines)
|
|
1077
|
+
|
|
1078
|
+
def _format_pattern_summary(self, c3_data: dict[str, Any]) -> str:
|
|
1079
|
+
"""Format design patterns summary (C3.1)."""
|
|
1080
|
+
patterns_data = c3_data.get("patterns", [])
|
|
1081
|
+
if not patterns_data:
|
|
1082
|
+
return ""
|
|
1083
|
+
|
|
1084
|
+
# Count patterns by type (deduplicate by class, keep highest confidence)
|
|
1085
|
+
pattern_counts = {}
|
|
1086
|
+
by_class = {}
|
|
1087
|
+
|
|
1088
|
+
for pattern_file in patterns_data:
|
|
1089
|
+
for pattern in pattern_file.get("patterns", []):
|
|
1090
|
+
ptype = pattern.get("pattern_type", "Unknown")
|
|
1091
|
+
cls = pattern.get("class_name", "")
|
|
1092
|
+
confidence = pattern.get("confidence", 0)
|
|
1093
|
+
|
|
1094
|
+
# Skip low confidence
|
|
1095
|
+
if confidence < 0.7:
|
|
1096
|
+
continue
|
|
1097
|
+
|
|
1098
|
+
# Deduplicate by class
|
|
1099
|
+
key = f"{cls}:{ptype}"
|
|
1100
|
+
if key not in by_class or by_class[key]["confidence"] < confidence:
|
|
1101
|
+
by_class[key] = pattern
|
|
1102
|
+
|
|
1103
|
+
# Count by type
|
|
1104
|
+
pattern_counts[ptype] = pattern_counts.get(ptype, 0) + 1
|
|
1105
|
+
|
|
1106
|
+
if not pattern_counts:
|
|
1107
|
+
return ""
|
|
1108
|
+
|
|
1109
|
+
content = "### Design Patterns Detected\n\n"
|
|
1110
|
+
content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n"
|
|
1111
|
+
|
|
1112
|
+
# Top 5 pattern types
|
|
1113
|
+
for ptype, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
|
|
1114
|
+
content += f"- **{ptype}**: {count} instances\n"
|
|
1115
|
+
|
|
1116
|
+
content += f"\n*Total: {len(by_class)} high-confidence patterns*\n\n"
|
|
1117
|
+
return content
|
|
1118
|
+
|
|
1119
|
+
def _format_code_examples(self, c3_data: dict[str, Any]) -> str:
|
|
1120
|
+
"""Format code examples (C3.2)."""
|
|
1121
|
+
examples_data = c3_data.get("test_examples", {})
|
|
1122
|
+
examples = examples_data.get("examples", [])
|
|
1123
|
+
|
|
1124
|
+
if not examples:
|
|
1125
|
+
return ""
|
|
1126
|
+
|
|
1127
|
+
# Filter high-value examples (complexity > 0.7)
|
|
1128
|
+
high_value = [ex for ex in examples if ex.get("complexity_score", 0) > 0.7]
|
|
1129
|
+
|
|
1130
|
+
if not high_value:
|
|
1131
|
+
return ""
|
|
1132
|
+
|
|
1133
|
+
content = "## š Code Examples\n\n"
|
|
1134
|
+
content += "*High-quality examples from codebase (C3.2)*\n\n"
|
|
1135
|
+
|
|
1136
|
+
# Top 10 examples
|
|
1137
|
+
for ex in sorted(high_value, key=lambda x: x.get("complexity_score", 0), reverse=True)[:10]:
|
|
1138
|
+
desc = ex.get("description", "Example")
|
|
1139
|
+
lang = ex.get("language", "python")
|
|
1140
|
+
code = ex.get("code", "")
|
|
1141
|
+
complexity = ex.get("complexity_score", 0)
|
|
1142
|
+
|
|
1143
|
+
content += f"**{desc}** (complexity: {complexity:.2f})\n\n"
|
|
1144
|
+
content += f"```{lang}\n{code}\n```\n\n"
|
|
1145
|
+
|
|
1146
|
+
return content
|
|
1147
|
+
|
|
1148
|
+
def _format_api_reference(self, c3_data: dict[str, Any]) -> str:
|
|
1149
|
+
"""Format API reference (C2.5)."""
|
|
1150
|
+
api_ref = c3_data.get("api_reference", {})
|
|
1151
|
+
|
|
1152
|
+
if not api_ref:
|
|
1153
|
+
return ""
|
|
1154
|
+
|
|
1155
|
+
content = "## š§ API Reference\n\n"
|
|
1156
|
+
content += "*Extracted from codebase analysis (C2.5)*\n\n"
|
|
1157
|
+
|
|
1158
|
+
# Top 5 modules
|
|
1159
|
+
for module_name, module_md in list(api_ref.items())[:5]:
|
|
1160
|
+
content += f"### {module_name}\n\n"
|
|
1161
|
+
# First 500 chars of module documentation
|
|
1162
|
+
content += module_md[:500]
|
|
1163
|
+
if len(module_md) > 500:
|
|
1164
|
+
content += "...\n\n"
|
|
1165
|
+
else:
|
|
1166
|
+
content += "\n\n"
|
|
1167
|
+
|
|
1168
|
+
content += "*See `references/codebase_analysis/api_reference/` for complete API docs*\n\n"
|
|
1169
|
+
return content
|
|
1170
|
+
|
|
1171
|
+
def _format_architecture(self, c3_data: dict[str, Any]) -> str:
|
|
1172
|
+
"""Format architecture overview (C3.7)."""
|
|
1173
|
+
arch_data = c3_data.get("architecture", {})
|
|
1174
|
+
|
|
1175
|
+
if not arch_data:
|
|
1176
|
+
return ""
|
|
1177
|
+
|
|
1178
|
+
content = "## šļø Architecture Overview\n\n"
|
|
1179
|
+
content += "*From C3.7 codebase analysis*\n\n"
|
|
1180
|
+
|
|
1181
|
+
# Architecture patterns
|
|
1182
|
+
patterns = arch_data.get("patterns", [])
|
|
1183
|
+
if patterns:
|
|
1184
|
+
content += "**Architectural Patterns:**\n"
|
|
1185
|
+
for pattern in patterns[:5]:
|
|
1186
|
+
content += (
|
|
1187
|
+
f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
|
|
1188
|
+
)
|
|
1189
|
+
content += "\n"
|
|
1190
|
+
|
|
1191
|
+
# Dependencies (C2.6)
|
|
1192
|
+
dep_data = c3_data.get("dependency_graph", {})
|
|
1193
|
+
if dep_data:
|
|
1194
|
+
total_deps = dep_data.get("total_dependencies", 0)
|
|
1195
|
+
circular = len(dep_data.get("circular_dependencies", []))
|
|
1196
|
+
if total_deps > 0:
|
|
1197
|
+
content += f"**Dependencies:** {total_deps} total"
|
|
1198
|
+
if circular > 0:
|
|
1199
|
+
content += f" (ā ļø {circular} circular dependencies detected)"
|
|
1200
|
+
content += "\n\n"
|
|
1201
|
+
|
|
1202
|
+
content += "*See `references/codebase_analysis/ARCHITECTURE.md` for complete overview*\n\n"
|
|
1203
|
+
return content
|
|
1204
|
+
|
|
1205
|
+
def _format_known_issues(self) -> str:
|
|
1206
|
+
"""Format known issues from GitHub."""
|
|
1207
|
+
issues = self.data.get("issues", [])
|
|
1208
|
+
|
|
1209
|
+
if not issues:
|
|
1210
|
+
return ""
|
|
1211
|
+
|
|
1212
|
+
content = "## ā ļø Known Issues\n\n"
|
|
1213
|
+
content += "*Recent issues from GitHub*\n\n"
|
|
1214
|
+
|
|
1215
|
+
# Top 5 issues
|
|
1216
|
+
for issue in issues[:5]:
|
|
1217
|
+
title = issue.get("title", "Untitled")
|
|
1218
|
+
number = issue.get("number", 0)
|
|
1219
|
+
labels = ", ".join(issue.get("labels", []))
|
|
1220
|
+
content += f"- **#{number}**: {title}"
|
|
1221
|
+
if labels:
|
|
1222
|
+
content += f" [`{labels}`]"
|
|
1223
|
+
content += "\n"
|
|
1224
|
+
|
|
1225
|
+
content += "\n*See `references/issues.md` for complete list*\n\n"
|
|
1226
|
+
return content
|
|
1227
|
+
|
|
1228
|
+
def _generate_references(self):
|
|
1229
|
+
"""Generate all reference files."""
|
|
1230
|
+
# README
|
|
1231
|
+
if self.data.get("readme"):
|
|
1232
|
+
readme_path = f"{self.skill_dir}/references/README.md"
|
|
1233
|
+
with open(readme_path, "w", encoding="utf-8") as f:
|
|
1234
|
+
f.write(self.data["readme"])
|
|
1235
|
+
logger.info(f"Generated: {readme_path}")
|
|
1236
|
+
|
|
1237
|
+
# CHANGELOG
|
|
1238
|
+
if self.data.get("changelog"):
|
|
1239
|
+
changelog_path = f"{self.skill_dir}/references/CHANGELOG.md"
|
|
1240
|
+
with open(changelog_path, "w", encoding="utf-8") as f:
|
|
1241
|
+
f.write(self.data["changelog"])
|
|
1242
|
+
logger.info(f"Generated: {changelog_path}")
|
|
1243
|
+
|
|
1244
|
+
# Issues
|
|
1245
|
+
if self.data.get("issues"):
|
|
1246
|
+
self._generate_issues_reference()
|
|
1247
|
+
|
|
1248
|
+
# Releases
|
|
1249
|
+
if self.data.get("releases"):
|
|
1250
|
+
self._generate_releases_reference()
|
|
1251
|
+
|
|
1252
|
+
# File structure
|
|
1253
|
+
if self.data.get("file_tree"):
|
|
1254
|
+
self._generate_file_structure_reference()
|
|
1255
|
+
|
|
1256
|
+
def _generate_issues_reference(self):
|
|
1257
|
+
"""Generate issues.md reference file."""
|
|
1258
|
+
issues = self.data["issues"]
|
|
1259
|
+
|
|
1260
|
+
content = f"# GitHub Issues\n\nRecent issues from the repository ({len(issues)} total).\n\n"
|
|
1261
|
+
|
|
1262
|
+
# Group by state
|
|
1263
|
+
open_issues = [i for i in issues if i["state"] == "open"]
|
|
1264
|
+
closed_issues = [i for i in issues if i["state"] == "closed"]
|
|
1265
|
+
|
|
1266
|
+
content += f"## Open Issues ({len(open_issues)})\n\n"
|
|
1267
|
+
for issue in open_issues[:20]:
|
|
1268
|
+
labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
|
|
1269
|
+
content += f"### #{issue['number']}: {issue['title']}\n"
|
|
1270
|
+
content += f"**Labels:** {labels} | **Created:** {issue['created_at'][:10]}\n"
|
|
1271
|
+
content += f"[View on GitHub]({issue['url']})\n\n"
|
|
1272
|
+
|
|
1273
|
+
content += f"\n## Recently Closed Issues ({len(closed_issues)})\n\n"
|
|
1274
|
+
for issue in closed_issues[:10]:
|
|
1275
|
+
labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
|
|
1276
|
+
content += f"### #{issue['number']}: {issue['title']}\n"
|
|
1277
|
+
content += f"**Labels:** {labels} | **Closed:** {issue['closed_at'][:10]}\n"
|
|
1278
|
+
content += f"[View on GitHub]({issue['url']})\n\n"
|
|
1279
|
+
|
|
1280
|
+
issues_path = f"{self.skill_dir}/references/issues.md"
|
|
1281
|
+
with open(issues_path, "w", encoding="utf-8") as f:
|
|
1282
|
+
f.write(content)
|
|
1283
|
+
logger.info(f"Generated: {issues_path}")
|
|
1284
|
+
|
|
1285
|
+
def _generate_releases_reference(self):
|
|
1286
|
+
"""Generate releases.md reference file."""
|
|
1287
|
+
releases = self.data["releases"]
|
|
1288
|
+
|
|
1289
|
+
content = (
|
|
1290
|
+
f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
|
|
1291
|
+
)
|
|
1292
|
+
|
|
1293
|
+
for release in releases:
|
|
1294
|
+
content += f"## {release['tag_name']}: {release['name']}\n"
|
|
1295
|
+
content += f"**Published:** {release['published_at'][:10]}\n"
|
|
1296
|
+
if release["prerelease"]:
|
|
1297
|
+
content += "**Pre-release**\n"
|
|
1298
|
+
content += f"\n{release['body']}\n\n"
|
|
1299
|
+
content += f"[View on GitHub]({release['url']})\n\n---\n\n"
|
|
1300
|
+
|
|
1301
|
+
releases_path = f"{self.skill_dir}/references/releases.md"
|
|
1302
|
+
with open(releases_path, "w", encoding="utf-8") as f:
|
|
1303
|
+
f.write(content)
|
|
1304
|
+
logger.info(f"Generated: {releases_path}")
|
|
1305
|
+
|
|
1306
|
+
def _generate_file_structure_reference(self):
|
|
1307
|
+
"""Generate file_structure.md reference file."""
|
|
1308
|
+
file_tree = self.data["file_tree"]
|
|
1309
|
+
|
|
1310
|
+
content = "# Repository File Structure\n\n"
|
|
1311
|
+
content += f"Total items: {len(file_tree)}\n\n"
|
|
1312
|
+
content += "```\n"
|
|
1313
|
+
|
|
1314
|
+
# Build tree structure
|
|
1315
|
+
for item in file_tree:
|
|
1316
|
+
indent = " " * item["path"].count("/")
|
|
1317
|
+
icon = "š" if item["type"] == "dir" else "š"
|
|
1318
|
+
content += f"{indent}{icon} {os.path.basename(item['path'])}\n"
|
|
1319
|
+
|
|
1320
|
+
content += "```\n"
|
|
1321
|
+
|
|
1322
|
+
structure_path = f"{self.skill_dir}/references/file_structure.md"
|
|
1323
|
+
with open(structure_path, "w", encoding="utf-8") as f:
|
|
1324
|
+
f.write(content)
|
|
1325
|
+
logger.info(f"Generated: {structure_path}")
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
def main():
|
|
1329
|
+
"""C1.10: CLI tool entry point."""
|
|
1330
|
+
parser = argparse.ArgumentParser(
|
|
1331
|
+
description="GitHub Repository to Claude Skill Converter",
|
|
1332
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1333
|
+
epilog="""
|
|
1334
|
+
Examples:
|
|
1335
|
+
skill-seekers github --repo facebook/react
|
|
1336
|
+
skill-seekers github --config configs/react_github.json
|
|
1337
|
+
skill-seekers github --repo owner/repo --token $GITHUB_TOKEN
|
|
1338
|
+
""",
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
parser.add_argument("--repo", help="GitHub repository (owner/repo)")
|
|
1342
|
+
parser.add_argument("--config", help="Path to config JSON file")
|
|
1343
|
+
parser.add_argument("--token", help="GitHub personal access token")
|
|
1344
|
+
parser.add_argument("--name", help="Skill name (default: repo name)")
|
|
1345
|
+
parser.add_argument("--description", help="Skill description")
|
|
1346
|
+
parser.add_argument("--no-issues", action="store_true", help="Skip GitHub issues")
|
|
1347
|
+
parser.add_argument("--no-changelog", action="store_true", help="Skip CHANGELOG")
|
|
1348
|
+
parser.add_argument("--no-releases", action="store_true", help="Skip releases")
|
|
1349
|
+
parser.add_argument("--max-issues", type=int, default=100, help="Max issues to fetch")
|
|
1350
|
+
parser.add_argument("--scrape-only", action="store_true", help="Only scrape, don't build skill")
|
|
1351
|
+
parser.add_argument(
|
|
1352
|
+
"--enhance",
|
|
1353
|
+
action="store_true",
|
|
1354
|
+
help="Enhance SKILL.md using Claude API after building (requires API key)",
|
|
1355
|
+
)
|
|
1356
|
+
parser.add_argument(
|
|
1357
|
+
"--enhance-local",
|
|
1358
|
+
action="store_true",
|
|
1359
|
+
help="Enhance SKILL.md using Claude Code (no API key needed)",
|
|
1360
|
+
)
|
|
1361
|
+
parser.add_argument(
|
|
1362
|
+
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
|
|
1363
|
+
)
|
|
1364
|
+
parser.add_argument(
|
|
1365
|
+
"--non-interactive",
|
|
1366
|
+
action="store_true",
|
|
1367
|
+
help="Non-interactive mode for CI/CD (fail fast on rate limits)",
|
|
1368
|
+
)
|
|
1369
|
+
parser.add_argument("--profile", type=str, help="GitHub profile name to use from config")
|
|
1370
|
+
|
|
1371
|
+
args = parser.parse_args()
|
|
1372
|
+
|
|
1373
|
+
# Build config from args or file
|
|
1374
|
+
if args.config:
|
|
1375
|
+
with open(args.config, encoding="utf-8") as f:
|
|
1376
|
+
config = json.load(f)
|
|
1377
|
+
# Override with CLI args if provided
|
|
1378
|
+
if args.non_interactive:
|
|
1379
|
+
config["interactive"] = False
|
|
1380
|
+
if args.profile:
|
|
1381
|
+
config["github_profile"] = args.profile
|
|
1382
|
+
elif args.repo:
|
|
1383
|
+
config = {
|
|
1384
|
+
"repo": args.repo,
|
|
1385
|
+
"name": args.name or args.repo.split("/")[-1],
|
|
1386
|
+
"description": args.description or f"Use when working with {args.repo.split('/')[-1]}",
|
|
1387
|
+
"github_token": args.token,
|
|
1388
|
+
"include_issues": not args.no_issues,
|
|
1389
|
+
"include_changelog": not args.no_changelog,
|
|
1390
|
+
"include_releases": not args.no_releases,
|
|
1391
|
+
"max_issues": args.max_issues,
|
|
1392
|
+
"interactive": not args.non_interactive,
|
|
1393
|
+
"github_profile": args.profile,
|
|
1394
|
+
}
|
|
1395
|
+
else:
|
|
1396
|
+
parser.error("Either --repo or --config is required")
|
|
1397
|
+
|
|
1398
|
+
try:
|
|
1399
|
+
# Phase 1: Scrape GitHub repository
|
|
1400
|
+
scraper = GitHubScraper(config)
|
|
1401
|
+
scraper.scrape()
|
|
1402
|
+
|
|
1403
|
+
if args.scrape_only:
|
|
1404
|
+
logger.info("Scrape complete (--scrape-only mode)")
|
|
1405
|
+
return
|
|
1406
|
+
|
|
1407
|
+
# Phase 2: Build skill
|
|
1408
|
+
converter = GitHubToSkillConverter(config)
|
|
1409
|
+
converter.build_skill()
|
|
1410
|
+
|
|
1411
|
+
skill_name = config.get("name", config["repo"].split("/")[-1])
|
|
1412
|
+
skill_dir = f"output/{skill_name}"
|
|
1413
|
+
|
|
1414
|
+
# Phase 3: Optional enhancement
|
|
1415
|
+
if args.enhance or args.enhance_local:
|
|
1416
|
+
logger.info("\nš Enhancing SKILL.md with Claude...")
|
|
1417
|
+
|
|
1418
|
+
if args.enhance_local:
|
|
1419
|
+
# Local enhancement using Claude Code
|
|
1420
|
+
from pathlib import Path
|
|
1421
|
+
|
|
1422
|
+
from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
|
|
1423
|
+
|
|
1424
|
+
enhancer = LocalSkillEnhancer(Path(skill_dir))
|
|
1425
|
+
enhancer.run(headless=True)
|
|
1426
|
+
logger.info("ā
Local enhancement complete!")
|
|
1427
|
+
|
|
1428
|
+
elif args.enhance:
|
|
1429
|
+
# API-based enhancement
|
|
1430
|
+
import os
|
|
1431
|
+
|
|
1432
|
+
api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
1433
|
+
if not api_key:
|
|
1434
|
+
logger.error(
|
|
1435
|
+
"ā ANTHROPIC_API_KEY not set. Use --api-key or set environment variable."
|
|
1436
|
+
)
|
|
1437
|
+
logger.info("š” Tip: Use --enhance-local instead (no API key needed)")
|
|
1438
|
+
else:
|
|
1439
|
+
# Import and run API enhancement
|
|
1440
|
+
try:
|
|
1441
|
+
from skill_seekers.cli.enhance_skill import enhance_skill_md
|
|
1442
|
+
|
|
1443
|
+
enhance_skill_md(skill_dir, api_key)
|
|
1444
|
+
logger.info("ā
API enhancement complete!")
|
|
1445
|
+
except ImportError:
|
|
1446
|
+
logger.error(
|
|
1447
|
+
"ā API enhancement not available. Install: pip install anthropic"
|
|
1448
|
+
)
|
|
1449
|
+
logger.info("š” Tip: Use --enhance-local instead (no API key needed)")
|
|
1450
|
+
|
|
1451
|
+
logger.info(f"\nā
Success! Skill created at: {skill_dir}/")
|
|
1452
|
+
|
|
1453
|
+
if not (args.enhance or args.enhance_local):
|
|
1454
|
+
logger.info("\nš” Optional: Enhance SKILL.md with Claude:")
|
|
1455
|
+
logger.info(f" Local (recommended): skill-seekers enhance {skill_dir}/")
|
|
1456
|
+
logger.info(" or re-run with: --enhance-local")
|
|
1457
|
+
|
|
1458
|
+
logger.info(f"\nNext step: skill-seekers package {skill_dir}/")
|
|
1459
|
+
|
|
1460
|
+
except Exception as e:
|
|
1461
|
+
logger.error(f"Error: {e}")
|
|
1462
|
+
sys.exit(1)
|
|
1463
|
+
|
|
1464
|
+
|
|
1465
|
+
if __name__ == "__main__":
|
|
1466
|
+
main()
|