skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2027 @@
1
+ Metadata-Version: 2.4
2
+ Name: skill-seekers
3
+ Version: 2.7.3
4
+ Summary: Convert documentation websites, GitHub repositories, and PDFs into Claude AI skills. International support with Chinese (įŽ€äŊ“中文) documentation.
5
+ Author: Yusuf Karaaslan
6
+ License: MIT
7
+ Project-URL: Homepage, https://skillseekersweb.com/
8
+ Project-URL: Website, https://skillseekersweb.com/
9
+ Project-URL: Repository, https://github.com/yusufkaraaslan/Skill_Seekers
10
+ Project-URL: Bug Tracker, https://github.com/yusufkaraaslan/Skill_Seekers/issues
11
+ Project-URL: Documentation, https://skillseekersweb.com/
12
+ Project-URL: Config Browser, https://skillseekersweb.com/
13
+ Project-URL: ä¸­æ–‡æ–‡æĄŖ (Chinese), https://github.com/yusufkaraaslan/Skill_Seekers/blob/main/README.zh-CN.md
14
+ Keywords: claude,ai,documentation,scraping,skills,llm,mcp,automation,i18n,chinese,international
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Topic :: Software Development :: Documentation
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
27
+ Classifier: Natural Language :: English
28
+ Classifier: Natural Language :: Chinese (Simplified)
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: requests>=2.32.5
33
+ Requires-Dist: beautifulsoup4>=4.14.2
34
+ Requires-Dist: PyGithub>=2.5.0
35
+ Requires-Dist: GitPython>=3.1.40
36
+ Requires-Dist: httpx>=0.28.1
37
+ Requires-Dist: anthropic>=0.76.0
38
+ Requires-Dist: PyMuPDF>=1.24.14
39
+ Requires-Dist: Pillow>=11.0.0
40
+ Requires-Dist: pytesseract>=0.3.13
41
+ Requires-Dist: pydantic>=2.12.3
42
+ Requires-Dist: pydantic-settings>=2.11.0
43
+ Requires-Dist: python-dotenv>=1.1.1
44
+ Requires-Dist: jsonschema>=4.25.1
45
+ Requires-Dist: click>=8.3.0
46
+ Requires-Dist: Pygments>=2.19.2
47
+ Requires-Dist: pathspec>=0.12.1
48
+ Requires-Dist: networkx>=3.0
49
+ Provides-Extra: mcp
50
+ Requires-Dist: mcp<2,>=1.25; extra == "mcp"
51
+ Requires-Dist: httpx>=0.28.1; extra == "mcp"
52
+ Requires-Dist: httpx-sse>=0.4.3; extra == "mcp"
53
+ Requires-Dist: uvicorn>=0.38.0; extra == "mcp"
54
+ Requires-Dist: starlette>=0.48.0; extra == "mcp"
55
+ Requires-Dist: sse-starlette>=3.0.2; extra == "mcp"
56
+ Provides-Extra: gemini
57
+ Requires-Dist: google-generativeai>=0.8.0; extra == "gemini"
58
+ Provides-Extra: openai
59
+ Requires-Dist: openai>=1.0.0; extra == "openai"
60
+ Provides-Extra: all-llms
61
+ Requires-Dist: google-generativeai>=0.8.0; extra == "all-llms"
62
+ Requires-Dist: openai>=1.0.0; extra == "all-llms"
63
+ Provides-Extra: all
64
+ Requires-Dist: mcp<2,>=1.25; extra == "all"
65
+ Requires-Dist: httpx>=0.28.1; extra == "all"
66
+ Requires-Dist: httpx-sse>=0.4.3; extra == "all"
67
+ Requires-Dist: uvicorn>=0.38.0; extra == "all"
68
+ Requires-Dist: starlette>=0.48.0; extra == "all"
69
+ Requires-Dist: sse-starlette>=3.0.2; extra == "all"
70
+ Requires-Dist: google-generativeai>=0.8.0; extra == "all"
71
+ Requires-Dist: openai>=1.0.0; extra == "all"
72
+ Dynamic: license-file
73
+
74
+ [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/yusufkaraaslan-skill-seekers-badge.png)](https://mseep.ai/app/yusufkaraaslan-skill-seekers)
75
+
76
+ # Skill Seeker
77
+
78
+ English | [įŽ€äŊ“中文](README.zh-CN.md)
79
+
80
+ [![Version](https://img.shields.io/badge/version-2.7.3-blue.svg)](https://github.com/yusufkaraaslan/Skill_Seekers/releases/tag/v2.7.3)
81
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
82
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
83
+ [![MCP Integration](https://img.shields.io/badge/MCP-Integrated-blue.svg)](https://modelcontextprotocol.io)
84
+ [![Tested](https://img.shields.io/badge/Tests-1200+%20Passing-brightgreen.svg)](tests/)
85
+ [![Project Board](https://img.shields.io/badge/Project-Board-purple.svg)](https://github.com/users/yusufkaraaslan/projects/2)
86
+ [![PyPI version](https://badge.fury.io/py/skill-seekers.svg)](https://pypi.org/project/skill-seekers/)
87
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/skill-seekers.svg)](https://pypi.org/project/skill-seekers/)
88
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/skill-seekers.svg)](https://pypi.org/project/skill-seekers/)
89
+ [![Website](https://img.shields.io/badge/Website-skillseekersweb.com-blue.svg)](https://skillseekersweb.com/)
90
+
91
+ **Automatically convert documentation websites, GitHub repositories, and PDFs into Claude AI skills in minutes.**
92
+
93
+ > 🌐 **[Visit SkillSeekersWeb.com](https://skillseekersweb.com/)** - Browse 24+ preset configs, share your configs, and access complete documentation!
94
+
95
+ > 📋 **[View Development Roadmap & Tasks](https://github.com/users/yusufkaraaslan/projects/2)** - 134 tasks across 10 categories, pick any to contribute!
96
+
97
+ ## What is Skill Seeker?
98
+
99
+ Skill Seeker is an automated tool that transforms documentation websites, GitHub repositories, and PDF files into production-ready [Claude AI skills](https://www.anthropic.com/news/skills). Instead of manually reading and summarizing documentation, Skill Seeker:
100
+
101
+ 1. **Scrapes** multiple sources (docs, GitHub repos, PDFs) automatically
102
+ 2. **Analyzes** code repositories with deep AST parsing
103
+ 3. **Detects** conflicts between documentation and code implementation
104
+ 4. **Organizes** content into categorized reference files
105
+ 5. **Enhances** with AI to extract best examples and key concepts
106
+ 6. **Packages** everything into an uploadable `.zip` file for Claude
107
+
108
+ **Result:** Get comprehensive Claude skills for any framework, API, or tool in 20-40 minutes instead of hours of manual work.
109
+
110
+ ## Why Use This?
111
+
112
+ - đŸŽ¯ **For Developers**: Create skills from documentation + GitHub repos with conflict detection
113
+ - 🎮 **For Game Devs**: Generate skills for game engines (Godot docs + GitHub, Unity, etc.)
114
+ - 🔧 **For Teams**: Combine internal docs + code repositories into single source of truth
115
+ - 📚 **For Learners**: Build comprehensive skills from docs, code examples, and PDFs
116
+ - 🔍 **For Open Source**: Analyze repos to find documentation gaps and outdated examples
117
+
118
+ ## Key Features
119
+
120
+ ### 🌐 Documentation Scraping
121
+ - ✅ **llms.txt Support** - Automatically detects and uses LLM-ready documentation files (10x faster)
122
+ - ✅ **Universal Scraper** - Works with ANY documentation website
123
+ - ✅ **Smart Categorization** - Automatically organizes content by topic
124
+ - ✅ **Code Language Detection** - Recognizes Python, JavaScript, C++, GDScript, etc.
125
+ - ✅ **8 Ready-to-Use Presets** - Godot, React, Vue, Django, FastAPI, and more
126
+
127
+ ### 📄 PDF Support (**v1.2.0**)
128
+ - ✅ **Basic PDF Extraction** - Extract text, code, and images from PDF files
129
+ - ✅ **OCR for Scanned PDFs** - Extract text from scanned documents
130
+ - ✅ **Password-Protected PDFs** - Handle encrypted PDFs
131
+ - ✅ **Table Extraction** - Extract complex tables from PDFs
132
+ - ✅ **Parallel Processing** - 3x faster for large PDFs
133
+ - ✅ **Intelligent Caching** - 50% faster on re-runs
134
+
135
+ ### 🐙 GitHub Repository Scraping (**v2.0.0**)
136
+ - ✅ **Deep Code Analysis** - AST parsing for Python, JavaScript, TypeScript, Java, C++, Go
137
+ - ✅ **API Extraction** - Functions, classes, methods with parameters and types
138
+ - ✅ **Repository Metadata** - README, file tree, language breakdown, stars/forks
139
+ - ✅ **GitHub Issues & PRs** - Fetch open/closed issues with labels and milestones
140
+ - ✅ **CHANGELOG & Releases** - Automatically extract version history
141
+ - ✅ **Conflict Detection** - Compare documented APIs vs actual code implementation
142
+ - ✅ **MCP Integration** - Natural language: "Scrape GitHub repo facebook/react"
143
+
144
+ ### 🔄 Unified Multi-Source Scraping (**NEW - v2.0.0**)
145
+ - ✅ **Combine Multiple Sources** - Mix documentation + GitHub + PDF in one skill
146
+ - ✅ **Conflict Detection** - Automatically finds discrepancies between docs and code
147
+ - ✅ **Intelligent Merging** - Rule-based or AI-powered conflict resolution
148
+ - ✅ **Transparent Reporting** - Side-by-side comparison with âš ī¸ warnings
149
+ - ✅ **Documentation Gap Analysis** - Identifies outdated docs and undocumented features
150
+ - ✅ **Single Source of Truth** - One skill showing both intent (docs) and reality (code)
151
+ - ✅ **Backward Compatible** - Legacy single-source configs still work
152
+
153
+ ### 🤖 Multi-LLM Platform Support (**NEW - v2.5.0**)
154
+ - ✅ **4 LLM Platforms** - Claude AI, Google Gemini, OpenAI ChatGPT, Generic Markdown
155
+ - ✅ **Universal Scraping** - Same documentation works for all platforms
156
+ - ✅ **Platform-Specific Packaging** - Optimized formats for each LLM
157
+ - ✅ **One-Command Export** - `--target` flag selects platform
158
+ - ✅ **Optional Dependencies** - Install only what you need
159
+ - ✅ **100% Backward Compatible** - Existing Claude workflows unchanged
160
+
161
+ | Platform | Format | Upload | Enhancement | API Key |
162
+ |----------|--------|--------|-------------|---------|
163
+ | **Claude AI** | ZIP + YAML | ✅ Auto | ✅ Yes | ANTHROPIC_API_KEY |
164
+ | **Google Gemini** | tar.gz | ✅ Auto | ✅ Yes | GOOGLE_API_KEY |
165
+ | **OpenAI ChatGPT** | ZIP + Vector Store | ✅ Auto | ✅ Yes | OPENAI_API_KEY |
166
+ | **Generic Markdown** | ZIP | ❌ Manual | ❌ No | None |
167
+
168
+ ```bash
169
+ # Claude (default - no changes needed!)
170
+ skill-seekers package output/react/
171
+ skill-seekers upload react.zip
172
+
173
+ # Google Gemini
174
+ pip install skill-seekers[gemini]
175
+ skill-seekers package output/react/ --target gemini
176
+ skill-seekers upload react-gemini.tar.gz --target gemini
177
+
178
+ # OpenAI ChatGPT
179
+ pip install skill-seekers[openai]
180
+ skill-seekers package output/react/ --target openai
181
+ skill-seekers upload react-openai.zip --target openai
182
+
183
+ # Generic Markdown (universal export)
184
+ skill-seekers package output/react/ --target markdown
185
+ # Use the markdown files directly in any LLM
186
+ ```
187
+
188
+ **Installation:**
189
+ ```bash
190
+ # Install with Gemini support
191
+ pip install skill-seekers[gemini]
192
+
193
+ # Install with OpenAI support
194
+ pip install skill-seekers[openai]
195
+
196
+ # Install with all LLM platforms
197
+ pip install skill-seekers[all-llms]
198
+ ```
199
+
200
+ ### 🌊 Three-Stream GitHub Architecture (**NEW - v2.6.0**)
201
+ - ✅ **Triple-Stream Analysis** - Split GitHub repos into Code, Docs, and Insights streams
202
+ - ✅ **Unified Codebase Analyzer** - Works with GitHub URLs AND local paths
203
+ - ✅ **C3.x as Analysis Depth** - Choose 'basic' (1-2 min) or 'c3x' (20-60 min) analysis
204
+ - ✅ **Enhanced Router Generation** - GitHub metadata, README quick start, common issues
205
+ - ✅ **Issue Integration** - Top problems and solutions from GitHub issues
206
+ - ✅ **Smart Routing Keywords** - GitHub labels weighted 2x for better topic detection
207
+ - ✅ **81 Tests Passing** - Comprehensive E2E validation (0.44 seconds)
208
+
209
+ **Three Streams Explained:**
210
+ - **Stream 1: Code** - Deep C3.x analysis (patterns, examples, guides, configs, architecture)
211
+ - **Stream 2: Docs** - Repository documentation (README, CONTRIBUTING, docs/*.md)
212
+ - **Stream 3: Insights** - Community knowledge (issues, labels, stars, forks)
213
+
214
+ ```python
215
+ from skill_seekers.cli.unified_codebase_analyzer import UnifiedCodebaseAnalyzer
216
+
217
+ # Analyze GitHub repo with all three streams
218
+ analyzer = UnifiedCodebaseAnalyzer()
219
+ result = analyzer.analyze(
220
+ source="https://github.com/facebook/react",
221
+ depth="c3x", # or "basic" for fast analysis
222
+ fetch_github_metadata=True
223
+ )
224
+
225
+ # Access code stream (C3.x analysis)
226
+ print(f"Design patterns: {len(result.code_analysis['c3_1_patterns'])}")
227
+ print(f"Test examples: {result.code_analysis['c3_2_examples_count']}")
228
+
229
+ # Access docs stream (repository docs)
230
+ print(f"README: {result.github_docs['readme'][:100]}")
231
+
232
+ # Access insights stream (GitHub metadata)
233
+ print(f"Stars: {result.github_insights['metadata']['stars']}")
234
+ print(f"Common issues: {len(result.github_insights['common_problems'])}")
235
+ ```
236
+
237
+ **See complete documentation**: [Three-Stream Implementation Summary](docs/IMPLEMENTATION_SUMMARY_THREE_STREAM.md)
238
+
239
+ ### 🔐 Smart Rate Limit Management & Configuration (**NEW - v2.7.0**)
240
+ - ✅ **Multi-Token Configuration System** - Manage multiple GitHub accounts (personal, work, OSS)
241
+ - Secure config storage at `~/.config/skill-seekers/config.json` (600 permissions)
242
+ - Per-profile rate limit strategies: `prompt`, `wait`, `switch`, `fail`
243
+ - Configurable timeout per profile (default: 30 min, prevents indefinite waits)
244
+ - Smart fallback chain: CLI arg → Env var → Config file → Prompt
245
+ - API key management for Claude, Gemini, OpenAI
246
+ - ✅ **Interactive Configuration Wizard** - Beautiful terminal UI for easy setup
247
+ - Browser integration for token creation (auto-opens GitHub, etc.)
248
+ - Token validation and connection testing
249
+ - Visual status display with color coding
250
+ - ✅ **Intelligent Rate Limit Handler** - No more indefinite waits!
251
+ - Upfront warning about rate limits (60/hour vs 5000/hour)
252
+ - Real-time detection from GitHub API responses
253
+ - Live countdown timers with progress
254
+ - Automatic profile switching when rate limited
255
+ - Four strategies: prompt (ask), wait (countdown), switch (try another), fail (abort)
256
+ - ✅ **Resume Capability** - Continue interrupted jobs
257
+ - Auto-save progress at configurable intervals (default: 60 sec)
258
+ - List all resumable jobs with progress details
259
+ - Auto-cleanup of old jobs (default: 7 days)
260
+ - ✅ **CI/CD Support** - Non-interactive mode for automation
261
+ - `--non-interactive` flag fails fast without prompts
262
+ - `--profile` flag to select specific GitHub account
263
+ - Clear error messages for pipeline logs
264
+ - Exit codes for automation integration
265
+
266
+ **Quick Setup:**
267
+ ```bash
268
+ # One-time configuration (5 minutes)
269
+ skill-seekers config --github
270
+
271
+ # Add multiple GitHub profiles
272
+ skill-seekers config
273
+ # → Select "1. GitHub Token Setup"
274
+ # → Add profiles for personal, work, OSS accounts
275
+
276
+ # Use specific profile for private repos
277
+ skill-seekers github --repo mycompany/private-repo --profile work
278
+
279
+ # CI/CD mode (fail fast, no prompts)
280
+ skill-seekers github --repo owner/repo --non-interactive
281
+
282
+ # View current configuration
283
+ skill-seekers config --show
284
+
285
+ # Test connections
286
+ skill-seekers config --test
287
+
288
+ # Resume interrupted job
289
+ skill-seekers resume --list
290
+ skill-seekers resume github_react_20260117_143022
291
+ ```
292
+
293
+ **Rate Limit Strategies Explained:**
294
+ - **prompt** (default) - Ask what to do when rate limited (wait, switch, setup token, cancel)
295
+ - **wait** - Automatically wait with countdown timer (respects timeout)
296
+ - **switch** - Automatically try next available profile (for multi-account setups)
297
+ - **fail** - Fail immediately with clear error (perfect for CI/CD)
298
+
299
+ **See complete documentation**: [Configuration Guide](docs/guides/CONFIGURATION.md) (coming soon)
300
+
301
+ ### đŸŽ¯ Bootstrap Skill - Self-Hosting (**NEW - v2.7.0**)
302
+
303
+ Generate skill-seekers as a Claude Code skill to use within Claude:
304
+
305
+ ```bash
306
+ # Generate the skill
307
+ ./scripts/bootstrap_skill.sh
308
+
309
+ # Install to Claude Code
310
+ cp -r output/skill-seekers ~/.claude/skills/
311
+
312
+ # Verify
313
+ ls ~/.claude/skills/skill-seekers/SKILL.md
314
+ ```
315
+
316
+ **What you get:**
317
+ - ✅ **Complete skill documentation** - All CLI commands and usage patterns
318
+ - ✅ **CLI command reference** - Every tool and its options documented
319
+ - ✅ **Quick start examples** - Common workflows and best practices
320
+ - ✅ **Auto-generated API docs** - Code analysis, patterns, and examples
321
+ - ✅ **Robust validation** - YAML frontmatter and required fields checked
322
+ - ✅ **One-command bootstrap** - Combines manual header with auto-generated analysis
323
+
324
+ **How it works:**
325
+ 1. Runs codebase analysis on skill-seekers itself (dogfooding!)
326
+ 2. Combines handcrafted header (prerequisites, commands) with auto-generated content
327
+ 3. Validates SKILL.md structure (frontmatter, required fields)
328
+ 4. Outputs ready-to-use skill directory
329
+
330
+ **Result:** Use skill-seekers to create skills, from within Claude Code!
331
+
332
+ ### 🔐 Private Config Repositories (**NEW - v2.2.0**)
333
+ - ✅ **Git-Based Config Sources** - Fetch configs from private/team git repositories
334
+ - ✅ **Multi-Source Management** - Register unlimited GitHub, GitLab, Bitbucket repos
335
+ - ✅ **Team Collaboration** - Share custom configs across 3-5 person teams
336
+ - ✅ **Enterprise Support** - Scale to 500+ developers with priority-based resolution
337
+ - ✅ **Secure Authentication** - Environment variable tokens (GITHUB_TOKEN, GITLAB_TOKEN)
338
+ - ✅ **Intelligent Caching** - Clone once, pull updates automatically
339
+ - ✅ **Offline Mode** - Work with cached configs when offline
340
+ - ✅ **Backward Compatible** - Existing API-based configs still work
341
+
342
+ ### 🤖 Codebase Analysis & AI Enhancement (**C3.x - NEW!**)
343
+
344
+ **C3.4: Configuration Pattern Extraction with AI Enhancement**
345
+ - ✅ **9 Config Formats** - JSON, YAML, TOML, ENV, INI, Python, JavaScript, Dockerfile, Docker Compose
346
+ - ✅ **7 Pattern Types** - Database, API, logging, cache, email, auth, server configurations
347
+ - ✅ **AI Enhancement (NEW!)** - Optional dual-mode AI analysis (API + LOCAL, like C3.3)
348
+ - Explains what each config does
349
+ - Suggests best practices and improvements
350
+ - **Security analysis** - Finds hardcoded secrets, exposed credentials
351
+ - Migration suggestions - Consolidation opportunities
352
+ - Context-aware documentation
353
+ - ✅ **Auto-Documentation** - Generates JSON + Markdown documentation of all configs
354
+ - ✅ **Type Inference** - Automatically detects setting types and environment variables
355
+ - ✅ **MCP Integration** - `extract_config_patterns` tool with enhancement support
356
+
357
+ **C3.3: AI-Enhanced How-To Guides**
358
+ - ✅ **Comprehensive AI Enhancement** - Transforms basic guides (⭐⭐) into professional tutorials (⭐⭐⭐⭐⭐)
359
+ - ✅ **5 Automatic Improvements** - Step descriptions, troubleshooting, prerequisites, next steps, use cases
360
+ - ✅ **Dual-Mode Support** - API mode (Claude API) or LOCAL mode (Claude Code CLI)
361
+ - ✅ **No API Costs with LOCAL Mode** - FREE enhancement using your Claude Code Max plan
362
+ - ✅ **Quality Transformation** - 75-line templates → 500+ line comprehensive guides
363
+
364
+ **What Gets Enhanced:**
365
+ - 🔍 **Step Descriptions** - Natural language explanations (not just syntax!)
366
+ - 🔧 **Troubleshooting** - Diagnostic flows + solutions for common errors
367
+ - 📋 **Prerequisites** - Why needed + setup instructions
368
+ - 🔗 **Next Steps** - Related guides, variations, learning paths
369
+ - 💡 **Use Cases** - Real-world scenarios showing when to use guide
370
+
371
+ **Usage:**
372
+ ```bash
373
+ # AUTO mode (default) - automatically detects best option
374
+ skill-seekers-codebase tests/ --build-how-to-guides --ai-mode auto
375
+
376
+ # API mode - fast, efficient (requires ANTHROPIC_API_KEY)
377
+ skill-seekers-codebase tests/ --build-how-to-guides --ai-mode api
378
+
379
+ # LOCAL mode - FREE using Claude Code Max (no API key needed)
380
+ skill-seekers-codebase tests/ --build-how-to-guides --ai-mode local
381
+
382
+ # Disable enhancement - basic guides only
383
+ skill-seekers-codebase tests/ --build-how-to-guides --ai-mode none
384
+ ```
385
+
386
+ **Full Documentation:** [docs/HOW_TO_GUIDES.md](docs/HOW_TO_GUIDES.md#ai-enhancement-new)
387
+
388
+ ### ⚡ Performance & Scale
389
+ - ✅ **Async Mode** - 2-3x faster scraping with async/await (use `--async` flag)
390
+ - ✅ **Large Documentation Support** - Handle 10K-40K+ page docs with intelligent splitting
391
+ - ✅ **Router/Hub Skills** - Intelligent routing to specialized sub-skills
392
+ - ✅ **Parallel Scraping** - Process multiple skills simultaneously
393
+ - ✅ **Checkpoint/Resume** - Never lose progress on long scrapes
394
+ - ✅ **Caching System** - Scrape once, rebuild instantly
395
+
396
+ ### ✅ Quality Assurance
397
+ - ✅ **Fully Tested** - 1200+ tests with comprehensive coverage
398
+
399
+ ---
400
+
401
+ ## đŸ“Ļ Now Available on PyPI!
402
+
403
+ **Skill Seekers is now published on the Python Package Index!** Install with a single command:
404
+
405
+ ```bash
406
+ pip install skill-seekers
407
+ ```
408
+
409
+ ### Installation Options
410
+
411
+ Choose your installation profile based on which features you need:
412
+
413
+ ```bash
414
+ # 1ī¸âƒŖ CLI Only (Skill Generation)
415
+ pip install skill-seekers
416
+
417
+ # Features:
418
+ # â€ĸ Scrape documentation websites
419
+ # â€ĸ Analyze GitHub repositories
420
+ # â€ĸ Extract from PDFs
421
+ # â€ĸ Package skills for all platforms
422
+
423
+ # 2ī¸âƒŖ MCP Integration (Claude Code, Cursor, Windsurf)
424
+ pip install skill-seekers[mcp]
425
+
426
+ # Features:
427
+ # â€ĸ Everything from CLI Only
428
+ # â€ĸ MCP server for Claude Code
429
+ # â€ĸ One-command skill installation
430
+ # â€ĸ HTTP/stdio transport modes
431
+
432
+ # 3ī¸âƒŖ Multi-LLM Support (Gemini, OpenAI)
433
+ pip install skill-seekers[all-llms]
434
+
435
+ # Features:
436
+ # â€ĸ Everything from CLI Only
437
+ # â€ĸ Google Gemini support
438
+ # â€ĸ OpenAI ChatGPT support
439
+ # â€ĸ Enhanced AI features
440
+
441
+ # 4ī¸âƒŖ Everything
442
+ pip install skill-seekers[all]
443
+
444
+ # Features:
445
+ # â€ĸ All features enabled
446
+ # â€ĸ Maximum flexibility
447
+ ```
448
+
449
+ **Need help choosing?** Run the setup wizard:
450
+ ```bash
451
+ skill-seekers-setup
452
+ ```
453
+
454
+ The wizard shows all options with detailed feature lists and guides you through configuration.
455
+
456
+ Get started in seconds. No cloning, no setup - just install and run. See installation options below.
457
+
458
+ ---
459
+
460
+ ## Quick Start
461
+
462
+ ### Option 1: Install from PyPI (Recommended)
463
+
464
+ ```bash
465
+ # Install from PyPI (easiest method!)
466
+ pip install skill-seekers
467
+
468
+ # Use the unified CLI
469
+ skill-seekers scrape --config configs/react.json
470
+ skill-seekers github --repo facebook/react
471
+ skill-seekers enhance output/react/
472
+ skill-seekers package output/react/
473
+ ```
474
+
475
+ **Time:** ~25 minutes | **Quality:** Production-ready | **Cost:** Free
476
+
477
+ 📖 **New to Skill Seekers?** Check out our [Quick Start Guide](QUICKSTART.md) or [Bulletproof Guide](BULLETPROOF_QUICKSTART.md)
478
+
479
+ ### Option 2: Install via uv (Modern Python Tool)
480
+
481
+ ```bash
482
+ # Install with uv (fast, modern alternative)
483
+ uv tool install skill-seekers
484
+
485
+ # Or run directly without installing
486
+ uv tool run --from skill-seekers skill-seekers scrape --config https://raw.githubusercontent.com/yusufkaraaslan/Skill_Seekers/main/configs/react.json
487
+
488
+ # Unified CLI - simple commands
489
+ skill-seekers scrape --config configs/react.json
490
+ skill-seekers github --repo facebook/react
491
+ skill-seekers package output/react/
492
+ ```
493
+
494
+ **Time:** ~25 minutes | **Quality:** Production-ready | **Cost:** Free
495
+
496
+ ### Option 3: Development Install (From Source)
497
+
498
+ ```bash
499
+ # Clone and install in editable mode
500
+ git clone https://github.com/yusufkaraaslan/Skill_Seekers.git
501
+ cd Skill_Seekers
502
+ pip install -e .
503
+
504
+ # Use the unified CLI
505
+ skill-seekers scrape --config configs/react.json
506
+ ```
507
+
508
+ ### Option 4: Use from Claude Code & 4 Other AI Agents (MCP Integration)
509
+
510
+ ```bash
511
+ # One-time setup (5 minutes) - Auto-configures 5 AI agents!
512
+ ./setup_mcp.sh
513
+
514
+ # Then in Claude Code, Cursor, Windsurf, VS Code + Cline, or IntelliJ IDEA, just ask:
515
+ "Generate a React skill from https://react.dev/"
516
+ "Scrape PDF at docs/manual.pdf and create skill"
517
+ ```
518
+
519
+ **Time:** Automated | **Quality:** Production-ready | **Cost:** Free
520
+
521
+ **NEW in v2.4.0:** MCP server now supports 5 AI coding agents with automatic configuration!
522
+
523
+ ### Option 5: Legacy CLI (Backwards Compatible)
524
+
525
+ ```bash
526
+ # Install dependencies
527
+ pip3 install requests beautifulsoup4
528
+
529
+ # Run scripts directly (old method)
530
+ python3 src/skill_seekers/cli/doc_scraper.py --config configs/react.json
531
+
532
+ # Upload output/react.zip to Claude - Done!
533
+ ```
534
+
535
+ **Time:** ~25 minutes | **Quality:** Production-ready | **Cost:** Free
536
+
537
+ ---
538
+
539
+ ## 🚀 **NEW!** One-Command Install Workflow (v2.1.1)
540
+
541
+ **The fastest way to go from config to uploaded skill - complete automation:**
542
+
543
+ ```bash
544
+ # Install React skill from official configs (auto-uploads to Claude)
545
+ skill-seekers install --config react
546
+
547
+ # Install from local config file
548
+ skill-seekers install --config configs/custom.json
549
+
550
+ # Install without uploading (package only)
551
+ skill-seekers install --config django --no-upload
552
+
553
+ # Unlimited scraping (no page limits)
554
+ skill-seekers install --config godot --unlimited
555
+
556
+ # Preview workflow without executing
557
+ skill-seekers install --config react --dry-run
558
+ ```
559
+
560
+ **Time:** 20-45 minutes total | **Quality:** Production-ready (9/10) | **Cost:** Free
561
+
562
+ ### What it does automatically:
563
+
564
+ 1. ✅ **Fetches config** from API (if config name provided)
565
+ 2. ✅ **Scrapes documentation** (respects rate limits, handles pagination)
566
+ 3. ✅ **AI Enhancement (MANDATORY)** - 30-60 sec, quality boost from 3/10 → 9/10
567
+ 4. ✅ **Packages skill** to .zip file
568
+ 5. ✅ **Uploads to Claude** (if ANTHROPIC_API_KEY set)
569
+
570
+ ### Why use this?
571
+
572
+ - **Zero friction** - One command instead of 5 separate steps
573
+ - **Quality guaranteed** - Enhancement is mandatory, ensures professional output
574
+ - **Complete automation** - From config name to uploaded skill in Claude
575
+ - **Time savings** - Fully automated end-to-end workflow
576
+
577
+ ### Phases executed:
578
+
579
+ ```
580
+ đŸ“Ĩ PHASE 1: Fetch Config (if config name provided)
581
+ 📖 PHASE 2: Scrape Documentation
582
+ ✨ PHASE 3: AI Enhancement (MANDATORY - no skip option)
583
+ đŸ“Ļ PHASE 4: Package Skill
584
+ â˜ī¸ PHASE 5: Upload to Claude (optional, requires API key)
585
+ ```
586
+
587
+ **Requirements:**
588
+ - ANTHROPIC_API_KEY environment variable (for auto-upload)
589
+ - Claude Code Max plan (for local AI enhancement)
590
+
591
+ **Example:**
592
+ ```bash
593
+ # Set API key once
594
+ export ANTHROPIC_API_KEY=sk-ant-your-key-here
595
+
596
+ # Run one command - sit back and relax!
597
+ skill-seekers install --config react
598
+
599
+ # Result: React skill uploaded to Claude in 20-45 minutes
600
+ ```
601
+
602
+ ---
603
+
604
+ ## 📊 Feature Matrix
605
+
606
+ Skill Seekers supports **4 platforms** and **5 skill modes** with full feature parity.
607
+
608
+ **Platforms:** Claude AI, Google Gemini, OpenAI ChatGPT, Generic Markdown
609
+ **Skill Modes:** Documentation, GitHub, PDF, Unified Multi-Source, Local Repository
610
+
611
+ See [Complete Feature Matrix](docs/FEATURE_MATRIX.md) for detailed platform and feature support.
612
+
613
+ ### Quick Platform Comparison
614
+
615
+ | Feature | Claude | Gemini | OpenAI | Markdown |
616
+ |---------|--------|--------|--------|----------|
617
+ | Format | ZIP + YAML | tar.gz | ZIP + Vector | ZIP |
618
+ | Upload | ✅ API | ✅ API | ✅ API | ❌ Manual |
619
+ | Enhancement | ✅ Sonnet 4 | ✅ 2.0 Flash | ✅ GPT-4o | ❌ None |
620
+ | All Skill Modes | ✅ | ✅ | ✅ | ✅ |
621
+
622
+ **Examples:**
623
+ ```bash
624
+ # Package for all platforms (same skill)
625
+ skill-seekers package output/react/ --target claude
626
+ skill-seekers package output/react/ --target gemini
627
+ skill-seekers package output/react/ --target openai
628
+ skill-seekers package output/react/ --target markdown
629
+
630
+ # Install for specific platform
631
+ skill-seekers install --config django --target gemini
632
+ skill-seekers install --config fastapi --target openai
633
+ ```
634
+
635
+ ---
636
+
637
+ ## Usage Examples
638
+
639
+ ### Documentation Scraping
640
+
641
+ ```bash
642
+ # Scrape documentation website
643
+ skill-seekers scrape --config configs/react.json
644
+
645
+ # Quick scrape without config
646
+ skill-seekers scrape --url https://react.dev --name react
647
+
648
+ # With async mode (3x faster)
649
+ skill-seekers scrape --config configs/godot.json --async --workers 8
650
+ ```
651
+
652
+ ### PDF Extraction
653
+
654
+ ```bash
655
+ # Basic PDF extraction
656
+ skill-seekers pdf --pdf docs/manual.pdf --name myskill
657
+
658
+ # Advanced features
659
+ skill-seekers pdf --pdf docs/manual.pdf --name myskill \
660
+ --extract-tables \ # Extract tables
661
+ --parallel \ # Fast parallel processing
662
+ --workers 8 # Use 8 CPU cores
663
+
664
+ # Scanned PDFs (requires: pip install pytesseract Pillow)
665
+ skill-seekers pdf --pdf docs/scanned.pdf --name myskill --ocr
666
+
667
+ # Password-protected PDFs
668
+ skill-seekers pdf --pdf docs/encrypted.pdf --name myskill --password mypassword
669
+ ```
670
+
671
+ **Time:** ~5-15 minutes (or 2-5 minutes with parallel) | **Quality:** Production-ready | **Cost:** Free
672
+
673
+ ### GitHub Repository Scraping
674
+
675
+ ```bash
676
+ # Basic repository scraping
677
+ skill-seekers github --repo facebook/react
678
+
679
+ # Using a config file
680
+ skill-seekers github --config configs/react_github.json
681
+
682
+ # With authentication (higher rate limits)
683
+ export GITHUB_TOKEN=ghp_your_token_here
684
+ skill-seekers github --repo facebook/react
685
+
686
+ # Customize what to include
687
+ skill-seekers github --repo django/django \
688
+ --include-issues \ # Extract GitHub Issues
689
+ --max-issues 100 \ # Limit issue count
690
+ --include-changelog \ # Extract CHANGELOG.md
691
+ --include-releases # Extract GitHub Releases
692
+ ```
693
+
694
+ **Time:** ~5-10 minutes | **Quality:** Production-ready | **Cost:** Free
695
+
696
+ ### Unified Multi-Source Scraping (**NEW - v2.0.0**)
697
+
698
+ **The Problem:** Documentation and code often drift apart. Docs might be outdated, missing features that exist in code, or documenting features that were removed.
699
+
700
+ **The Solution:** Combine documentation + GitHub + PDF into one unified skill that shows BOTH what's documented AND what actually exists, with clear warnings about discrepancies.
701
+
702
+ ```bash
703
+ # Use existing unified configs
704
+ skill-seekers unified --config configs/react_unified.json
705
+ skill-seekers unified --config configs/django_unified.json
706
+
707
+ # Or create unified config (mix documentation + GitHub)
708
+ cat > configs/myframework_unified.json << 'EOF'
709
+ {
710
+ "name": "myframework",
711
+ "description": "Complete framework knowledge from docs + code",
712
+ "merge_mode": "rule-based",
713
+ "sources": [
714
+ {
715
+ "type": "documentation",
716
+ "base_url": "https://docs.myframework.com/",
717
+ "extract_api": true,
718
+ "max_pages": 200
719
+ },
720
+ {
721
+ "type": "github",
722
+ "repo": "owner/myframework",
723
+ "include_code": true,
724
+ "code_analysis_depth": "surface"
725
+ }
726
+ ]
727
+ }
728
+ EOF
729
+
730
+ # Run unified scraper
731
+ skill-seekers unified --config configs/myframework_unified.json
732
+
733
+ # Package and upload
734
+ skill-seekers package output/myframework/
735
+ # Upload output/myframework.zip to Claude - Done!
736
+ ```
737
+
738
+ **Time:** ~30-45 minutes | **Quality:** Production-ready with conflict detection | **Cost:** Free
739
+
740
+ **What Makes It Special:**
741
+
742
+ ✅ **Conflict Detection** - Automatically finds 4 types of discrepancies:
743
+ - 🔴 **Missing in code** (high): Documented but not implemented
744
+ - 🟡 **Missing in docs** (medium): Implemented but not documented
745
+ - âš ī¸ **Signature mismatch**: Different parameters/types
746
+ - â„šī¸ **Description mismatch**: Different explanations
747
+
748
+ ✅ **Transparent Reporting** - Shows both versions side-by-side:
749
+ ```markdown
750
+ #### `move_local_x(delta: float)`
751
+
752
+ âš ī¸ **Conflict**: Documentation signature differs from implementation
753
+
754
+ **Documentation says:**
755
+ ```
756
+ def move_local_x(delta: float)
757
+ ```
758
+
759
+ **Code implementation:**
760
+ ```python
761
+ def move_local_x(delta: float, snap: bool = False) -> None
762
+ ```
763
+ ```
764
+
765
+ ✅ **Advantages:**
766
+ - **Identifies documentation gaps** - Find outdated or missing docs automatically
767
+ - **Catches code changes** - Know when APIs change without docs being updated
768
+ - **Single source of truth** - One skill showing intent (docs) AND reality (code)
769
+ - **Actionable insights** - Get suggestions for fixing each conflict
770
+ - **Development aid** - See what's actually in the codebase vs what's documented
771
+
772
+ **Example Unified Configs:**
773
+ - `configs/react_unified.json` - React docs + GitHub repo
774
+ - `configs/django_unified.json` - Django docs + GitHub repo
775
+ - `configs/fastapi_unified.json` - FastAPI docs + GitHub repo
776
+
777
+ **Full Guide:** See [docs/UNIFIED_SCRAPING.md](docs/UNIFIED_SCRAPING.md) for complete documentation.
778
+
779
+ ### Private Config Repositories (**NEW - v2.2.0**)
780
+
781
+ **The Problem:** Teams need to share custom configs for internal documentation, but don't want to publish them publicly.
782
+
783
+ **The Solution:** Register private git repositories as config sources. Fetch configs from team repos just like the public API, with full authentication support.
784
+
785
+ ```bash
786
+ # Setup: Set your GitHub token (one-time)
787
+ export GITHUB_TOKEN=ghp_your_token_here
788
+
789
+ # Option 1: Using MCP tools (recommended)
790
+ # Register your team's private repo
791
+ add_config_source(
792
+ name="team",
793
+ git_url="https://github.com/mycompany/skill-configs.git",
794
+ token_env="GITHUB_TOKEN"
795
+ )
796
+
797
+ # Fetch config from team repo
798
+ fetch_config(source="team", config_name="internal-api")
799
+
800
+ # List all registered sources
801
+ list_config_sources()
802
+
803
+ # Remove source when no longer needed
804
+ remove_config_source(name="team")
805
+ ```
806
+
807
+ **Direct Git URL mode** (no registration):
808
+ ```bash
809
+ # Fetch directly from git URL
810
+ fetch_config(
811
+ git_url="https://github.com/mycompany/configs.git",
812
+ config_name="react-custom",
813
+ token="ghp_your_token_here"
814
+ )
815
+ ```
816
+
817
+ **Supported Platforms:**
818
+ - GitHub (token env: `GITHUB_TOKEN`)
819
+ - GitLab (token env: `GITLAB_TOKEN`)
820
+ - Gitea (token env: `GITEA_TOKEN`)
821
+ - Bitbucket (token env: `BITBUCKET_TOKEN`)
822
+ - Any git server (token env: `GIT_TOKEN`)
823
+
824
+ **Use Cases:**
825
+
826
+ 📋 **Small Teams (3-5 people)**
827
+ ```bash
828
+ # Team lead creates repo
829
+ gh repo create myteam/skill-configs --private
830
+
831
+ # Add configs to repo
832
+ cd myteam-skill-configs
833
+ cp ../Skill_Seekers/configs/react.json ./react-custom.json
834
+ # Edit selectors, categories for your internal docs...
835
+ git add . && git commit -m "Add custom React config" && git push
836
+
837
+ # Team members register (one-time)
838
+ add_config_source(name="team", git_url="https://github.com/myteam/skill-configs.git")
839
+
840
+ # Everyone can now fetch
841
+ fetch_config(source="team", config_name="react-custom")
842
+ ```
843
+
844
+ đŸĸ **Enterprise (500+ developers)**
845
+ ```bash
846
+ # IT pre-configures sources for everyone
847
+ add_config_source(name="platform", git_url="gitlab.company.com/platform/configs", priority=1)
848
+ add_config_source(name="mobile", git_url="gitlab.company.com/mobile/configs", priority=2)
849
+ add_config_source(name="official", git_url="api.skillseekersweb.com", priority=3)
850
+
851
+ # Developers use transparently
852
+ fetch_config(config_name="internal-platform") # Finds in platform source
853
+ fetch_config(config_name="react") # Falls back to official API
854
+ ```
855
+
856
+ **Storage Locations:**
857
+ - Registry: `~/.skill-seekers/sources.json`
858
+ - Cache: `$SKILL_SEEKERS_CACHE_DIR` (default: `~/.skill-seekers/cache/`)
859
+
860
+ **Features:**
861
+ - ✅ **Shallow clone** - 10-50x faster, minimal disk space
862
+ - ✅ **Auto-pull** - Fetches latest changes automatically
863
+ - ✅ **Offline mode** - Works with cached repos when offline
864
+ - ✅ **Priority resolution** - Multiple sources with conflict resolution
865
+ - ✅ **Secure** - Tokens via environment variables only
866
+
867
+ **Example Team Repository:**
868
+
869
+ Try the included example:
870
+ ```bash
871
+ # Test with file:// URL (no auth needed)
872
+ cd /path/to/Skill_Seekers
873
+
874
+ # Run the E2E test
875
+ python3 configs/example-team/test_e2e.py
876
+
877
+ # Or test manually
878
+ add_config_source(
879
+ name="example",
880
+ git_url="file://$(pwd)/configs/example-team",
881
+ branch="master"
882
+ )
883
+
884
+ fetch_config(source="example", config_name="react-custom")
885
+ ```
886
+
887
+ **Full Guide:** See [docs/GIT_CONFIG_SOURCES.md](docs/GIT_CONFIG_SOURCES.md) for complete documentation.
888
+
889
+ ## How It Works
890
+
891
+ ```mermaid
892
+ graph LR
893
+ A[Documentation Website] --> B[Skill Seeker]
894
+ B --> C[Scraper]
895
+ B --> D[AI Enhancement]
896
+ B --> E[Packager]
897
+ C --> F[Organized References]
898
+ D --> F
899
+ F --> E
900
+ E --> G[Claude Skill .zip]
901
+ G --> H[Upload to Claude AI]
902
+ ```
903
+
904
+ 0. **Detect llms.txt** - Checks for llms-full.txt, llms.txt, llms-small.txt first
905
+ 1. **Scrape**: Extracts all pages from documentation
906
+ 2. **Categorize**: Organizes content into topics (API, guides, tutorials, etc.)
907
+ 3. **Enhance**: AI analyzes docs and creates comprehensive SKILL.md with examples
908
+ 4. **Package**: Bundles everything into a Claude-ready `.zip` file
909
+
910
+ ## 📋 Prerequisites
911
+
912
+ **Before you start, make sure you have:**
913
+
914
+ 1. **Python 3.10 or higher** - [Download](https://www.python.org/downloads/) | Check: `python3 --version`
915
+ 2. **Git** - [Download](https://git-scm.com/) | Check: `git --version`
916
+ 3. **15-30 minutes** for first-time setup
917
+
918
+ **First time user?** → **[Start Here: Bulletproof Quick Start Guide](BULLETPROOF_QUICKSTART.md)** đŸŽ¯
919
+
920
+ This guide walks you through EVERYTHING step-by-step (Python install, git clone, first skill creation).
921
+
922
+ ---
923
+
924
+ ## 🚀 Quick Start
925
+
926
+ ### Method 1: MCP Server for 5 AI Agents (Easiest - **NEW v2.4.0!**)
927
+
928
+ Use Skill Seeker directly from **Claude Code, Cursor, Windsurf, VS Code + Cline, or IntelliJ IDEA** with natural language!
929
+
930
+ ```bash
931
+ # Clone repository
932
+ git clone https://github.com/yusufkaraaslan/Skill_Seekers.git
933
+ cd Skill_Seekers
934
+
935
+ # One-time setup (5 minutes) - Auto-configures ALL 5 agents!
936
+ ./setup_mcp.sh
937
+
938
+ # Restart your AI agent, then just ask:
939
+ ```
940
+
941
+ **In Claude Code, Cursor, Windsurf, VS Code + Cline, or IntelliJ IDEA:**
942
+ ```
943
+ List all available configs
944
+ Generate config for Tailwind at https://tailwindcss.com/docs
945
+ Scrape docs using configs/react.json
946
+ Package skill at output/react/
947
+ ```
948
+
949
+ **Benefits:**
950
+ - ✅ No manual CLI commands
951
+ - ✅ Natural language interface
952
+ - ✅ Integrated with your workflow
953
+ - ✅ **18 tools** available instantly (up from 9!)
954
+ - ✅ **5 AI agents supported** - auto-configured with one command
955
+ - ✅ **Tested and working** in production
956
+
957
+ **NEW in v2.4.0:**
958
+ - ✅ **Upgraded to MCP SDK v1.25.0** - Latest features and performance
959
+ - ✅ **FastMCP Framework** - Modern, maintainable MCP implementation
960
+ - ✅ **HTTP + stdio transport** - Works with more AI agents
961
+ - ✅ **18 tools** (up from 9) - More capabilities
962
+ - ✅ **Multi-agent auto-configuration** - Setup all agents with one command
963
+
964
+ **Full guides:**
965
+ - 📘 [MCP Setup Guide](docs/MCP_SETUP.md) - Complete installation instructions
966
+ - đŸ§Ē [MCP Testing Guide](docs/TEST_MCP_IN_CLAUDE_CODE.md) - Test all 18 tools
967
+ - đŸ“Ļ [Large Documentation Guide](docs/LARGE_DOCUMENTATION.md) - Handle 10K-40K+ pages
968
+ - 📤 [Upload Guide](docs/UPLOAD_GUIDE.md) - How to upload skills to Claude
969
+
970
+ ### Method 2: CLI (Traditional)
971
+
972
+ #### One-Time Setup: Create Virtual Environment
973
+
974
+ ```bash
975
+ # Clone repository
976
+ git clone https://github.com/yusufkaraaslan/Skill_Seekers.git
977
+ cd Skill_Seekers
978
+
979
+ # Create virtual environment
980
+ python3 -m venv venv
981
+
982
+ # Activate virtual environment
983
+ source venv/bin/activate # macOS/Linux
984
+ # OR on Windows: venv\Scripts\activate
985
+
986
+ # Install dependencies
987
+ pip install requests beautifulsoup4 pytest
988
+
989
+ # Save dependencies
990
+ pip freeze > requirements.txt
991
+
992
+ # Optional: Install anthropic for API-based enhancement (not needed for LOCAL enhancement)
993
+ # pip install anthropic
994
+ ```
995
+
996
+ **Always activate the virtual environment before using Skill Seeker:**
997
+ ```bash
998
+ source venv/bin/activate # Run this each time you start a new terminal session
999
+ ```
1000
+
1001
+ #### Easiest: Use a Preset
1002
+
1003
+ ```bash
1004
+ # Make sure venv is activated (you should see (venv) in your prompt)
1005
+ source venv/bin/activate
1006
+
1007
+ # Optional: Estimate pages first (fast, 1-2 minutes)
1008
+ skill-seekers estimate configs/godot.json
1009
+
1010
+ # Use Godot preset
1011
+ skill-seekers scrape --config configs/godot.json
1012
+
1013
+ # Use React preset
1014
+ skill-seekers scrape --config configs/react.json
1015
+
1016
+ # See all presets
1017
+ ls configs/
1018
+ ```
1019
+
1020
+ ### Interactive Mode
1021
+
1022
+ ```bash
1023
+ skill-seekers scrape --interactive
1024
+ ```
1025
+
1026
+ ### Quick Mode
1027
+
1028
+ ```bash
1029
+ skill-seekers scrape \
1030
+ --name react \
1031
+ --url https://react.dev/ \
1032
+ --description "React framework for UIs"
1033
+ ```
1034
+
1035
+ ## 📤 Uploading Skills to Claude
1036
+
1037
+ Once your skill is packaged, you need to upload it to Claude:
1038
+
1039
+ ### Option 1: Automatic Upload (API-based)
1040
+
1041
+ ```bash
1042
+ # Set your API key (one-time)
1043
+ export ANTHROPIC_API_KEY=sk-ant-...
1044
+
1045
+ # Package and upload automatically
1046
+ skill-seekers package output/react/ --upload
1047
+
1048
+ # OR upload existing .zip
1049
+ skill-seekers upload output/react.zip
1050
+ ```
1051
+
1052
+ **Benefits:**
1053
+ - ✅ Fully automatic
1054
+ - ✅ No manual steps
1055
+ - ✅ Works from command line
1056
+
1057
+ **Requirements:**
1058
+ - Anthropic API key (get from https://console.anthropic.com/)
1059
+
1060
+ ### Option 2: Manual Upload (No API Key)
1061
+
1062
+ ```bash
1063
+ # Package skill
1064
+ skill-seekers package output/react/
1065
+
1066
+ # This will:
1067
+ # 1. Create output/react.zip
1068
+ # 2. Open the output/ folder automatically
1069
+ # 3. Show upload instructions
1070
+
1071
+ # Then manually upload:
1072
+ # - Go to https://claude.ai/skills
1073
+ # - Click "Upload Skill"
1074
+ # - Select output/react.zip
1075
+ # - Done!
1076
+ ```
1077
+
1078
+ **Benefits:**
1079
+ - ✅ No API key needed
1080
+ - ✅ Works for everyone
1081
+ - ✅ Folder opens automatically
1082
+
1083
+ ### Option 3: Claude Code (MCP) - Smart & Automatic
1084
+
1085
+ ```
1086
+ In Claude Code, just ask:
1087
+ "Package and upload the React skill"
1088
+
1089
+ # With API key set:
1090
+ # - Packages the skill
1091
+ # - Uploads to Claude automatically
1092
+ # - Done! ✅
1093
+
1094
+ # Without API key:
1095
+ # - Packages the skill
1096
+ # - Shows where to find the .zip
1097
+ # - Provides manual upload instructions
1098
+ ```
1099
+
1100
+ **Benefits:**
1101
+ - ✅ Natural language
1102
+ - ✅ Smart auto-detection (uploads if API key available)
1103
+ - ✅ Works with or without API key
1104
+ - ✅ No errors or failures
1105
+
1106
+ ---
1107
+
1108
+ ## 🤖 Installing to AI Agents
1109
+
1110
+ Skill Seekers can automatically install skills to 10+ AI coding agents.
1111
+
1112
+ ### Quick Start
1113
+
1114
+ ```bash
1115
+ # Install to specific agent
1116
+ skill-seekers install-agent output/react/ --agent cursor
1117
+
1118
+ # Install to all agents at once
1119
+ skill-seekers install-agent output/react/ --agent all
1120
+
1121
+ # Overwrite existing installation
1122
+ skill-seekers install-agent output/react/ --agent claude --force
1123
+
1124
+ # Preview without installing
1125
+ skill-seekers install-agent output/react/ --agent cursor --dry-run
1126
+ ```
1127
+
1128
+ ### Supported Agents
1129
+
1130
+ | Agent | Path | Type |
1131
+ |-------|------|------|
1132
+ | **Claude Code** | `~/.claude/skills/` | Global |
1133
+ | **Cursor** | `.cursor/skills/` | Project |
1134
+ | **VS Code / Copilot** | `.github/skills/` | Project |
1135
+ | **Amp** | `~/.amp/skills/` | Global |
1136
+ | **Goose** | `~/.config/goose/skills/` | Global |
1137
+ | **OpenCode** | `~/.opencode/skills/` | Global |
1138
+ | **Letta** | `~/.letta/skills/` | Global |
1139
+ | **Aide** | `~/.aide/skills/` | Global |
1140
+ | **Windsurf** | `~/.windsurf/skills/` | Global |
1141
+ | **Neovate Code** | `~/.neovate/skills/` | Global |
1142
+
1143
+ **Global paths** install to user's home directory (~/).
1144
+ **Project paths** install to current project's root directory.
1145
+
1146
+ ### Complete Workflow
1147
+
1148
+ ```bash
1149
+ # 1. Scrape documentation
1150
+ skill-seekers scrape --config configs/react.json --enhance-local
1151
+
1152
+ # 2. Package skill
1153
+ skill-seekers package output/react/
1154
+
1155
+ # 3. Install to your agent
1156
+ skill-seekers install-agent output/react/ --agent cursor
1157
+
1158
+ # 4. Restart Cursor to load the skill
1159
+ ```
1160
+
1161
+ ---
1162
+
1163
+ ## 🤖 Multi-Agent MCP Support (NEW in v2.4.0)
1164
+
1165
+ **Skill Seekers MCP server now works with 5 leading AI coding agents!**
1166
+
1167
+ ### Supported AI Agents
1168
+
1169
+ | Agent | Transport | Setup Difficulty | Auto-Configured |
1170
+ |-------|-----------|------------------|-----------------|
1171
+ | **Claude Code** | stdio | Easy | ✅ Yes |
1172
+ | **VS Code + Cline** | stdio | Easy | ✅ Yes |
1173
+ | **Cursor** | HTTP | Medium | ✅ Yes |
1174
+ | **Windsurf** | HTTP | Medium | ✅ Yes |
1175
+ | **IntelliJ IDEA** | HTTP | Medium | ✅ Yes |
1176
+
1177
+ ### Quick Setup - All Agents at Once
1178
+
1179
+ ```bash
1180
+ # Clone repository
1181
+ git clone https://github.com/yusufkaraaslan/Skill_Seekers.git
1182
+ cd Skill_Seekers
1183
+
1184
+ # Run one command - auto-configures ALL 5 agents!
1185
+ ./setup_mcp.sh
1186
+
1187
+ # Restart your AI agent and start using natural language:
1188
+ "List all available configs"
1189
+ "Generate a React skill from https://react.dev/"
1190
+ "Package the skill at output/react/"
1191
+ ```
1192
+
1193
+ **What `setup_mcp.sh` does:**
1194
+ 1. ✅ Installs MCP server dependencies
1195
+ 2. ✅ Configures Claude Code (stdio transport)
1196
+ 3. ✅ Configures VS Code + Cline (stdio transport)
1197
+ 4. ✅ Configures Cursor (HTTP transport)
1198
+ 5. ✅ Configures Windsurf (HTTP transport)
1199
+ 6. ✅ Configures IntelliJ IDEA (HTTP transport)
1200
+ 7. ✅ Shows next steps for each agent
1201
+
1202
+ **Time:** 5 minutes | **Result:** All agents configured and ready to use
1203
+
1204
+ ### Transport Modes
1205
+
1206
+ Skill Seekers MCP server supports 2 transport modes:
1207
+
1208
+ #### stdio Transport (Claude Code, VS Code + Cline)
1209
+
1210
+ **How it works:** Agent launches MCP server as subprocess and communicates via stdin/stdout
1211
+
1212
+ **Benefits:**
1213
+ - ✅ More secure (no network ports)
1214
+ - ✅ Automatic lifecycle management
1215
+ - ✅ Simpler configuration
1216
+ - ✅ Better for single-user development
1217
+
1218
+ **Configuration example (Claude Code):**
1219
+ ```json
1220
+ {
1221
+ "mcpServers": {
1222
+ "skill-seeker": {
1223
+ "command": "python3",
1224
+ "args": ["-m", "skill_seekers.mcp.server_fastmcp"],
1225
+ "cwd": "/path/to/Skill_Seekers"
1226
+ }
1227
+ }
1228
+ }
1229
+ ```
1230
+
1231
+ #### HTTP Transport (Cursor, Windsurf, IntelliJ IDEA)
1232
+
1233
+ **How it works:** MCP server runs as HTTP service, agents connect as clients
1234
+
1235
+ **Benefits:**
1236
+ - ✅ Multi-agent support (one server, multiple clients)
1237
+ - ✅ Server can run independently
1238
+ - ✅ Better for team collaboration
1239
+ - ✅ Easier debugging and monitoring
1240
+
1241
+ **Configuration example (Cursor):**
1242
+ ```json
1243
+ {
1244
+ "mcpServers": {
1245
+ "skill-seeker": {
1246
+ "url": "http://localhost:8765/sse"
1247
+ }
1248
+ }
1249
+ }
1250
+ ```
1251
+
1252
+ **Starting HTTP server:**
1253
+ ```bash
1254
+ # Start server manually (runs in background)
1255
+ cd /path/to/Skill_Seekers
1256
+ python3 -m skill_seekers.mcp.server_fastmcp --transport http --port 8765
1257
+
1258
+ # Or use auto-start script
1259
+ ./scripts/start_mcp_server.sh
1260
+ ```
1261
+
1262
+ ### Agent-Specific Instructions
1263
+
1264
+ #### Claude Code (stdio)
1265
+
1266
+ ```bash
1267
+ # Already configured by setup_mcp.sh!
1268
+ # Just restart Claude Code
1269
+
1270
+ # Config location: ~/.claude/claude_code_config.json
1271
+ ```
1272
+
1273
+ **Usage:**
1274
+ ```
1275
+ In Claude Code:
1276
+ "List all available configs"
1277
+ "Scrape React docs at https://react.dev/"
1278
+ ```
1279
+
1280
+ #### VS Code + Cline Extension (stdio)
1281
+
1282
+ ```bash
1283
+ # Already configured by setup_mcp.sh!
1284
+ # Just restart VS Code
1285
+
1286
+ # Config location: ~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json
1287
+ ```
1288
+
1289
+ **Usage:**
1290
+ ```
1291
+ In Cline:
1292
+ "Generate config for Tailwind"
1293
+ "Package skill at output/tailwind/"
1294
+ ```
1295
+
1296
+ #### Cursor (HTTP)
1297
+
1298
+ ```bash
1299
+ # 1. Setup already configured HTTP settings
1300
+ # Config location: ~/.cursor/mcp_settings.json
1301
+
1302
+ # 2. Start HTTP server (one-time per session)
1303
+ ./scripts/start_mcp_server.sh
1304
+
1305
+ # 3. Restart Cursor
1306
+ ```
1307
+
1308
+ **Usage:**
1309
+ ```
1310
+ In Cursor:
1311
+ "Show me all skill-seeker configs"
1312
+ "Create Django skill from docs"
1313
+ ```
1314
+
1315
+ #### Windsurf (HTTP)
1316
+
1317
+ ```bash
1318
+ # 1. Setup already configured HTTP settings
1319
+ # Config location: ~/.windsurf/mcp_settings.json
1320
+
1321
+ # 2. Start HTTP server (one-time per session)
1322
+ ./scripts/start_mcp_server.sh
1323
+
1324
+ # 3. Restart Windsurf
1325
+ ```
1326
+
1327
+ **Usage:**
1328
+ ```
1329
+ In Windsurf:
1330
+ "Estimate pages for Godot config"
1331
+ "Build unified skill for FastAPI"
1332
+ ```
1333
+
1334
+ #### IntelliJ IDEA (HTTP)
1335
+
1336
+ ```bash
1337
+ # 1. Setup already configured HTTP settings
1338
+ # Config location: ~/.intellij/mcp_settings.json
1339
+
1340
+ # 2. Start HTTP server (one-time per session)
1341
+ ./scripts/start_mcp_server.sh
1342
+
1343
+ # 3. Restart IntelliJ IDEA
1344
+ ```
1345
+
1346
+ **Usage:**
1347
+ ```
1348
+ In IntelliJ IDEA:
1349
+ "Validate my config file"
1350
+ "Split large Godot config"
1351
+ ```
1352
+
1353
+ ### Available MCP Tools (18 Total)
1354
+
1355
+ All agents have access to these 18 tools:
1356
+
1357
+ **Core Tools (9):**
1358
+ 1. `list_configs` - List all available preset configurations
1359
+ 2. `generate_config` - Generate new config for any docs site
1360
+ 3. `validate_config` - Validate config structure
1361
+ 4. `estimate_pages` - Estimate page count before scraping
1362
+ 5. `scrape_docs` - Scrape and build skill
1363
+ 6. `package_skill` - Package skill into .zip
1364
+ 7. `upload_skill` - Upload .zip to Claude
1365
+ 8. `split_config` - Split large documentation configs
1366
+ 9. `generate_router` - Generate router/hub skills
1367
+
1368
+ **Extended Tools (8 - NEW!):**
1369
+ 10. `scrape_github` - Scrape GitHub repositories
1370
+ 11. `scrape_pdf` - Extract content from PDFs
1371
+ 12. `unified_scrape` - Combine multiple sources
1372
+ 13. `merge_sources` - Merge documentation + code
1373
+ 14. `detect_conflicts` - Find doc/code discrepancies
1374
+ 15. `add_config_source` - Register private git repos
1375
+ 16. `fetch_config` - Fetch configs from git
1376
+ 17. `list_config_sources` - List registered sources
1377
+
1378
+ ### What's New in v2.4.0
1379
+
1380
+ **MCP Infrastructure:**
1381
+ - ✅ **Upgraded to MCP SDK v1.25.0** - Latest stable version
1382
+ - ✅ **FastMCP Framework** - Modern, maintainable implementation
1383
+ - ✅ **Dual Transport** - stdio + HTTP support
1384
+ - ✅ **18 Tools** - Up from 9 (exactly 2x!)
1385
+ - ✅ **Auto-Configuration** - One script configures all agents
1386
+
1387
+ **Agent Support:**
1388
+ - ✅ **5 Agents Supported** - Claude Code, VS Code + Cline, Cursor, Windsurf, IntelliJ IDEA
1389
+ - ✅ **Automatic Setup** - `./setup_mcp.sh` configures everything
1390
+ - ✅ **Transport Detection** - Auto-selects stdio vs HTTP per agent
1391
+ - ✅ **Config Management** - Handles all agent-specific config formats
1392
+
1393
+ **Developer Experience:**
1394
+ - ✅ **One Setup Command** - Works for all agents
1395
+ - ✅ **Natural Language** - Use plain English in any agent
1396
+ - ✅ **No CLI Required** - All features via MCP tools
1397
+ - ✅ **Full Testing** - All 18 tools tested and working
1398
+
1399
+ ### Troubleshooting Multi-Agent Setup
1400
+
1401
+ **HTTP server not starting?**
1402
+ ```bash
1403
+ # Check if port 8765 is in use
1404
+ lsof -i :8765
1405
+
1406
+ # Use different port
1407
+ python3 -m skill_seekers.mcp.server_fastmcp --transport http --port 9000
1408
+
1409
+ # Update agent config with new port
1410
+ ```
1411
+
1412
+ **Agent not finding MCP server?**
1413
+ ```bash
1414
+ # Verify config file exists
1415
+ cat ~/.claude/claude_code_config.json
1416
+ cat ~/.cursor/mcp_settings.json
1417
+
1418
+ # Re-run setup
1419
+ ./setup_mcp.sh
1420
+
1421
+ # Check server logs
1422
+ tail -f logs/mcp_server.log
1423
+ ```
1424
+
1425
+ **Tools not appearing in agent?**
1426
+ ```bash
1427
+ # Restart agent completely (quit and relaunch)
1428
+ # For HTTP transport, ensure server is running:
1429
+ ps aux | grep "skill_seekers.mcp.server_fastmcp"
1430
+
1431
+ # Test server directly
1432
+ curl http://localhost:8765/health
1433
+ ```
1434
+
1435
+ ### Complete Multi-Agent Workflow
1436
+
1437
+ ```bash
1438
+ # 1. One-time setup (5 minutes)
1439
+ git clone https://github.com/yusufkaraaslan/Skill_Seekers.git
1440
+ cd Skill_Seekers
1441
+ ./setup_mcp.sh
1442
+
1443
+ # 2. For HTTP agents (Cursor/Windsurf/IntelliJ), start server
1444
+ ./scripts/start_mcp_server.sh
1445
+
1446
+ # 3. Restart your AI agent
1447
+
1448
+ # 4. Use natural language in ANY agent:
1449
+ "List all available configs"
1450
+ "Generate React skill from https://react.dev/"
1451
+ "Estimate pages for Godot config"
1452
+ "Package and upload skill at output/react/"
1453
+
1454
+ # 5. Result: Skills created without touching CLI!
1455
+ ```
1456
+
1457
+ **Full Guide:** See [docs/MCP_SETUP.md](docs/MCP_SETUP.md) for detailed multi-agent setup instructions.
1458
+
1459
+ ---
1460
+
1461
+ ## 📁 Simple Structure
1462
+
1463
+ ```
1464
+ doc-to-skill/
1465
+ ├── cli/
1466
+ │ ├── doc_scraper.py # Main scraping tool
1467
+ │ ├── package_skill.py # Package to .zip
1468
+ │ ├── upload_skill.py # Auto-upload (API)
1469
+ │ └── enhance_skill.py # AI enhancement
1470
+ ├── mcp/ # MCP server for 5 AI agents
1471
+ │ └── server.py # 18 MCP tools (v2.7.0)
1472
+ ├── configs/ # Preset configurations
1473
+ │ ├── godot.json # Godot Engine
1474
+ │ ├── react.json # React
1475
+ │ ├── vue.json # Vue.js
1476
+ │ ├── django.json # Django
1477
+ │ └── fastapi.json # FastAPI
1478
+ └── output/ # All output (auto-created)
1479
+ ├── godot_data/ # Scraped data
1480
+ ├── godot/ # Built skill
1481
+ └── godot.zip # Packaged skill
1482
+ ```
1483
+
1484
+ ## ✨ Features
1485
+
1486
+ ### 1. Fast Page Estimation (NEW!)
1487
+
1488
+ ```bash
1489
+ skill-seekers estimate configs/react.json
1490
+
1491
+ # Output:
1492
+ 📊 ESTIMATION RESULTS
1493
+ ✅ Pages Discovered: 180
1494
+ 📈 Estimated Total: 230
1495
+ âąī¸ Time Elapsed: 1.2 minutes
1496
+ 💡 Recommended max_pages: 280
1497
+ ```
1498
+
1499
+ **Benefits:**
1500
+ - Know page count BEFORE scraping (saves time)
1501
+ - Validates URL patterns work correctly
1502
+ - Estimates total scraping time
1503
+ - Recommends optimal `max_pages` setting
1504
+ - Fast (1-2 minutes vs 20-40 minutes full scrape)
1505
+
1506
+ ### 2. Auto-Detect Existing Data
1507
+
1508
+ ```bash
1509
+ skill-seekers scrape --config configs/godot.json
1510
+
1511
+ # If data exists:
1512
+ ✓ Found existing data: 245 pages
1513
+ Use existing data? (y/n): y
1514
+ â­ī¸ Skipping scrape, using existing data
1515
+ ```
1516
+
1517
+ ### 3. Knowledge Generation
1518
+
1519
+ **Automatic pattern extraction:**
1520
+ - Extracts common code patterns from docs
1521
+ - Detects programming language
1522
+ - Creates quick reference with real examples
1523
+ - Smarter categorization with scoring
1524
+
1525
+ **Enhanced SKILL.md:**
1526
+ - Real code examples from documentation
1527
+ - Language-annotated code blocks
1528
+ - Common patterns section
1529
+ - Quick reference from actual usage examples
1530
+
1531
+ ### 4. Smart Categorization
1532
+
1533
+ Automatically infers categories from:
1534
+ - URL structure
1535
+ - Page titles
1536
+ - Content keywords
1537
+ - With scoring for better accuracy
1538
+
1539
+ ### 5. Code Language Detection
1540
+
1541
+ ```python
1542
+ # Automatically detects:
1543
+ - Python (def, import, from)
1544
+ - JavaScript (const, let, =>)
1545
+ - GDScript (func, var, extends)
1546
+ - C++ (#include, int main)
1547
+ - And more...
1548
+ ```
1549
+
1550
+ ### 5. Skip Scraping
1551
+
1552
+ ```bash
1553
+ # Scrape once
1554
+ skill-seekers scrape --config configs/react.json
1555
+
1556
+ # Later, just rebuild (instant)
1557
+ skill-seekers scrape --config configs/react.json --skip-scrape
1558
+ ```
1559
+
1560
+ ### 6. Async Mode for Faster Scraping (2-3x Speed!)
1561
+
1562
+ ```bash
1563
+ # Enable async mode with 8 workers (recommended for large docs)
1564
+ skill-seekers scrape --config configs/react.json --async --workers 8
1565
+
1566
+ # Small docs (~100-500 pages)
1567
+ skill-seekers scrape --config configs/mydocs.json --async --workers 4
1568
+
1569
+ # Large docs (2000+ pages) with no rate limiting
1570
+ skill-seekers scrape --config configs/largedocs.json --async --workers 8 --no-rate-limit
1571
+ ```
1572
+
1573
+ **Performance Comparison:**
1574
+ - **Sync mode (threads):** ~18 pages/sec, 120 MB memory
1575
+ - **Async mode:** ~55 pages/sec, 40 MB memory
1576
+ - **Result:** 3x faster, 66% less memory!
1577
+
1578
+ **When to use:**
1579
+ - ✅ Large documentation (500+ pages)
1580
+ - ✅ Network latency is high
1581
+ - ✅ Memory is constrained
1582
+ - ❌ Small docs (< 100 pages) - overhead not worth it
1583
+
1584
+ **See full guide:** [ASYNC_SUPPORT.md](ASYNC_SUPPORT.md)
1585
+
1586
+ ### 7. AI-Powered SKILL.md Enhancement
1587
+
1588
+ ```bash
1589
+ # Option 1: During scraping (API-based, requires API key)
1590
+ pip3 install anthropic
1591
+ export ANTHROPIC_API_KEY=sk-ant-...
1592
+ skill-seekers scrape --config configs/react.json --enhance
1593
+
1594
+ # Option 2: During scraping (LOCAL, no API key - uses Claude Code Max)
1595
+ skill-seekers scrape --config configs/react.json --enhance-local
1596
+
1597
+ # Option 3: After scraping (API-based, standalone)
1598
+ skill-seekers enhance output/react/
1599
+
1600
+ # Option 4: After scraping (LOCAL, no API key, standalone)
1601
+ skill-seekers enhance output/react/
1602
+ ```
1603
+
1604
+ **What it does:**
1605
+ - Reads your reference documentation
1606
+ - Uses Claude to generate an excellent SKILL.md
1607
+ - Extracts best code examples (5-10 practical examples)
1608
+ - Creates comprehensive quick reference
1609
+ - Adds domain-specific key concepts
1610
+ - Provides navigation guidance for different skill levels
1611
+ - Automatically backs up original
1612
+ - **Quality:** Transforms 75-line templates into 500+ line comprehensive guides
1613
+
1614
+ **LOCAL Enhancement (Recommended):**
1615
+ - Uses your Claude Code Max plan (no API costs)
1616
+ - Opens new terminal with Claude Code
1617
+ - Analyzes reference files automatically
1618
+ - Takes 30-60 seconds
1619
+ - Quality: 9/10 (comparable to API version)
1620
+
1621
+ ### 7. Large Documentation Support (10K-40K+ Pages)
1622
+
1623
+ **For massive documentation sites like Godot (40K pages), AWS, or Microsoft Docs:**
1624
+
1625
+ ```bash
1626
+ # 1. Estimate first (discover page count)
1627
+ skill-seekers estimate configs/godot.json
1628
+
1629
+ # 2. Auto-split into focused sub-skills
1630
+ python3 -m skill_seekers.cli.split_config configs/godot.json --strategy router
1631
+
1632
+ # Creates:
1633
+ # - godot-scripting.json (5K pages)
1634
+ # - godot-2d.json (8K pages)
1635
+ # - godot-3d.json (10K pages)
1636
+ # - godot-physics.json (6K pages)
1637
+ # - godot-shaders.json (11K pages)
1638
+
1639
+ # 3. Scrape all in parallel (4-8 hours instead of 20-40!)
1640
+ for config in configs/godot-*.json; do
1641
+ skill-seekers scrape --config $config &
1642
+ done
1643
+ wait
1644
+
1645
+ # 4. Generate intelligent router/hub skill
1646
+ python3 -m skill_seekers.cli.generate_router configs/godot-*.json
1647
+
1648
+ # 5. Package all skills
1649
+ python3 -m skill_seekers.cli.package_multi output/godot*/
1650
+
1651
+ # 6. Upload all .zip files to Claude
1652
+ # Users just ask questions naturally!
1653
+ # Router automatically directs to the right sub-skill!
1654
+ ```
1655
+
1656
+ **Split Strategies:**
1657
+ - **auto** - Intelligently detects best strategy based on page count
1658
+ - **category** - Split by documentation categories (scripting, 2d, 3d, etc.)
1659
+ - **router** - Create hub skill + specialized sub-skills (RECOMMENDED)
1660
+ - **size** - Split every N pages (for docs without clear categories)
1661
+
1662
+ **Benefits:**
1663
+ - ✅ Faster scraping (parallel execution)
1664
+ - ✅ More focused skills (better Claude performance)
1665
+ - ✅ Easier maintenance (update one topic at a time)
1666
+ - ✅ Natural user experience (router handles routing)
1667
+ - ✅ Avoids context window limits
1668
+
1669
+ **Configuration:**
1670
+ ```json
1671
+ {
1672
+ "name": "godot",
1673
+ "max_pages": 40000,
1674
+ "split_strategy": "router",
1675
+ "split_config": {
1676
+ "target_pages_per_skill": 5000,
1677
+ "create_router": true,
1678
+ "split_by_categories": ["scripting", "2d", "3d", "physics"]
1679
+ }
1680
+ }
1681
+ ```
1682
+
1683
+ **Full Guide:** [Large Documentation Guide](docs/LARGE_DOCUMENTATION.md)
1684
+
1685
+ ### 8. Checkpoint/Resume for Long Scrapes
1686
+
1687
+ **Never lose progress on long-running scrapes:**
1688
+
1689
+ ```bash
1690
+ # Enable in config
1691
+ {
1692
+ "checkpoint": {
1693
+ "enabled": true,
1694
+ "interval": 1000 // Save every 1000 pages
1695
+ }
1696
+ }
1697
+
1698
+ # If scrape is interrupted (Ctrl+C or crash)
1699
+ skill-seekers scrape --config configs/godot.json --resume
1700
+
1701
+ # Resume from last checkpoint
1702
+ ✅ Resuming from checkpoint (12,450 pages scraped)
1703
+ â­ī¸ Skipping 12,450 already-scraped pages
1704
+ 🔄 Continuing from where we left off...
1705
+
1706
+ # Start fresh (clear checkpoint)
1707
+ skill-seekers scrape --config configs/godot.json --fresh
1708
+ ```
1709
+
1710
+ **Benefits:**
1711
+ - ✅ Auto-saves every 1000 pages (configurable)
1712
+ - ✅ Saves on interruption (Ctrl+C)
1713
+ - ✅ Resume with `--resume` flag
1714
+ - ✅ Never lose hours of scraping progress
1715
+
1716
+ ## đŸŽ¯ Complete Workflows
1717
+
1718
+ ### First Time (With Scraping + Enhancement)
1719
+
1720
+ ```bash
1721
+ # 1. Scrape + Build + AI Enhancement (LOCAL, no API key)
1722
+ skill-seekers scrape --config configs/godot.json --enhance-local
1723
+
1724
+ # 2. Wait for new terminal to close (enhancement completes)
1725
+ # Check the enhanced SKILL.md:
1726
+ cat output/godot/SKILL.md
1727
+
1728
+ # 3. Package
1729
+ skill-seekers package output/godot/
1730
+
1731
+ # 4. Done! You have godot.zip with excellent SKILL.md
1732
+ ```
1733
+
1734
+ **Time:** 20-40 minutes (scraping) + 60 seconds (enhancement) = ~21-41 minutes
1735
+
1736
+ ### Using Existing Data (Fast!)
1737
+
1738
+ ```bash
1739
+ # 1. Use cached data + Local Enhancement
1740
+ skill-seekers scrape --config configs/godot.json --skip-scrape
1741
+ skill-seekers enhance output/godot/
1742
+
1743
+ # 2. Package
1744
+ skill-seekers package output/godot/
1745
+
1746
+ # 3. Done!
1747
+ ```
1748
+
1749
+ **Time:** 1-3 minutes (build) + 60 seconds (enhancement) = ~2-4 minutes total
1750
+
1751
+ ### Without Enhancement (Basic)
1752
+
1753
+ ```bash
1754
+ # 1. Scrape + Build (no enhancement)
1755
+ skill-seekers scrape --config configs/godot.json
1756
+
1757
+ # 2. Package
1758
+ skill-seekers package output/godot/
1759
+
1760
+ # 3. Done! (SKILL.md will be basic template)
1761
+ ```
1762
+
1763
+ **Time:** 20-40 minutes
1764
+ **Note:** SKILL.md will be generic - enhancement strongly recommended!
1765
+
1766
+ ## 📋 Available Presets
1767
+
1768
+ | Config | Framework | Description |
1769
+ |--------|-----------|-------------|
1770
+ | `godot.json` | Godot Engine | Game development |
1771
+ | `react.json` | React | UI framework |
1772
+ | `vue.json` | Vue.js | Progressive framework |
1773
+ | `django.json` | Django | Python web framework |
1774
+ | `fastapi.json` | FastAPI | Modern Python API |
1775
+ | `ansible-core.json` | Ansible Core 2.19 | Automation & configuration |
1776
+
1777
+ ### Using Presets
1778
+
1779
+ ```bash
1780
+ # Godot
1781
+ skill-seekers scrape --config configs/godot.json
1782
+
1783
+ # React
1784
+ skill-seekers scrape --config configs/react.json
1785
+
1786
+ # Vue
1787
+ skill-seekers scrape --config configs/vue.json
1788
+
1789
+ # Django
1790
+ skill-seekers scrape --config configs/django.json
1791
+
1792
+ # FastAPI
1793
+ skill-seekers scrape --config configs/fastapi.json
1794
+
1795
+ # Ansible
1796
+ skill-seekers scrape --config configs/ansible-core.json
1797
+ ```
1798
+
1799
+ ## 🎨 Creating Your Own Config
1800
+
1801
+ ### Option 1: Interactive
1802
+
1803
+ ```bash
1804
+ skill-seekers scrape --interactive
1805
+ # Follow prompts, it will create the config for you
1806
+ ```
1807
+
1808
+ ### Option 2: Copy and Edit
1809
+
1810
+ ```bash
1811
+ # Copy a preset
1812
+ cp configs/react.json configs/myframework.json
1813
+
1814
+ # Edit it
1815
+ nano configs/myframework.json
1816
+
1817
+ # Use it
1818
+ skill-seekers scrape --config configs/myframework.json
1819
+ ```
1820
+
1821
+ ### Config Structure
1822
+
1823
+ ```json
1824
+ {
1825
+ "name": "myframework",
1826
+ "description": "When to use this skill",
1827
+ "base_url": "https://docs.myframework.com/",
1828
+ "selectors": {
1829
+ "main_content": "article",
1830
+ "title": "h1",
1831
+ "code_blocks": "pre code"
1832
+ },
1833
+ "url_patterns": {
1834
+ "include": ["/docs", "/guide"],
1835
+ "exclude": ["/blog", "/about"]
1836
+ },
1837
+ "categories": {
1838
+ "getting_started": ["intro", "quickstart"],
1839
+ "api": ["api", "reference"]
1840
+ },
1841
+ "rate_limit": 0.5,
1842
+ "max_pages": 500
1843
+ }
1844
+ ```
1845
+
1846
+ ## 📊 What Gets Created
1847
+
1848
+ ```
1849
+ output/
1850
+ ├── godot_data/ # Scraped raw data
1851
+ │ ├── pages/ # JSON files (one per page)
1852
+ │ └── summary.json # Overview
1853
+ │
1854
+ └── godot/ # The skill
1855
+ ├── SKILL.md # Enhanced with real examples
1856
+ ├── references/ # Categorized docs
1857
+ │ ├── index.md
1858
+ │ ├── getting_started.md
1859
+ │ ├── scripting.md
1860
+ │ └── ...
1861
+ ├── scripts/ # Empty (add your own)
1862
+ └── assets/ # Empty (add your own)
1863
+ ```
1864
+
1865
+ ## đŸŽ¯ Command Line Options
1866
+
1867
+ ```bash
1868
+ # Interactive mode
1869
+ skill-seekers scrape --interactive
1870
+
1871
+ # Use config file
1872
+ skill-seekers scrape --config configs/godot.json
1873
+
1874
+ # Quick mode
1875
+ skill-seekers scrape --name react --url https://react.dev/
1876
+
1877
+ # Skip scraping (use existing data)
1878
+ skill-seekers scrape --config configs/godot.json --skip-scrape
1879
+
1880
+ # With description
1881
+ skill-seekers scrape \
1882
+ --name react \
1883
+ --url https://react.dev/ \
1884
+ --description "React framework for building UIs"
1885
+ ```
1886
+
1887
+ ## 💡 Tips
1888
+
1889
+ ### 1. Test Small First
1890
+
1891
+ Edit `max_pages` in config to test:
1892
+ ```json
1893
+ {
1894
+ "max_pages": 20 // Test with just 20 pages
1895
+ }
1896
+ ```
1897
+
1898
+ ### 2. Reuse Scraped Data
1899
+
1900
+ ```bash
1901
+ # Scrape once
1902
+ skill-seekers scrape --config configs/react.json
1903
+
1904
+ # Rebuild multiple times (instant)
1905
+ skill-seekers scrape --config configs/react.json --skip-scrape
1906
+ skill-seekers scrape --config configs/react.json --skip-scrape
1907
+ ```
1908
+
1909
+ ### 3. Finding Selectors
1910
+
1911
+ ```python
1912
+ # Test in Python
1913
+ from bs4 import BeautifulSoup
1914
+ import requests
1915
+
1916
+ url = "https://docs.example.com/page"
1917
+ soup = BeautifulSoup(requests.get(url).content, 'html.parser')
1918
+
1919
+ # Try different selectors
1920
+ print(soup.select_one('article'))
1921
+ print(soup.select_one('main'))
1922
+ print(soup.select_one('div[role="main"]'))
1923
+ ```
1924
+
1925
+ ### 4. Check Output Quality
1926
+
1927
+ ```bash
1928
+ # After building, check:
1929
+ cat output/godot/SKILL.md # Should have real examples
1930
+ cat output/godot/references/index.md # Categories
1931
+ ```
1932
+
1933
+ ## 🐛 Troubleshooting
1934
+
1935
+ ### No Content Extracted?
1936
+ - Check your `main_content` selector
1937
+ - Try: `article`, `main`, `div[role="main"]`
1938
+
1939
+ ### Data Exists But Won't Use It?
1940
+ ```bash
1941
+ # Force re-scrape
1942
+ rm -rf output/myframework_data/
1943
+ skill-seekers scrape --config configs/myframework.json
1944
+ ```
1945
+
1946
+ ### Categories Not Good?
1947
+ Edit the config `categories` section with better keywords.
1948
+
1949
+ ### Want to Update Docs?
1950
+ ```bash
1951
+ # Delete old data
1952
+ rm -rf output/godot_data/
1953
+
1954
+ # Re-scrape
1955
+ skill-seekers scrape --config configs/godot.json
1956
+ ```
1957
+
1958
+ ## 📈 Performance
1959
+
1960
+ | Task | Time | Notes |
1961
+ |------|------|-------|
1962
+ | Scraping (sync) | 15-45 min | First time only, thread-based |
1963
+ | Scraping (async) | 5-15 min | 2-3x faster with --async flag |
1964
+ | Building | 1-3 min | Fast! |
1965
+ | Re-building | <1 min | With --skip-scrape |
1966
+ | Packaging | 5-10 sec | Final zip |
1967
+
1968
+ ## ✅ Summary
1969
+
1970
+ **One tool does everything:**
1971
+ 1. ✅ Scrapes documentation
1972
+ 2. ✅ Auto-detects existing data
1973
+ 3. ✅ Generates better knowledge
1974
+ 4. ✅ Creates enhanced skills
1975
+ 5. ✅ Works with presets or custom configs
1976
+ 6. ✅ Supports skip-scraping for fast iteration
1977
+
1978
+ **Simple structure:**
1979
+ - `doc_scraper.py` - The tool
1980
+ - `configs/` - Presets
1981
+ - `output/` - Everything else
1982
+
1983
+ **Better output:**
1984
+ - Real code examples with language detection
1985
+ - Common patterns extracted from docs
1986
+ - Smart categorization
1987
+ - Enhanced SKILL.md with actual examples
1988
+
1989
+ ## 📚 Documentation
1990
+
1991
+ ### Getting Started
1992
+ - **[BULLETPROOF_QUICKSTART.md](BULLETPROOF_QUICKSTART.md)** - đŸŽ¯ **START HERE** if you're new!
1993
+ - **[QUICKSTART.md](QUICKSTART.md)** - Quick start for experienced users
1994
+ - **[TROUBLESHOOTING.md](TROUBLESHOOTING.md)** - Common issues and solutions
1995
+
1996
+ ### Guides
1997
+ - **[docs/LARGE_DOCUMENTATION.md](docs/LARGE_DOCUMENTATION.md)** - Handle 10K-40K+ page docs
1998
+ - **[ASYNC_SUPPORT.md](ASYNC_SUPPORT.md)** - Async mode guide (2-3x faster scraping)
1999
+ - **[docs/ENHANCEMENT.md](docs/ENHANCEMENT.md)** - AI enhancement guide
2000
+ - **[docs/TERMINAL_SELECTION.md](docs/TERMINAL_SELECTION.md)** - Configure terminal app for local enhancement
2001
+ - **[docs/UPLOAD_GUIDE.md](docs/UPLOAD_GUIDE.md)** - How to upload skills to Claude
2002
+ - **[docs/MCP_SETUP.md](docs/MCP_SETUP.md)** - MCP integration setup
2003
+
2004
+ ### Technical
2005
+ - **[docs/CLAUDE.md](docs/CLAUDE.md)** - Technical architecture
2006
+ - **[STRUCTURE.md](STRUCTURE.md)** - Repository structure
2007
+
2008
+ ## 🎮 Ready?
2009
+
2010
+ ```bash
2011
+ # Try Godot
2012
+ skill-seekers scrape --config configs/godot.json
2013
+
2014
+ # Try React
2015
+ skill-seekers scrape --config configs/react.json
2016
+
2017
+ # Or go interactive
2018
+ skill-seekers scrape --interactive
2019
+ ```
2020
+
2021
+ ## 📝 License
2022
+
2023
+ MIT License - see [LICENSE](LICENSE) file for details
2024
+
2025
+ ---
2026
+
2027
+ Happy skill building! 🚀