skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2302 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Skill Seeker MCP Server
4
+ Model Context Protocol server for generating Claude AI skills from documentation
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import os
10
+ import re
11
+ import subprocess
12
+ import sys
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ import httpx
18
+
19
+ # Import external MCP package
20
+ # NOTE: Directory renamed from 'mcp/' to 'skill_seeker_mcp/' to avoid shadowing the external mcp package
21
+ MCP_AVAILABLE = False
22
+ Server = None
23
+ Tool = None
24
+ TextContent = None
25
+
26
+ try:
27
+ from mcp.server import Server
28
+ from mcp.types import TextContent, Tool
29
+
30
+ MCP_AVAILABLE = True
31
+ except ImportError as e:
32
+ if __name__ == "__main__":
33
+ print("❌ Error: mcp package not installed")
34
+ print("Install with: pip install mcp")
35
+ print(f"Import error: {e}")
36
+ sys.exit(1)
37
+
38
+
39
+ # Initialize MCP server (only if MCP is available)
40
+ app = Server("skill-seeker") if MCP_AVAILABLE and Server is not None else None
41
+
42
+ # Path to CLI tools
43
+ CLI_DIR = Path(__file__).parent.parent / "cli"
44
+
45
+ # Import config validator for submit_config validation
46
+ sys.path.insert(0, str(CLI_DIR))
47
+ try:
48
+ from config_validator import ConfigValidator
49
+ except ImportError:
50
+ ConfigValidator = None # Graceful degradation if not available
51
+
52
+
53
+ # Helper decorator that works even when app is None
54
+ def safe_decorator(decorator_func):
55
+ """Returns the decorator if MCP is available, otherwise returns a no-op"""
56
+ if MCP_AVAILABLE and app is not None:
57
+ return decorator_func
58
+ else:
59
+ # Return a decorator that just returns the function unchanged
60
+ def noop_decorator(func):
61
+ return func
62
+
63
+ return noop_decorator
64
+
65
+
66
+ def run_subprocess_with_streaming(cmd, timeout=None):
67
+ """
68
+ Run subprocess with real-time output streaming.
69
+ Returns (stdout, stderr, returncode).
70
+
71
+ This solves the blocking issue where long-running processes (like scraping)
72
+ would cause MCP to appear frozen. Now we stream output as it comes.
73
+ """
74
+ try:
75
+ process = subprocess.Popen(
76
+ cmd,
77
+ stdout=subprocess.PIPE,
78
+ stderr=subprocess.PIPE,
79
+ text=True,
80
+ bufsize=1, # Line buffered
81
+ universal_newlines=True,
82
+ )
83
+
84
+ stdout_lines = []
85
+ stderr_lines = []
86
+ start_time = time.time()
87
+
88
+ # Read output line by line as it comes
89
+ while True:
90
+ # Check timeout
91
+ if timeout and (time.time() - start_time) > timeout:
92
+ process.kill()
93
+ stderr_lines.append(f"\n⚠️ Process killed after {timeout}s timeout")
94
+ break
95
+
96
+ # Check if process finished
97
+ if process.poll() is not None:
98
+ break
99
+
100
+ # Read available output (non-blocking)
101
+ try:
102
+ import select
103
+
104
+ readable, _, _ = select.select([process.stdout, process.stderr], [], [], 0.1)
105
+
106
+ if process.stdout in readable:
107
+ line = process.stdout.readline()
108
+ if line:
109
+ stdout_lines.append(line)
110
+
111
+ if process.stderr in readable:
112
+ line = process.stderr.readline()
113
+ if line:
114
+ stderr_lines.append(line)
115
+ except Exception:
116
+ # Fallback for Windows (no select)
117
+ time.sleep(0.1)
118
+
119
+ # Get any remaining output
120
+ remaining_stdout, remaining_stderr = process.communicate()
121
+ if remaining_stdout:
122
+ stdout_lines.append(remaining_stdout)
123
+ if remaining_stderr:
124
+ stderr_lines.append(remaining_stderr)
125
+
126
+ stdout = "".join(stdout_lines)
127
+ stderr = "".join(stderr_lines)
128
+ returncode = process.returncode
129
+
130
+ return stdout, stderr, returncode
131
+
132
+ except Exception as e:
133
+ return "", f"Error running subprocess: {str(e)}", 1
134
+
135
+
136
+ @safe_decorator(app.list_tools() if app else lambda: lambda f: f)
137
+ async def list_tools() -> list[Tool]:
138
+ """List available tools"""
139
+ return [
140
+ Tool(
141
+ name="generate_config",
142
+ description="Generate a config file for documentation scraping. Interactively creates a JSON config for any documentation website.",
143
+ inputSchema={
144
+ "type": "object",
145
+ "properties": {
146
+ "name": {
147
+ "type": "string",
148
+ "description": "Skill name (lowercase, alphanumeric, hyphens, underscores)",
149
+ },
150
+ "url": {
151
+ "type": "string",
152
+ "description": "Base documentation URL (must include http:// or https://)",
153
+ },
154
+ "description": {
155
+ "type": "string",
156
+ "description": "Description of when to use this skill",
157
+ },
158
+ "max_pages": {
159
+ "type": "integer",
160
+ "description": "Maximum pages to scrape (default: 100, use -1 for unlimited)",
161
+ "default": 100,
162
+ },
163
+ "unlimited": {
164
+ "type": "boolean",
165
+ "description": "Remove all limits - scrape all pages (default: false). Overrides max_pages.",
166
+ "default": False,
167
+ },
168
+ "rate_limit": {
169
+ "type": "number",
170
+ "description": "Delay between requests in seconds (default: 0.5)",
171
+ "default": 0.5,
172
+ },
173
+ },
174
+ "required": ["name", "url", "description"],
175
+ },
176
+ ),
177
+ Tool(
178
+ name="estimate_pages",
179
+ description="Estimate how many pages will be scraped from a config. Fast preview without downloading content.",
180
+ inputSchema={
181
+ "type": "object",
182
+ "properties": {
183
+ "config_path": {
184
+ "type": "string",
185
+ "description": "Path to config JSON file (e.g., configs/react.json)",
186
+ },
187
+ "max_discovery": {
188
+ "type": "integer",
189
+ "description": "Maximum pages to discover during estimation (default: 1000, use -1 for unlimited)",
190
+ "default": 1000,
191
+ },
192
+ "unlimited": {
193
+ "type": "boolean",
194
+ "description": "Remove discovery limit - estimate all pages (default: false). Overrides max_discovery.",
195
+ "default": False,
196
+ },
197
+ },
198
+ "required": ["config_path"],
199
+ },
200
+ ),
201
+ Tool(
202
+ name="scrape_docs",
203
+ description="Scrape documentation and build Claude skill. Supports both single-source (legacy) and unified multi-source configs. Creates SKILL.md and reference files. Automatically detects llms.txt files for 10x faster processing. Falls back to HTML scraping if not available.",
204
+ inputSchema={
205
+ "type": "object",
206
+ "properties": {
207
+ "config_path": {
208
+ "type": "string",
209
+ "description": "Path to config JSON file (e.g., configs/react.json or configs/godot_unified.json)",
210
+ },
211
+ "unlimited": {
212
+ "type": "boolean",
213
+ "description": "Remove page limit - scrape all pages (default: false). Overrides max_pages in config.",
214
+ "default": False,
215
+ },
216
+ "enhance_local": {
217
+ "type": "boolean",
218
+ "description": "Open terminal for local enhancement with Claude Code (default: false)",
219
+ "default": False,
220
+ },
221
+ "skip_scrape": {
222
+ "type": "boolean",
223
+ "description": "Skip scraping, use cached data (default: false)",
224
+ "default": False,
225
+ },
226
+ "dry_run": {
227
+ "type": "boolean",
228
+ "description": "Preview what will be scraped without saving (default: false)",
229
+ "default": False,
230
+ },
231
+ "merge_mode": {
232
+ "type": "string",
233
+ "description": "Override merge mode for unified configs: 'rule-based' or 'claude-enhanced' (default: from config)",
234
+ },
235
+ },
236
+ "required": ["config_path"],
237
+ },
238
+ ),
239
+ Tool(
240
+ name="package_skill",
241
+ description="Package a skill directory into a .zip file ready for Claude upload. Automatically uploads if ANTHROPIC_API_KEY is set.",
242
+ inputSchema={
243
+ "type": "object",
244
+ "properties": {
245
+ "skill_dir": {
246
+ "type": "string",
247
+ "description": "Path to skill directory (e.g., output/react/)",
248
+ },
249
+ "auto_upload": {
250
+ "type": "boolean",
251
+ "description": "Try to upload automatically if API key is available (default: true). If false, only package without upload attempt.",
252
+ "default": True,
253
+ },
254
+ },
255
+ "required": ["skill_dir"],
256
+ },
257
+ ),
258
+ Tool(
259
+ name="upload_skill",
260
+ description="Upload a skill .zip file to Claude automatically (requires ANTHROPIC_API_KEY)",
261
+ inputSchema={
262
+ "type": "object",
263
+ "properties": {
264
+ "skill_zip": {
265
+ "type": "string",
266
+ "description": "Path to skill .zip file (e.g., output/react.zip)",
267
+ },
268
+ },
269
+ "required": ["skill_zip"],
270
+ },
271
+ ),
272
+ Tool(
273
+ name="list_configs",
274
+ description="List all available preset configurations.",
275
+ inputSchema={
276
+ "type": "object",
277
+ "properties": {},
278
+ },
279
+ ),
280
+ Tool(
281
+ name="validate_config",
282
+ description="Validate a config file for errors.",
283
+ inputSchema={
284
+ "type": "object",
285
+ "properties": {
286
+ "config_path": {
287
+ "type": "string",
288
+ "description": "Path to config JSON file",
289
+ },
290
+ },
291
+ "required": ["config_path"],
292
+ },
293
+ ),
294
+ Tool(
295
+ name="split_config",
296
+ description="Split large documentation config into multiple focused skills. For 10K+ page documentation.",
297
+ inputSchema={
298
+ "type": "object",
299
+ "properties": {
300
+ "config_path": {
301
+ "type": "string",
302
+ "description": "Path to config JSON file (e.g., configs/godot.json)",
303
+ },
304
+ "strategy": {
305
+ "type": "string",
306
+ "description": "Split strategy: auto, none, category, router, size (default: auto)",
307
+ "default": "auto",
308
+ },
309
+ "target_pages": {
310
+ "type": "integer",
311
+ "description": "Target pages per skill (default: 5000)",
312
+ "default": 5000,
313
+ },
314
+ "dry_run": {
315
+ "type": "boolean",
316
+ "description": "Preview without saving files (default: false)",
317
+ "default": False,
318
+ },
319
+ },
320
+ "required": ["config_path"],
321
+ },
322
+ ),
323
+ Tool(
324
+ name="generate_router",
325
+ description="Generate router/hub skill for split documentation. Creates intelligent routing to sub-skills.",
326
+ inputSchema={
327
+ "type": "object",
328
+ "properties": {
329
+ "config_pattern": {
330
+ "type": "string",
331
+ "description": "Config pattern for sub-skills (e.g., 'configs/godot-*.json')",
332
+ },
333
+ "router_name": {
334
+ "type": "string",
335
+ "description": "Router skill name (optional, inferred from configs)",
336
+ },
337
+ },
338
+ "required": ["config_pattern"],
339
+ },
340
+ ),
341
+ Tool(
342
+ name="scrape_pdf",
343
+ description="Scrape PDF documentation and build Claude skill. Extracts text, code, and images from PDF files.",
344
+ inputSchema={
345
+ "type": "object",
346
+ "properties": {
347
+ "config_path": {
348
+ "type": "string",
349
+ "description": "Path to PDF config JSON file (e.g., configs/manual_pdf.json)",
350
+ },
351
+ "pdf_path": {
352
+ "type": "string",
353
+ "description": "Direct PDF path (alternative to config_path)",
354
+ },
355
+ "name": {
356
+ "type": "string",
357
+ "description": "Skill name (required with pdf_path)",
358
+ },
359
+ "description": {
360
+ "type": "string",
361
+ "description": "Skill description (optional)",
362
+ },
363
+ "from_json": {
364
+ "type": "string",
365
+ "description": "Build from extracted JSON file (e.g., output/manual_extracted.json)",
366
+ },
367
+ },
368
+ "required": [],
369
+ },
370
+ ),
371
+ Tool(
372
+ name="scrape_github",
373
+ description="Scrape GitHub repository and build Claude skill. Extracts README, Issues, Changelog, Releases, and code structure.",
374
+ inputSchema={
375
+ "type": "object",
376
+ "properties": {
377
+ "repo": {
378
+ "type": "string",
379
+ "description": "GitHub repository (owner/repo, e.g., facebook/react)",
380
+ },
381
+ "config_path": {
382
+ "type": "string",
383
+ "description": "Path to GitHub config JSON file (e.g., configs/react_github.json)",
384
+ },
385
+ "name": {
386
+ "type": "string",
387
+ "description": "Skill name (default: repo name)",
388
+ },
389
+ "description": {
390
+ "type": "string",
391
+ "description": "Skill description",
392
+ },
393
+ "token": {
394
+ "type": "string",
395
+ "description": "GitHub personal access token (or use GITHUB_TOKEN env var)",
396
+ },
397
+ "no_issues": {
398
+ "type": "boolean",
399
+ "description": "Skip GitHub issues extraction (default: false)",
400
+ "default": False,
401
+ },
402
+ "no_changelog": {
403
+ "type": "boolean",
404
+ "description": "Skip CHANGELOG extraction (default: false)",
405
+ "default": False,
406
+ },
407
+ "no_releases": {
408
+ "type": "boolean",
409
+ "description": "Skip releases extraction (default: false)",
410
+ "default": False,
411
+ },
412
+ "max_issues": {
413
+ "type": "integer",
414
+ "description": "Maximum issues to fetch (default: 100)",
415
+ "default": 100,
416
+ },
417
+ "scrape_only": {
418
+ "type": "boolean",
419
+ "description": "Only scrape, don't build skill (default: false)",
420
+ "default": False,
421
+ },
422
+ },
423
+ "required": [],
424
+ },
425
+ ),
426
+ Tool(
427
+ name="install_skill",
428
+ description="Complete one-command workflow: fetch config → scrape docs → AI enhance (MANDATORY) → package → upload. Enhancement required for quality (3/10→9/10). Takes 20-45 min depending on config size. Automatically uploads to Claude if ANTHROPIC_API_KEY is set.",
429
+ inputSchema={
430
+ "type": "object",
431
+ "properties": {
432
+ "config_name": {
433
+ "type": "string",
434
+ "description": "Config name from API (e.g., 'react', 'django'). Mutually exclusive with config_path. Tool will fetch this config from the official API before scraping.",
435
+ },
436
+ "config_path": {
437
+ "type": "string",
438
+ "description": "Path to existing config JSON file (e.g., 'configs/custom.json'). Mutually exclusive with config_name. Use this if you already have a config file.",
439
+ },
440
+ "destination": {
441
+ "type": "string",
442
+ "description": "Output directory for skill files (default: 'output')",
443
+ "default": "output",
444
+ },
445
+ "auto_upload": {
446
+ "type": "boolean",
447
+ "description": "Auto-upload to Claude after packaging (requires ANTHROPIC_API_KEY). Default: true. Set to false to skip upload.",
448
+ "default": True,
449
+ },
450
+ "unlimited": {
451
+ "type": "boolean",
452
+ "description": "Remove page limits during scraping (default: false). WARNING: Can take hours for large sites.",
453
+ "default": False,
454
+ },
455
+ "dry_run": {
456
+ "type": "boolean",
457
+ "description": "Preview workflow without executing (default: false). Shows all phases that would run.",
458
+ "default": False,
459
+ },
460
+ },
461
+ "required": [],
462
+ },
463
+ ),
464
+ Tool(
465
+ name="fetch_config",
466
+ description="Fetch config from API, git URL, or registered source. Supports three modes: (1) Named source from registry, (2) Direct git URL, (3) API (default). List available configs or download a specific one by name.",
467
+ inputSchema={
468
+ "type": "object",
469
+ "properties": {
470
+ "config_name": {
471
+ "type": "string",
472
+ "description": "Name of the config to download (e.g., 'react', 'django', 'godot'). Required for git modes. Omit to list all available configs in API mode.",
473
+ },
474
+ "destination": {
475
+ "type": "string",
476
+ "description": "Directory to save the config file (default: 'configs/')",
477
+ "default": "configs",
478
+ },
479
+ "list_available": {
480
+ "type": "boolean",
481
+ "description": "List all available configs from the API (only works in API mode, default: false)",
482
+ "default": False,
483
+ },
484
+ "category": {
485
+ "type": "string",
486
+ "description": "Filter configs by category when listing in API mode (e.g., 'web-frameworks', 'game-engines', 'devops')",
487
+ },
488
+ "git_url": {
489
+ "type": "string",
490
+ "description": "Git repository URL containing configs. If provided, fetches from git instead of API. Supports HTTPS and SSH URLs. Example: 'https://github.com/myorg/configs.git'",
491
+ },
492
+ "source": {
493
+ "type": "string",
494
+ "description": "Named source from registry (highest priority). Use add_config_source to register sources first. Example: 'team', 'company'",
495
+ },
496
+ "branch": {
497
+ "type": "string",
498
+ "description": "Git branch to use (default: 'main'). Only used with git_url or source.",
499
+ "default": "main",
500
+ },
501
+ "token": {
502
+ "type": "string",
503
+ "description": "Authentication token for private repos (optional). Prefer using environment variables (GITHUB_TOKEN, GITLAB_TOKEN, etc.).",
504
+ },
505
+ "refresh": {
506
+ "type": "boolean",
507
+ "description": "Force refresh cached git repository (default: false). Deletes cache and re-clones. Only used with git modes.",
508
+ "default": False,
509
+ },
510
+ },
511
+ "required": [],
512
+ },
513
+ ),
514
+ Tool(
515
+ name="submit_config",
516
+ description="Submit a custom config file to the community. Validates config (legacy or unified format) and creates a GitHub issue in skill-seekers-configs repo for review.",
517
+ inputSchema={
518
+ "type": "object",
519
+ "properties": {
520
+ "config_path": {
521
+ "type": "string",
522
+ "description": "Path to config JSON file to submit (e.g., 'configs/myframework.json')",
523
+ },
524
+ "config_json": {
525
+ "type": "string",
526
+ "description": "Config JSON as string (alternative to config_path)",
527
+ },
528
+ "testing_notes": {
529
+ "type": "string",
530
+ "description": "Notes about testing (e.g., 'Tested with 20 pages, works well')",
531
+ },
532
+ "github_token": {
533
+ "type": "string",
534
+ "description": "GitHub personal access token (or use GITHUB_TOKEN env var)",
535
+ },
536
+ },
537
+ "required": [],
538
+ },
539
+ ),
540
+ Tool(
541
+ name="add_config_source",
542
+ description="Register a git repository as a config source. Allows fetching configs from private/team repos. Use this to set up named sources that can be referenced by fetch_config. Supports GitHub, GitLab, Gitea, Bitbucket, and custom git servers.",
543
+ inputSchema={
544
+ "type": "object",
545
+ "properties": {
546
+ "name": {
547
+ "type": "string",
548
+ "description": "Source identifier (lowercase, alphanumeric, hyphens/underscores allowed). Example: 'team', 'company-internal', 'my_configs'",
549
+ },
550
+ "git_url": {
551
+ "type": "string",
552
+ "description": "Git repository URL (HTTPS or SSH). Example: 'https://github.com/myorg/configs.git' or 'git@github.com:myorg/configs.git'",
553
+ },
554
+ "source_type": {
555
+ "type": "string",
556
+ "description": "Source type (default: 'github'). Options: 'github', 'gitlab', 'gitea', 'bitbucket', 'custom'",
557
+ "default": "github",
558
+ },
559
+ "token_env": {
560
+ "type": "string",
561
+ "description": "Environment variable name for auth token (optional). Auto-detected if not provided. Example: 'GITHUB_TOKEN', 'GITLAB_TOKEN', 'MY_CUSTOM_TOKEN'",
562
+ },
563
+ "branch": {
564
+ "type": "string",
565
+ "description": "Git branch to use (default: 'main'). Example: 'main', 'master', 'develop'",
566
+ "default": "main",
567
+ },
568
+ "priority": {
569
+ "type": "integer",
570
+ "description": "Source priority (lower = higher priority, default: 100). Used for conflict resolution when same config exists in multiple sources.",
571
+ "default": 100,
572
+ },
573
+ "enabled": {
574
+ "type": "boolean",
575
+ "description": "Whether source is enabled (default: true)",
576
+ "default": True,
577
+ },
578
+ },
579
+ "required": ["name", "git_url"],
580
+ },
581
+ ),
582
+ Tool(
583
+ name="list_config_sources",
584
+ description="List all registered config sources. Shows git repositories that have been registered with add_config_source. Use this to see available sources for fetch_config.",
585
+ inputSchema={
586
+ "type": "object",
587
+ "properties": {
588
+ "enabled_only": {
589
+ "type": "boolean",
590
+ "description": "Only show enabled sources (default: false)",
591
+ "default": False,
592
+ },
593
+ },
594
+ "required": [],
595
+ },
596
+ ),
597
+ Tool(
598
+ name="remove_config_source",
599
+ description="Remove a registered config source. Deletes the source from the registry. Does not delete cached git repository data.",
600
+ inputSchema={
601
+ "type": "object",
602
+ "properties": {
603
+ "name": {
604
+ "type": "string",
605
+ "description": "Source identifier to remove. Example: 'team', 'company-internal'",
606
+ },
607
+ },
608
+ "required": ["name"],
609
+ },
610
+ ),
611
+ ]
612
+
613
+
614
+ @safe_decorator(app.call_tool() if app else lambda: lambda f: f)
615
+ async def call_tool(name: str, arguments: Any) -> list[TextContent]:
616
+ """Handle tool calls"""
617
+
618
+ try:
619
+ if name == "generate_config":
620
+ return await generate_config_tool(arguments)
621
+ elif name == "estimate_pages":
622
+ return await estimate_pages_tool(arguments)
623
+ elif name == "scrape_docs":
624
+ return await scrape_docs_tool(arguments)
625
+ elif name == "package_skill":
626
+ return await package_skill_tool(arguments)
627
+ elif name == "upload_skill":
628
+ return await upload_skill_tool(arguments)
629
+ elif name == "list_configs":
630
+ return await list_configs_tool(arguments)
631
+ elif name == "validate_config":
632
+ return await validate_config_tool(arguments)
633
+ elif name == "split_config":
634
+ return await split_config_tool(arguments)
635
+ elif name == "generate_router":
636
+ return await generate_router_tool(arguments)
637
+ elif name == "scrape_pdf":
638
+ return await scrape_pdf_tool(arguments)
639
+ elif name == "scrape_github":
640
+ return await scrape_github_tool(arguments)
641
+ elif name == "fetch_config":
642
+ return await fetch_config_tool(arguments)
643
+ elif name == "submit_config":
644
+ return await submit_config_tool(arguments)
645
+ elif name == "add_config_source":
646
+ return await add_config_source_tool(arguments)
647
+ elif name == "list_config_sources":
648
+ return await list_config_sources_tool(arguments)
649
+ elif name == "remove_config_source":
650
+ return await remove_config_source_tool(arguments)
651
+ elif name == "install_skill":
652
+ return await install_skill_tool(arguments)
653
+ else:
654
+ return [TextContent(type="text", text=f"Unknown tool: {name}")]
655
+
656
+ except Exception as e:
657
+ return [TextContent(type="text", text=f"Error: {str(e)}")]
658
+
659
+
660
+ async def generate_config_tool(args: dict) -> list[TextContent]:
661
+ """Generate a config file"""
662
+ name = args["name"]
663
+ url = args["url"]
664
+ description = args["description"]
665
+ max_pages = args.get("max_pages", 100)
666
+ unlimited = args.get("unlimited", False)
667
+ rate_limit = args.get("rate_limit", 0.5)
668
+
669
+ # Handle unlimited mode
670
+ if unlimited or max_pages == -1:
671
+ max_pages = None
672
+ limit_msg = "unlimited (no page limit)"
673
+ else:
674
+ limit_msg = str(max_pages)
675
+
676
+ # Create config
677
+ config = {
678
+ "name": name,
679
+ "description": description,
680
+ "base_url": url,
681
+ "selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
682
+ "url_patterns": {"include": [], "exclude": []},
683
+ "categories": {},
684
+ "rate_limit": rate_limit,
685
+ "max_pages": max_pages,
686
+ }
687
+
688
+ # Save to configs directory
689
+ config_path = Path("configs") / f"{name}.json"
690
+ config_path.parent.mkdir(exist_ok=True)
691
+
692
+ with open(config_path, "w") as f:
693
+ json.dump(config, f, indent=2)
694
+
695
+ result = f"""✅ Config created: {config_path}
696
+
697
+ Configuration:
698
+ Name: {name}
699
+ URL: {url}
700
+ Max pages: {limit_msg}
701
+ Rate limit: {rate_limit}s
702
+
703
+ Next steps:
704
+ 1. Review/edit config: cat {config_path}
705
+ 2. Estimate pages: Use estimate_pages tool
706
+ 3. Scrape docs: Use scrape_docs tool
707
+
708
+ Note: Default selectors may need adjustment for your documentation site.
709
+ """
710
+
711
+ return [TextContent(type="text", text=result)]
712
+
713
+
714
+ async def estimate_pages_tool(args: dict) -> list[TextContent]:
715
+ """Estimate page count"""
716
+ config_path = args["config_path"]
717
+ max_discovery = args.get("max_discovery", 1000)
718
+ unlimited = args.get("unlimited", False)
719
+
720
+ # Handle unlimited mode
721
+ if unlimited or max_discovery == -1:
722
+ max_discovery = -1
723
+ timeout = 1800 # 30 minutes for unlimited discovery
724
+ else:
725
+ # Estimate: 0.5s per page discovered
726
+ timeout = max(300, max_discovery // 2) # Minimum 5 minutes
727
+
728
+ # Run estimate_pages.py
729
+ cmd = [
730
+ sys.executable,
731
+ str(CLI_DIR / "estimate_pages.py"),
732
+ config_path,
733
+ "--max-discovery",
734
+ str(max_discovery),
735
+ ]
736
+
737
+ progress_msg = "🔄 Estimating page count...\n"
738
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
739
+
740
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
741
+
742
+ output = progress_msg + stdout
743
+
744
+ if returncode == 0:
745
+ return [TextContent(type="text", text=output)]
746
+ else:
747
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
748
+
749
+
750
+ async def scrape_docs_tool(args: dict) -> list[TextContent]:
751
+ """Scrape documentation - auto-detects unified vs legacy format"""
752
+ config_path = args["config_path"]
753
+ unlimited = args.get("unlimited", False)
754
+ enhance_local = args.get("enhance_local", False)
755
+ skip_scrape = args.get("skip_scrape", False)
756
+ dry_run = args.get("dry_run", False)
757
+ merge_mode = args.get("merge_mode")
758
+
759
+ # Load config to detect format
760
+ with open(config_path) as f:
761
+ config = json.load(f)
762
+
763
+ # Detect if unified format (has 'sources' array)
764
+ is_unified = "sources" in config and isinstance(config["sources"], list)
765
+
766
+ # Handle unlimited mode by modifying config temporarily
767
+ if unlimited:
768
+ # Set max_pages to None (unlimited)
769
+ if is_unified:
770
+ # For unified configs, set max_pages on documentation sources
771
+ for source in config.get("sources", []):
772
+ if source.get("type") == "documentation":
773
+ source["max_pages"] = None
774
+ else:
775
+ # For legacy configs
776
+ config["max_pages"] = None
777
+
778
+ # Create temporary config file
779
+ temp_config_path = config_path.replace(".json", "_unlimited_temp.json")
780
+ with open(temp_config_path, "w") as f:
781
+ json.dump(config, f, indent=2)
782
+
783
+ config_to_use = temp_config_path
784
+ else:
785
+ config_to_use = config_path
786
+
787
+ # Choose scraper based on format
788
+ if is_unified:
789
+ scraper_script = "unified_scraper.py"
790
+ progress_msg = "🔄 Starting unified multi-source scraping...\n"
791
+ progress_msg += "📦 Config format: Unified (multiple sources)\n"
792
+ else:
793
+ scraper_script = "doc_scraper.py"
794
+ progress_msg = "🔄 Starting scraping process...\n"
795
+ progress_msg += "📦 Config format: Legacy (single source)\n"
796
+
797
+ # Build command
798
+ cmd = [sys.executable, str(CLI_DIR / scraper_script), "--config", config_to_use]
799
+
800
+ # Add merge mode for unified configs
801
+ if is_unified and merge_mode:
802
+ cmd.extend(["--merge-mode", merge_mode])
803
+
804
+ # Add --fresh to avoid user input prompts when existing data found
805
+ if not skip_scrape:
806
+ cmd.append("--fresh")
807
+
808
+ if enhance_local:
809
+ cmd.append("--enhance-local")
810
+ if skip_scrape:
811
+ cmd.append("--skip-scrape")
812
+ if dry_run:
813
+ cmd.append("--dry-run")
814
+
815
+ # Determine timeout based on operation type
816
+ if dry_run:
817
+ timeout = 300 # 5 minutes for dry run
818
+ elif skip_scrape:
819
+ timeout = 600 # 10 minutes for building from cache
820
+ elif unlimited:
821
+ timeout = None # No timeout for unlimited mode (user explicitly requested)
822
+ else:
823
+ # Read config to estimate timeout
824
+ try:
825
+ if is_unified:
826
+ # For unified configs, estimate based on all sources
827
+ total_pages = 0
828
+ for source in config.get("sources", []):
829
+ if source.get("type") == "documentation":
830
+ total_pages += source.get("max_pages", 500)
831
+ max_pages = total_pages or 500
832
+ else:
833
+ max_pages = config.get("max_pages", 500)
834
+
835
+ # Estimate: 30s per page + buffer
836
+ timeout = max(3600, max_pages * 35) # Minimum 1 hour, or 35s per page
837
+ except Exception:
838
+ timeout = 14400 # Default: 4 hours
839
+
840
+ # Add progress message
841
+ if timeout:
842
+ progress_msg += f"⏱️ Maximum time allowed: {timeout // 60} minutes\n"
843
+ else:
844
+ progress_msg += "⏱️ Unlimited mode - no timeout\n"
845
+ progress_msg += "📝 Progress will be shown below:\n\n"
846
+
847
+ # Run scraper with streaming
848
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
849
+
850
+ # Clean up temporary config
851
+ if unlimited and Path(config_to_use).exists():
852
+ Path(config_to_use).unlink()
853
+
854
+ output = progress_msg + stdout
855
+
856
+ if returncode == 0:
857
+ return [TextContent(type="text", text=output)]
858
+ else:
859
+ error_output = output + f"\n\n❌ Error:\n{stderr}"
860
+ return [TextContent(type="text", text=error_output)]
861
+
862
+
863
+ async def package_skill_tool(args: dict) -> list[TextContent]:
864
+ """Package skill to .zip and optionally auto-upload"""
865
+ skill_dir = args["skill_dir"]
866
+ auto_upload = args.get("auto_upload", True)
867
+
868
+ # Check if API key exists - only upload if available
869
+ has_api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
870
+ should_upload = auto_upload and has_api_key
871
+
872
+ # Run package_skill.py
873
+ cmd = [
874
+ sys.executable,
875
+ str(CLI_DIR / "package_skill.py"),
876
+ skill_dir,
877
+ "--no-open", # Don't open folder in MCP context
878
+ "--skip-quality-check", # Skip interactive quality checks in MCP context
879
+ ]
880
+
881
+ # Add upload flag only if we have API key
882
+ if should_upload:
883
+ cmd.append("--upload")
884
+
885
+ # Timeout: 5 minutes for packaging + upload
886
+ timeout = 300
887
+
888
+ progress_msg = "📦 Packaging skill...\n"
889
+ if should_upload:
890
+ progress_msg += "📤 Will auto-upload if successful\n"
891
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
892
+
893
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
894
+
895
+ output = progress_msg + stdout
896
+
897
+ if returncode == 0:
898
+ if should_upload:
899
+ # Upload succeeded
900
+ output += "\n\n✅ Skill packaged and uploaded automatically!"
901
+ output += "\n Your skill is now available in Claude!"
902
+ elif auto_upload and not has_api_key:
903
+ # User wanted upload but no API key
904
+ output += "\n\n📝 Skill packaged successfully!"
905
+ output += "\n"
906
+ output += "\n💡 To enable automatic upload:"
907
+ output += "\n 1. Get API key from https://console.anthropic.com/"
908
+ output += "\n 2. Set: export ANTHROPIC_API_KEY=sk-ant-..."
909
+ output += "\n"
910
+ output += "\n📤 Manual upload:"
911
+ output += "\n 1. Find the .zip file in your output/ folder"
912
+ output += "\n 2. Go to https://claude.ai/skills"
913
+ output += "\n 3. Click 'Upload Skill' and select the .zip file"
914
+ else:
915
+ # auto_upload=False, just packaged
916
+ output += "\n\n✅ Skill packaged successfully!"
917
+ output += "\n Upload manually to https://claude.ai/skills"
918
+
919
+ return [TextContent(type="text", text=output)]
920
+ else:
921
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
922
+
923
+
924
+ async def upload_skill_tool(args: dict) -> list[TextContent]:
925
+ """Upload skill .zip to Claude"""
926
+ skill_zip = args["skill_zip"]
927
+
928
+ # Run upload_skill.py
929
+ cmd = [sys.executable, str(CLI_DIR / "upload_skill.py"), skill_zip]
930
+
931
+ # Timeout: 5 minutes for upload
932
+ timeout = 300
933
+
934
+ progress_msg = "📤 Uploading skill to Claude...\n"
935
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
936
+
937
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
938
+
939
+ output = progress_msg + stdout
940
+
941
+ if returncode == 0:
942
+ return [TextContent(type="text", text=output)]
943
+ else:
944
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
945
+
946
+
947
+ async def list_configs_tool(_args: dict) -> list[TextContent]:
948
+ """List available configs"""
949
+ configs_dir = Path("configs")
950
+
951
+ if not configs_dir.exists():
952
+ return [TextContent(type="text", text="No configs directory found")]
953
+
954
+ configs = list(configs_dir.glob("*.json"))
955
+
956
+ if not configs:
957
+ return [TextContent(type="text", text="No config files found")]
958
+
959
+ result = "📋 Available Configs:\n\n"
960
+
961
+ for config_file in sorted(configs):
962
+ try:
963
+ with open(config_file) as f:
964
+ config = json.load(f)
965
+ name = config.get("name", config_file.stem)
966
+ desc = config.get("description", "No description")
967
+ url = config.get("base_url", "")
968
+
969
+ result += f" • {config_file.name}\n"
970
+ result += f" Name: {name}\n"
971
+ result += f" URL: {url}\n"
972
+ result += f" Description: {desc}\n\n"
973
+ except Exception as e:
974
+ result += f" • {config_file.name} - Error reading: {e}\n\n"
975
+
976
+ return [TextContent(type="text", text=result)]
977
+
978
+
979
+ async def validate_config_tool(args: dict) -> list[TextContent]:
980
+ """Validate a config file - supports both legacy and unified formats"""
981
+ config_path = args["config_path"]
982
+
983
+ # Import validation classes
984
+ sys.path.insert(0, str(CLI_DIR))
985
+
986
+ try:
987
+ # Check if file exists
988
+ if not Path(config_path).exists():
989
+ return [
990
+ TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
991
+ ]
992
+
993
+ # Try unified config validator first
994
+ try:
995
+ from config_validator import validate_config
996
+
997
+ validator = validate_config(config_path)
998
+
999
+ result = "✅ Config is valid!\n\n"
1000
+
1001
+ # Show format
1002
+ if validator.is_unified:
1003
+ result += "📦 Format: Unified (multi-source)\n"
1004
+ result += f" Name: {validator.config['name']}\n"
1005
+ result += f" Sources: {len(validator.config.get('sources', []))}\n"
1006
+
1007
+ # Show sources
1008
+ for i, source in enumerate(validator.config.get("sources", []), 1):
1009
+ result += f"\n Source {i}: {source['type']}\n"
1010
+ if source["type"] == "documentation":
1011
+ result += f" URL: {source.get('base_url', 'N/A')}\n"
1012
+ result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
1013
+ elif source["type"] == "github":
1014
+ result += f" Repo: {source.get('repo', 'N/A')}\n"
1015
+ result += (
1016
+ f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
1017
+ )
1018
+ elif source["type"] == "pdf":
1019
+ result += f" Path: {source.get('path', 'N/A')}\n"
1020
+
1021
+ # Show merge settings if applicable
1022
+ if validator.needs_api_merge():
1023
+ merge_mode = validator.config.get("merge_mode", "rule-based")
1024
+ result += f"\n Merge mode: {merge_mode}\n"
1025
+ result += " API merging: Required (docs + code sources)\n"
1026
+
1027
+ else:
1028
+ result += "📦 Format: Legacy (single source)\n"
1029
+ result += f" Name: {validator.config['name']}\n"
1030
+ result += f" Base URL: {validator.config.get('base_url', 'N/A')}\n"
1031
+ result += f" Max pages: {validator.config.get('max_pages', 'Not set')}\n"
1032
+ result += f" Rate limit: {validator.config.get('rate_limit', 'Not set')}s\n"
1033
+
1034
+ return [TextContent(type="text", text=result)]
1035
+
1036
+ except ImportError:
1037
+ # Fall back to legacy validation
1038
+ import json
1039
+
1040
+ from doc_scraper import validate_config
1041
+
1042
+ with open(config_path) as f:
1043
+ config = json.load(f)
1044
+
1045
+ # Validate config - returns (errors, warnings) tuple
1046
+ errors, warnings = validate_config(config)
1047
+
1048
+ if errors:
1049
+ result = "❌ Config validation failed:\n\n"
1050
+ for error in errors:
1051
+ result += f" • {error}\n"
1052
+ else:
1053
+ result = "✅ Config is valid!\n\n"
1054
+ result += "📦 Format: Legacy (single source)\n"
1055
+ result += f" Name: {config['name']}\n"
1056
+ result += f" Base URL: {config['base_url']}\n"
1057
+ result += f" Max pages: {config.get('max_pages', 'Not set')}\n"
1058
+ result += f" Rate limit: {config.get('rate_limit', 'Not set')}s\n"
1059
+
1060
+ if warnings:
1061
+ result += "\n⚠️ Warnings:\n"
1062
+ for warning in warnings:
1063
+ result += f" • {warning}\n"
1064
+
1065
+ return [TextContent(type="text", text=result)]
1066
+
1067
+ except Exception as e:
1068
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
1069
+
1070
+
1071
+ async def split_config_tool(args: dict) -> list[TextContent]:
1072
+ """Split large config into multiple focused configs"""
1073
+ config_path = args["config_path"]
1074
+ strategy = args.get("strategy", "auto")
1075
+ target_pages = args.get("target_pages", 5000)
1076
+ dry_run = args.get("dry_run", False)
1077
+
1078
+ # Run split_config.py
1079
+ cmd = [
1080
+ sys.executable,
1081
+ str(CLI_DIR / "split_config.py"),
1082
+ config_path,
1083
+ "--strategy",
1084
+ strategy,
1085
+ "--target-pages",
1086
+ str(target_pages),
1087
+ ]
1088
+
1089
+ if dry_run:
1090
+ cmd.append("--dry-run")
1091
+
1092
+ # Timeout: 5 minutes for config splitting
1093
+ timeout = 300
1094
+
1095
+ progress_msg = "✂️ Splitting configuration...\n"
1096
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
1097
+
1098
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
1099
+
1100
+ output = progress_msg + stdout
1101
+
1102
+ if returncode == 0:
1103
+ return [TextContent(type="text", text=output)]
1104
+ else:
1105
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
1106
+
1107
+
1108
+ async def generate_router_tool(args: dict) -> list[TextContent]:
1109
+ """Generate router skill for split documentation"""
1110
+ import glob
1111
+
1112
+ config_pattern = args["config_pattern"]
1113
+ router_name = args.get("router_name")
1114
+
1115
+ # Expand glob pattern
1116
+ config_files = glob.glob(config_pattern)
1117
+
1118
+ if not config_files:
1119
+ return [
1120
+ TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
1121
+ ]
1122
+
1123
+ # Run generate_router.py
1124
+ cmd = [
1125
+ sys.executable,
1126
+ str(CLI_DIR / "generate_router.py"),
1127
+ ] + config_files
1128
+
1129
+ if router_name:
1130
+ cmd.extend(["--name", router_name])
1131
+
1132
+ # Timeout: 5 minutes for router generation
1133
+ timeout = 300
1134
+
1135
+ progress_msg = "🧭 Generating router skill...\n"
1136
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
1137
+
1138
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
1139
+
1140
+ output = progress_msg + stdout
1141
+
1142
+ if returncode == 0:
1143
+ return [TextContent(type="text", text=output)]
1144
+ else:
1145
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
1146
+
1147
+
1148
+ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
1149
+ """Scrape PDF documentation and build skill"""
1150
+ config_path = args.get("config_path")
1151
+ pdf_path = args.get("pdf_path")
1152
+ name = args.get("name")
1153
+ description = args.get("description")
1154
+ from_json = args.get("from_json")
1155
+
1156
+ # Build command
1157
+ cmd = [sys.executable, str(CLI_DIR / "pdf_scraper.py")]
1158
+
1159
+ # Mode 1: Config file
1160
+ if config_path:
1161
+ cmd.extend(["--config", config_path])
1162
+
1163
+ # Mode 2: Direct PDF
1164
+ elif pdf_path and name:
1165
+ cmd.extend(["--pdf", pdf_path, "--name", name])
1166
+ if description:
1167
+ cmd.extend(["--description", description])
1168
+
1169
+ # Mode 3: From JSON
1170
+ elif from_json:
1171
+ cmd.extend(["--from-json", from_json])
1172
+
1173
+ else:
1174
+ return [
1175
+ TextContent(
1176
+ type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
1177
+ )
1178
+ ]
1179
+
1180
+ # Run pdf_scraper.py with streaming (can take a while)
1181
+ timeout = 600 # 10 minutes for PDF extraction
1182
+
1183
+ progress_msg = "📄 Scraping PDF documentation...\n"
1184
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
1185
+
1186
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
1187
+
1188
+ output = progress_msg + stdout
1189
+
1190
+ if returncode == 0:
1191
+ return [TextContent(type="text", text=output)]
1192
+ else:
1193
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
1194
+
1195
+
1196
+ async def scrape_github_tool(args: dict) -> list[TextContent]:
1197
+ """Scrape GitHub repository to Claude skill (C1.11)"""
1198
+ repo = args.get("repo")
1199
+ config_path = args.get("config_path")
1200
+ name = args.get("name")
1201
+ description = args.get("description")
1202
+ token = args.get("token")
1203
+ no_issues = args.get("no_issues", False)
1204
+ no_changelog = args.get("no_changelog", False)
1205
+ no_releases = args.get("no_releases", False)
1206
+ max_issues = args.get("max_issues", 100)
1207
+ scrape_only = args.get("scrape_only", False)
1208
+
1209
+ # Build command
1210
+ cmd = [sys.executable, str(CLI_DIR / "github_scraper.py")]
1211
+
1212
+ # Mode 1: Config file
1213
+ if config_path:
1214
+ cmd.extend(["--config", config_path])
1215
+
1216
+ # Mode 2: Direct repo
1217
+ elif repo:
1218
+ cmd.extend(["--repo", repo])
1219
+ if name:
1220
+ cmd.extend(["--name", name])
1221
+ if description:
1222
+ cmd.extend(["--description", description])
1223
+ if token:
1224
+ cmd.extend(["--token", token])
1225
+ if no_issues:
1226
+ cmd.append("--no-issues")
1227
+ if no_changelog:
1228
+ cmd.append("--no-changelog")
1229
+ if no_releases:
1230
+ cmd.append("--no-releases")
1231
+ if max_issues != 100:
1232
+ cmd.extend(["--max-issues", str(max_issues)])
1233
+ if scrape_only:
1234
+ cmd.append("--scrape-only")
1235
+
1236
+ else:
1237
+ return [TextContent(type="text", text="❌ Error: Must specify --repo or --config")]
1238
+
1239
+ # Run github_scraper.py with streaming (can take a while)
1240
+ timeout = 600 # 10 minutes for GitHub scraping
1241
+
1242
+ progress_msg = "🐙 Scraping GitHub repository...\n"
1243
+ progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
1244
+
1245
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
1246
+
1247
+ output = progress_msg + stdout
1248
+
1249
+ if returncode == 0:
1250
+ return [TextContent(type="text", text=output)]
1251
+ else:
1252
+ return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
1253
+
1254
+
1255
+ async def fetch_config_tool(args: dict) -> list[TextContent]:
1256
+ """Fetch config from API, git URL, or named source"""
1257
+ from skill_seekers.mcp.git_repo import GitConfigRepo
1258
+ from skill_seekers.mcp.source_manager import SourceManager
1259
+
1260
+ config_name = args.get("config_name")
1261
+ destination = args.get("destination", "configs")
1262
+ list_available = args.get("list_available", False)
1263
+ category = args.get("category")
1264
+
1265
+ # Git mode parameters
1266
+ source_name = args.get("source")
1267
+ git_url = args.get("git_url")
1268
+ branch = args.get("branch", "main")
1269
+ token = args.get("token")
1270
+ force_refresh = args.get("refresh", False)
1271
+
1272
+ try:
1273
+ # MODE 1: Named Source (highest priority)
1274
+ if source_name:
1275
+ if not config_name:
1276
+ return [
1277
+ TextContent(
1278
+ type="text",
1279
+ text="❌ Error: config_name is required when using source parameter",
1280
+ )
1281
+ ]
1282
+
1283
+ # Get source from registry
1284
+ source_manager = SourceManager()
1285
+ try:
1286
+ source = source_manager.get_source(source_name)
1287
+ except KeyError as e:
1288
+ return [TextContent(type="text", text=f"❌ {str(e)}")]
1289
+
1290
+ git_url = source["git_url"]
1291
+ branch = source.get("branch", branch)
1292
+ token_env = source.get("token_env")
1293
+
1294
+ # Get token from environment if not provided
1295
+ if not token and token_env:
1296
+ token = os.environ.get(token_env)
1297
+
1298
+ # Clone/pull repository
1299
+ git_repo = GitConfigRepo()
1300
+ try:
1301
+ repo_path = git_repo.clone_or_pull(
1302
+ source_name=source_name,
1303
+ git_url=git_url,
1304
+ branch=branch,
1305
+ token=token,
1306
+ force_refresh=force_refresh,
1307
+ )
1308
+ except Exception as e:
1309
+ return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
1310
+
1311
+ # Load config from repository
1312
+ try:
1313
+ config_data = git_repo.get_config(repo_path, config_name)
1314
+ except FileNotFoundError as e:
1315
+ return [TextContent(type="text", text=f"❌ {str(e)}")]
1316
+ except ValueError as e:
1317
+ return [TextContent(type="text", text=f"❌ {str(e)}")]
1318
+
1319
+ # Save to destination
1320
+ dest_path = Path(destination)
1321
+ dest_path.mkdir(parents=True, exist_ok=True)
1322
+ config_file = dest_path / f"{config_name}.json"
1323
+
1324
+ with open(config_file, "w") as f:
1325
+ json.dump(config_data, f, indent=2)
1326
+
1327
+ result = f"""✅ Config fetched from git source successfully!
1328
+
1329
+ 📦 Config: {config_name}
1330
+ 📂 Saved to: {config_file}
1331
+ 🔗 Source: {source_name}
1332
+ 🌿 Branch: {branch}
1333
+ 📁 Repository: {git_url}
1334
+ 🔄 Refreshed: {"Yes (forced)" if force_refresh else "No (used cache)"}
1335
+
1336
+ Next steps:
1337
+ 1. Review config: cat {config_file}
1338
+ 2. Estimate pages: Use estimate_pages tool
1339
+ 3. Scrape docs: Use scrape_docs tool
1340
+
1341
+ 💡 Manage sources: Use add_config_source, list_config_sources, remove_config_source tools
1342
+ """
1343
+ return [TextContent(type="text", text=result)]
1344
+
1345
+ # MODE 2: Direct Git URL
1346
+ elif git_url:
1347
+ if not config_name:
1348
+ return [
1349
+ TextContent(
1350
+ type="text",
1351
+ text="❌ Error: config_name is required when using git_url parameter",
1352
+ )
1353
+ ]
1354
+
1355
+ # Clone/pull repository
1356
+ git_repo = GitConfigRepo()
1357
+ source_name_temp = f"temp_{config_name}"
1358
+
1359
+ try:
1360
+ repo_path = git_repo.clone_or_pull(
1361
+ source_name=source_name_temp,
1362
+ git_url=git_url,
1363
+ branch=branch,
1364
+ token=token,
1365
+ force_refresh=force_refresh,
1366
+ )
1367
+ except ValueError as e:
1368
+ return [TextContent(type="text", text=f"❌ Invalid git URL: {str(e)}")]
1369
+ except Exception as e:
1370
+ return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
1371
+
1372
+ # Load config from repository
1373
+ try:
1374
+ config_data = git_repo.get_config(repo_path, config_name)
1375
+ except FileNotFoundError as e:
1376
+ return [TextContent(type="text", text=f"❌ {str(e)}")]
1377
+ except ValueError as e:
1378
+ return [TextContent(type="text", text=f"❌ {str(e)}")]
1379
+
1380
+ # Save to destination
1381
+ dest_path = Path(destination)
1382
+ dest_path.mkdir(parents=True, exist_ok=True)
1383
+ config_file = dest_path / f"{config_name}.json"
1384
+
1385
+ with open(config_file, "w") as f:
1386
+ json.dump(config_data, f, indent=2)
1387
+
1388
+ result = f"""✅ Config fetched from git URL successfully!
1389
+
1390
+ 📦 Config: {config_name}
1391
+ 📂 Saved to: {config_file}
1392
+ 📁 Repository: {git_url}
1393
+ 🌿 Branch: {branch}
1394
+ 🔄 Refreshed: {"Yes (forced)" if force_refresh else "No (used cache)"}
1395
+
1396
+ Next steps:
1397
+ 1. Review config: cat {config_file}
1398
+ 2. Estimate pages: Use estimate_pages tool
1399
+ 3. Scrape docs: Use scrape_docs tool
1400
+
1401
+ 💡 Register this source: Use add_config_source to save for future use
1402
+ """
1403
+ return [TextContent(type="text", text=result)]
1404
+
1405
+ # MODE 3: API (existing, backward compatible)
1406
+ else:
1407
+ API_BASE_URL = "https://api.skillseekersweb.com"
1408
+
1409
+ async with httpx.AsyncClient(timeout=30.0) as client:
1410
+ # List available configs if requested or no config_name provided
1411
+ if list_available or not config_name:
1412
+ # Build API URL with optional category filter
1413
+ list_url = f"{API_BASE_URL}/api/configs"
1414
+ params = {}
1415
+ if category:
1416
+ params["category"] = category
1417
+
1418
+ response = await client.get(list_url, params=params)
1419
+ response.raise_for_status()
1420
+ data = response.json()
1421
+
1422
+ configs = data.get("configs", [])
1423
+ total = data.get("total", 0)
1424
+ filters = data.get("filters")
1425
+
1426
+ # Format list output
1427
+ result = f"📋 Available Configs ({total} total)\n"
1428
+ if filters:
1429
+ result += f"🔍 Filters: {filters}\n"
1430
+ result += "\n"
1431
+
1432
+ # Group by category
1433
+ by_category = {}
1434
+ for config in configs:
1435
+ cat = config.get("category", "uncategorized")
1436
+ if cat not in by_category:
1437
+ by_category[cat] = []
1438
+ by_category[cat].append(config)
1439
+
1440
+ for cat, cat_configs in sorted(by_category.items()):
1441
+ result += f"\n**{cat.upper()}** ({len(cat_configs)} configs):\n"
1442
+ for cfg in cat_configs:
1443
+ name = cfg.get("name")
1444
+ desc = cfg.get("description", "")[:60]
1445
+ config_type = cfg.get("type", "unknown")
1446
+ tags = ", ".join(cfg.get("tags", [])[:3])
1447
+ result += f" • {name} [{config_type}] - {desc}{'...' if len(cfg.get('description', '')) > 60 else ''}\n"
1448
+ if tags:
1449
+ result += f" Tags: {tags}\n"
1450
+
1451
+ result += (
1452
+ "\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
1453
+ )
1454
+ result += f"📚 API Docs: {API_BASE_URL}/docs\n"
1455
+
1456
+ return [TextContent(type="text", text=result)]
1457
+
1458
+ # Download specific config
1459
+ if not config_name:
1460
+ return [
1461
+ TextContent(
1462
+ type="text",
1463
+ text="❌ Error: Please provide config_name or set list_available=true",
1464
+ )
1465
+ ]
1466
+
1467
+ # Get config details first
1468
+ detail_url = f"{API_BASE_URL}/api/configs/{config_name}"
1469
+ detail_response = await client.get(detail_url)
1470
+
1471
+ if detail_response.status_code == 404:
1472
+ return [
1473
+ TextContent(
1474
+ type="text",
1475
+ text=f"❌ Config '{config_name}' not found. Use list_available=true to see available configs.",
1476
+ )
1477
+ ]
1478
+
1479
+ detail_response.raise_for_status()
1480
+ config_info = detail_response.json()
1481
+
1482
+ # Download the actual config file using the download_url from API response
1483
+ download_url = config_info.get("download_url")
1484
+ if not download_url:
1485
+ return [
1486
+ TextContent(
1487
+ type="text",
1488
+ text=f"❌ Config '{config_name}' has no download_url. Contact support.",
1489
+ )
1490
+ ]
1491
+
1492
+ download_response = await client.get(download_url)
1493
+ download_response.raise_for_status()
1494
+ config_data = download_response.json()
1495
+
1496
+ # Save to destination
1497
+ dest_path = Path(destination)
1498
+ dest_path.mkdir(parents=True, exist_ok=True)
1499
+ config_file = dest_path / f"{config_name}.json"
1500
+
1501
+ with open(config_file, "w") as f:
1502
+ json.dump(config_data, f, indent=2)
1503
+
1504
+ # Build result message
1505
+ result = f"""✅ Config downloaded successfully!
1506
+
1507
+ 📦 Config: {config_name}
1508
+ 📂 Saved to: {config_file}
1509
+ 📊 Category: {config_info.get("category", "uncategorized")}
1510
+ 🏷️ Tags: {", ".join(config_info.get("tags", []))}
1511
+ 📄 Type: {config_info.get("type", "unknown")}
1512
+ 📝 Description: {config_info.get("description", "No description")}
1513
+
1514
+ 🔗 Source: {config_info.get("primary_source", "N/A")}
1515
+ 📏 Max pages: {config_info.get("max_pages", "N/A")}
1516
+ 📦 File size: {config_info.get("file_size", "N/A")} bytes
1517
+ 🕒 Last updated: {config_info.get("last_updated", "N/A")}
1518
+
1519
+ Next steps:
1520
+ 1. Review config: cat {config_file}
1521
+ 2. Estimate pages: Use estimate_pages tool
1522
+ 3. Scrape docs: Use scrape_docs tool
1523
+
1524
+ 💡 More configs: Use list_available=true to see all available configs
1525
+ """
1526
+
1527
+ return [TextContent(type="text", text=result)]
1528
+
1529
+ except httpx.HTTPError as e:
1530
+ return [
1531
+ TextContent(
1532
+ type="text",
1533
+ text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
1534
+ )
1535
+ ]
1536
+ except json.JSONDecodeError as e:
1537
+ return [
1538
+ TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
1539
+ ]
1540
+ except Exception as e:
1541
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
1542
+
1543
+
1544
+ async def install_skill_tool(args: dict) -> list[TextContent]:
1545
+ """
1546
+ Complete skill installation workflow.
1547
+
1548
+ Orchestrates the complete workflow:
1549
+ 1. Fetch config (if config_name provided)
1550
+ 2. Scrape documentation
1551
+ 3. AI Enhancement (MANDATORY - no skip option)
1552
+ 4. Package to .zip
1553
+ 5. Upload to Claude (optional)
1554
+
1555
+ Args:
1556
+ config_name: Config to fetch from API (mutually exclusive with config_path)
1557
+ config_path: Path to existing config (mutually exclusive with config_name)
1558
+ destination: Output directory (default: "output")
1559
+ auto_upload: Upload after packaging (default: True)
1560
+ unlimited: Remove page limits (default: False)
1561
+ dry_run: Preview only (default: False)
1562
+
1563
+ Returns:
1564
+ List of TextContent with workflow progress and results
1565
+ """
1566
+ import json
1567
+ import re
1568
+
1569
+ # Extract and validate inputs
1570
+ config_name = args.get("config_name")
1571
+ config_path = args.get("config_path")
1572
+ destination = args.get("destination", "output")
1573
+ auto_upload = args.get("auto_upload", True)
1574
+ unlimited = args.get("unlimited", False)
1575
+ dry_run = args.get("dry_run", False)
1576
+
1577
+ # Validation: Must provide exactly one of config_name or config_path
1578
+ if not config_name and not config_path:
1579
+ return [
1580
+ TextContent(
1581
+ type="text",
1582
+ text="❌ Error: Must provide either config_name or config_path\n\nExamples:\n install_skill(config_name='react')\n install_skill(config_path='configs/custom.json')",
1583
+ )
1584
+ ]
1585
+
1586
+ if config_name and config_path:
1587
+ return [
1588
+ TextContent(
1589
+ type="text",
1590
+ text="❌ Error: Cannot provide both config_name and config_path\n\nChoose one:\n - config_name: Fetch from API (e.g., 'react')\n - config_path: Use existing file (e.g., 'configs/custom.json')",
1591
+ )
1592
+ ]
1593
+
1594
+ # Initialize output
1595
+ output_lines = []
1596
+ output_lines.append("🚀 SKILL INSTALLATION WORKFLOW")
1597
+ output_lines.append("=" * 70)
1598
+ output_lines.append("")
1599
+
1600
+ if dry_run:
1601
+ output_lines.append("🔍 DRY RUN MODE - Preview only, no actions taken")
1602
+ output_lines.append("")
1603
+
1604
+ # Track workflow state
1605
+ workflow_state = {
1606
+ "config_path": config_path,
1607
+ "skill_name": None,
1608
+ "skill_dir": None,
1609
+ "zip_path": None,
1610
+ "phases_completed": [],
1611
+ }
1612
+
1613
+ try:
1614
+ # ===== PHASE 1: Fetch Config (if needed) =====
1615
+ if config_name:
1616
+ output_lines.append("📥 PHASE 1/5: Fetch Config")
1617
+ output_lines.append("-" * 70)
1618
+ output_lines.append(f"Config: {config_name}")
1619
+ output_lines.append(f"Destination: {destination}/")
1620
+ output_lines.append("")
1621
+
1622
+ if not dry_run:
1623
+ # Call fetch_config_tool directly
1624
+ fetch_result = await fetch_config_tool(
1625
+ {"config_name": config_name, "destination": destination}
1626
+ )
1627
+
1628
+ # Parse result to extract config path
1629
+ fetch_output = fetch_result[0].text
1630
+ output_lines.append(fetch_output)
1631
+ output_lines.append("")
1632
+
1633
+ # Extract config path from output
1634
+ # Expected format: "✅ Config saved to: configs/react.json"
1635
+ match = re.search(r"saved to:\s*(.+\.json)", fetch_output)
1636
+ if match:
1637
+ workflow_state["config_path"] = match.group(1).strip()
1638
+ output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
1639
+ else:
1640
+ return [
1641
+ TextContent(
1642
+ type="text",
1643
+ text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
1644
+ )
1645
+ ]
1646
+
1647
+ workflow_state["phases_completed"].append("fetch_config")
1648
+ else:
1649
+ output_lines.append(" [DRY RUN] Would fetch config from API")
1650
+ workflow_state["config_path"] = f"{destination}/{config_name}.json"
1651
+
1652
+ output_lines.append("")
1653
+
1654
+ # ===== PHASE 2: Scrape Documentation =====
1655
+ phase_num = "2/5" if config_name else "1/4"
1656
+ output_lines.append(f"📄 PHASE {phase_num}: Scrape Documentation")
1657
+ output_lines.append("-" * 70)
1658
+ output_lines.append(f"Config: {workflow_state['config_path']}")
1659
+ output_lines.append(f"Unlimited mode: {unlimited}")
1660
+ output_lines.append("")
1661
+
1662
+ if not dry_run:
1663
+ # Load config to get skill name
1664
+ try:
1665
+ with open(workflow_state["config_path"]) as f:
1666
+ config = json.load(f)
1667
+ workflow_state["skill_name"] = config.get("name", "unknown")
1668
+ except Exception as e:
1669
+ return [
1670
+ TextContent(
1671
+ type="text",
1672
+ text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
1673
+ )
1674
+ ]
1675
+
1676
+ # Call scrape_docs_tool (does NOT include enhancement)
1677
+ output_lines.append("Scraping documentation (this may take 20-45 minutes)...")
1678
+ output_lines.append("")
1679
+
1680
+ scrape_result = await scrape_docs_tool(
1681
+ {
1682
+ "config_path": workflow_state["config_path"],
1683
+ "unlimited": unlimited,
1684
+ "enhance_local": False, # Enhancement is separate phase
1685
+ "skip_scrape": False,
1686
+ "dry_run": False,
1687
+ }
1688
+ )
1689
+
1690
+ scrape_output = scrape_result[0].text
1691
+ output_lines.append(scrape_output)
1692
+ output_lines.append("")
1693
+
1694
+ # Check for success
1695
+ if "❌" in scrape_output:
1696
+ return [
1697
+ TextContent(
1698
+ type="text",
1699
+ text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
1700
+ )
1701
+ ]
1702
+
1703
+ workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
1704
+ workflow_state["phases_completed"].append("scrape_docs")
1705
+ else:
1706
+ output_lines.append(" [DRY RUN] Would scrape documentation")
1707
+ workflow_state["skill_name"] = "example"
1708
+ workflow_state["skill_dir"] = f"{destination}/example"
1709
+
1710
+ output_lines.append("")
1711
+
1712
+ # ===== PHASE 3: AI Enhancement (MANDATORY) =====
1713
+ phase_num = "3/5" if config_name else "2/4"
1714
+ output_lines.append(f"✨ PHASE {phase_num}: AI Enhancement (MANDATORY)")
1715
+ output_lines.append("-" * 70)
1716
+ output_lines.append("⚠️ Enhancement is REQUIRED for quality (3/10→9/10 boost)")
1717
+ output_lines.append(f"Skill directory: {workflow_state['skill_dir']}")
1718
+ output_lines.append("Mode: Headless (runs in background)")
1719
+ output_lines.append("Estimated time: 30-60 seconds")
1720
+ output_lines.append("")
1721
+
1722
+ if not dry_run:
1723
+ # Run enhance_skill_local in headless mode
1724
+ # Build command directly
1725
+ cmd = [
1726
+ sys.executable,
1727
+ str(CLI_DIR / "enhance_skill_local.py"),
1728
+ workflow_state["skill_dir"],
1729
+ # Headless is default, no flag needed
1730
+ ]
1731
+
1732
+ timeout = 900 # 15 minutes max for enhancement
1733
+
1734
+ output_lines.append("Running AI enhancement...")
1735
+
1736
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
1737
+
1738
+ if returncode != 0:
1739
+ output_lines.append(f"\n❌ Enhancement failed (exit code {returncode}):")
1740
+ output_lines.append(stderr if stderr else stdout)
1741
+ return [TextContent(type="text", text="\n".join(output_lines))]
1742
+
1743
+ output_lines.append(stdout)
1744
+ workflow_state["phases_completed"].append("enhance_skill")
1745
+ else:
1746
+ output_lines.append(" [DRY RUN] Would enhance SKILL.md with Claude Code")
1747
+
1748
+ output_lines.append("")
1749
+
1750
+ # ===== PHASE 4: Package Skill =====
1751
+ phase_num = "4/5" if config_name else "3/4"
1752
+ output_lines.append(f"📦 PHASE {phase_num}: Package Skill")
1753
+ output_lines.append("-" * 70)
1754
+ output_lines.append(f"Skill directory: {workflow_state['skill_dir']}")
1755
+ output_lines.append("")
1756
+
1757
+ if not dry_run:
1758
+ # Call package_skill_tool (auto_upload=False, we handle upload separately)
1759
+ package_result = await package_skill_tool(
1760
+ {
1761
+ "skill_dir": workflow_state["skill_dir"],
1762
+ "auto_upload": False, # We handle upload in next phase
1763
+ }
1764
+ )
1765
+
1766
+ package_output = package_result[0].text
1767
+ output_lines.append(package_output)
1768
+ output_lines.append("")
1769
+
1770
+ # Extract zip path from output
1771
+ # Expected format: "Saved to: output/react.zip"
1772
+ match = re.search(r"Saved to:\s*(.+\.zip)", package_output)
1773
+ if match:
1774
+ workflow_state["zip_path"] = match.group(1).strip()
1775
+ else:
1776
+ # Fallback: construct zip path
1777
+ workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip"
1778
+
1779
+ workflow_state["phases_completed"].append("package_skill")
1780
+ else:
1781
+ output_lines.append(" [DRY RUN] Would package to .zip file")
1782
+ workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip"
1783
+
1784
+ output_lines.append("")
1785
+
1786
+ # ===== PHASE 5: Upload (Optional) =====
1787
+ if auto_upload:
1788
+ phase_num = "5/5" if config_name else "4/4"
1789
+ output_lines.append(f"📤 PHASE {phase_num}: Upload to Claude")
1790
+ output_lines.append("-" * 70)
1791
+ output_lines.append(f"Zip file: {workflow_state['zip_path']}")
1792
+ output_lines.append("")
1793
+
1794
+ # Check for API key
1795
+ has_api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
1796
+
1797
+ if not dry_run:
1798
+ if has_api_key:
1799
+ # Call upload_skill_tool
1800
+ upload_result = await upload_skill_tool(
1801
+ {"skill_zip": workflow_state["zip_path"]}
1802
+ )
1803
+
1804
+ upload_output = upload_result[0].text
1805
+ output_lines.append(upload_output)
1806
+
1807
+ workflow_state["phases_completed"].append("upload_skill")
1808
+ else:
1809
+ output_lines.append("⚠️ ANTHROPIC_API_KEY not set - skipping upload")
1810
+ output_lines.append("")
1811
+ output_lines.append("To enable automatic upload:")
1812
+ output_lines.append(" 1. Get API key from https://console.anthropic.com/")
1813
+ output_lines.append(" 2. Set: export ANTHROPIC_API_KEY=sk-ant-...")
1814
+ output_lines.append("")
1815
+ output_lines.append("📤 Manual upload:")
1816
+ output_lines.append(" 1. Go to https://claude.ai/skills")
1817
+ output_lines.append(" 2. Click 'Upload Skill'")
1818
+ output_lines.append(f" 3. Select: {workflow_state['zip_path']}")
1819
+ else:
1820
+ output_lines.append(" [DRY RUN] Would upload to Claude (if API key set)")
1821
+
1822
+ output_lines.append("")
1823
+
1824
+ # ===== WORKFLOW SUMMARY =====
1825
+ output_lines.append("=" * 70)
1826
+ output_lines.append("✅ WORKFLOW COMPLETE")
1827
+ output_lines.append("=" * 70)
1828
+ output_lines.append("")
1829
+
1830
+ if not dry_run:
1831
+ output_lines.append("Phases completed:")
1832
+ for phase in workflow_state["phases_completed"]:
1833
+ output_lines.append(f" ✓ {phase}")
1834
+ output_lines.append("")
1835
+
1836
+ output_lines.append("📁 Output:")
1837
+ output_lines.append(f" Skill directory: {workflow_state['skill_dir']}")
1838
+ if workflow_state["zip_path"]:
1839
+ output_lines.append(f" Skill package: {workflow_state['zip_path']}")
1840
+ output_lines.append("")
1841
+
1842
+ if auto_upload and has_api_key:
1843
+ output_lines.append("🎉 Your skill is now available in Claude!")
1844
+ output_lines.append(" Go to https://claude.ai/skills to use it")
1845
+ elif auto_upload:
1846
+ output_lines.append("📝 Manual upload required (see instructions above)")
1847
+ else:
1848
+ output_lines.append("📤 To upload:")
1849
+ output_lines.append(" skill-seekers upload " + workflow_state["zip_path"])
1850
+ else:
1851
+ output_lines.append("This was a dry run. No actions were taken.")
1852
+ output_lines.append("")
1853
+ output_lines.append("To execute for real, remove the --dry-run flag:")
1854
+ if config_name:
1855
+ output_lines.append(f" install_skill(config_name='{config_name}')")
1856
+ else:
1857
+ output_lines.append(f" install_skill(config_path='{config_path}')")
1858
+
1859
+ return [TextContent(type="text", text="\n".join(output_lines))]
1860
+
1861
+ except Exception as e:
1862
+ output_lines.append("")
1863
+ output_lines.append(f"❌ Workflow failed: {str(e)}")
1864
+ output_lines.append("")
1865
+ output_lines.append("Phases completed before failure:")
1866
+ for phase in workflow_state["phases_completed"]:
1867
+ output_lines.append(f" ✓ {phase}")
1868
+ return [TextContent(type="text", text="\n".join(output_lines))]
1869
+
1870
+
1871
+ async def submit_config_tool(args: dict) -> list[TextContent]:
1872
+ """Submit a custom config to skill-seekers-configs repository via GitHub issue"""
1873
+ try:
1874
+ from github import Github, GithubException
1875
+ except ImportError:
1876
+ return [
1877
+ TextContent(
1878
+ type="text",
1879
+ text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
1880
+ )
1881
+ ]
1882
+
1883
+ config_path = args.get("config_path")
1884
+ config_json_str = args.get("config_json")
1885
+ testing_notes = args.get("testing_notes", "")
1886
+ github_token = args.get("github_token") or os.environ.get("GITHUB_TOKEN")
1887
+
1888
+ try:
1889
+ # Load config data
1890
+ if config_path:
1891
+ config_file = Path(config_path)
1892
+ if not config_file.exists():
1893
+ return [
1894
+ TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
1895
+ ]
1896
+
1897
+ with open(config_file) as f:
1898
+ config_data = json.load(f)
1899
+ config_json_str = json.dumps(config_data, indent=2)
1900
+ config_name = config_data.get("name", config_file.stem)
1901
+
1902
+ elif config_json_str:
1903
+ try:
1904
+ config_data = json.loads(config_json_str)
1905
+ config_name = config_data.get("name", "unnamed")
1906
+ except json.JSONDecodeError as e:
1907
+ return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
1908
+
1909
+ else:
1910
+ return [
1911
+ TextContent(
1912
+ type="text", text="❌ Error: Must provide either config_path or config_json"
1913
+ )
1914
+ ]
1915
+
1916
+ # Use ConfigValidator for comprehensive validation
1917
+ if ConfigValidator is None:
1918
+ return [
1919
+ TextContent(
1920
+ type="text",
1921
+ text="❌ Error: ConfigValidator not available. Please ensure config_validator.py is in the CLI directory.",
1922
+ )
1923
+ ]
1924
+
1925
+ try:
1926
+ validator = ConfigValidator(config_data)
1927
+ validator.validate()
1928
+
1929
+ # Get format info
1930
+ is_unified = validator.is_unified
1931
+ config_name = config_data.get("name", "unnamed")
1932
+
1933
+ # Additional format validation (ConfigValidator only checks structure)
1934
+ # Validate name format (alphanumeric, hyphens, underscores only)
1935
+ if not re.match(r"^[a-zA-Z0-9_-]+$", config_name):
1936
+ raise ValueError(
1937
+ f"Invalid name format: '{config_name}'\nNames must contain only alphanumeric characters, hyphens, and underscores"
1938
+ )
1939
+
1940
+ # Validate URL formats
1941
+ if not is_unified:
1942
+ # Legacy config - check base_url
1943
+ base_url = config_data.get("base_url", "")
1944
+ if base_url and not (
1945
+ base_url.startswith("http://") or base_url.startswith("https://")
1946
+ ):
1947
+ raise ValueError(
1948
+ f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
1949
+ )
1950
+ else:
1951
+ # Unified config - check URLs in sources
1952
+ for idx, source in enumerate(config_data.get("sources", [])):
1953
+ if source.get("type") == "documentation":
1954
+ source_url = source.get("base_url", "")
1955
+ if source_url and not (
1956
+ source_url.startswith("http://") or source_url.startswith("https://")
1957
+ ):
1958
+ raise ValueError(
1959
+ f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
1960
+ )
1961
+
1962
+ except ValueError as validation_error:
1963
+ # Provide detailed validation feedback
1964
+ error_msg = f"""❌ Config validation failed:
1965
+
1966
+ {str(validation_error)}
1967
+
1968
+ Please fix these issues and try again.
1969
+
1970
+ 💡 Validation help:
1971
+ - Names: alphanumeric, hyphens, underscores only (e.g., "my-framework", "react_docs")
1972
+ - URLs: must start with http:// or https://
1973
+ - Selectors: should be a dict with keys like 'main_content', 'title', 'code_blocks'
1974
+ - Rate limit: non-negative number (default: 0.5)
1975
+ - Max pages: positive integer or -1 for unlimited
1976
+
1977
+ 📚 Example configs: https://github.com/yusufkaraaslan/skill-seekers-configs/tree/main/official
1978
+ """
1979
+ return [TextContent(type="text", text=error_msg)]
1980
+
1981
+ # Detect category based on config format and content
1982
+ if is_unified:
1983
+ # For unified configs, look at source types
1984
+ source_types = [src.get("type") for src in config_data.get("sources", [])]
1985
+ if (
1986
+ "documentation" in source_types
1987
+ and "github" in source_types
1988
+ or "documentation" in source_types
1989
+ and "pdf" in source_types
1990
+ or len(source_types) > 1
1991
+ ):
1992
+ category = "multi-source"
1993
+ else:
1994
+ category = "unified"
1995
+ else:
1996
+ # For legacy configs, use name-based detection
1997
+ name_lower = config_name.lower()
1998
+ category = "other"
1999
+ if any(
2000
+ x in name_lower
2001
+ for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
2002
+ ):
2003
+ category = "web-frameworks"
2004
+ elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
2005
+ category = "game-engines"
2006
+ elif any(x in name_lower for x in ["kubernetes", "ansible", "docker"]):
2007
+ category = "devops"
2008
+ elif any(x in name_lower for x in ["tailwind", "bootstrap", "bulma"]):
2009
+ category = "css-frameworks"
2010
+
2011
+ # Collect validation warnings
2012
+ warnings = []
2013
+ if not is_unified:
2014
+ # Legacy config warnings
2015
+ if "max_pages" not in config_data:
2016
+ warnings.append("⚠️ No max_pages set - will use default (100)")
2017
+ elif config_data.get("max_pages") in (None, -1):
2018
+ warnings.append(
2019
+ "⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
2020
+ )
2021
+ else:
2022
+ # Unified config warnings
2023
+ for src in config_data.get("sources", []):
2024
+ if src.get("type") == "documentation" and "max_pages" not in src:
2025
+ warnings.append(
2026
+ "⚠️ No max_pages set for documentation source - will use default (100)"
2027
+ )
2028
+ elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
2029
+ warnings.append("⚠️ Unlimited scraping enabled for documentation source")
2030
+
2031
+ # Check for GitHub token
2032
+ if not github_token:
2033
+ return [
2034
+ TextContent(
2035
+ type="text",
2036
+ text="❌ Error: GitHub token required.\n\nProvide github_token parameter or set GITHUB_TOKEN environment variable.\n\nCreate token at: https://github.com/settings/tokens",
2037
+ )
2038
+ ]
2039
+
2040
+ # Create GitHub issue
2041
+ try:
2042
+ gh = Github(github_token)
2043
+ repo = gh.get_repo("yusufkaraaslan/skill-seekers-configs")
2044
+
2045
+ # Build issue body
2046
+ issue_body = f"""## Config Submission
2047
+
2048
+ ### Framework/Tool Name
2049
+ {config_name}
2050
+
2051
+ ### Category
2052
+ {category}
2053
+
2054
+ ### Config Format
2055
+ {"Unified (multi-source)" if is_unified else "Legacy (single-source)"}
2056
+
2057
+ ### Configuration JSON
2058
+ ```json
2059
+ {config_json_str}
2060
+ ```
2061
+
2062
+ ### Testing Results
2063
+ {testing_notes if testing_notes else "Not provided"}
2064
+
2065
+ ### Documentation URL
2066
+ {config_data.get("base_url") if not is_unified else "See sources in config"}
2067
+
2068
+ {"### Validation Warnings" if warnings else ""}
2069
+ {chr(10).join(f"- {w}" for w in warnings) if warnings else ""}
2070
+
2071
+ ---
2072
+
2073
+ ### Checklist
2074
+ - [x] Config validated with ConfigValidator
2075
+ - [ ] Test scraping completed
2076
+ - [ ] Added to appropriate category
2077
+ - [ ] API updated
2078
+ """
2079
+
2080
+ # Create issue
2081
+ issue = repo.create_issue(
2082
+ title=f"[CONFIG] {config_name}",
2083
+ body=issue_body,
2084
+ labels=["config-submission", "needs-review"],
2085
+ )
2086
+
2087
+ result = f"""✅ Config submitted successfully!
2088
+
2089
+ 📝 Issue created: {issue.html_url}
2090
+ 🏷️ Issue #{issue.number}
2091
+ 📦 Config: {config_name}
2092
+ 📊 Category: {category}
2093
+ 🏷️ Labels: config-submission, needs-review
2094
+
2095
+ What happens next:
2096
+ 1. Maintainers will review your config
2097
+ 2. They'll test it with the actual documentation
2098
+ 3. If approved, it will be added to official/{category}/
2099
+ 4. The API will auto-update and your config becomes available!
2100
+
2101
+ 💡 Track your submission: {issue.html_url}
2102
+ 📚 All configs: https://github.com/yusufkaraaslan/skill-seekers-configs
2103
+ """
2104
+
2105
+ return [TextContent(type="text", text=result)]
2106
+
2107
+ except GithubException as e:
2108
+ return [
2109
+ TextContent(
2110
+ type="text",
2111
+ text=f"❌ GitHub Error: {str(e)}\n\nCheck your token permissions (needs 'repo' or 'public_repo' scope).",
2112
+ )
2113
+ ]
2114
+
2115
+ except Exception as e:
2116
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
2117
+
2118
+
2119
+ async def add_config_source_tool(args: dict) -> list[TextContent]:
2120
+ """Register a git repository as a config source"""
2121
+ from skill_seekers.mcp.source_manager import SourceManager
2122
+
2123
+ name = args.get("name")
2124
+ git_url = args.get("git_url")
2125
+ source_type = args.get("source_type", "github")
2126
+ token_env = args.get("token_env")
2127
+ branch = args.get("branch", "main")
2128
+ priority = args.get("priority", 100)
2129
+ enabled = args.get("enabled", True)
2130
+
2131
+ try:
2132
+ # Validate required parameters
2133
+ if not name:
2134
+ return [TextContent(type="text", text="❌ Error: 'name' parameter is required")]
2135
+ if not git_url:
2136
+ return [TextContent(type="text", text="❌ Error: 'git_url' parameter is required")]
2137
+
2138
+ # Add source
2139
+ source_manager = SourceManager()
2140
+ source = source_manager.add_source(
2141
+ name=name,
2142
+ git_url=git_url,
2143
+ source_type=source_type,
2144
+ token_env=token_env,
2145
+ branch=branch,
2146
+ priority=priority,
2147
+ enabled=enabled,
2148
+ )
2149
+
2150
+ # Check if this is an update
2151
+ is_update = "updated_at" in source and source["added_at"] != source["updated_at"]
2152
+
2153
+ result = f"""✅ Config source {"updated" if is_update else "registered"} successfully!
2154
+
2155
+ 📛 Name: {source["name"]}
2156
+ 📁 Repository: {source["git_url"]}
2157
+ 🔖 Type: {source["type"]}
2158
+ 🌿 Branch: {source["branch"]}
2159
+ 🔑 Token env: {source.get("token_env", "None")}
2160
+ ⚡ Priority: {source["priority"]} (lower = higher priority)
2161
+ ✓ Enabled: {source["enabled"]}
2162
+ 🕒 Added: {source["added_at"][:19]}
2163
+
2164
+ Usage:
2165
+ # Fetch config from this source
2166
+ fetch_config(source="{source["name"]}", config_name="your-config")
2167
+
2168
+ # List all sources
2169
+ list_config_sources()
2170
+
2171
+ # Remove this source
2172
+ remove_config_source(name="{source["name"]}")
2173
+
2174
+ 💡 Make sure to set {source.get("token_env", "GIT_TOKEN")} environment variable for private repos
2175
+ """
2176
+
2177
+ return [TextContent(type="text", text=result)]
2178
+
2179
+ except ValueError as e:
2180
+ return [TextContent(type="text", text=f"❌ Validation Error: {str(e)}")]
2181
+ except Exception as e:
2182
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
2183
+
2184
+
2185
+ async def list_config_sources_tool(args: dict) -> list[TextContent]:
2186
+ """List all registered config sources"""
2187
+ from skill_seekers.mcp.source_manager import SourceManager
2188
+
2189
+ enabled_only = args.get("enabled_only", False)
2190
+
2191
+ try:
2192
+ source_manager = SourceManager()
2193
+ sources = source_manager.list_sources(enabled_only=enabled_only)
2194
+
2195
+ if not sources:
2196
+ result = """📋 No config sources registered
2197
+
2198
+ To add a source:
2199
+ add_config_source(
2200
+ name="team",
2201
+ git_url="https://github.com/myorg/configs.git"
2202
+ )
2203
+
2204
+ 💡 Once added, use: fetch_config(source="team", config_name="...")
2205
+ """
2206
+ return [TextContent(type="text", text=result)]
2207
+
2208
+ # Format sources list
2209
+ result = f"📋 Config Sources ({len(sources)} total"
2210
+ if enabled_only:
2211
+ result += ", enabled only"
2212
+ result += ")\n\n"
2213
+
2214
+ for source in sources:
2215
+ status_icon = "✓" if source.get("enabled", True) else "✗"
2216
+ result += f"{status_icon} **{source['name']}**\n"
2217
+ result += f" 📁 {source['git_url']}\n"
2218
+ result += f" 🔖 Type: {source['type']} | 🌿 Branch: {source['branch']}\n"
2219
+ result += f" 🔑 Token: {source.get('token_env', 'None')} | ⚡ Priority: {source['priority']}\n"
2220
+ result += f" 🕒 Added: {source['added_at'][:19]}\n"
2221
+ result += "\n"
2222
+
2223
+ result += """Usage:
2224
+ # Fetch config from a source
2225
+ fetch_config(source="SOURCE_NAME", config_name="CONFIG_NAME")
2226
+
2227
+ # Add new source
2228
+ add_config_source(name="...", git_url="...")
2229
+
2230
+ # Remove source
2231
+ remove_config_source(name="SOURCE_NAME")
2232
+ """
2233
+
2234
+ return [TextContent(type="text", text=result)]
2235
+
2236
+ except Exception as e:
2237
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
2238
+
2239
+
2240
+ async def remove_config_source_tool(args: dict) -> list[TextContent]:
2241
+ """Remove a registered config source"""
2242
+ from skill_seekers.mcp.source_manager import SourceManager
2243
+
2244
+ name = args.get("name")
2245
+
2246
+ try:
2247
+ # Validate required parameter
2248
+ if not name:
2249
+ return [TextContent(type="text", text="❌ Error: 'name' parameter is required")]
2250
+
2251
+ # Remove source
2252
+ source_manager = SourceManager()
2253
+ removed = source_manager.remove_source(name)
2254
+
2255
+ if removed:
2256
+ result = f"""✅ Config source removed successfully!
2257
+
2258
+ 📛 Removed: {name}
2259
+
2260
+ ⚠️ Note: Cached git repository data is NOT deleted
2261
+ To free up disk space, manually delete: ~/.skill-seekers/cache/{name}/
2262
+
2263
+ Next steps:
2264
+ # List remaining sources
2265
+ list_config_sources()
2266
+
2267
+ # Add a different source
2268
+ add_config_source(name="...", git_url="...")
2269
+ """
2270
+ return [TextContent(type="text", text=result)]
2271
+ else:
2272
+ # Not found - show available sources
2273
+ sources = source_manager.list_sources()
2274
+ available = [s["name"] for s in sources]
2275
+
2276
+ result = f"""❌ Source '{name}' not found
2277
+
2278
+ Available sources: {", ".join(available) if available else "none"}
2279
+
2280
+ To see all sources:
2281
+ list_config_sources()
2282
+ """
2283
+ return [TextContent(type="text", text=result)]
2284
+
2285
+ except Exception as e:
2286
+ return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
2287
+
2288
+
2289
+ async def main():
2290
+ """Run the MCP server"""
2291
+ if not MCP_AVAILABLE or app is None:
2292
+ print("❌ Error: MCP server cannot start - MCP package not available")
2293
+ sys.exit(1)
2294
+
2295
+ from mcp.server.stdio import stdio_server
2296
+
2297
+ async with stdio_server() as (read_stream, write_stream):
2298
+ await app.run(read_stream, write_stream, app.create_initialization_options())
2299
+
2300
+
2301
+ if __name__ == "__main__":
2302
+ asyncio.run(main())