skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Config Source Manager
|
|
4
|
+
Manages registry of custom config sources (git repositories)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SourceManager:
|
|
13
|
+
"""Manages config source registry at ~/.skill-seekers/sources.json"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, config_dir: str | None = None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize source manager.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
config_dir: Base config directory. Defaults to ~/.skill-seekers/
|
|
21
|
+
"""
|
|
22
|
+
if config_dir:
|
|
23
|
+
self.config_dir = Path(config_dir)
|
|
24
|
+
else:
|
|
25
|
+
self.config_dir = Path.home() / ".skill-seekers"
|
|
26
|
+
|
|
27
|
+
# Ensure config directory exists
|
|
28
|
+
self.config_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
# Registry file path
|
|
31
|
+
self.registry_file = self.config_dir / "sources.json"
|
|
32
|
+
|
|
33
|
+
# Initialize registry if it doesn't exist
|
|
34
|
+
if not self.registry_file.exists():
|
|
35
|
+
self._write_registry({"version": "1.0", "sources": []})
|
|
36
|
+
|
|
37
|
+
def add_source(
|
|
38
|
+
self,
|
|
39
|
+
name: str,
|
|
40
|
+
git_url: str,
|
|
41
|
+
source_type: str = "github",
|
|
42
|
+
token_env: str | None = None,
|
|
43
|
+
branch: str = "main",
|
|
44
|
+
priority: int = 100,
|
|
45
|
+
enabled: bool = True,
|
|
46
|
+
) -> dict:
|
|
47
|
+
"""
|
|
48
|
+
Add or update a config source.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
name: Source identifier (lowercase, alphanumeric + hyphens/underscores)
|
|
52
|
+
git_url: Git repository URL
|
|
53
|
+
source_type: Source type (github, gitlab, bitbucket, custom)
|
|
54
|
+
token_env: Environment variable name for auth token
|
|
55
|
+
branch: Git branch to use (default: main)
|
|
56
|
+
priority: Source priority (lower = higher priority, default: 100)
|
|
57
|
+
enabled: Whether source is enabled (default: True)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Source dictionary
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ValueError: If name is invalid or git_url is empty
|
|
64
|
+
"""
|
|
65
|
+
# Validate name
|
|
66
|
+
if not name or not name.replace("-", "").replace("_", "").isalnum():
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Validate git_url
|
|
72
|
+
if not git_url or not git_url.strip():
|
|
73
|
+
raise ValueError("git_url cannot be empty")
|
|
74
|
+
|
|
75
|
+
# Auto-detect token_env if not provided
|
|
76
|
+
if token_env is None:
|
|
77
|
+
token_env = self._default_token_env(source_type)
|
|
78
|
+
|
|
79
|
+
# Create source entry
|
|
80
|
+
source = {
|
|
81
|
+
"name": name.lower(),
|
|
82
|
+
"git_url": git_url.strip(),
|
|
83
|
+
"type": source_type.lower(),
|
|
84
|
+
"token_env": token_env,
|
|
85
|
+
"branch": branch,
|
|
86
|
+
"enabled": enabled,
|
|
87
|
+
"priority": priority,
|
|
88
|
+
"added_at": datetime.now(timezone.utc).isoformat(),
|
|
89
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Load registry
|
|
93
|
+
registry = self._read_registry()
|
|
94
|
+
|
|
95
|
+
# Check if source exists
|
|
96
|
+
existing_index = None
|
|
97
|
+
for i, existing_source in enumerate(registry["sources"]):
|
|
98
|
+
if existing_source["name"] == source["name"]:
|
|
99
|
+
existing_index = i
|
|
100
|
+
# Preserve added_at timestamp
|
|
101
|
+
source["added_at"] = existing_source.get("added_at", source["added_at"])
|
|
102
|
+
break
|
|
103
|
+
|
|
104
|
+
# Add or update
|
|
105
|
+
if existing_index is not None:
|
|
106
|
+
registry["sources"][existing_index] = source
|
|
107
|
+
else:
|
|
108
|
+
registry["sources"].append(source)
|
|
109
|
+
|
|
110
|
+
# Sort by priority (lower first)
|
|
111
|
+
registry["sources"].sort(key=lambda s: s["priority"])
|
|
112
|
+
|
|
113
|
+
# Save registry
|
|
114
|
+
self._write_registry(registry)
|
|
115
|
+
|
|
116
|
+
return source
|
|
117
|
+
|
|
118
|
+
def get_source(self, name: str) -> dict:
|
|
119
|
+
"""
|
|
120
|
+
Get source by name.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
name: Source identifier
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Source dictionary
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
KeyError: If source not found
|
|
130
|
+
"""
|
|
131
|
+
registry = self._read_registry()
|
|
132
|
+
|
|
133
|
+
# Search for source (case-insensitive)
|
|
134
|
+
name_lower = name.lower()
|
|
135
|
+
for source in registry["sources"]:
|
|
136
|
+
if source["name"] == name_lower:
|
|
137
|
+
return source
|
|
138
|
+
|
|
139
|
+
# Not found - provide helpful error
|
|
140
|
+
available = [s["name"] for s in registry["sources"]]
|
|
141
|
+
raise KeyError(
|
|
142
|
+
f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def list_sources(self, enabled_only: bool = False) -> list[dict]:
|
|
146
|
+
"""
|
|
147
|
+
List all config sources.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
enabled_only: If True, only return enabled sources
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of source dictionaries (sorted by priority)
|
|
154
|
+
"""
|
|
155
|
+
registry = self._read_registry()
|
|
156
|
+
|
|
157
|
+
if enabled_only:
|
|
158
|
+
return [s for s in registry["sources"] if s.get("enabled", True)]
|
|
159
|
+
|
|
160
|
+
return registry["sources"]
|
|
161
|
+
|
|
162
|
+
def remove_source(self, name: str) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Remove source by name.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
name: Source identifier
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
True if removed, False if not found
|
|
171
|
+
"""
|
|
172
|
+
registry = self._read_registry()
|
|
173
|
+
|
|
174
|
+
# Find source index
|
|
175
|
+
name_lower = name.lower()
|
|
176
|
+
for i, source in enumerate(registry["sources"]):
|
|
177
|
+
if source["name"] == name_lower:
|
|
178
|
+
# Remove source
|
|
179
|
+
del registry["sources"][i]
|
|
180
|
+
# Save registry
|
|
181
|
+
self._write_registry(registry)
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
def update_source(self, name: str, **kwargs) -> dict:
|
|
187
|
+
"""
|
|
188
|
+
Update specific fields of an existing source.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
name: Source identifier
|
|
192
|
+
**kwargs: Fields to update (git_url, branch, enabled, priority, etc.)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Updated source dictionary
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
KeyError: If source not found
|
|
199
|
+
"""
|
|
200
|
+
# Get existing source
|
|
201
|
+
source = self.get_source(name)
|
|
202
|
+
|
|
203
|
+
# Update allowed fields
|
|
204
|
+
allowed_fields = {"git_url", "type", "token_env", "branch", "enabled", "priority"}
|
|
205
|
+
for field, value in kwargs.items():
|
|
206
|
+
if field in allowed_fields:
|
|
207
|
+
source[field] = value
|
|
208
|
+
|
|
209
|
+
# Update timestamp
|
|
210
|
+
source["updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
211
|
+
|
|
212
|
+
# Save changes
|
|
213
|
+
registry = self._read_registry()
|
|
214
|
+
for i, s in enumerate(registry["sources"]):
|
|
215
|
+
if s["name"] == source["name"]:
|
|
216
|
+
registry["sources"][i] = source
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
# Re-sort by priority
|
|
220
|
+
registry["sources"].sort(key=lambda s: s["priority"])
|
|
221
|
+
|
|
222
|
+
self._write_registry(registry)
|
|
223
|
+
|
|
224
|
+
return source
|
|
225
|
+
|
|
226
|
+
def _read_registry(self) -> dict:
|
|
227
|
+
"""
|
|
228
|
+
Read registry from file.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Registry dictionary
|
|
232
|
+
"""
|
|
233
|
+
try:
|
|
234
|
+
with open(self.registry_file, encoding="utf-8") as f:
|
|
235
|
+
return json.load(f)
|
|
236
|
+
except json.JSONDecodeError as e:
|
|
237
|
+
raise ValueError(f"Corrupted registry file: {e}") from e
|
|
238
|
+
|
|
239
|
+
def _write_registry(self, registry: dict) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Write registry to file atomically.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
registry: Registry dictionary
|
|
245
|
+
"""
|
|
246
|
+
# Validate schema
|
|
247
|
+
if "version" not in registry or "sources" not in registry:
|
|
248
|
+
raise ValueError("Invalid registry schema")
|
|
249
|
+
|
|
250
|
+
# Atomic write: write to temp file, then rename
|
|
251
|
+
temp_file = self.registry_file.with_suffix(".tmp")
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
with open(temp_file, "w", encoding="utf-8") as f:
|
|
255
|
+
json.dump(registry, f, indent=2, ensure_ascii=False)
|
|
256
|
+
|
|
257
|
+
# Atomic rename
|
|
258
|
+
temp_file.replace(self.registry_file)
|
|
259
|
+
|
|
260
|
+
except Exception as e:
|
|
261
|
+
# Clean up temp file on error
|
|
262
|
+
if temp_file.exists():
|
|
263
|
+
temp_file.unlink()
|
|
264
|
+
raise e
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def _default_token_env(source_type: str) -> str:
|
|
268
|
+
"""
|
|
269
|
+
Get default token environment variable name for source type.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
source_type: Source type (github, gitlab, bitbucket, custom)
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Environment variable name (e.g., GITHUB_TOKEN)
|
|
276
|
+
"""
|
|
277
|
+
type_map = {
|
|
278
|
+
"github": "GITHUB_TOKEN",
|
|
279
|
+
"gitlab": "GITLAB_TOKEN",
|
|
280
|
+
"gitea": "GITEA_TOKEN",
|
|
281
|
+
"bitbucket": "BITBUCKET_TOKEN",
|
|
282
|
+
"custom": "GIT_TOKEN",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return type_map.get(source_type.lower(), "GIT_TOKEN")
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MCP Tool Implementations
|
|
3
|
+
|
|
4
|
+
This package contains modular tool implementations for the Skill Seekers MCP server.
|
|
5
|
+
Tools are organized by functionality:
|
|
6
|
+
|
|
7
|
+
- config_tools: Configuration management (generate, list, validate)
|
|
8
|
+
- scraping_tools: Scraping operations (docs, GitHub, PDF, estimation)
|
|
9
|
+
- packaging_tools: Skill packaging and upload
|
|
10
|
+
- splitting_tools: Config splitting and router generation
|
|
11
|
+
- source_tools: Config source management (fetch, submit, add/remove sources)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__version__ = "2.7.2"
|
|
15
|
+
|
|
16
|
+
from .config_tools import (
|
|
17
|
+
generate_config as generate_config_impl,
|
|
18
|
+
)
|
|
19
|
+
from .config_tools import (
|
|
20
|
+
list_configs as list_configs_impl,
|
|
21
|
+
)
|
|
22
|
+
from .config_tools import (
|
|
23
|
+
validate_config as validate_config_impl,
|
|
24
|
+
)
|
|
25
|
+
from .packaging_tools import (
|
|
26
|
+
enhance_skill_tool as enhance_skill_impl,
|
|
27
|
+
)
|
|
28
|
+
from .packaging_tools import (
|
|
29
|
+
install_skill_tool as install_skill_impl,
|
|
30
|
+
)
|
|
31
|
+
from .packaging_tools import (
|
|
32
|
+
package_skill_tool as package_skill_impl,
|
|
33
|
+
)
|
|
34
|
+
from .packaging_tools import (
|
|
35
|
+
upload_skill_tool as upload_skill_impl,
|
|
36
|
+
)
|
|
37
|
+
from .scraping_tools import (
|
|
38
|
+
build_how_to_guides_tool as build_how_to_guides_impl,
|
|
39
|
+
)
|
|
40
|
+
from .scraping_tools import (
|
|
41
|
+
detect_patterns_tool as detect_patterns_impl,
|
|
42
|
+
)
|
|
43
|
+
from .scraping_tools import (
|
|
44
|
+
estimate_pages_tool as estimate_pages_impl,
|
|
45
|
+
)
|
|
46
|
+
from .scraping_tools import (
|
|
47
|
+
extract_config_patterns_tool as extract_config_patterns_impl,
|
|
48
|
+
)
|
|
49
|
+
from .scraping_tools import (
|
|
50
|
+
extract_test_examples_tool as extract_test_examples_impl,
|
|
51
|
+
)
|
|
52
|
+
from .scraping_tools import (
|
|
53
|
+
scrape_codebase_tool as scrape_codebase_impl,
|
|
54
|
+
)
|
|
55
|
+
from .scraping_tools import (
|
|
56
|
+
scrape_docs_tool as scrape_docs_impl,
|
|
57
|
+
)
|
|
58
|
+
from .scraping_tools import (
|
|
59
|
+
scrape_github_tool as scrape_github_impl,
|
|
60
|
+
)
|
|
61
|
+
from .scraping_tools import (
|
|
62
|
+
scrape_pdf_tool as scrape_pdf_impl,
|
|
63
|
+
)
|
|
64
|
+
from .source_tools import (
|
|
65
|
+
add_config_source_tool as add_config_source_impl,
|
|
66
|
+
)
|
|
67
|
+
from .source_tools import (
|
|
68
|
+
fetch_config_tool as fetch_config_impl,
|
|
69
|
+
)
|
|
70
|
+
from .source_tools import (
|
|
71
|
+
list_config_sources_tool as list_config_sources_impl,
|
|
72
|
+
)
|
|
73
|
+
from .source_tools import (
|
|
74
|
+
remove_config_source_tool as remove_config_source_impl,
|
|
75
|
+
)
|
|
76
|
+
from .source_tools import (
|
|
77
|
+
submit_config_tool as submit_config_impl,
|
|
78
|
+
)
|
|
79
|
+
from .splitting_tools import (
|
|
80
|
+
generate_router as generate_router_impl,
|
|
81
|
+
)
|
|
82
|
+
from .splitting_tools import (
|
|
83
|
+
split_config as split_config_impl,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
# Config tools
|
|
88
|
+
"generate_config_impl",
|
|
89
|
+
"list_configs_impl",
|
|
90
|
+
"validate_config_impl",
|
|
91
|
+
# Scraping tools
|
|
92
|
+
"estimate_pages_impl",
|
|
93
|
+
"scrape_docs_impl",
|
|
94
|
+
"scrape_github_impl",
|
|
95
|
+
"scrape_pdf_impl",
|
|
96
|
+
"scrape_codebase_impl",
|
|
97
|
+
"detect_patterns_impl",
|
|
98
|
+
"extract_test_examples_impl",
|
|
99
|
+
"build_how_to_guides_impl",
|
|
100
|
+
"extract_config_patterns_impl",
|
|
101
|
+
# Packaging tools
|
|
102
|
+
"package_skill_impl",
|
|
103
|
+
"upload_skill_impl",
|
|
104
|
+
"enhance_skill_impl",
|
|
105
|
+
"install_skill_impl",
|
|
106
|
+
# Splitting tools
|
|
107
|
+
"split_config_impl",
|
|
108
|
+
"generate_router_impl",
|
|
109
|
+
# Source tools
|
|
110
|
+
"fetch_config_impl",
|
|
111
|
+
"submit_config_impl",
|
|
112
|
+
"add_config_source_impl",
|
|
113
|
+
"list_config_sources_impl",
|
|
114
|
+
"remove_config_source_impl",
|
|
115
|
+
]
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Config management tools for Skill Seeker MCP Server.
|
|
3
|
+
|
|
4
|
+
This module provides tools for generating, listing, and validating configuration files
|
|
5
|
+
for documentation scraping.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from mcp.types import TextContent
|
|
14
|
+
except ImportError:
|
|
15
|
+
# Graceful degradation: Create a simple fallback class for testing
|
|
16
|
+
class TextContent:
|
|
17
|
+
"""Fallback TextContent for when MCP is not installed"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, type: str, text: str):
|
|
20
|
+
self.type = type
|
|
21
|
+
self.text = text
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Path to CLI tools
|
|
25
|
+
CLI_DIR = Path(__file__).parent.parent.parent / "cli"
|
|
26
|
+
|
|
27
|
+
# Import config validator for validation
|
|
28
|
+
sys.path.insert(0, str(CLI_DIR))
|
|
29
|
+
try:
|
|
30
|
+
from config_validator import ConfigValidator
|
|
31
|
+
except ImportError:
|
|
32
|
+
ConfigValidator = None # Graceful degradation if not available
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def generate_config(args: dict) -> list[TextContent]:
|
|
36
|
+
"""
|
|
37
|
+
Generate a config file for documentation scraping.
|
|
38
|
+
|
|
39
|
+
Interactively creates a JSON config for any documentation website with default
|
|
40
|
+
selectors and sensible defaults. The config can be further customized after creation.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
args: Dictionary containing:
|
|
44
|
+
- name (str): Skill name (lowercase, alphanumeric, hyphens, underscores)
|
|
45
|
+
- url (str): Base documentation URL (must include http:// or https://)
|
|
46
|
+
- description (str): Description of when to use this skill
|
|
47
|
+
- max_pages (int, optional): Maximum pages to scrape (default: 100, use -1 for unlimited)
|
|
48
|
+
- unlimited (bool, optional): Remove all limits - scrape all pages (default: False). Overrides max_pages.
|
|
49
|
+
- rate_limit (float, optional): Delay between requests in seconds (default: 0.5)
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
List[TextContent]: Success message with config path and next steps, or error message.
|
|
53
|
+
"""
|
|
54
|
+
name = args["name"]
|
|
55
|
+
url = args["url"]
|
|
56
|
+
description = args["description"]
|
|
57
|
+
max_pages = args.get("max_pages", 100)
|
|
58
|
+
unlimited = args.get("unlimited", False)
|
|
59
|
+
rate_limit = args.get("rate_limit", 0.5)
|
|
60
|
+
|
|
61
|
+
# Handle unlimited mode
|
|
62
|
+
if unlimited or max_pages == -1:
|
|
63
|
+
max_pages = None
|
|
64
|
+
limit_msg = "unlimited (no page limit)"
|
|
65
|
+
else:
|
|
66
|
+
limit_msg = str(max_pages)
|
|
67
|
+
|
|
68
|
+
# Create config
|
|
69
|
+
config = {
|
|
70
|
+
"name": name,
|
|
71
|
+
"description": description,
|
|
72
|
+
"base_url": url,
|
|
73
|
+
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
|
|
74
|
+
"url_patterns": {"include": [], "exclude": []},
|
|
75
|
+
"categories": {},
|
|
76
|
+
"rate_limit": rate_limit,
|
|
77
|
+
"max_pages": max_pages,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Save to configs directory
|
|
81
|
+
config_path = Path("configs") / f"{name}.json"
|
|
82
|
+
config_path.parent.mkdir(exist_ok=True)
|
|
83
|
+
|
|
84
|
+
with open(config_path, "w") as f:
|
|
85
|
+
json.dump(config, f, indent=2)
|
|
86
|
+
|
|
87
|
+
result = f"""ā
Config created: {config_path}
|
|
88
|
+
|
|
89
|
+
Configuration:
|
|
90
|
+
Name: {name}
|
|
91
|
+
URL: {url}
|
|
92
|
+
Max pages: {limit_msg}
|
|
93
|
+
Rate limit: {rate_limit}s
|
|
94
|
+
|
|
95
|
+
Next steps:
|
|
96
|
+
1. Review/edit config: cat {config_path}
|
|
97
|
+
2. Estimate pages: Use estimate_pages tool
|
|
98
|
+
3. Scrape docs: Use scrape_docs tool
|
|
99
|
+
|
|
100
|
+
Note: Default selectors may need adjustment for your documentation site.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
return [TextContent(type="text", text=result)]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def list_configs(_args: dict) -> list[TextContent]:
|
|
107
|
+
"""
|
|
108
|
+
List all available preset configurations.
|
|
109
|
+
|
|
110
|
+
Scans the configs directory and lists all available config files with their
|
|
111
|
+
basic information (name, URL, description).
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
args: Dictionary (empty, no parameters required)
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
List[TextContent]: Formatted list of available configs with details, or error if no configs found.
|
|
118
|
+
"""
|
|
119
|
+
configs_dir = Path("configs")
|
|
120
|
+
|
|
121
|
+
if not configs_dir.exists():
|
|
122
|
+
return [TextContent(type="text", text="No configs directory found")]
|
|
123
|
+
|
|
124
|
+
configs = list(configs_dir.glob("*.json"))
|
|
125
|
+
|
|
126
|
+
if not configs:
|
|
127
|
+
return [TextContent(type="text", text="No config files found")]
|
|
128
|
+
|
|
129
|
+
result = "š Available Configs:\n\n"
|
|
130
|
+
|
|
131
|
+
for config_file in sorted(configs):
|
|
132
|
+
try:
|
|
133
|
+
with open(config_file) as f:
|
|
134
|
+
config = json.load(f)
|
|
135
|
+
name = config.get("name", config_file.stem)
|
|
136
|
+
desc = config.get("description", "No description")
|
|
137
|
+
url = config.get("base_url", "")
|
|
138
|
+
|
|
139
|
+
result += f" ⢠{config_file.name}\n"
|
|
140
|
+
result += f" Name: {name}\n"
|
|
141
|
+
result += f" URL: {url}\n"
|
|
142
|
+
result += f" Description: {desc}\n\n"
|
|
143
|
+
except Exception as e:
|
|
144
|
+
result += f" ⢠{config_file.name} - Error reading: {e}\n\n"
|
|
145
|
+
|
|
146
|
+
return [TextContent(type="text", text=result)]
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def validate_config(args: dict) -> list[TextContent]:
|
|
150
|
+
"""
|
|
151
|
+
Validate a config file for errors.
|
|
152
|
+
|
|
153
|
+
Validates both legacy (single-source) and unified (multi-source) config formats.
|
|
154
|
+
Checks for required fields, valid URLs, proper structure, and provides detailed
|
|
155
|
+
feedback on any issues found.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
args: Dictionary containing:
|
|
159
|
+
- config_path (str): Path to config JSON file to validate
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
List[TextContent]: Validation results with format details and any errors/warnings, or error message.
|
|
163
|
+
"""
|
|
164
|
+
config_path = args["config_path"]
|
|
165
|
+
|
|
166
|
+
# Import validation classes
|
|
167
|
+
sys.path.insert(0, str(CLI_DIR))
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
# Check if file exists
|
|
171
|
+
if not Path(config_path).exists():
|
|
172
|
+
return [
|
|
173
|
+
TextContent(type="text", text=f"ā Error: Config file not found: {config_path}")
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
# Try unified config validator first
|
|
177
|
+
try:
|
|
178
|
+
from config_validator import validate_config
|
|
179
|
+
|
|
180
|
+
validator = validate_config(config_path)
|
|
181
|
+
|
|
182
|
+
result = "ā
Config is valid!\n\n"
|
|
183
|
+
|
|
184
|
+
# Show format
|
|
185
|
+
if validator.is_unified:
|
|
186
|
+
result += "š¦ Format: Unified (multi-source)\n"
|
|
187
|
+
result += f" Name: {validator.config['name']}\n"
|
|
188
|
+
result += f" Sources: {len(validator.config.get('sources', []))}\n"
|
|
189
|
+
|
|
190
|
+
# Show sources
|
|
191
|
+
for i, source in enumerate(validator.config.get("sources", []), 1):
|
|
192
|
+
result += f"\n Source {i}: {source['type']}\n"
|
|
193
|
+
if source["type"] == "documentation":
|
|
194
|
+
result += f" URL: {source.get('base_url', 'N/A')}\n"
|
|
195
|
+
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
|
|
196
|
+
elif source["type"] == "github":
|
|
197
|
+
result += f" Repo: {source.get('repo', 'N/A')}\n"
|
|
198
|
+
result += (
|
|
199
|
+
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
|
200
|
+
)
|
|
201
|
+
elif source["type"] == "pdf":
|
|
202
|
+
result += f" Path: {source.get('path', 'N/A')}\n"
|
|
203
|
+
|
|
204
|
+
# Show merge settings if applicable
|
|
205
|
+
if validator.needs_api_merge():
|
|
206
|
+
merge_mode = validator.config.get("merge_mode", "rule-based")
|
|
207
|
+
result += f"\n Merge mode: {merge_mode}\n"
|
|
208
|
+
result += " API merging: Required (docs + code sources)\n"
|
|
209
|
+
|
|
210
|
+
else:
|
|
211
|
+
result += "š¦ Format: Legacy (single source)\n"
|
|
212
|
+
result += f" Name: {validator.config['name']}\n"
|
|
213
|
+
result += f" Base URL: {validator.config.get('base_url', 'N/A')}\n"
|
|
214
|
+
result += f" Max pages: {validator.config.get('max_pages', 'Not set')}\n"
|
|
215
|
+
result += f" Rate limit: {validator.config.get('rate_limit', 'Not set')}s\n"
|
|
216
|
+
|
|
217
|
+
return [TextContent(type="text", text=result)]
|
|
218
|
+
|
|
219
|
+
except ImportError:
|
|
220
|
+
# Fall back to legacy validation
|
|
221
|
+
import json
|
|
222
|
+
|
|
223
|
+
from doc_scraper import validate_config
|
|
224
|
+
|
|
225
|
+
with open(config_path) as f:
|
|
226
|
+
config = json.load(f)
|
|
227
|
+
|
|
228
|
+
# Validate config - returns (errors, warnings) tuple
|
|
229
|
+
errors, warnings = validate_config(config)
|
|
230
|
+
|
|
231
|
+
if errors:
|
|
232
|
+
result = "ā Config validation failed:\n\n"
|
|
233
|
+
for error in errors:
|
|
234
|
+
result += f" ⢠{error}\n"
|
|
235
|
+
else:
|
|
236
|
+
result = "ā
Config is valid!\n\n"
|
|
237
|
+
result += "š¦ Format: Legacy (single source)\n"
|
|
238
|
+
result += f" Name: {config['name']}\n"
|
|
239
|
+
result += f" Base URL: {config['base_url']}\n"
|
|
240
|
+
result += f" Max pages: {config.get('max_pages', 'Not set')}\n"
|
|
241
|
+
result += f" Rate limit: {config.get('rate_limit', 'Not set')}s\n"
|
|
242
|
+
|
|
243
|
+
if warnings:
|
|
244
|
+
result += "\nā ļø Warnings:\n"
|
|
245
|
+
for warning in warnings:
|
|
246
|
+
result += f" ⢠{warning}\n"
|
|
247
|
+
|
|
248
|
+
return [TextContent(type="text", text=result)]
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
return [TextContent(type="text", text=f"ā Error: {str(e)}")]
|