skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified Config Validator
4
+
5
+ Validates unified config format that supports multiple sources:
6
+ - documentation (website scraping)
7
+ - github (repository scraping)
8
+ - pdf (PDF document scraping)
9
+
10
+ Also provides backward compatibility detection for legacy configs.
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ConfigValidator:
23
+ """
24
+ Validates unified config format and provides backward compatibility.
25
+ """
26
+
27
+ # Valid source types
28
+ VALID_SOURCE_TYPES = {"documentation", "github", "pdf"}
29
+
30
+ # Valid merge modes
31
+ VALID_MERGE_MODES = {"rule-based", "claude-enhanced"}
32
+
33
+ # Valid code analysis depth levels
34
+ VALID_DEPTH_LEVELS = {"surface", "deep", "full"}
35
+
36
+ # Valid AI modes for C3.x enhancement
37
+ VALID_AI_MODES = {"auto", "api", "local", "none"}
38
+
39
+ def __init__(self, config_or_path: dict[str, Any] | str):
40
+ """
41
+ Initialize validator with config dict or file path.
42
+
43
+ Args:
44
+ config_or_path: Either a config dict or path to config JSON file
45
+ """
46
+ if isinstance(config_or_path, dict):
47
+ self.config_path = None
48
+ self.config = config_or_path
49
+ else:
50
+ self.config_path = config_or_path
51
+ self.config = self._load_config()
52
+ self.is_unified = self._detect_format()
53
+
54
+ def _load_config(self) -> dict[str, Any]:
55
+ """Load JSON config file."""
56
+ try:
57
+ with open(self.config_path, encoding="utf-8") as f:
58
+ return json.load(f)
59
+ except FileNotFoundError as e:
60
+ raise ValueError(f"Config file not found: {self.config_path}") from e
61
+ except json.JSONDecodeError as e:
62
+ raise ValueError(f"Invalid JSON in config file: {e}") from e
63
+
64
+ def _detect_format(self) -> bool:
65
+ """
66
+ Detect if config is unified format or legacy.
67
+
68
+ Returns:
69
+ True if unified format (has 'sources' array)
70
+ False if legacy format
71
+ """
72
+ return "sources" in self.config and isinstance(self.config["sources"], list)
73
+
74
+ def validate(self) -> bool:
75
+ """
76
+ Validate config based on detected format.
77
+
78
+ Returns:
79
+ True if valid
80
+
81
+ Raises:
82
+ ValueError if invalid with detailed error message
83
+ """
84
+ if self.is_unified:
85
+ return self._validate_unified()
86
+ else:
87
+ return self._validate_legacy()
88
+
89
+ def _validate_unified(self) -> bool:
90
+ """Validate unified config format."""
91
+ logger.info("Validating unified config format...")
92
+
93
+ # Required top-level fields
94
+ if "name" not in self.config:
95
+ raise ValueError("Missing required field: 'name'")
96
+
97
+ if "description" not in self.config:
98
+ raise ValueError("Missing required field: 'description'")
99
+
100
+ if "sources" not in self.config:
101
+ raise ValueError("Missing required field: 'sources'")
102
+
103
+ # Validate sources array
104
+ sources = self.config["sources"]
105
+
106
+ if not isinstance(sources, list):
107
+ raise ValueError("'sources' must be an array")
108
+
109
+ if len(sources) == 0:
110
+ raise ValueError("'sources' array cannot be empty")
111
+
112
+ # Validate merge_mode (optional)
113
+ merge_mode = self.config.get("merge_mode", "rule-based")
114
+ if merge_mode not in self.VALID_MERGE_MODES:
115
+ raise ValueError(
116
+ f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}"
117
+ )
118
+
119
+ # Validate each source
120
+ for i, source in enumerate(sources):
121
+ self._validate_source(source, i)
122
+
123
+ logger.info(f"✅ Unified config valid: {len(sources)} sources")
124
+ return True
125
+
126
+ def _validate_source(self, source: dict[str, Any], index: int):
127
+ """Validate individual source configuration."""
128
+ # Check source has 'type' field
129
+ if "type" not in source:
130
+ raise ValueError(f"Source {index}: Missing required field 'type'")
131
+
132
+ source_type = source["type"]
133
+
134
+ if source_type not in self.VALID_SOURCE_TYPES:
135
+ raise ValueError(
136
+ f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}"
137
+ )
138
+
139
+ # Type-specific validation
140
+ if source_type == "documentation":
141
+ self._validate_documentation_source(source, index)
142
+ elif source_type == "github":
143
+ self._validate_github_source(source, index)
144
+ elif source_type == "pdf":
145
+ self._validate_pdf_source(source, index)
146
+
147
+ def _validate_documentation_source(self, source: dict[str, Any], index: int):
148
+ """Validate documentation source configuration."""
149
+ if "base_url" not in source:
150
+ raise ValueError(f"Source {index} (documentation): Missing required field 'base_url'")
151
+
152
+ # Optional but recommended fields
153
+ if "selectors" not in source:
154
+ logger.warning(
155
+ f"Source {index} (documentation): No 'selectors' specified, using defaults"
156
+ )
157
+
158
+ if "max_pages" in source and not isinstance(source["max_pages"], int):
159
+ raise ValueError(f"Source {index} (documentation): 'max_pages' must be an integer")
160
+
161
+ def _validate_github_source(self, source: dict[str, Any], index: int):
162
+ """Validate GitHub source configuration."""
163
+ if "repo" not in source:
164
+ raise ValueError(f"Source {index} (github): Missing required field 'repo'")
165
+
166
+ # Validate repo format (owner/repo)
167
+ repo = source["repo"]
168
+ if "/" not in repo:
169
+ raise ValueError(
170
+ f"Source {index} (github): Invalid repo format '{repo}'. Must be 'owner/repo' (e.g., 'facebook/react')"
171
+ )
172
+
173
+ # Validate code_analysis_depth if specified
174
+ if "code_analysis_depth" in source:
175
+ depth = source["code_analysis_depth"]
176
+ if depth not in self.VALID_DEPTH_LEVELS:
177
+ raise ValueError(
178
+ f"Source {index} (github): Invalid code_analysis_depth '{depth}'. "
179
+ f"Must be one of {self.VALID_DEPTH_LEVELS}"
180
+ )
181
+
182
+ # Validate max_issues if specified
183
+ if "max_issues" in source and not isinstance(source["max_issues"], int):
184
+ raise ValueError(f"Source {index} (github): 'max_issues' must be an integer")
185
+
186
+ # Validate enable_codebase_analysis if specified (C3.5)
187
+ if "enable_codebase_analysis" in source and not isinstance(
188
+ source["enable_codebase_analysis"], bool
189
+ ):
190
+ raise ValueError(
191
+ f"Source {index} (github): 'enable_codebase_analysis' must be a boolean"
192
+ )
193
+
194
+ # Validate ai_mode if specified (C3.5)
195
+ if "ai_mode" in source:
196
+ ai_mode = source["ai_mode"]
197
+ if ai_mode not in self.VALID_AI_MODES:
198
+ raise ValueError(
199
+ f"Source {index} (github): Invalid ai_mode '{ai_mode}'. Must be one of {self.VALID_AI_MODES}"
200
+ )
201
+
202
+ def _validate_pdf_source(self, source: dict[str, Any], index: int):
203
+ """Validate PDF source configuration."""
204
+ if "path" not in source:
205
+ raise ValueError(f"Source {index} (pdf): Missing required field 'path'")
206
+
207
+ # Check if file exists
208
+ pdf_path = source["path"]
209
+ if not Path(pdf_path).exists():
210
+ logger.warning(f"Source {index} (pdf): File not found: {pdf_path}")
211
+
212
+ def _validate_legacy(self) -> bool:
213
+ """
214
+ Validate legacy config format (backward compatibility).
215
+
216
+ Legacy configs are the old format used by doc_scraper, github_scraper, pdf_scraper.
217
+ """
218
+ logger.info("Detected legacy config format (backward compatible)")
219
+
220
+ # Detect which legacy type based on fields
221
+ if "base_url" in self.config:
222
+ logger.info("Legacy type: documentation")
223
+ elif "repo" in self.config:
224
+ logger.info("Legacy type: github")
225
+ elif "pdf" in self.config or "path" in self.config:
226
+ logger.info("Legacy type: pdf")
227
+ else:
228
+ raise ValueError("Cannot detect legacy config type (missing base_url, repo, or pdf)")
229
+
230
+ return True
231
+
232
+ def convert_legacy_to_unified(self) -> dict[str, Any]:
233
+ """
234
+ Convert legacy config to unified format.
235
+
236
+ Returns:
237
+ Unified config dict
238
+ """
239
+ if self.is_unified:
240
+ logger.info("Config already in unified format")
241
+ return self.config
242
+
243
+ logger.info("Converting legacy config to unified format...")
244
+
245
+ # Detect legacy type and convert
246
+ if "base_url" in self.config:
247
+ return self._convert_legacy_documentation()
248
+ elif "repo" in self.config:
249
+ return self._convert_legacy_github()
250
+ elif "pdf" in self.config or "path" in self.config:
251
+ return self._convert_legacy_pdf()
252
+ else:
253
+ raise ValueError("Cannot convert: unknown legacy format")
254
+
255
+ def _convert_legacy_documentation(self) -> dict[str, Any]:
256
+ """Convert legacy documentation config to unified."""
257
+ unified = {
258
+ "name": self.config.get("name", "unnamed"),
259
+ "description": self.config.get("description", "Documentation skill"),
260
+ "merge_mode": "rule-based",
261
+ "sources": [
262
+ {
263
+ "type": "documentation",
264
+ **{k: v for k, v in self.config.items() if k not in ["name", "description"]},
265
+ }
266
+ ],
267
+ }
268
+ return unified
269
+
270
+ def _convert_legacy_github(self) -> dict[str, Any]:
271
+ """Convert legacy GitHub config to unified."""
272
+ unified = {
273
+ "name": self.config.get("name", "unnamed"),
274
+ "description": self.config.get("description", "GitHub repository skill"),
275
+ "merge_mode": "rule-based",
276
+ "sources": [
277
+ {
278
+ "type": "github",
279
+ **{k: v for k, v in self.config.items() if k not in ["name", "description"]},
280
+ }
281
+ ],
282
+ }
283
+ return unified
284
+
285
+ def _convert_legacy_pdf(self) -> dict[str, Any]:
286
+ """Convert legacy PDF config to unified."""
287
+ unified = {
288
+ "name": self.config.get("name", "unnamed"),
289
+ "description": self.config.get("description", "PDF document skill"),
290
+ "merge_mode": "rule-based",
291
+ "sources": [
292
+ {
293
+ "type": "pdf",
294
+ **{k: v for k, v in self.config.items() if k not in ["name", "description"]},
295
+ }
296
+ ],
297
+ }
298
+ return unified
299
+
300
+ def get_sources_by_type(self, source_type: str) -> list[dict[str, Any]]:
301
+ """
302
+ Get all sources of a specific type.
303
+
304
+ Args:
305
+ source_type: 'documentation', 'github', or 'pdf'
306
+
307
+ Returns:
308
+ List of sources matching the type
309
+ """
310
+ if not self.is_unified:
311
+ # For legacy, convert and get sources
312
+ unified = self.convert_legacy_to_unified()
313
+ sources = unified["sources"]
314
+ else:
315
+ sources = self.config["sources"]
316
+
317
+ return [s for s in sources if s.get("type") == source_type]
318
+
319
+ def has_multiple_sources(self) -> bool:
320
+ """Check if config has multiple sources (requires merging)."""
321
+ if not self.is_unified:
322
+ return False
323
+ return len(self.config["sources"]) > 1
324
+
325
+ def needs_api_merge(self) -> bool:
326
+ """
327
+ Check if config needs API merging.
328
+
329
+ Returns True if both documentation and github sources exist
330
+ with API extraction enabled.
331
+ """
332
+ if not self.has_multiple_sources():
333
+ return False
334
+
335
+ has_docs_api = any(
336
+ s.get("type") == "documentation" and s.get("extract_api", True)
337
+ for s in self.config["sources"]
338
+ )
339
+
340
+ has_github_code = any(
341
+ s.get("type") == "github" and s.get("include_code", False)
342
+ for s in self.config["sources"]
343
+ )
344
+
345
+ return has_docs_api and has_github_code
346
+
347
+
348
+ def validate_config(config_path: str) -> ConfigValidator:
349
+ """
350
+ Validate config file and return validator instance.
351
+
352
+ Args:
353
+ config_path: Path to config JSON file
354
+
355
+ Returns:
356
+ ConfigValidator instance
357
+
358
+ Raises:
359
+ ValueError if config is invalid
360
+ """
361
+ validator = ConfigValidator(config_path)
362
+ validator.validate()
363
+ return validator
364
+
365
+
366
+ if __name__ == "__main__":
367
+ import sys
368
+
369
+ if len(sys.argv) < 2:
370
+ print("Usage: python config_validator.py <config.json>")
371
+ sys.exit(1)
372
+
373
+ config_file = sys.argv[1]
374
+
375
+ try:
376
+ validator = validate_config(config_file)
377
+
378
+ print("\n✅ Config valid!")
379
+ print(f" Format: {'Unified' if validator.is_unified else 'Legacy'}")
380
+ print(f" Name: {validator.config.get('name')}")
381
+
382
+ if validator.is_unified:
383
+ sources = validator.config["sources"]
384
+ print(f" Sources: {len(sources)}")
385
+ for i, source in enumerate(sources):
386
+ print(f" {i + 1}. {source['type']}")
387
+
388
+ if validator.needs_api_merge():
389
+ merge_mode = validator.config.get("merge_mode", "rule-based")
390
+ print(f" ⚠️ API merge required (mode: {merge_mode})")
391
+
392
+ except ValueError as e:
393
+ print(f"\n❌ Config invalid: {e}")
394
+ sys.exit(1)