skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Config Splitter for Large Documentation Sites
4
+
5
+ Splits large documentation configs into multiple smaller, focused skill configs.
6
+ Supports multiple splitting strategies: category-based, size-based, and automatic.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import sys
12
+ from collections import defaultdict
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ class ConfigSplitter:
18
+ """Splits large documentation configs into multiple focused configs"""
19
+
20
+ def __init__(self, config_path: str, strategy: str = "auto", target_pages: int = 5000):
21
+ self.config_path = Path(config_path)
22
+ self.strategy = strategy
23
+ self.target_pages = target_pages
24
+ self.config = self.load_config()
25
+ self.base_name = self.config["name"]
26
+
27
+ def load_config(self) -> dict[str, Any]:
28
+ """Load configuration from file"""
29
+ try:
30
+ with open(self.config_path) as f:
31
+ return json.load(f)
32
+ except FileNotFoundError:
33
+ print(f"❌ Error: Config file not found: {self.config_path}")
34
+ sys.exit(1)
35
+ except json.JSONDecodeError as e:
36
+ print(f"❌ Error: Invalid JSON in config file: {e}")
37
+ sys.exit(1)
38
+
39
+ def is_unified_config(self) -> bool:
40
+ """Check if this is a unified multi-source config"""
41
+ return "sources" in self.config
42
+
43
+ def get_split_strategy(self) -> str:
44
+ """Determine split strategy"""
45
+ # For unified configs, default to source-based splitting
46
+ if self.is_unified_config():
47
+ if self.strategy == "auto":
48
+ num_sources = len(self.config.get("sources", []))
49
+ if num_sources <= 1:
50
+ print("ℹ️ Single source unified config - no splitting needed")
51
+ return "none"
52
+ else:
53
+ print(
54
+ f"ℹ️ Multi-source unified config ({num_sources} sources) - source split recommended"
55
+ )
56
+ return "source"
57
+ # For unified configs, only 'source' and 'none' strategies are valid
58
+ elif self.strategy in ["source", "none"]:
59
+ return self.strategy
60
+ else:
61
+ print(f"⚠️ Warning: Strategy '{self.strategy}' not supported for unified configs")
62
+ print("ℹ️ Using 'source' strategy instead")
63
+ return "source"
64
+
65
+ # Check if strategy is defined in config (documentation configs)
66
+ if "split_strategy" in self.config:
67
+ config_strategy = self.config["split_strategy"]
68
+ if config_strategy != "none":
69
+ return config_strategy
70
+
71
+ # Use provided strategy or auto-detect (documentation configs)
72
+ if self.strategy == "auto":
73
+ max_pages = self.config.get("max_pages", 500)
74
+
75
+ if max_pages < 5000:
76
+ print(f"ℹ️ Small documentation ({max_pages} pages) - no splitting needed")
77
+ return "none"
78
+ elif max_pages < 10000 and "categories" in self.config:
79
+ print(f"ℹ️ Medium documentation ({max_pages} pages) - category split recommended")
80
+ return "category"
81
+ elif "categories" in self.config and len(self.config["categories"]) >= 3:
82
+ print(
83
+ f"ℹ️ Large documentation ({max_pages} pages) - router + categories recommended"
84
+ )
85
+ return "router"
86
+ else:
87
+ print(f"ℹ️ Large documentation ({max_pages} pages) - size-based split")
88
+ return "size"
89
+
90
+ return self.strategy
91
+
92
+ def split_by_category(self, create_router: bool = False) -> list[dict[str, Any]]:
93
+ """Split config by categories"""
94
+ if "categories" not in self.config:
95
+ print("❌ Error: No categories defined in config")
96
+ sys.exit(1)
97
+
98
+ categories = self.config["categories"]
99
+ split_categories = self.config.get("split_config", {}).get("split_by_categories")
100
+
101
+ # If specific categories specified, use only those
102
+ if split_categories:
103
+ categories = {k: v for k, v in categories.items() if k in split_categories}
104
+
105
+ configs = []
106
+
107
+ for category_name, keywords in categories.items():
108
+ # Create new config for this category
109
+ new_config = self.config.copy()
110
+ new_config["name"] = f"{self.base_name}-{category_name}"
111
+ new_config["description"] = (
112
+ f"{self.base_name.capitalize()} - {category_name.replace('_', ' ').title()}. {self.config.get('description', '')}"
113
+ )
114
+
115
+ # Update URL patterns to focus on this category
116
+ url_patterns = new_config.get("url_patterns", {})
117
+
118
+ # Add category keywords to includes
119
+ includes = url_patterns.get("include", [])
120
+ for keyword in keywords:
121
+ if keyword.startswith("/"):
122
+ includes.append(keyword)
123
+
124
+ if includes:
125
+ url_patterns["include"] = list(set(includes))
126
+ new_config["url_patterns"] = url_patterns
127
+
128
+ # Keep only this category
129
+ new_config["categories"] = {category_name: keywords}
130
+
131
+ # Remove split config from child
132
+ if "split_strategy" in new_config:
133
+ del new_config["split_strategy"]
134
+ if "split_config" in new_config:
135
+ del new_config["split_config"]
136
+
137
+ # Adjust max_pages estimate
138
+ if "max_pages" in new_config:
139
+ new_config["max_pages"] = self.target_pages
140
+
141
+ configs.append(new_config)
142
+
143
+ print(f"✅ Created {len(configs)} category-based configs")
144
+
145
+ # Optionally create router config
146
+ if create_router:
147
+ router_config = self.create_router_config(configs)
148
+ configs.insert(0, router_config)
149
+ print(f"✅ Created router config: {router_config['name']}")
150
+
151
+ return configs
152
+
153
+ def split_by_size(self) -> list[dict[str, Any]]:
154
+ """Split config by size (page count)"""
155
+ max_pages = self.config.get("max_pages", 500)
156
+ num_splits = (max_pages + self.target_pages - 1) // self.target_pages
157
+
158
+ configs = []
159
+
160
+ for i in range(num_splits):
161
+ new_config = self.config.copy()
162
+ part_num = i + 1
163
+ new_config["name"] = f"{self.base_name}-part{part_num}"
164
+ new_config["description"] = (
165
+ f"{self.base_name.capitalize()} - Part {part_num}. {self.config.get('description', '')}"
166
+ )
167
+ new_config["max_pages"] = self.target_pages
168
+
169
+ # Remove split config from child
170
+ if "split_strategy" in new_config:
171
+ del new_config["split_strategy"]
172
+ if "split_config" in new_config:
173
+ del new_config["split_config"]
174
+
175
+ configs.append(new_config)
176
+
177
+ print(f"✅ Created {len(configs)} size-based configs ({self.target_pages} pages each)")
178
+ return configs
179
+
180
+ def split_by_source(self) -> list[dict[str, Any]]:
181
+ """Split unified config by source type"""
182
+ if not self.is_unified_config():
183
+ print("❌ Error: Config is not a unified config (missing 'sources' key)")
184
+ sys.exit(1)
185
+
186
+ sources = self.config.get("sources", [])
187
+ if not sources:
188
+ print("❌ Error: No sources defined in unified config")
189
+ sys.exit(1)
190
+
191
+ configs = []
192
+ source_type_counts = defaultdict(int)
193
+
194
+ for source in sources:
195
+ source_type = source.get("type", "unknown")
196
+ source_type_counts[source_type] += 1
197
+ count = source_type_counts[source_type]
198
+
199
+ # Create new config for this source
200
+ new_config = {
201
+ "name": f"{self.base_name}-{source_type}" + (f"-{count}" if count > 1 else ""),
202
+ "description": f"{self.base_name.capitalize()} - {source_type.title()} source. {self.config.get('description', '')}",
203
+ "sources": [source], # Single source per config
204
+ }
205
+
206
+ # Copy merge_mode if it exists
207
+ if "merge_mode" in self.config:
208
+ new_config["merge_mode"] = self.config["merge_mode"]
209
+
210
+ configs.append(new_config)
211
+
212
+ print(f"✅ Created {len(configs)} source-based configs")
213
+
214
+ # Show breakdown by source type
215
+ for source_type, count in source_type_counts.items():
216
+ print(f" 📄 {count}x {source_type}")
217
+
218
+ return configs
219
+
220
+ def create_router_config(self, sub_configs: list[dict[str, Any]]) -> dict[str, Any]:
221
+ """Create a router config that references sub-skills"""
222
+ router_name = self.config.get("split_config", {}).get("router_name", self.base_name)
223
+
224
+ router_config = {
225
+ "name": router_name,
226
+ "description": self.config.get("description", ""),
227
+ "base_url": self.config["base_url"],
228
+ "selectors": self.config["selectors"],
229
+ "url_patterns": self.config.get("url_patterns", {}),
230
+ "rate_limit": self.config.get("rate_limit", 0.5),
231
+ "max_pages": 500, # Router only needs overview pages
232
+ "_router": True,
233
+ "_sub_skills": [cfg["name"] for cfg in sub_configs],
234
+ "_routing_keywords": {
235
+ cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs
236
+ },
237
+ }
238
+
239
+ return router_config
240
+
241
+ def split(self) -> list[dict[str, Any]]:
242
+ """Execute split based on strategy"""
243
+ strategy = self.get_split_strategy()
244
+
245
+ config_type = "UNIFIED" if self.is_unified_config() else "DOCUMENTATION"
246
+ print(f"\n{'=' * 60}")
247
+ print(f"CONFIG SPLITTER: {self.base_name} ({config_type})")
248
+ print(f"{'=' * 60}")
249
+ print(f"Strategy: {strategy}")
250
+ if not self.is_unified_config():
251
+ print(f"Target pages per skill: {self.target_pages}")
252
+ print("")
253
+
254
+ if strategy == "none":
255
+ print("ℹ️ No splitting required")
256
+ return [self.config]
257
+
258
+ elif strategy == "source":
259
+ return self.split_by_source()
260
+
261
+ elif strategy == "category":
262
+ return self.split_by_category(create_router=False)
263
+
264
+ elif strategy == "router":
265
+ create_router = self.config.get("split_config", {}).get("create_router", True)
266
+ return self.split_by_category(create_router=create_router)
267
+
268
+ elif strategy == "size":
269
+ return self.split_by_size()
270
+
271
+ else:
272
+ print(f"❌ Error: Unknown strategy: {strategy}")
273
+ sys.exit(1)
274
+
275
+ def save_configs(self, configs: list[dict[str, Any]], output_dir: Path = None) -> list[Path]:
276
+ """Save configs to files"""
277
+ if output_dir is None:
278
+ output_dir = self.config_path.parent
279
+
280
+ output_dir = Path(output_dir)
281
+ output_dir.mkdir(parents=True, exist_ok=True)
282
+
283
+ saved_files = []
284
+
285
+ for config in configs:
286
+ filename = f"{config['name']}.json"
287
+ filepath = output_dir / filename
288
+
289
+ with open(filepath, "w") as f:
290
+ json.dump(config, f, indent=2)
291
+
292
+ saved_files.append(filepath)
293
+ print(f" 💾 Saved: {filepath}")
294
+
295
+ return saved_files
296
+
297
+
298
+ def main():
299
+ parser = argparse.ArgumentParser(
300
+ description="Split large documentation configs into multiple focused skills",
301
+ formatter_class=argparse.RawDescriptionHelpFormatter,
302
+ epilog="""
303
+ Examples:
304
+ # Auto-detect strategy
305
+ python3 split_config.py configs/godot.json
306
+
307
+ # Use category-based split
308
+ python3 split_config.py configs/godot.json --strategy category
309
+
310
+ # Use router + categories
311
+ python3 split_config.py configs/godot.json --strategy router
312
+
313
+ # Custom target size
314
+ python3 split_config.py configs/godot.json --target-pages 3000
315
+
316
+ # Dry run (don't save files)
317
+ python3 split_config.py configs/godot.json --dry-run
318
+
319
+ Split Strategies:
320
+ none - No splitting (single skill)
321
+ auto - Automatically choose best strategy
322
+ source - Split unified configs by source type (docs, github, pdf)
323
+ category - Split by categories defined in config
324
+ router - Create router + category-based sub-skills
325
+ size - Split by page count
326
+
327
+ Config Types:
328
+ Documentation - Single base_url config (supports: category, router, size)
329
+ Unified - Multi-source config (supports: source)
330
+ """,
331
+ )
332
+
333
+ parser.add_argument("config", help="Path to config file (e.g., configs/godot.json)")
334
+
335
+ parser.add_argument(
336
+ "--strategy",
337
+ choices=["auto", "none", "source", "category", "router", "size"],
338
+ default="auto",
339
+ help="Splitting strategy (default: auto)",
340
+ )
341
+
342
+ parser.add_argument(
343
+ "--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)"
344
+ )
345
+
346
+ parser.add_argument(
347
+ "--output-dir", help="Output directory for configs (default: same as input)"
348
+ )
349
+
350
+ parser.add_argument(
351
+ "--dry-run", action="store_true", help="Show what would be created without saving files"
352
+ )
353
+
354
+ args = parser.parse_args()
355
+
356
+ # Create splitter
357
+ splitter = ConfigSplitter(args.config, args.strategy, args.target_pages)
358
+
359
+ # Split config
360
+ configs = splitter.split()
361
+
362
+ if args.dry_run:
363
+ print(f"\n{'=' * 60}")
364
+ print("DRY RUN - No files saved")
365
+ print(f"{'=' * 60}")
366
+ print(f"Would create {len(configs)} config files:")
367
+ for cfg in configs:
368
+ is_router = cfg.get("_router", False)
369
+ router_marker = " (ROUTER)" if is_router else ""
370
+ print(f" 📄 {cfg['name']}.json{router_marker}")
371
+ else:
372
+ print(f"\n{'=' * 60}")
373
+ print("SAVING CONFIGS")
374
+ print(f"{'=' * 60}")
375
+ saved_files = splitter.save_configs(configs, args.output_dir)
376
+
377
+ print(f"\n{'=' * 60}")
378
+ print("NEXT STEPS")
379
+ print(f"{'=' * 60}")
380
+ print("1. Review generated configs")
381
+ print("2. Scrape each config:")
382
+ for filepath in saved_files:
383
+ print(f" skill-seekers scrape --config {filepath}")
384
+ print("3. Package skills:")
385
+ print(" skill-seekers-package-multi configs/<name>-*.json")
386
+ print("")
387
+
388
+
389
+ if __name__ == "__main__":
390
+ main()