skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,842 @@
1
+ """
2
+ Scraping Tools Module for MCP Server
3
+
4
+ This module contains all scraping-related MCP tool implementations:
5
+ - estimate_pages_tool: Estimate page count before scraping
6
+ - scrape_docs_tool: Scrape documentation (legacy or unified)
7
+ - scrape_github_tool: Scrape GitHub repositories
8
+ - scrape_pdf_tool: Scrape PDF documentation
9
+ - scrape_codebase_tool: Analyze local codebase and extract code knowledge
10
+
11
+ Extracted from server.py for better modularity and organization.
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ # MCP types - with graceful fallback for testing
19
+ try:
20
+ from mcp.types import TextContent
21
+ except ImportError:
22
+ # Graceful degradation: Create a simple fallback class for testing
23
+ class TextContent:
24
+ """Fallback TextContent for when MCP is not installed"""
25
+
26
+ def __init__(self, type: str, text: str):
27
+ self.type = type
28
+ self.text = text
29
+
30
+
31
+ # Path to CLI tools
32
+ CLI_DIR = Path(__file__).parent.parent.parent / "cli"
33
+
34
+
35
+ def run_subprocess_with_streaming(cmd: list[str], timeout: int = None) -> tuple:
36
+ """
37
+ Run subprocess with real-time output streaming.
38
+
39
+ This solves the blocking issue where long-running processes (like scraping)
40
+ would cause MCP to appear frozen. Now we stream output as it comes.
41
+
42
+ Args:
43
+ cmd: Command list to execute
44
+ timeout: Optional timeout in seconds
45
+
46
+ Returns:
47
+ Tuple of (stdout, stderr, returncode)
48
+ """
49
+ import subprocess
50
+ import time
51
+
52
+ try:
53
+ process = subprocess.Popen(
54
+ cmd,
55
+ stdout=subprocess.PIPE,
56
+ stderr=subprocess.PIPE,
57
+ text=True,
58
+ bufsize=1, # Line buffered
59
+ universal_newlines=True,
60
+ )
61
+
62
+ stdout_lines = []
63
+ stderr_lines = []
64
+ start_time = time.time()
65
+
66
+ # Read output line by line as it comes
67
+ while True:
68
+ # Check timeout
69
+ if timeout and (time.time() - start_time) > timeout:
70
+ process.kill()
71
+ stderr_lines.append(f"\nāš ļø Process killed after {timeout}s timeout")
72
+ break
73
+
74
+ # Check if process finished
75
+ if process.poll() is not None:
76
+ break
77
+
78
+ # Read available output (non-blocking)
79
+ try:
80
+ import select
81
+
82
+ readable, _, _ = select.select([process.stdout, process.stderr], [], [], 0.1)
83
+
84
+ if process.stdout in readable:
85
+ line = process.stdout.readline()
86
+ if line:
87
+ stdout_lines.append(line)
88
+
89
+ if process.stderr in readable:
90
+ line = process.stderr.readline()
91
+ if line:
92
+ stderr_lines.append(line)
93
+ except Exception:
94
+ # Fallback for Windows (no select)
95
+ time.sleep(0.1)
96
+
97
+ # Get any remaining output
98
+ remaining_stdout, remaining_stderr = process.communicate()
99
+ if remaining_stdout:
100
+ stdout_lines.append(remaining_stdout)
101
+ if remaining_stderr:
102
+ stderr_lines.append(remaining_stderr)
103
+
104
+ stdout = "".join(stdout_lines)
105
+ stderr = "".join(stderr_lines)
106
+ returncode = process.returncode
107
+
108
+ return stdout, stderr, returncode
109
+
110
+ except Exception as e:
111
+ return "", f"Error running subprocess: {str(e)}", 1
112
+
113
+
114
+ async def estimate_pages_tool(args: dict) -> list[TextContent]:
115
+ """
116
+ Estimate page count from a config file.
117
+
118
+ Performs fast preview without downloading content to estimate
119
+ how many pages will be scraped.
120
+
121
+ Args:
122
+ args: Dictionary containing:
123
+ - config_path (str): Path to config JSON file
124
+ - max_discovery (int, optional): Maximum pages to discover (default: 1000)
125
+ - unlimited (bool, optional): Remove discovery limit (default: False)
126
+
127
+ Returns:
128
+ List[TextContent]: Tool execution results
129
+ """
130
+ config_path = args["config_path"]
131
+ max_discovery = args.get("max_discovery", 1000)
132
+ unlimited = args.get("unlimited", False)
133
+
134
+ # Handle unlimited mode
135
+ if unlimited or max_discovery == -1:
136
+ max_discovery = -1
137
+ timeout = 1800 # 30 minutes for unlimited discovery
138
+ else:
139
+ # Estimate: 0.5s per page discovered
140
+ timeout = max(300, max_discovery // 2) # Minimum 5 minutes
141
+
142
+ # Run estimate_pages.py
143
+ cmd = [
144
+ sys.executable,
145
+ str(CLI_DIR / "estimate_pages.py"),
146
+ config_path,
147
+ "--max-discovery",
148
+ str(max_discovery),
149
+ ]
150
+
151
+ progress_msg = "šŸ”„ Estimating page count...\n"
152
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
153
+
154
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
155
+
156
+ output = progress_msg + stdout
157
+
158
+ if returncode == 0:
159
+ return [TextContent(type="text", text=output)]
160
+ else:
161
+ return [TextContent(type="text", text=f"{output}\n\nāŒ Error:\n{stderr}")]
162
+
163
+
164
+ async def scrape_docs_tool(args: dict) -> list[TextContent]:
165
+ """
166
+ Scrape documentation and build skill.
167
+
168
+ Auto-detects unified vs legacy format and routes to appropriate scraper.
169
+ Supports both single-source (legacy) and unified multi-source configs.
170
+ Creates SKILL.md and reference files.
171
+
172
+ Args:
173
+ args: Dictionary containing:
174
+ - config_path (str): Path to config JSON file
175
+ - unlimited (bool, optional): Remove page limit (default: False)
176
+ - enhance_local (bool, optional): Open terminal for local enhancement (default: False)
177
+ - skip_scrape (bool, optional): Skip scraping, use cached data (default: False)
178
+ - dry_run (bool, optional): Preview without saving (default: False)
179
+ - merge_mode (str, optional): Override merge mode for unified configs
180
+
181
+ Returns:
182
+ List[TextContent]: Tool execution results
183
+ """
184
+ config_path = args["config_path"]
185
+ unlimited = args.get("unlimited", False)
186
+ enhance_local = args.get("enhance_local", False)
187
+ skip_scrape = args.get("skip_scrape", False)
188
+ dry_run = args.get("dry_run", False)
189
+ merge_mode = args.get("merge_mode")
190
+
191
+ # Load config to detect format
192
+ with open(config_path) as f:
193
+ config = json.load(f)
194
+
195
+ # Detect if unified format (has 'sources' array)
196
+ is_unified = "sources" in config and isinstance(config["sources"], list)
197
+
198
+ # Handle unlimited mode by modifying config temporarily
199
+ if unlimited:
200
+ # Set max_pages to None (unlimited)
201
+ if is_unified:
202
+ # For unified configs, set max_pages on documentation sources
203
+ for source in config.get("sources", []):
204
+ if source.get("type") == "documentation":
205
+ source["max_pages"] = None
206
+ else:
207
+ # For legacy configs
208
+ config["max_pages"] = None
209
+
210
+ # Create temporary config file
211
+ temp_config_path = config_path.replace(".json", "_unlimited_temp.json")
212
+ with open(temp_config_path, "w") as f:
213
+ json.dump(config, f, indent=2)
214
+
215
+ config_to_use = temp_config_path
216
+ else:
217
+ config_to_use = config_path
218
+
219
+ # Choose scraper based on format
220
+ if is_unified:
221
+ scraper_script = "unified_scraper.py"
222
+ progress_msg = "šŸ”„ Starting unified multi-source scraping...\n"
223
+ progress_msg += "šŸ“¦ Config format: Unified (multiple sources)\n"
224
+ else:
225
+ scraper_script = "doc_scraper.py"
226
+ progress_msg = "šŸ”„ Starting scraping process...\n"
227
+ progress_msg += "šŸ“¦ Config format: Legacy (single source)\n"
228
+
229
+ # Build command
230
+ cmd = [sys.executable, str(CLI_DIR / scraper_script), "--config", config_to_use]
231
+
232
+ # Add merge mode for unified configs
233
+ if is_unified and merge_mode:
234
+ cmd.extend(["--merge-mode", merge_mode])
235
+
236
+ # Add --fresh to avoid user input prompts when existing data found
237
+ if not skip_scrape:
238
+ cmd.append("--fresh")
239
+
240
+ if enhance_local:
241
+ cmd.append("--enhance-local")
242
+ if skip_scrape:
243
+ cmd.append("--skip-scrape")
244
+ if dry_run:
245
+ cmd.append("--dry-run")
246
+
247
+ # Determine timeout based on operation type
248
+ if dry_run:
249
+ timeout = 300 # 5 minutes for dry run
250
+ elif skip_scrape:
251
+ timeout = 600 # 10 minutes for building from cache
252
+ elif unlimited:
253
+ timeout = None # No timeout for unlimited mode (user explicitly requested)
254
+ else:
255
+ # Read config to estimate timeout
256
+ try:
257
+ if is_unified:
258
+ # For unified configs, estimate based on all sources
259
+ total_pages = 0
260
+ for source in config.get("sources", []):
261
+ if source.get("type") == "documentation":
262
+ total_pages += source.get("max_pages", 500)
263
+ max_pages = total_pages or 500
264
+ else:
265
+ max_pages = config.get("max_pages", 500)
266
+
267
+ # Estimate: 30s per page + buffer
268
+ timeout = max(3600, max_pages * 35) # Minimum 1 hour, or 35s per page
269
+ except Exception:
270
+ timeout = 14400 # Default: 4 hours
271
+
272
+ # Add progress message
273
+ if timeout:
274
+ progress_msg += f"ā±ļø Maximum time allowed: {timeout // 60} minutes\n"
275
+ else:
276
+ progress_msg += "ā±ļø Unlimited mode - no timeout\n"
277
+ progress_msg += "šŸ“ Progress will be shown below:\n\n"
278
+
279
+ # Run scraper with streaming
280
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
281
+
282
+ # Clean up temporary config
283
+ if unlimited and Path(config_to_use).exists():
284
+ Path(config_to_use).unlink()
285
+
286
+ output = progress_msg + stdout
287
+
288
+ if returncode == 0:
289
+ return [TextContent(type="text", text=output)]
290
+ else:
291
+ error_output = output + f"\n\nāŒ Error:\n{stderr}"
292
+ return [TextContent(type="text", text=error_output)]
293
+
294
+
295
+ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
296
+ """
297
+ Scrape PDF documentation and build Claude skill.
298
+
299
+ Extracts text, code, and images from PDF files and builds
300
+ a skill package with organized references.
301
+
302
+ Args:
303
+ args: Dictionary containing:
304
+ - config_path (str, optional): Path to PDF config JSON file
305
+ - pdf_path (str, optional): Direct PDF path (alternative to config_path)
306
+ - name (str, optional): Skill name (required with pdf_path)
307
+ - description (str, optional): Skill description
308
+ - from_json (str, optional): Build from extracted JSON file
309
+
310
+ Returns:
311
+ List[TextContent]: Tool execution results
312
+ """
313
+ config_path = args.get("config_path")
314
+ pdf_path = args.get("pdf_path")
315
+ name = args.get("name")
316
+ description = args.get("description")
317
+ from_json = args.get("from_json")
318
+
319
+ # Build command
320
+ cmd = [sys.executable, str(CLI_DIR / "pdf_scraper.py")]
321
+
322
+ # Mode 1: Config file
323
+ if config_path:
324
+ cmd.extend(["--config", config_path])
325
+
326
+ # Mode 2: Direct PDF
327
+ elif pdf_path and name:
328
+ cmd.extend(["--pdf", pdf_path, "--name", name])
329
+ if description:
330
+ cmd.extend(["--description", description])
331
+
332
+ # Mode 3: From JSON
333
+ elif from_json:
334
+ cmd.extend(["--from-json", from_json])
335
+
336
+ else:
337
+ return [
338
+ TextContent(
339
+ type="text", text="āŒ Error: Must specify --config, --pdf + --name, or --from-json"
340
+ )
341
+ ]
342
+
343
+ # Run pdf_scraper.py with streaming (can take a while)
344
+ timeout = 600 # 10 minutes for PDF extraction
345
+
346
+ progress_msg = "šŸ“„ Scraping PDF documentation...\n"
347
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
348
+
349
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
350
+
351
+ output = progress_msg + stdout
352
+
353
+ if returncode == 0:
354
+ return [TextContent(type="text", text=output)]
355
+ else:
356
+ return [TextContent(type="text", text=f"{output}\n\nāŒ Error:\n{stderr}")]
357
+
358
+
359
+ async def scrape_github_tool(args: dict) -> list[TextContent]:
360
+ """
361
+ Scrape GitHub repository and build Claude skill.
362
+
363
+ Extracts README, Issues, Changelog, Releases, and code structure
364
+ from GitHub repositories to create comprehensive skills.
365
+
366
+ Args:
367
+ args: Dictionary containing:
368
+ - repo (str, optional): GitHub repository (owner/repo)
369
+ - config_path (str, optional): Path to GitHub config JSON file
370
+ - name (str, optional): Skill name (default: repo name)
371
+ - description (str, optional): Skill description
372
+ - token (str, optional): GitHub personal access token
373
+ - no_issues (bool, optional): Skip GitHub issues extraction (default: False)
374
+ - no_changelog (bool, optional): Skip CHANGELOG extraction (default: False)
375
+ - no_releases (bool, optional): Skip releases extraction (default: False)
376
+ - max_issues (int, optional): Maximum issues to fetch (default: 100)
377
+ - scrape_only (bool, optional): Only scrape, don't build skill (default: False)
378
+
379
+ Returns:
380
+ List[TextContent]: Tool execution results
381
+ """
382
+ repo = args.get("repo")
383
+ config_path = args.get("config_path")
384
+ name = args.get("name")
385
+ description = args.get("description")
386
+ token = args.get("token")
387
+ no_issues = args.get("no_issues", False)
388
+ no_changelog = args.get("no_changelog", False)
389
+ no_releases = args.get("no_releases", False)
390
+ max_issues = args.get("max_issues", 100)
391
+ scrape_only = args.get("scrape_only", False)
392
+
393
+ # Build command
394
+ cmd = [sys.executable, str(CLI_DIR / "github_scraper.py")]
395
+
396
+ # Mode 1: Config file
397
+ if config_path:
398
+ cmd.extend(["--config", config_path])
399
+
400
+ # Mode 2: Direct repo
401
+ elif repo:
402
+ cmd.extend(["--repo", repo])
403
+ if name:
404
+ cmd.extend(["--name", name])
405
+ if description:
406
+ cmd.extend(["--description", description])
407
+ if token:
408
+ cmd.extend(["--token", token])
409
+ if no_issues:
410
+ cmd.append("--no-issues")
411
+ if no_changelog:
412
+ cmd.append("--no-changelog")
413
+ if no_releases:
414
+ cmd.append("--no-releases")
415
+ if max_issues != 100:
416
+ cmd.extend(["--max-issues", str(max_issues)])
417
+ if scrape_only:
418
+ cmd.append("--scrape-only")
419
+
420
+ else:
421
+ return [TextContent(type="text", text="āŒ Error: Must specify --repo or --config")]
422
+
423
+ # Run github_scraper.py with streaming (can take a while)
424
+ timeout = 600 # 10 minutes for GitHub scraping
425
+
426
+ progress_msg = "šŸ™ Scraping GitHub repository...\n"
427
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
428
+
429
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
430
+
431
+ output = progress_msg + stdout
432
+
433
+ if returncode == 0:
434
+ return [TextContent(type="text", text=output)]
435
+ else:
436
+ return [TextContent(type="text", text=f"{output}\n\nāŒ Error:\n{stderr}")]
437
+
438
+
439
+ async def scrape_codebase_tool(args: dict) -> list[TextContent]:
440
+ """
441
+ Analyze local codebase and extract code knowledge.
442
+
443
+ Walks directory tree, analyzes code files, extracts signatures,
444
+ docstrings, and optionally generates API reference documentation
445
+ and dependency graphs.
446
+
447
+ Args:
448
+ args: Dictionary containing:
449
+ - directory (str): Directory to analyze
450
+ - output (str, optional): Output directory for results (default: output/codebase/)
451
+ - depth (str, optional): Analysis depth - surface, deep, full (default: deep)
452
+ - languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++")
453
+ - file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js")
454
+ - build_api_reference (bool, optional): Generate API reference markdown (default: False)
455
+ - build_dependency_graph (bool, optional): Generate dependency graph and detect circular dependencies (default: False)
456
+
457
+ Returns:
458
+ List[TextContent]: Tool execution results
459
+
460
+ Example:
461
+ scrape_codebase(
462
+ directory="/path/to/repo",
463
+ depth="deep",
464
+ build_api_reference=True,
465
+ build_dependency_graph=True
466
+ )
467
+ """
468
+ directory = args.get("directory")
469
+ if not directory:
470
+ return [TextContent(type="text", text="āŒ Error: directory parameter is required")]
471
+
472
+ output = args.get("output", "output/codebase/")
473
+ depth = args.get("depth", "deep")
474
+ languages = args.get("languages", "")
475
+ file_patterns = args.get("file_patterns", "")
476
+ build_api_reference = args.get("build_api_reference", False)
477
+ build_dependency_graph = args.get("build_dependency_graph", False)
478
+
479
+ # Build command
480
+ cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"]
481
+ cmd.extend(["--directory", directory])
482
+
483
+ if output:
484
+ cmd.extend(["--output", output])
485
+ if depth:
486
+ cmd.extend(["--depth", depth])
487
+ if languages:
488
+ cmd.extend(["--languages", languages])
489
+ if file_patterns:
490
+ cmd.extend(["--file-patterns", file_patterns])
491
+ if build_api_reference:
492
+ cmd.append("--build-api-reference")
493
+ if build_dependency_graph:
494
+ cmd.append("--build-dependency-graph")
495
+
496
+ timeout = 600 # 10 minutes for codebase analysis
497
+
498
+ progress_msg = "šŸ” Analyzing local codebase...\n"
499
+ progress_msg += f"šŸ“ Directory: {directory}\n"
500
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
501
+
502
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
503
+
504
+ output_text = progress_msg + stdout
505
+
506
+ if returncode == 0:
507
+ return [TextContent(type="text", text=output_text)]
508
+ else:
509
+ return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")]
510
+
511
+
512
+ async def detect_patterns_tool(args: dict) -> list[TextContent]:
513
+ """
514
+ Detect design patterns in source code.
515
+
516
+ Analyzes source files or directories to detect common design patterns
517
+ (Singleton, Factory, Observer, Strategy, Decorator, Builder, Adapter,
518
+ Command, Template Method, Chain of Responsibility).
519
+
520
+ Supports 9 languages: Python, JavaScript, TypeScript, C++, C, C#,
521
+ Go, Rust, Java, Ruby, PHP.
522
+
523
+ Args:
524
+ args: Dictionary containing:
525
+ - file (str, optional): Single file to analyze
526
+ - directory (str, optional): Directory to analyze (analyzes all source files)
527
+ - output (str, optional): Output directory for JSON results
528
+ - depth (str, optional): Detection depth - surface, deep, full (default: deep)
529
+ - json (bool, optional): Output JSON format (default: False)
530
+
531
+ Returns:
532
+ List[TextContent]: Pattern detection results
533
+
534
+ Example:
535
+ detect_patterns(file="src/database.py", depth="deep")
536
+ detect_patterns(directory="src/", output="patterns/", json=True)
537
+ """
538
+ file_path = args.get("file")
539
+ directory = args.get("directory")
540
+
541
+ if not file_path and not directory:
542
+ return [
543
+ TextContent(
544
+ type="text", text="āŒ Error: Must specify either 'file' or 'directory' parameter"
545
+ )
546
+ ]
547
+
548
+ output = args.get("output", "")
549
+ depth = args.get("depth", "deep")
550
+ json_output = args.get("json", False)
551
+
552
+ # Build command
553
+ cmd = [sys.executable, "-m", "skill_seekers.cli.pattern_recognizer"]
554
+
555
+ if file_path:
556
+ cmd.extend(["--file", file_path])
557
+ if directory:
558
+ cmd.extend(["--directory", directory])
559
+ if output:
560
+ cmd.extend(["--output", output])
561
+ if depth:
562
+ cmd.extend(["--depth", depth])
563
+ if json_output:
564
+ cmd.append("--json")
565
+
566
+ timeout = 300 # 5 minutes for pattern detection
567
+
568
+ progress_msg = "šŸ” Detecting design patterns...\n"
569
+ if file_path:
570
+ progress_msg += f"šŸ“„ File: {file_path}\n"
571
+ if directory:
572
+ progress_msg += f"šŸ“ Directory: {directory}\n"
573
+ progress_msg += f"šŸŽÆ Detection depth: {depth}\n"
574
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
575
+
576
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
577
+
578
+ output_text = progress_msg + stdout
579
+
580
+ if returncode == 0:
581
+ return [TextContent(type="text", text=output_text)]
582
+ else:
583
+ return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")]
584
+
585
+
586
+ async def extract_test_examples_tool(args: dict) -> list[TextContent]:
587
+ """
588
+ Extract usage examples from test files.
589
+
590
+ Analyzes test files to extract real API usage patterns including:
591
+ - Object instantiation with real parameters
592
+ - Method calls with expected behaviors
593
+ - Configuration examples
594
+ - Setup patterns from fixtures/setUp()
595
+ - Multi-step workflows from integration tests
596
+
597
+ Supports 9 languages: Python (AST-based deep analysis), JavaScript,
598
+ TypeScript, Go, Rust, Java, C#, PHP, Ruby (regex-based).
599
+
600
+ Args:
601
+ args: Dictionary containing:
602
+ - file (str, optional): Single test file to analyze
603
+ - directory (str, optional): Directory containing test files
604
+ - language (str, optional): Filter by language (python, javascript, etc.)
605
+ - min_confidence (float, optional): Minimum confidence threshold 0.0-1.0 (default: 0.5)
606
+ - max_per_file (int, optional): Maximum examples per file (default: 10)
607
+ - json (bool, optional): Output JSON format (default: False)
608
+ - markdown (bool, optional): Output Markdown format (default: False)
609
+
610
+ Returns:
611
+ List[TextContent]: Extracted test examples
612
+
613
+ Example:
614
+ extract_test_examples(directory="tests/", language="python")
615
+ extract_test_examples(file="tests/test_scraper.py", json=True)
616
+ """
617
+ file_path = args.get("file")
618
+ directory = args.get("directory")
619
+
620
+ if not file_path and not directory:
621
+ return [
622
+ TextContent(
623
+ type="text", text="āŒ Error: Must specify either 'file' or 'directory' parameter"
624
+ )
625
+ ]
626
+
627
+ language = args.get("language", "")
628
+ min_confidence = args.get("min_confidence", 0.5)
629
+ max_per_file = args.get("max_per_file", 10)
630
+ json_output = args.get("json", False)
631
+ markdown_output = args.get("markdown", False)
632
+
633
+ # Build command
634
+ cmd = [sys.executable, "-m", "skill_seekers.cli.test_example_extractor"]
635
+
636
+ if directory:
637
+ cmd.append(directory)
638
+ if file_path:
639
+ cmd.extend(["--file", file_path])
640
+ if language:
641
+ cmd.extend(["--language", language])
642
+ if min_confidence:
643
+ cmd.extend(["--min-confidence", str(min_confidence)])
644
+ if max_per_file:
645
+ cmd.extend(["--max-per-file", str(max_per_file)])
646
+ if json_output:
647
+ cmd.append("--json")
648
+ if markdown_output:
649
+ cmd.append("--markdown")
650
+
651
+ timeout = 180 # 3 minutes for test example extraction
652
+
653
+ progress_msg = "🧪 Extracting usage examples from test files...\n"
654
+ if file_path:
655
+ progress_msg += f"šŸ“„ File: {file_path}\n"
656
+ if directory:
657
+ progress_msg += f"šŸ“ Directory: {directory}\n"
658
+ if language:
659
+ progress_msg += f"šŸ”¤ Language: {language}\n"
660
+ progress_msg += f"šŸŽÆ Min confidence: {min_confidence}\n"
661
+ progress_msg += f"šŸ“Š Max per file: {max_per_file}\n"
662
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
663
+
664
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
665
+
666
+ output_text = progress_msg + stdout
667
+
668
+ if returncode == 0:
669
+ return [TextContent(type="text", text=output_text)]
670
+ else:
671
+ return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")]
672
+
673
+
674
+ async def build_how_to_guides_tool(args: dict) -> list[TextContent]:
675
+ """
676
+ Build how-to guides from workflow test examples.
677
+
678
+ Transforms workflow examples extracted from test files into step-by-step
679
+ educational guides. Automatically groups related workflows, extracts steps,
680
+ and generates comprehensive markdown guides.
681
+
682
+ Features:
683
+ - Python AST-based step extraction (heuristic for other languages)
684
+ - 4 grouping strategies: ai-tutorial-group, file-path, test-name, complexity
685
+ - Detects prerequisites, setup code, and verification points
686
+ - Generates troubleshooting tips and next steps
687
+ - Creates index with difficulty levels
688
+
689
+ Args:
690
+ args: Dictionary containing:
691
+ - input (str): Path to test_examples.json from extract_test_examples
692
+ - output (str, optional): Output directory for guides (default: output/codebase/tutorials)
693
+ - group_by (str, optional): Grouping strategy - ai-tutorial-group, file-path, test-name, complexity
694
+ - no_ai (bool, optional): Disable AI enhancement for grouping (default: False)
695
+ - json_output (bool, optional): Output JSON format alongside markdown (default: False)
696
+
697
+ Returns:
698
+ List[TextContent]: Guide building results
699
+
700
+ Example:
701
+ build_how_to_guides(
702
+ input="output/codebase/test_examples/test_examples.json",
703
+ group_by="ai-tutorial-group",
704
+ output="output/codebase/tutorials"
705
+ )
706
+ """
707
+ input_file = args.get("input")
708
+ if not input_file:
709
+ return [
710
+ TextContent(
711
+ type="text",
712
+ text="āŒ Error: input parameter is required (path to test_examples.json)",
713
+ )
714
+ ]
715
+
716
+ output = args.get("output", "output/codebase/tutorials")
717
+ group_by = args.get("group_by", "ai-tutorial-group")
718
+ no_ai = args.get("no_ai", False)
719
+ json_output = args.get("json_output", False)
720
+
721
+ # Build command
722
+ cmd = [sys.executable, "-m", "skill_seekers.cli.how_to_guide_builder"]
723
+ cmd.append(input_file)
724
+
725
+ if output:
726
+ cmd.extend(["--output", output])
727
+ if group_by:
728
+ cmd.extend(["--group-by", group_by])
729
+ if no_ai:
730
+ cmd.append("--no-ai")
731
+ if json_output:
732
+ cmd.append("--json-output")
733
+
734
+ timeout = 180 # 3 minutes for guide building
735
+
736
+ progress_msg = "šŸ“š Building how-to guides from workflow examples...\n"
737
+ progress_msg += f"šŸ“„ Input: {input_file}\n"
738
+ progress_msg += f"šŸ“ Output: {output}\n"
739
+ progress_msg += f"šŸ”€ Grouping: {group_by}\n"
740
+ if no_ai:
741
+ progress_msg += "🚫 AI enhancement disabled\n"
742
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
743
+
744
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
745
+
746
+ output_text = progress_msg + stdout
747
+
748
+ if returncode == 0:
749
+ return [TextContent(type="text", text=output_text)]
750
+ else:
751
+ return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")]
752
+
753
+
754
+ async def extract_config_patterns_tool(args: dict) -> list[TextContent]:
755
+ """
756
+ Extract configuration patterns from config files (C3.4).
757
+
758
+ Analyzes configuration files in the codebase to extract settings,
759
+ detect common patterns (database, API, logging, cache, etc.), and
760
+ generate comprehensive documentation.
761
+
762
+ Supports 9 config formats: JSON, YAML, TOML, ENV, INI, Python modules,
763
+ JavaScript/TypeScript configs, Dockerfile, Docker Compose.
764
+
765
+ Detects 7 common patterns:
766
+ - Database configuration (host, port, credentials)
767
+ - API configuration (endpoints, keys, timeouts)
768
+ - Logging configuration (level, format, handlers)
769
+ - Cache configuration (backend, TTL, keys)
770
+ - Email configuration (SMTP, credentials)
771
+ - Authentication configuration (providers, secrets)
772
+ - Server configuration (host, port, workers)
773
+
774
+ Args:
775
+ args: Dictionary containing:
776
+ - directory (str): Directory to analyze
777
+ - output (str, optional): Output directory (default: output/codebase/config_patterns)
778
+ - max_files (int, optional): Maximum config files to process (default: 100)
779
+ - enhance (bool, optional): Enable AI enhancement - API mode (default: False, requires ANTHROPIC_API_KEY)
780
+ - enhance_local (bool, optional): Enable AI enhancement - LOCAL mode (default: False, uses Claude Code CLI)
781
+ - ai_mode (str, optional): AI mode - auto, api, local, none (default: none)
782
+ - json (bool, optional): Output JSON format (default: True)
783
+ - markdown (bool, optional): Output Markdown format (default: True)
784
+
785
+ Returns:
786
+ List[TextContent]: Config extraction results with optional AI enhancements
787
+
788
+ Example:
789
+ extract_config_patterns(directory=".", output="output/configs")
790
+ extract_config_patterns(directory="/path/to/repo", max_files=50, enhance_local=True)
791
+ """
792
+ directory = args.get("directory")
793
+ if not directory:
794
+ return [TextContent(type="text", text="āŒ Error: directory parameter is required")]
795
+
796
+ output = args.get("output", "output/codebase/config_patterns")
797
+ max_files = args.get("max_files", 100)
798
+ enhance = args.get("enhance", False)
799
+ enhance_local = args.get("enhance_local", False)
800
+ ai_mode = args.get("ai_mode", "none")
801
+ json_output = args.get("json", True)
802
+ markdown_output = args.get("markdown", True)
803
+
804
+ # Build command
805
+ cmd = [sys.executable, "-m", "skill_seekers.cli.config_extractor"]
806
+ cmd.extend(["--directory", directory])
807
+
808
+ if output:
809
+ cmd.extend(["--output", output])
810
+ if max_files:
811
+ cmd.extend(["--max-files", str(max_files)])
812
+ if enhance:
813
+ cmd.append("--enhance")
814
+ if enhance_local:
815
+ cmd.append("--enhance-local")
816
+ if ai_mode and ai_mode != "none":
817
+ cmd.extend(["--ai-mode", ai_mode])
818
+ if json_output:
819
+ cmd.append("--json")
820
+ if markdown_output:
821
+ cmd.append("--markdown")
822
+
823
+ # Adjust timeout for AI enhancement
824
+ timeout = 180 # 3 minutes base
825
+ if enhance or enhance_local or ai_mode != "none":
826
+ timeout = 360 # 6 minutes with AI enhancement
827
+
828
+ progress_msg = "āš™ļø Extracting configuration patterns...\n"
829
+ progress_msg += f"šŸ“ Directory: {directory}\n"
830
+ progress_msg += f"šŸ“„ Max files: {max_files}\n"
831
+ if enhance or enhance_local or (ai_mode and ai_mode != "none"):
832
+ progress_msg += f"šŸ¤– AI enhancement: {ai_mode if ai_mode != 'none' else ('api' if enhance else 'local')}\n"
833
+ progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n"
834
+
835
+ stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
836
+
837
+ output_text = progress_msg + stdout
838
+
839
+ if returncode == 0:
840
+ return [TextContent(type="text", text=output_text)]
841
+ else:
842
+ return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")]