skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1081 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test Example Extractor - Extract real usage examples from test files
4
+
5
+ Analyzes test files to extract meaningful code examples showing:
6
+ - Object instantiation with real parameters
7
+ - Method calls with expected behaviors
8
+ - Configuration examples
9
+ - Setup patterns from fixtures/setUp()
10
+ - Multi-step workflows from integration tests
11
+
12
+ Supports 9 languages:
13
+ - Python (AST-based, deep analysis)
14
+ - JavaScript, TypeScript, Go, Rust, Java, C#, PHP, Ruby (regex-based)
15
+
16
+ Example usage:
17
+ # Extract from directory
18
+ python test_example_extractor.py tests/ --language python
19
+
20
+ # Extract from single file
21
+ python test_example_extractor.py --file tests/test_scraper.py
22
+
23
+ # JSON output
24
+ python test_example_extractor.py tests/ --json > examples.json
25
+
26
+ # Filter by confidence
27
+ python test_example_extractor.py tests/ --min-confidence 0.7
28
+ """
29
+
30
+ import argparse
31
+ import ast
32
+ import hashlib
33
+ import json
34
+ import logging
35
+ import re
36
+ from dataclasses import asdict, dataclass, field
37
+ from pathlib import Path
38
+ from typing import Literal
39
+
40
+ # Configure logging
41
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ # ============================================================================
46
+ # DATA MODELS
47
+ # ============================================================================
48
+
49
+
50
+ @dataclass
51
+ class TestExample:
52
+ """Single extracted usage example from test code"""
53
+
54
+ # Identity
55
+ example_id: str # Unique hash of example
56
+ test_name: str # Test function/method name
57
+ category: Literal["instantiation", "method_call", "config", "setup", "workflow"]
58
+
59
+ # Code
60
+ code: str # Actual example code
61
+ language: str # Programming language
62
+
63
+ # Context
64
+ description: str # What this demonstrates
65
+ expected_behavior: str # Expected outcome from assertions
66
+
67
+ # Source
68
+ file_path: str
69
+ line_start: int
70
+ line_end: int
71
+
72
+ # Quality
73
+ complexity_score: float # 0-1 scale (higher = more complex/valuable)
74
+ confidence: float # 0-1 scale (higher = more confident extraction)
75
+
76
+ # Optional fields (must come after required fields)
77
+ setup_code: str | None = None # Required setup code
78
+ tags: list[str] = field(default_factory=list) # ["pytest", "mock", "async"]
79
+ dependencies: list[str] = field(default_factory=list) # Imported modules
80
+ ai_analysis: dict | None = None # AI-generated analysis (C3.6)
81
+
82
+ def to_dict(self) -> dict:
83
+ """Convert to dictionary for JSON serialization"""
84
+ return asdict(self)
85
+
86
+ def to_markdown(self) -> str:
87
+ """Convert to markdown format"""
88
+ md = f"### {self.test_name}\n\n"
89
+ md += f"**Category**: {self.category} \n"
90
+ md += f"**Description**: {self.description} \n"
91
+ if self.expected_behavior:
92
+ md += f"**Expected**: {self.expected_behavior} \n"
93
+ md += f"**Confidence**: {self.confidence:.2f} \n"
94
+ if self.tags:
95
+ md += f"**Tags**: {', '.join(self.tags)} \n"
96
+
97
+ # Add AI analysis if available (C3.6)
98
+ if self.ai_analysis:
99
+ md += "\n**🤖 AI Analysis:** \n"
100
+ if self.ai_analysis.get("explanation"):
101
+ md += f"*{self.ai_analysis['explanation']}* \n"
102
+ if self.ai_analysis.get("best_practices"):
103
+ md += f"**Best Practices:** {', '.join(self.ai_analysis['best_practices'])} \n"
104
+ if self.ai_analysis.get("tutorial_group"):
105
+ md += f"**Tutorial Group:** {self.ai_analysis['tutorial_group']} \n"
106
+
107
+ md += f"\n```{self.language.lower()}\n"
108
+ if self.setup_code:
109
+ md += f"# Setup\n{self.setup_code}\n\n"
110
+ md += f"{self.code}\n```\n\n"
111
+ md += f"*Source: {self.file_path}:{self.line_start}*\n\n"
112
+ return md
113
+
114
+
115
+ @dataclass
116
+ class ExampleReport:
117
+ """Summary of test example extraction results"""
118
+
119
+ total_examples: int
120
+ examples_by_category: dict[str, int]
121
+ examples_by_language: dict[str, int]
122
+ examples: list[TestExample]
123
+ avg_complexity: float
124
+ high_value_count: int # confidence > 0.7
125
+ file_path: str | None = None # If single file
126
+ directory: str | None = None # If directory
127
+
128
+ def to_dict(self) -> dict:
129
+ """Convert to dictionary for JSON serialization"""
130
+ return {
131
+ "total_examples": self.total_examples,
132
+ "examples_by_category": self.examples_by_category,
133
+ "examples_by_language": self.examples_by_language,
134
+ "avg_complexity": self.avg_complexity,
135
+ "high_value_count": self.high_value_count,
136
+ "file_path": self.file_path,
137
+ "directory": self.directory,
138
+ "examples": [ex.to_dict() for ex in self.examples],
139
+ }
140
+
141
+ def to_markdown(self) -> str:
142
+ """Convert to markdown format"""
143
+ md = "# Test Example Extraction Report\n\n"
144
+ md += f"**Total Examples**: {self.total_examples} \n"
145
+ md += f"**High Value Examples** (confidence > 0.7): {self.high_value_count} \n"
146
+ md += f"**Average Complexity**: {self.avg_complexity:.2f} \n"
147
+
148
+ md += "\n## Examples by Category\n\n"
149
+ for category, count in sorted(self.examples_by_category.items()):
150
+ md += f"- **{category}**: {count}\n"
151
+
152
+ md += "\n## Examples by Language\n\n"
153
+ for language, count in sorted(self.examples_by_language.items()):
154
+ md += f"- **{language}**: {count}\n"
155
+
156
+ md += "\n## Extracted Examples\n\n"
157
+ for example in sorted(self.examples, key=lambda x: x.confidence, reverse=True):
158
+ md += example.to_markdown()
159
+
160
+ return md
161
+
162
+
163
+ # ============================================================================
164
+ # PYTHON TEST ANALYZER (AST-based)
165
+ # ============================================================================
166
+
167
+
168
+ class PythonTestAnalyzer:
169
+ """Deep AST-based test example extraction for Python"""
170
+
171
+ def __init__(self):
172
+ self.trivial_patterns = {
173
+ "assertTrue(True)",
174
+ "assertFalse(False)",
175
+ "assertEqual(1, 1)",
176
+ "assertIsNone(None)",
177
+ "assertIsNotNone(None)",
178
+ }
179
+
180
+ def extract(self, file_path: str, code: str) -> list[TestExample]:
181
+ """Extract examples from Python test file"""
182
+ examples = []
183
+
184
+ try:
185
+ tree = ast.parse(code)
186
+ except SyntaxError as e:
187
+ logger.warning(f"Failed to parse {file_path}: {e}")
188
+ return []
189
+
190
+ # Extract imports for dependency tracking
191
+ imports = self._extract_imports(tree)
192
+
193
+ # Find test classes (unittest.TestCase)
194
+ for node in ast.walk(tree):
195
+ if isinstance(node, ast.ClassDef):
196
+ if self._is_test_class(node):
197
+ examples.extend(self._extract_from_test_class(node, file_path, imports))
198
+
199
+ # Find test functions (pytest)
200
+ elif isinstance(node, ast.FunctionDef) and self._is_test_function(node):
201
+ examples.extend(self._extract_from_test_function(node, file_path, imports))
202
+
203
+ return examples
204
+
205
+ def _extract_imports(self, tree: ast.AST) -> list[str]:
206
+ """Extract imported modules"""
207
+ imports = []
208
+ for node in ast.walk(tree):
209
+ if isinstance(node, ast.Import):
210
+ imports.extend([alias.name for alias in node.names])
211
+ elif isinstance(node, ast.ImportFrom) and node.module:
212
+ imports.append(node.module)
213
+ return imports
214
+
215
+ def _is_test_class(self, node: ast.ClassDef) -> bool:
216
+ """Check if class is a test class"""
217
+ # unittest.TestCase pattern
218
+ for base in node.bases:
219
+ if (
220
+ isinstance(base, ast.Name)
221
+ and "Test" in base.id
222
+ or isinstance(base, ast.Attribute)
223
+ and base.attr == "TestCase"
224
+ ):
225
+ return True
226
+ return False
227
+
228
+ def _is_test_function(self, node: ast.FunctionDef) -> bool:
229
+ """Check if function is a test function"""
230
+ # pytest pattern: starts with test_
231
+ if node.name.startswith("test_"):
232
+ return True
233
+ # Has @pytest.mark decorator
234
+ for decorator in node.decorator_list:
235
+ if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator):
236
+ return True
237
+ return False
238
+
239
+ def _extract_from_test_class(
240
+ self, class_node: ast.ClassDef, file_path: str, imports: list[str]
241
+ ) -> list[TestExample]:
242
+ """Extract examples from unittest.TestCase class"""
243
+ examples = []
244
+
245
+ # Extract setUp method if exists
246
+ setup_code = self._extract_setup_method(class_node)
247
+
248
+ # Process each test method
249
+ for node in class_node.body:
250
+ if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
251
+ examples.extend(
252
+ self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
253
+ )
254
+
255
+ return examples
256
+
257
+ def _extract_from_test_function(
258
+ self, func_node: ast.FunctionDef, file_path: str, imports: list[str]
259
+ ) -> list[TestExample]:
260
+ """Extract examples from pytest test function"""
261
+ # Check for fixture parameters
262
+ fixture_setup = self._extract_fixtures(func_node)
263
+
264
+ return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup)
265
+
266
+ def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None:
267
+ """Extract setUp method code"""
268
+ for node in class_node.body:
269
+ if isinstance(node, ast.FunctionDef) and node.name == "setUp":
270
+ return ast.unparse(node.body)
271
+ return None
272
+
273
+ def _extract_fixtures(self, func_node: ast.FunctionDef) -> str | None:
274
+ """Extract pytest fixture parameters"""
275
+ if not func_node.args.args:
276
+ return None
277
+
278
+ # Skip 'self' parameter
279
+ params = [arg.arg for arg in func_node.args.args if arg.arg != "self"]
280
+ if params:
281
+ return f"# Fixtures: {', '.join(params)}"
282
+ return None
283
+
284
+ def _analyze_test_body(
285
+ self,
286
+ func_node: ast.FunctionDef,
287
+ file_path: str,
288
+ imports: list[str],
289
+ setup_code: str | None = None,
290
+ ) -> list[TestExample]:
291
+ """Analyze test function body for extractable patterns"""
292
+ examples = []
293
+
294
+ # Get docstring for description
295
+ docstring = ast.get_docstring(func_node) or func_node.name.replace("_", " ")
296
+
297
+ # Detect tags
298
+ tags = self._detect_tags(func_node, imports)
299
+
300
+ # Extract different pattern categories
301
+
302
+ # 1. Instantiation patterns
303
+ instantiations = self._find_instantiations(
304
+ func_node, file_path, docstring, setup_code, tags, imports
305
+ )
306
+ examples.extend(instantiations)
307
+
308
+ # 2. Method calls with assertions
309
+ method_calls = self._find_method_calls_with_assertions(
310
+ func_node, file_path, docstring, setup_code, tags, imports
311
+ )
312
+ examples.extend(method_calls)
313
+
314
+ # 3. Configuration dictionaries
315
+ configs = self._find_config_dicts(
316
+ func_node, file_path, docstring, setup_code, tags, imports
317
+ )
318
+ examples.extend(configs)
319
+
320
+ # 4. Multi-step workflows (integration tests)
321
+ workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports)
322
+ examples.extend(workflows)
323
+
324
+ return examples
325
+
326
+ def _detect_tags(self, func_node: ast.FunctionDef, imports: list[str]) -> list[str]:
327
+ """Detect test tags (pytest, mock, async, etc.)"""
328
+ tags = []
329
+
330
+ # Check decorators
331
+ for decorator in func_node.decorator_list:
332
+ decorator_str = ast.unparse(decorator).lower()
333
+ if "pytest" in decorator_str:
334
+ tags.append("pytest")
335
+ if "mock" in decorator_str:
336
+ tags.append("mock")
337
+ if "async" in decorator_str or func_node.name.startswith("test_async"):
338
+ tags.append("async")
339
+
340
+ # Check if using unittest
341
+ if "unittest" in imports:
342
+ tags.append("unittest")
343
+
344
+ # Check function body for mock usage
345
+ func_str = ast.unparse(func_node).lower()
346
+ if "mock" in func_str or "patch" in func_str:
347
+ tags.append("mock")
348
+
349
+ return list(set(tags))
350
+
351
+ def _find_instantiations(
352
+ self,
353
+ func_node: ast.FunctionDef,
354
+ file_path: str,
355
+ description: str,
356
+ setup_code: str | None,
357
+ tags: list[str],
358
+ imports: list[str],
359
+ ) -> list[TestExample]:
360
+ """Find object instantiation patterns: obj = ClassName(...)"""
361
+ examples = []
362
+
363
+ for node in ast.walk(func_node):
364
+ # Check if meaningful instantiation
365
+ if (
366
+ isinstance(node, ast.Assign)
367
+ and isinstance(node.value, ast.Call)
368
+ and self._is_meaningful_instantiation(node)
369
+ ):
370
+ code = ast.unparse(node)
371
+
372
+ # Skip trivial or mock-only
373
+ if len(code) < 20 or "Mock()" in code:
374
+ continue
375
+
376
+ # Get class name
377
+ class_name = self._get_class_name(node.value)
378
+
379
+ example = TestExample(
380
+ example_id=self._generate_id(code),
381
+ test_name=func_node.name,
382
+ category="instantiation",
383
+ code=code,
384
+ language="Python",
385
+ description=f"Instantiate {class_name}: {description}",
386
+ expected_behavior=self._extract_assertion_after(func_node, node),
387
+ setup_code=setup_code,
388
+ file_path=file_path,
389
+ line_start=node.lineno,
390
+ line_end=node.end_lineno or node.lineno,
391
+ complexity_score=self._calculate_complexity(code),
392
+ confidence=0.8,
393
+ tags=tags,
394
+ dependencies=imports,
395
+ )
396
+ examples.append(example)
397
+
398
+ return examples
399
+
400
+ def _find_method_calls_with_assertions(
401
+ self,
402
+ func_node: ast.FunctionDef,
403
+ file_path: str,
404
+ description: str,
405
+ setup_code: str | None,
406
+ tags: list[str],
407
+ imports: list[str],
408
+ ) -> list[TestExample]:
409
+ """Find method calls followed by assertions"""
410
+ examples = []
411
+
412
+ statements = func_node.body
413
+ for i, stmt in enumerate(statements):
414
+ # Look for method calls and check if next statement is an assertion
415
+ if (
416
+ isinstance(stmt, ast.Expr)
417
+ and isinstance(stmt.value, ast.Call)
418
+ and i + 1 < len(statements)
419
+ ):
420
+ next_stmt = statements[i + 1]
421
+ if self._is_assertion(next_stmt):
422
+ method_call = ast.unparse(stmt)
423
+ assertion = ast.unparse(next_stmt)
424
+
425
+ code = f"{method_call}\n{assertion}"
426
+
427
+ # Skip trivial assertions
428
+ if any(trivial in assertion for trivial in self.trivial_patterns):
429
+ continue
430
+
431
+ example = TestExample(
432
+ example_id=self._generate_id(code),
433
+ test_name=func_node.name,
434
+ category="method_call",
435
+ code=code,
436
+ language="Python",
437
+ description=description,
438
+ expected_behavior=assertion,
439
+ setup_code=setup_code,
440
+ file_path=file_path,
441
+ line_start=stmt.lineno,
442
+ line_end=next_stmt.end_lineno or next_stmt.lineno,
443
+ complexity_score=self._calculate_complexity(code),
444
+ confidence=0.85,
445
+ tags=tags,
446
+ dependencies=imports,
447
+ )
448
+ examples.append(example)
449
+
450
+ return examples
451
+
452
+ def _find_config_dicts(
453
+ self,
454
+ func_node: ast.FunctionDef,
455
+ file_path: str,
456
+ description: str,
457
+ setup_code: str | None,
458
+ tags: list[str],
459
+ imports: list[str],
460
+ ) -> list[TestExample]:
461
+ """Find configuration dictionary patterns"""
462
+ examples = []
463
+
464
+ for node in ast.walk(func_node):
465
+ # Must have 2+ keys and be meaningful
466
+ if (
467
+ isinstance(node, ast.Assign)
468
+ and isinstance(node.value, ast.Dict)
469
+ and len(node.value.keys) >= 2
470
+ ):
471
+ code = ast.unparse(node)
472
+
473
+ # Check if looks like configuration
474
+ if self._is_config_dict(node.value):
475
+ example = TestExample(
476
+ example_id=self._generate_id(code),
477
+ test_name=func_node.name,
478
+ category="config",
479
+ code=code,
480
+ language="Python",
481
+ description=f"Configuration example: {description}",
482
+ expected_behavior=self._extract_assertion_after(func_node, node),
483
+ setup_code=setup_code,
484
+ file_path=file_path,
485
+ line_start=node.lineno,
486
+ line_end=node.end_lineno or node.lineno,
487
+ complexity_score=self._calculate_complexity(code),
488
+ confidence=0.75,
489
+ tags=tags,
490
+ dependencies=imports,
491
+ )
492
+ examples.append(example)
493
+
494
+ return examples
495
+
496
+ def _find_workflows(
497
+ self,
498
+ func_node: ast.FunctionDef,
499
+ file_path: str,
500
+ description: str,
501
+ setup_code: str | None,
502
+ tags: list[str],
503
+ imports: list[str],
504
+ ) -> list[TestExample]:
505
+ """Find multi-step workflow patterns (integration tests)"""
506
+ examples = []
507
+
508
+ # Check if this looks like an integration test (3+ meaningful steps)
509
+ if len(func_node.body) >= 3 and self._is_integration_test(func_node):
510
+ # Extract the full workflow
511
+ code = ast.unparse(func_node.body)
512
+
513
+ # Skip if too long (> 30 lines)
514
+ if code.count("\n") > 30:
515
+ return examples
516
+
517
+ example = TestExample(
518
+ example_id=self._generate_id(code),
519
+ test_name=func_node.name,
520
+ category="workflow",
521
+ code=code,
522
+ language="Python",
523
+ description=f"Workflow: {description}",
524
+ expected_behavior=self._extract_final_assertion(func_node),
525
+ setup_code=setup_code,
526
+ file_path=file_path,
527
+ line_start=func_node.lineno,
528
+ line_end=func_node.end_lineno or func_node.lineno,
529
+ complexity_score=min(1.0, len(func_node.body) / 10),
530
+ confidence=0.9,
531
+ tags=tags + ["workflow", "integration"],
532
+ dependencies=imports,
533
+ )
534
+ examples.append(example)
535
+
536
+ return examples
537
+
538
+ # Helper methods
539
+
540
+ def _is_meaningful_instantiation(self, node: ast.Assign) -> bool:
541
+ """Check if instantiation has meaningful parameters"""
542
+ if not isinstance(node.value, ast.Call):
543
+ return False
544
+
545
+ # Must have at least one argument or keyword argument
546
+ call = node.value
547
+ return bool(call.args or call.keywords)
548
+
549
+ def _get_class_name(self, call_node: ast.Call) -> str:
550
+ """Extract class name from Call node"""
551
+ if isinstance(call_node.func, ast.Name):
552
+ return call_node.func.id
553
+ elif isinstance(call_node.func, ast.Attribute):
554
+ return call_node.func.attr
555
+ return "UnknownClass"
556
+
557
+ def _is_assertion(self, node: ast.stmt) -> bool:
558
+ """Check if statement is an assertion"""
559
+ if isinstance(node, ast.Assert):
560
+ return True
561
+
562
+ if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
563
+ call_str = ast.unparse(node.value).lower()
564
+ assertion_methods = ["assert", "expect", "should"]
565
+ return any(method in call_str for method in assertion_methods)
566
+
567
+ return False
568
+
569
+ def _is_config_dict(self, dict_node: ast.Dict) -> bool:
570
+ """Check if dictionary looks like configuration"""
571
+ # Keys should be strings
572
+ for key in dict_node.keys:
573
+ if not isinstance(key, ast.Constant) or not isinstance(key.value, str):
574
+ return False
575
+ return True
576
+
577
+ def _is_integration_test(self, func_node: ast.FunctionDef) -> bool:
578
+ """Check if test looks like an integration test"""
579
+ test_name = func_node.name.lower()
580
+ integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
581
+ return any(keyword in test_name for keyword in integration_keywords)
582
+
583
+ def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
584
+ """Find assertion that follows the target node"""
585
+ found_target = False
586
+ for stmt in func_node.body:
587
+ if stmt == target_node:
588
+ found_target = True
589
+ continue
590
+ if found_target and self._is_assertion(stmt):
591
+ return ast.unparse(stmt)
592
+ return ""
593
+
594
+ def _extract_final_assertion(self, func_node: ast.FunctionDef) -> str:
595
+ """Extract the final assertion from test"""
596
+ for stmt in reversed(func_node.body):
597
+ if self._is_assertion(stmt):
598
+ return ast.unparse(stmt)
599
+ return ""
600
+
601
+ def _calculate_complexity(self, code: str) -> float:
602
+ """Calculate code complexity score (0-1)"""
603
+ # Simple heuristic: more lines + more parameters = more complex
604
+ lines = code.count("\n") + 1
605
+ params = code.count(",") + 1
606
+
607
+ complexity = min(1.0, (lines * 0.1) + (params * 0.05))
608
+ return round(complexity, 2)
609
+
610
+ def _generate_id(self, code: str) -> str:
611
+ """Generate unique ID for example"""
612
+ return hashlib.md5(code.encode()).hexdigest()[:8]
613
+
614
+
615
+ # ============================================================================
616
+ # GENERIC TEST ANALYZER (Regex-based for non-Python languages)
617
+ # ============================================================================
618
+
619
+
620
+ class GenericTestAnalyzer:
621
+ """Regex-based test example extraction for non-Python languages"""
622
+
623
+ # Language-specific regex patterns
624
+ PATTERNS = {
625
+ "javascript": {
626
+ "instantiation": r"(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
627
+ "assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
628
+ "test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
629
+ "config": r"(?:const|let)\s+config\s*=\s*\{[\s\S]{20,500}?\}",
630
+ },
631
+ "typescript": {
632
+ "instantiation": r"(?:const|let|var)\s+(\w+):\s*\w+\s*=\s*new\s+(\w+)\(([^)]*)\)",
633
+ "assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
634
+ "test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
635
+ "config": r"(?:const|let)\s+config:\s*\w+\s*=\s*\{[\s\S]{20,500}?\}",
636
+ },
637
+ "go": {
638
+ "instantiation": r"(\w+)\s*:=\s*(\w+)\{([^}]+)\}",
639
+ "assertion": r't\.(?:Error|Fatal)(?:f)?\(["\']([^"\']+)["\']',
640
+ "test_function": r"func\s+(Test\w+)\(t\s+\*testing\.T\)",
641
+ "table_test": r"tests\s*:=\s*\[\]struct\s*\{[\s\S]{50,1000}?\}",
642
+ },
643
+ "rust": {
644
+ "instantiation": r"let\s+(\w+)\s*=\s*(\w+)::new\(([^)]*)\)",
645
+ "assertion": r"assert(?:_eq)?!\(([^)]+)\)",
646
+ "test_function": r"#\[test\]\s*fn\s+(\w+)\(\)",
647
+ },
648
+ "java": {
649
+ "instantiation": r"(\w+)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
650
+ "assertion": r"assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
651
+ "test_function": r"@Test\s+public\s+void\s+(\w+)\(\)",
652
+ },
653
+ "csharp": {
654
+ "instantiation": r"var\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
655
+ "assertion": r"Assert\.(?:AreEqual|IsTrue|IsFalse|IsNotNull)\(([^)]+)\)",
656
+ "test_function": r"\[Test\]\s+public\s+void\s+(\w+)\(\)",
657
+ },
658
+ "php": {
659
+ "instantiation": r"\$(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
660
+ "assertion": r"\$this->assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
661
+ "test_function": r"public\s+function\s+(test\w+)\(\)",
662
+ },
663
+ "ruby": {
664
+ "instantiation": r"(\w+)\s*=\s*(\w+)\.new\(([^)]*)\)",
665
+ "assertion": r"expect\(([^)]+)\)\.to\s+(?:eq|be|match)\(([^)]+)\)",
666
+ "test_function": r'(?:test|it)\s+["\']([^"\']+)["\']',
667
+ },
668
+ }
669
+
670
+ def extract(self, file_path: str, code: str, language: str) -> list[TestExample]:
671
+ """Extract examples from test file using regex patterns"""
672
+ examples = []
673
+
674
+ language_lower = language.lower()
675
+ if language_lower not in self.PATTERNS:
676
+ logger.warning(f"Language {language} not supported for regex extraction")
677
+ return []
678
+
679
+ patterns = self.PATTERNS[language_lower]
680
+
681
+ # Extract test functions
682
+ test_functions = re.finditer(patterns["test_function"], code)
683
+
684
+ for match in test_functions:
685
+ test_name = match.group(1)
686
+
687
+ # Get test function body (approximate - find next function start)
688
+ start_pos = match.end()
689
+ next_match = re.search(patterns["test_function"], code[start_pos:])
690
+ end_pos = start_pos + next_match.start() if next_match else len(code)
691
+ test_body = code[start_pos:end_pos]
692
+
693
+ # Extract instantiations
694
+ for inst_match in re.finditer(patterns["instantiation"], test_body):
695
+ example = self._create_example(
696
+ test_name=test_name,
697
+ category="instantiation",
698
+ code=inst_match.group(0),
699
+ language=language,
700
+ file_path=file_path,
701
+ line_number=code[: start_pos + inst_match.start()].count("\n") + 1,
702
+ )
703
+ examples.append(example)
704
+
705
+ # Extract config dictionaries (if pattern exists)
706
+ if "config" in patterns:
707
+ for config_match in re.finditer(patterns["config"], test_body):
708
+ example = self._create_example(
709
+ test_name=test_name,
710
+ category="config",
711
+ code=config_match.group(0),
712
+ language=language,
713
+ file_path=file_path,
714
+ line_number=code[: start_pos + config_match.start()].count("\n") + 1,
715
+ )
716
+ examples.append(example)
717
+
718
+ return examples
719
+
720
+ def _create_example(
721
+ self,
722
+ test_name: str,
723
+ category: str,
724
+ code: str,
725
+ language: str,
726
+ file_path: str,
727
+ line_number: int,
728
+ ) -> TestExample:
729
+ """Create TestExample from regex match"""
730
+ return TestExample(
731
+ example_id=hashlib.md5(code.encode()).hexdigest()[:8],
732
+ test_name=test_name,
733
+ category=category,
734
+ code=code,
735
+ language=language,
736
+ description=f"Test: {test_name}",
737
+ expected_behavior="",
738
+ file_path=file_path,
739
+ line_start=line_number,
740
+ line_end=line_number + code.count("\n"),
741
+ complexity_score=min(1.0, (code.count("\n") + 1) * 0.1),
742
+ confidence=0.6, # Lower confidence for regex extraction
743
+ tags=[],
744
+ dependencies=[],
745
+ )
746
+
747
+
748
+ # ============================================================================
749
+ # EXAMPLE QUALITY FILTER
750
+ # ============================================================================
751
+
752
+
753
+ class ExampleQualityFilter:
754
+ """Filter out trivial or low-quality examples"""
755
+
756
+ def __init__(self, min_confidence: float = 0.7, min_code_length: int = 20):
757
+ self.min_confidence = min_confidence
758
+ self.min_code_length = min_code_length
759
+
760
+ # Trivial patterns to exclude
761
+ self.trivial_patterns = [
762
+ "Mock()",
763
+ "MagicMock()",
764
+ "assertTrue(True)",
765
+ "assertFalse(False)",
766
+ "assertEqual(1, 1)",
767
+ "pass",
768
+ "...",
769
+ ]
770
+
771
+ def filter(self, examples: list[TestExample]) -> list[TestExample]:
772
+ """Filter examples by quality criteria"""
773
+ filtered = []
774
+
775
+ for example in examples:
776
+ # Check confidence threshold
777
+ if example.confidence < self.min_confidence:
778
+ continue
779
+
780
+ # Check code length
781
+ if len(example.code) < self.min_code_length:
782
+ continue
783
+
784
+ # Check for trivial patterns
785
+ if self._is_trivial(example.code):
786
+ continue
787
+
788
+ filtered.append(example)
789
+
790
+ return filtered
791
+
792
+ def _is_trivial(self, code: str) -> bool:
793
+ """Check if code contains trivial patterns"""
794
+ return any(pattern in code for pattern in self.trivial_patterns)
795
+
796
+
797
+ # ============================================================================
798
+ # TEST EXAMPLE EXTRACTOR (Main Orchestrator)
799
+ # ============================================================================
800
+
801
+
802
+ class TestExampleExtractor:
803
+ """Main orchestrator for test example extraction"""
804
+
805
+ # Test file patterns
806
+ TEST_PATTERNS = [
807
+ "test_*.py",
808
+ "*_test.py",
809
+ "test*.js",
810
+ "*test.js",
811
+ "*_test.go",
812
+ "*_test.rs",
813
+ "Test*.java",
814
+ "Test*.cs",
815
+ "*Test.php",
816
+ "*_spec.rb",
817
+ ]
818
+
819
+ # Language detection by extension
820
+ LANGUAGE_MAP = {
821
+ ".py": "Python",
822
+ ".js": "JavaScript",
823
+ ".ts": "TypeScript",
824
+ ".go": "Go",
825
+ ".rs": "Rust",
826
+ ".java": "Java",
827
+ ".cs": "C#",
828
+ ".php": "PHP",
829
+ ".rb": "Ruby",
830
+ }
831
+
832
+ def __init__(
833
+ self,
834
+ min_confidence: float = 0.7,
835
+ max_per_file: int = 10,
836
+ languages: list[str] | None = None,
837
+ enhance_with_ai: bool = True,
838
+ ):
839
+ self.python_analyzer = PythonTestAnalyzer()
840
+ self.generic_analyzer = GenericTestAnalyzer()
841
+ self.quality_filter = ExampleQualityFilter(min_confidence=min_confidence)
842
+ self.max_per_file = max_per_file
843
+ self.languages = [lang.lower() for lang in languages] if languages else None
844
+ self.enhance_with_ai = enhance_with_ai
845
+
846
+ # Initialize AI enhancer if enabled (C3.6)
847
+ self.ai_enhancer = None
848
+ if self.enhance_with_ai:
849
+ try:
850
+ from skill_seekers.cli.ai_enhancer import TestExampleEnhancer
851
+
852
+ self.ai_enhancer = TestExampleEnhancer()
853
+ except Exception as e:
854
+ logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}")
855
+ self.enhance_with_ai = False
856
+
857
+ def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport:
858
+ """Extract examples from all test files in directory"""
859
+ directory = Path(directory)
860
+
861
+ if not directory.exists():
862
+ raise FileNotFoundError(f"Directory not found: {directory}")
863
+
864
+ # Find test files
865
+ test_files = self._find_test_files(directory, recursive)
866
+
867
+ logger.info(f"Found {len(test_files)} test files in {directory}")
868
+
869
+ # Extract from each file
870
+ all_examples = []
871
+ for test_file in test_files:
872
+ examples = self.extract_from_file(test_file)
873
+ all_examples.extend(examples)
874
+
875
+ # Generate report
876
+ return self._create_report(all_examples, directory=str(directory))
877
+
878
+ def extract_from_file(self, file_path: Path) -> list[TestExample]:
879
+ """Extract examples from single test file"""
880
+ file_path = Path(file_path)
881
+
882
+ if not file_path.exists():
883
+ raise FileNotFoundError(f"File not found: {file_path}")
884
+
885
+ # Detect language
886
+ language = self._detect_language(file_path)
887
+
888
+ # Filter by language if specified
889
+ if self.languages and language.lower() not in self.languages:
890
+ return []
891
+
892
+ # Read file
893
+ try:
894
+ code = file_path.read_text(encoding="utf-8")
895
+ except UnicodeDecodeError:
896
+ logger.warning(f"Failed to read {file_path} (encoding error)")
897
+ return []
898
+
899
+ # Extract examples based on language
900
+ if language == "Python":
901
+ examples = self.python_analyzer.extract(str(file_path), code)
902
+ else:
903
+ examples = self.generic_analyzer.extract(str(file_path), code, language)
904
+
905
+ # Apply quality filter
906
+ filtered_examples = self.quality_filter.filter(examples)
907
+
908
+ # Limit per file
909
+ if len(filtered_examples) > self.max_per_file:
910
+ # Sort by confidence and take top N
911
+ filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
912
+ : self.max_per_file
913
+ ]
914
+
915
+ logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
916
+
917
+ return filtered_examples
918
+
919
+ def _find_test_files(self, directory: Path, recursive: bool) -> list[Path]:
920
+ """Find test files in directory"""
921
+ test_files = []
922
+
923
+ for pattern in self.TEST_PATTERNS:
924
+ if recursive:
925
+ test_files.extend(directory.rglob(pattern))
926
+ else:
927
+ test_files.extend(directory.glob(pattern))
928
+
929
+ return list(set(test_files)) # Remove duplicates
930
+
931
+ def _detect_language(self, file_path: Path) -> str:
932
+ """Detect programming language from file extension"""
933
+ suffix = file_path.suffix.lower()
934
+ return self.LANGUAGE_MAP.get(suffix, "Unknown")
935
+
936
+ def _create_report(
937
+ self,
938
+ examples: list[TestExample],
939
+ file_path: str | None = None,
940
+ directory: str | None = None,
941
+ ) -> ExampleReport:
942
+ """Create summary report from examples"""
943
+ # Enhance examples with AI analysis (C3.6)
944
+ if self.enhance_with_ai and self.ai_enhancer and examples:
945
+ # Convert examples to dict format for AI processing
946
+ example_dicts = [ex.to_dict() for ex in examples]
947
+ enhanced_dicts = self.ai_enhancer.enhance_examples(example_dicts)
948
+
949
+ # Update examples with AI analysis
950
+ for i, example in enumerate(examples):
951
+ if i < len(enhanced_dicts) and "ai_analysis" in enhanced_dicts[i]:
952
+ example.ai_analysis = enhanced_dicts[i]["ai_analysis"]
953
+
954
+ # Count by category
955
+ examples_by_category = {}
956
+ for example in examples:
957
+ examples_by_category[example.category] = (
958
+ examples_by_category.get(example.category, 0) + 1
959
+ )
960
+
961
+ # Count by language
962
+ examples_by_language = {}
963
+ for example in examples:
964
+ examples_by_language[example.language] = (
965
+ examples_by_language.get(example.language, 0) + 1
966
+ )
967
+
968
+ # Calculate averages
969
+ avg_complexity = (
970
+ sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
971
+ )
972
+ high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
973
+
974
+ return ExampleReport(
975
+ total_examples=len(examples),
976
+ examples_by_category=examples_by_category,
977
+ examples_by_language=examples_by_language,
978
+ examples=examples,
979
+ avg_complexity=round(avg_complexity, 2),
980
+ high_value_count=high_value_count,
981
+ file_path=file_path,
982
+ directory=directory,
983
+ )
984
+
985
+
986
+ # ============================================================================
987
+ # COMMAND-LINE INTERFACE
988
+ # ============================================================================
989
+
990
+
991
+ def main():
992
+ """Main entry point for CLI"""
993
+ parser = argparse.ArgumentParser(
994
+ description="Extract usage examples from test files",
995
+ formatter_class=argparse.RawDescriptionHelpFormatter,
996
+ epilog="""
997
+ Examples:
998
+ # Extract from directory
999
+ %(prog)s tests/ --language python
1000
+
1001
+ # Extract from single file
1002
+ %(prog)s --file tests/test_scraper.py
1003
+
1004
+ # JSON output
1005
+ %(prog)s tests/ --json > examples.json
1006
+
1007
+ # Filter by confidence
1008
+ %(prog)s tests/ --min-confidence 0.7
1009
+ """,
1010
+ )
1011
+
1012
+ parser.add_argument("directory", nargs="?", help="Directory containing test files")
1013
+ parser.add_argument("--file", help="Single test file to analyze")
1014
+ parser.add_argument(
1015
+ "--language", help="Filter by programming language (python, javascript, etc.)"
1016
+ )
1017
+ parser.add_argument(
1018
+ "--min-confidence",
1019
+ type=float,
1020
+ default=0.5,
1021
+ help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
1022
+ )
1023
+ parser.add_argument(
1024
+ "--max-per-file",
1025
+ type=int,
1026
+ default=10,
1027
+ help="Maximum examples per file (default: 10)",
1028
+ )
1029
+ parser.add_argument("--json", action="store_true", help="Output JSON format")
1030
+ parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
1031
+ parser.add_argument(
1032
+ "--recursive",
1033
+ action="store_true",
1034
+ default=True,
1035
+ help="Search directory recursively (default: True)",
1036
+ )
1037
+
1038
+ args = parser.parse_args()
1039
+
1040
+ # Validate arguments
1041
+ if not args.directory and not args.file:
1042
+ parser.error("Either directory or --file must be specified")
1043
+
1044
+ # Create extractor
1045
+ languages = [args.language] if args.language else None
1046
+ extractor = TestExampleExtractor(
1047
+ min_confidence=args.min_confidence,
1048
+ max_per_file=args.max_per_file,
1049
+ languages=languages,
1050
+ )
1051
+
1052
+ # Extract examples
1053
+ if args.file:
1054
+ examples = extractor.extract_from_file(Path(args.file))
1055
+ report = extractor._create_report(examples, file_path=args.file)
1056
+ else:
1057
+ report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive)
1058
+
1059
+ # Output results
1060
+ if args.json:
1061
+ print(json.dumps(report.to_dict(), indent=2))
1062
+ elif args.markdown:
1063
+ print(report.to_markdown())
1064
+ else:
1065
+ # Human-readable summary
1066
+ print("\nTest Example Extraction Results")
1067
+ print("=" * 50)
1068
+ print(f"Total Examples: {report.total_examples}")
1069
+ print(f"High Value (confidence > 0.7): {report.high_value_count}")
1070
+ print(f"Average Complexity: {report.avg_complexity:.2f}")
1071
+ print("\nExamples by Category:")
1072
+ for category, count in sorted(report.examples_by_category.items()):
1073
+ print(f" {category}: {count}")
1074
+ print("\nExamples by Language:")
1075
+ for language, count in sorted(report.examples_by_language.items()):
1076
+ print(f" {language}: {count}")
1077
+ print("\nUse --json or --markdown for detailed output")
1078
+
1079
+
1080
+ if __name__ == "__main__":
1081
+ main()