skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1081 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test Example Extractor - Extract real usage examples from test files
|
|
4
|
+
|
|
5
|
+
Analyzes test files to extract meaningful code examples showing:
|
|
6
|
+
- Object instantiation with real parameters
|
|
7
|
+
- Method calls with expected behaviors
|
|
8
|
+
- Configuration examples
|
|
9
|
+
- Setup patterns from fixtures/setUp()
|
|
10
|
+
- Multi-step workflows from integration tests
|
|
11
|
+
|
|
12
|
+
Supports 9 languages:
|
|
13
|
+
- Python (AST-based, deep analysis)
|
|
14
|
+
- JavaScript, TypeScript, Go, Rust, Java, C#, PHP, Ruby (regex-based)
|
|
15
|
+
|
|
16
|
+
Example usage:
|
|
17
|
+
# Extract from directory
|
|
18
|
+
python test_example_extractor.py tests/ --language python
|
|
19
|
+
|
|
20
|
+
# Extract from single file
|
|
21
|
+
python test_example_extractor.py --file tests/test_scraper.py
|
|
22
|
+
|
|
23
|
+
# JSON output
|
|
24
|
+
python test_example_extractor.py tests/ --json > examples.json
|
|
25
|
+
|
|
26
|
+
# Filter by confidence
|
|
27
|
+
python test_example_extractor.py tests/ --min-confidence 0.7
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import ast
|
|
32
|
+
import hashlib
|
|
33
|
+
import json
|
|
34
|
+
import logging
|
|
35
|
+
import re
|
|
36
|
+
from dataclasses import asdict, dataclass, field
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Literal
|
|
39
|
+
|
|
40
|
+
# Configure logging
|
|
41
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ============================================================================
|
|
46
|
+
# DATA MODELS
|
|
47
|
+
# ============================================================================
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class TestExample:
|
|
52
|
+
"""Single extracted usage example from test code"""
|
|
53
|
+
|
|
54
|
+
# Identity
|
|
55
|
+
example_id: str # Unique hash of example
|
|
56
|
+
test_name: str # Test function/method name
|
|
57
|
+
category: Literal["instantiation", "method_call", "config", "setup", "workflow"]
|
|
58
|
+
|
|
59
|
+
# Code
|
|
60
|
+
code: str # Actual example code
|
|
61
|
+
language: str # Programming language
|
|
62
|
+
|
|
63
|
+
# Context
|
|
64
|
+
description: str # What this demonstrates
|
|
65
|
+
expected_behavior: str # Expected outcome from assertions
|
|
66
|
+
|
|
67
|
+
# Source
|
|
68
|
+
file_path: str
|
|
69
|
+
line_start: int
|
|
70
|
+
line_end: int
|
|
71
|
+
|
|
72
|
+
# Quality
|
|
73
|
+
complexity_score: float # 0-1 scale (higher = more complex/valuable)
|
|
74
|
+
confidence: float # 0-1 scale (higher = more confident extraction)
|
|
75
|
+
|
|
76
|
+
# Optional fields (must come after required fields)
|
|
77
|
+
setup_code: str | None = None # Required setup code
|
|
78
|
+
tags: list[str] = field(default_factory=list) # ["pytest", "mock", "async"]
|
|
79
|
+
dependencies: list[str] = field(default_factory=list) # Imported modules
|
|
80
|
+
ai_analysis: dict | None = None # AI-generated analysis (C3.6)
|
|
81
|
+
|
|
82
|
+
def to_dict(self) -> dict:
|
|
83
|
+
"""Convert to dictionary for JSON serialization"""
|
|
84
|
+
return asdict(self)
|
|
85
|
+
|
|
86
|
+
def to_markdown(self) -> str:
|
|
87
|
+
"""Convert to markdown format"""
|
|
88
|
+
md = f"### {self.test_name}\n\n"
|
|
89
|
+
md += f"**Category**: {self.category} \n"
|
|
90
|
+
md += f"**Description**: {self.description} \n"
|
|
91
|
+
if self.expected_behavior:
|
|
92
|
+
md += f"**Expected**: {self.expected_behavior} \n"
|
|
93
|
+
md += f"**Confidence**: {self.confidence:.2f} \n"
|
|
94
|
+
if self.tags:
|
|
95
|
+
md += f"**Tags**: {', '.join(self.tags)} \n"
|
|
96
|
+
|
|
97
|
+
# Add AI analysis if available (C3.6)
|
|
98
|
+
if self.ai_analysis:
|
|
99
|
+
md += "\n**🤖 AI Analysis:** \n"
|
|
100
|
+
if self.ai_analysis.get("explanation"):
|
|
101
|
+
md += f"*{self.ai_analysis['explanation']}* \n"
|
|
102
|
+
if self.ai_analysis.get("best_practices"):
|
|
103
|
+
md += f"**Best Practices:** {', '.join(self.ai_analysis['best_practices'])} \n"
|
|
104
|
+
if self.ai_analysis.get("tutorial_group"):
|
|
105
|
+
md += f"**Tutorial Group:** {self.ai_analysis['tutorial_group']} \n"
|
|
106
|
+
|
|
107
|
+
md += f"\n```{self.language.lower()}\n"
|
|
108
|
+
if self.setup_code:
|
|
109
|
+
md += f"# Setup\n{self.setup_code}\n\n"
|
|
110
|
+
md += f"{self.code}\n```\n\n"
|
|
111
|
+
md += f"*Source: {self.file_path}:{self.line_start}*\n\n"
|
|
112
|
+
return md
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@dataclass
|
|
116
|
+
class ExampleReport:
|
|
117
|
+
"""Summary of test example extraction results"""
|
|
118
|
+
|
|
119
|
+
total_examples: int
|
|
120
|
+
examples_by_category: dict[str, int]
|
|
121
|
+
examples_by_language: dict[str, int]
|
|
122
|
+
examples: list[TestExample]
|
|
123
|
+
avg_complexity: float
|
|
124
|
+
high_value_count: int # confidence > 0.7
|
|
125
|
+
file_path: str | None = None # If single file
|
|
126
|
+
directory: str | None = None # If directory
|
|
127
|
+
|
|
128
|
+
def to_dict(self) -> dict:
|
|
129
|
+
"""Convert to dictionary for JSON serialization"""
|
|
130
|
+
return {
|
|
131
|
+
"total_examples": self.total_examples,
|
|
132
|
+
"examples_by_category": self.examples_by_category,
|
|
133
|
+
"examples_by_language": self.examples_by_language,
|
|
134
|
+
"avg_complexity": self.avg_complexity,
|
|
135
|
+
"high_value_count": self.high_value_count,
|
|
136
|
+
"file_path": self.file_path,
|
|
137
|
+
"directory": self.directory,
|
|
138
|
+
"examples": [ex.to_dict() for ex in self.examples],
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def to_markdown(self) -> str:
|
|
142
|
+
"""Convert to markdown format"""
|
|
143
|
+
md = "# Test Example Extraction Report\n\n"
|
|
144
|
+
md += f"**Total Examples**: {self.total_examples} \n"
|
|
145
|
+
md += f"**High Value Examples** (confidence > 0.7): {self.high_value_count} \n"
|
|
146
|
+
md += f"**Average Complexity**: {self.avg_complexity:.2f} \n"
|
|
147
|
+
|
|
148
|
+
md += "\n## Examples by Category\n\n"
|
|
149
|
+
for category, count in sorted(self.examples_by_category.items()):
|
|
150
|
+
md += f"- **{category}**: {count}\n"
|
|
151
|
+
|
|
152
|
+
md += "\n## Examples by Language\n\n"
|
|
153
|
+
for language, count in sorted(self.examples_by_language.items()):
|
|
154
|
+
md += f"- **{language}**: {count}\n"
|
|
155
|
+
|
|
156
|
+
md += "\n## Extracted Examples\n\n"
|
|
157
|
+
for example in sorted(self.examples, key=lambda x: x.confidence, reverse=True):
|
|
158
|
+
md += example.to_markdown()
|
|
159
|
+
|
|
160
|
+
return md
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ============================================================================
|
|
164
|
+
# PYTHON TEST ANALYZER (AST-based)
|
|
165
|
+
# ============================================================================
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class PythonTestAnalyzer:
|
|
169
|
+
"""Deep AST-based test example extraction for Python"""
|
|
170
|
+
|
|
171
|
+
def __init__(self):
|
|
172
|
+
self.trivial_patterns = {
|
|
173
|
+
"assertTrue(True)",
|
|
174
|
+
"assertFalse(False)",
|
|
175
|
+
"assertEqual(1, 1)",
|
|
176
|
+
"assertIsNone(None)",
|
|
177
|
+
"assertIsNotNone(None)",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
def extract(self, file_path: str, code: str) -> list[TestExample]:
|
|
181
|
+
"""Extract examples from Python test file"""
|
|
182
|
+
examples = []
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
tree = ast.parse(code)
|
|
186
|
+
except SyntaxError as e:
|
|
187
|
+
logger.warning(f"Failed to parse {file_path}: {e}")
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
# Extract imports for dependency tracking
|
|
191
|
+
imports = self._extract_imports(tree)
|
|
192
|
+
|
|
193
|
+
# Find test classes (unittest.TestCase)
|
|
194
|
+
for node in ast.walk(tree):
|
|
195
|
+
if isinstance(node, ast.ClassDef):
|
|
196
|
+
if self._is_test_class(node):
|
|
197
|
+
examples.extend(self._extract_from_test_class(node, file_path, imports))
|
|
198
|
+
|
|
199
|
+
# Find test functions (pytest)
|
|
200
|
+
elif isinstance(node, ast.FunctionDef) and self._is_test_function(node):
|
|
201
|
+
examples.extend(self._extract_from_test_function(node, file_path, imports))
|
|
202
|
+
|
|
203
|
+
return examples
|
|
204
|
+
|
|
205
|
+
def _extract_imports(self, tree: ast.AST) -> list[str]:
|
|
206
|
+
"""Extract imported modules"""
|
|
207
|
+
imports = []
|
|
208
|
+
for node in ast.walk(tree):
|
|
209
|
+
if isinstance(node, ast.Import):
|
|
210
|
+
imports.extend([alias.name for alias in node.names])
|
|
211
|
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
|
212
|
+
imports.append(node.module)
|
|
213
|
+
return imports
|
|
214
|
+
|
|
215
|
+
def _is_test_class(self, node: ast.ClassDef) -> bool:
|
|
216
|
+
"""Check if class is a test class"""
|
|
217
|
+
# unittest.TestCase pattern
|
|
218
|
+
for base in node.bases:
|
|
219
|
+
if (
|
|
220
|
+
isinstance(base, ast.Name)
|
|
221
|
+
and "Test" in base.id
|
|
222
|
+
or isinstance(base, ast.Attribute)
|
|
223
|
+
and base.attr == "TestCase"
|
|
224
|
+
):
|
|
225
|
+
return True
|
|
226
|
+
return False
|
|
227
|
+
|
|
228
|
+
def _is_test_function(self, node: ast.FunctionDef) -> bool:
|
|
229
|
+
"""Check if function is a test function"""
|
|
230
|
+
# pytest pattern: starts with test_
|
|
231
|
+
if node.name.startswith("test_"):
|
|
232
|
+
return True
|
|
233
|
+
# Has @pytest.mark decorator
|
|
234
|
+
for decorator in node.decorator_list:
|
|
235
|
+
if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator):
|
|
236
|
+
return True
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
def _extract_from_test_class(
|
|
240
|
+
self, class_node: ast.ClassDef, file_path: str, imports: list[str]
|
|
241
|
+
) -> list[TestExample]:
|
|
242
|
+
"""Extract examples from unittest.TestCase class"""
|
|
243
|
+
examples = []
|
|
244
|
+
|
|
245
|
+
# Extract setUp method if exists
|
|
246
|
+
setup_code = self._extract_setup_method(class_node)
|
|
247
|
+
|
|
248
|
+
# Process each test method
|
|
249
|
+
for node in class_node.body:
|
|
250
|
+
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
|
|
251
|
+
examples.extend(
|
|
252
|
+
self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return examples
|
|
256
|
+
|
|
257
|
+
def _extract_from_test_function(
|
|
258
|
+
self, func_node: ast.FunctionDef, file_path: str, imports: list[str]
|
|
259
|
+
) -> list[TestExample]:
|
|
260
|
+
"""Extract examples from pytest test function"""
|
|
261
|
+
# Check for fixture parameters
|
|
262
|
+
fixture_setup = self._extract_fixtures(func_node)
|
|
263
|
+
|
|
264
|
+
return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup)
|
|
265
|
+
|
|
266
|
+
def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None:
|
|
267
|
+
"""Extract setUp method code"""
|
|
268
|
+
for node in class_node.body:
|
|
269
|
+
if isinstance(node, ast.FunctionDef) and node.name == "setUp":
|
|
270
|
+
return ast.unparse(node.body)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
def _extract_fixtures(self, func_node: ast.FunctionDef) -> str | None:
|
|
274
|
+
"""Extract pytest fixture parameters"""
|
|
275
|
+
if not func_node.args.args:
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
# Skip 'self' parameter
|
|
279
|
+
params = [arg.arg for arg in func_node.args.args if arg.arg != "self"]
|
|
280
|
+
if params:
|
|
281
|
+
return f"# Fixtures: {', '.join(params)}"
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
def _analyze_test_body(
|
|
285
|
+
self,
|
|
286
|
+
func_node: ast.FunctionDef,
|
|
287
|
+
file_path: str,
|
|
288
|
+
imports: list[str],
|
|
289
|
+
setup_code: str | None = None,
|
|
290
|
+
) -> list[TestExample]:
|
|
291
|
+
"""Analyze test function body for extractable patterns"""
|
|
292
|
+
examples = []
|
|
293
|
+
|
|
294
|
+
# Get docstring for description
|
|
295
|
+
docstring = ast.get_docstring(func_node) or func_node.name.replace("_", " ")
|
|
296
|
+
|
|
297
|
+
# Detect tags
|
|
298
|
+
tags = self._detect_tags(func_node, imports)
|
|
299
|
+
|
|
300
|
+
# Extract different pattern categories
|
|
301
|
+
|
|
302
|
+
# 1. Instantiation patterns
|
|
303
|
+
instantiations = self._find_instantiations(
|
|
304
|
+
func_node, file_path, docstring, setup_code, tags, imports
|
|
305
|
+
)
|
|
306
|
+
examples.extend(instantiations)
|
|
307
|
+
|
|
308
|
+
# 2. Method calls with assertions
|
|
309
|
+
method_calls = self._find_method_calls_with_assertions(
|
|
310
|
+
func_node, file_path, docstring, setup_code, tags, imports
|
|
311
|
+
)
|
|
312
|
+
examples.extend(method_calls)
|
|
313
|
+
|
|
314
|
+
# 3. Configuration dictionaries
|
|
315
|
+
configs = self._find_config_dicts(
|
|
316
|
+
func_node, file_path, docstring, setup_code, tags, imports
|
|
317
|
+
)
|
|
318
|
+
examples.extend(configs)
|
|
319
|
+
|
|
320
|
+
# 4. Multi-step workflows (integration tests)
|
|
321
|
+
workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports)
|
|
322
|
+
examples.extend(workflows)
|
|
323
|
+
|
|
324
|
+
return examples
|
|
325
|
+
|
|
326
|
+
def _detect_tags(self, func_node: ast.FunctionDef, imports: list[str]) -> list[str]:
|
|
327
|
+
"""Detect test tags (pytest, mock, async, etc.)"""
|
|
328
|
+
tags = []
|
|
329
|
+
|
|
330
|
+
# Check decorators
|
|
331
|
+
for decorator in func_node.decorator_list:
|
|
332
|
+
decorator_str = ast.unparse(decorator).lower()
|
|
333
|
+
if "pytest" in decorator_str:
|
|
334
|
+
tags.append("pytest")
|
|
335
|
+
if "mock" in decorator_str:
|
|
336
|
+
tags.append("mock")
|
|
337
|
+
if "async" in decorator_str or func_node.name.startswith("test_async"):
|
|
338
|
+
tags.append("async")
|
|
339
|
+
|
|
340
|
+
# Check if using unittest
|
|
341
|
+
if "unittest" in imports:
|
|
342
|
+
tags.append("unittest")
|
|
343
|
+
|
|
344
|
+
# Check function body for mock usage
|
|
345
|
+
func_str = ast.unparse(func_node).lower()
|
|
346
|
+
if "mock" in func_str or "patch" in func_str:
|
|
347
|
+
tags.append("mock")
|
|
348
|
+
|
|
349
|
+
return list(set(tags))
|
|
350
|
+
|
|
351
|
+
def _find_instantiations(
|
|
352
|
+
self,
|
|
353
|
+
func_node: ast.FunctionDef,
|
|
354
|
+
file_path: str,
|
|
355
|
+
description: str,
|
|
356
|
+
setup_code: str | None,
|
|
357
|
+
tags: list[str],
|
|
358
|
+
imports: list[str],
|
|
359
|
+
) -> list[TestExample]:
|
|
360
|
+
"""Find object instantiation patterns: obj = ClassName(...)"""
|
|
361
|
+
examples = []
|
|
362
|
+
|
|
363
|
+
for node in ast.walk(func_node):
|
|
364
|
+
# Check if meaningful instantiation
|
|
365
|
+
if (
|
|
366
|
+
isinstance(node, ast.Assign)
|
|
367
|
+
and isinstance(node.value, ast.Call)
|
|
368
|
+
and self._is_meaningful_instantiation(node)
|
|
369
|
+
):
|
|
370
|
+
code = ast.unparse(node)
|
|
371
|
+
|
|
372
|
+
# Skip trivial or mock-only
|
|
373
|
+
if len(code) < 20 or "Mock()" in code:
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
# Get class name
|
|
377
|
+
class_name = self._get_class_name(node.value)
|
|
378
|
+
|
|
379
|
+
example = TestExample(
|
|
380
|
+
example_id=self._generate_id(code),
|
|
381
|
+
test_name=func_node.name,
|
|
382
|
+
category="instantiation",
|
|
383
|
+
code=code,
|
|
384
|
+
language="Python",
|
|
385
|
+
description=f"Instantiate {class_name}: {description}",
|
|
386
|
+
expected_behavior=self._extract_assertion_after(func_node, node),
|
|
387
|
+
setup_code=setup_code,
|
|
388
|
+
file_path=file_path,
|
|
389
|
+
line_start=node.lineno,
|
|
390
|
+
line_end=node.end_lineno or node.lineno,
|
|
391
|
+
complexity_score=self._calculate_complexity(code),
|
|
392
|
+
confidence=0.8,
|
|
393
|
+
tags=tags,
|
|
394
|
+
dependencies=imports,
|
|
395
|
+
)
|
|
396
|
+
examples.append(example)
|
|
397
|
+
|
|
398
|
+
return examples
|
|
399
|
+
|
|
400
|
+
def _find_method_calls_with_assertions(
|
|
401
|
+
self,
|
|
402
|
+
func_node: ast.FunctionDef,
|
|
403
|
+
file_path: str,
|
|
404
|
+
description: str,
|
|
405
|
+
setup_code: str | None,
|
|
406
|
+
tags: list[str],
|
|
407
|
+
imports: list[str],
|
|
408
|
+
) -> list[TestExample]:
|
|
409
|
+
"""Find method calls followed by assertions"""
|
|
410
|
+
examples = []
|
|
411
|
+
|
|
412
|
+
statements = func_node.body
|
|
413
|
+
for i, stmt in enumerate(statements):
|
|
414
|
+
# Look for method calls and check if next statement is an assertion
|
|
415
|
+
if (
|
|
416
|
+
isinstance(stmt, ast.Expr)
|
|
417
|
+
and isinstance(stmt.value, ast.Call)
|
|
418
|
+
and i + 1 < len(statements)
|
|
419
|
+
):
|
|
420
|
+
next_stmt = statements[i + 1]
|
|
421
|
+
if self._is_assertion(next_stmt):
|
|
422
|
+
method_call = ast.unparse(stmt)
|
|
423
|
+
assertion = ast.unparse(next_stmt)
|
|
424
|
+
|
|
425
|
+
code = f"{method_call}\n{assertion}"
|
|
426
|
+
|
|
427
|
+
# Skip trivial assertions
|
|
428
|
+
if any(trivial in assertion for trivial in self.trivial_patterns):
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
example = TestExample(
|
|
432
|
+
example_id=self._generate_id(code),
|
|
433
|
+
test_name=func_node.name,
|
|
434
|
+
category="method_call",
|
|
435
|
+
code=code,
|
|
436
|
+
language="Python",
|
|
437
|
+
description=description,
|
|
438
|
+
expected_behavior=assertion,
|
|
439
|
+
setup_code=setup_code,
|
|
440
|
+
file_path=file_path,
|
|
441
|
+
line_start=stmt.lineno,
|
|
442
|
+
line_end=next_stmt.end_lineno or next_stmt.lineno,
|
|
443
|
+
complexity_score=self._calculate_complexity(code),
|
|
444
|
+
confidence=0.85,
|
|
445
|
+
tags=tags,
|
|
446
|
+
dependencies=imports,
|
|
447
|
+
)
|
|
448
|
+
examples.append(example)
|
|
449
|
+
|
|
450
|
+
return examples
|
|
451
|
+
|
|
452
|
+
def _find_config_dicts(
|
|
453
|
+
self,
|
|
454
|
+
func_node: ast.FunctionDef,
|
|
455
|
+
file_path: str,
|
|
456
|
+
description: str,
|
|
457
|
+
setup_code: str | None,
|
|
458
|
+
tags: list[str],
|
|
459
|
+
imports: list[str],
|
|
460
|
+
) -> list[TestExample]:
|
|
461
|
+
"""Find configuration dictionary patterns"""
|
|
462
|
+
examples = []
|
|
463
|
+
|
|
464
|
+
for node in ast.walk(func_node):
|
|
465
|
+
# Must have 2+ keys and be meaningful
|
|
466
|
+
if (
|
|
467
|
+
isinstance(node, ast.Assign)
|
|
468
|
+
and isinstance(node.value, ast.Dict)
|
|
469
|
+
and len(node.value.keys) >= 2
|
|
470
|
+
):
|
|
471
|
+
code = ast.unparse(node)
|
|
472
|
+
|
|
473
|
+
# Check if looks like configuration
|
|
474
|
+
if self._is_config_dict(node.value):
|
|
475
|
+
example = TestExample(
|
|
476
|
+
example_id=self._generate_id(code),
|
|
477
|
+
test_name=func_node.name,
|
|
478
|
+
category="config",
|
|
479
|
+
code=code,
|
|
480
|
+
language="Python",
|
|
481
|
+
description=f"Configuration example: {description}",
|
|
482
|
+
expected_behavior=self._extract_assertion_after(func_node, node),
|
|
483
|
+
setup_code=setup_code,
|
|
484
|
+
file_path=file_path,
|
|
485
|
+
line_start=node.lineno,
|
|
486
|
+
line_end=node.end_lineno or node.lineno,
|
|
487
|
+
complexity_score=self._calculate_complexity(code),
|
|
488
|
+
confidence=0.75,
|
|
489
|
+
tags=tags,
|
|
490
|
+
dependencies=imports,
|
|
491
|
+
)
|
|
492
|
+
examples.append(example)
|
|
493
|
+
|
|
494
|
+
return examples
|
|
495
|
+
|
|
496
|
+
def _find_workflows(
|
|
497
|
+
self,
|
|
498
|
+
func_node: ast.FunctionDef,
|
|
499
|
+
file_path: str,
|
|
500
|
+
description: str,
|
|
501
|
+
setup_code: str | None,
|
|
502
|
+
tags: list[str],
|
|
503
|
+
imports: list[str],
|
|
504
|
+
) -> list[TestExample]:
|
|
505
|
+
"""Find multi-step workflow patterns (integration tests)"""
|
|
506
|
+
examples = []
|
|
507
|
+
|
|
508
|
+
# Check if this looks like an integration test (3+ meaningful steps)
|
|
509
|
+
if len(func_node.body) >= 3 and self._is_integration_test(func_node):
|
|
510
|
+
# Extract the full workflow
|
|
511
|
+
code = ast.unparse(func_node.body)
|
|
512
|
+
|
|
513
|
+
# Skip if too long (> 30 lines)
|
|
514
|
+
if code.count("\n") > 30:
|
|
515
|
+
return examples
|
|
516
|
+
|
|
517
|
+
example = TestExample(
|
|
518
|
+
example_id=self._generate_id(code),
|
|
519
|
+
test_name=func_node.name,
|
|
520
|
+
category="workflow",
|
|
521
|
+
code=code,
|
|
522
|
+
language="Python",
|
|
523
|
+
description=f"Workflow: {description}",
|
|
524
|
+
expected_behavior=self._extract_final_assertion(func_node),
|
|
525
|
+
setup_code=setup_code,
|
|
526
|
+
file_path=file_path,
|
|
527
|
+
line_start=func_node.lineno,
|
|
528
|
+
line_end=func_node.end_lineno or func_node.lineno,
|
|
529
|
+
complexity_score=min(1.0, len(func_node.body) / 10),
|
|
530
|
+
confidence=0.9,
|
|
531
|
+
tags=tags + ["workflow", "integration"],
|
|
532
|
+
dependencies=imports,
|
|
533
|
+
)
|
|
534
|
+
examples.append(example)
|
|
535
|
+
|
|
536
|
+
return examples
|
|
537
|
+
|
|
538
|
+
# Helper methods
|
|
539
|
+
|
|
540
|
+
def _is_meaningful_instantiation(self, node: ast.Assign) -> bool:
|
|
541
|
+
"""Check if instantiation has meaningful parameters"""
|
|
542
|
+
if not isinstance(node.value, ast.Call):
|
|
543
|
+
return False
|
|
544
|
+
|
|
545
|
+
# Must have at least one argument or keyword argument
|
|
546
|
+
call = node.value
|
|
547
|
+
return bool(call.args or call.keywords)
|
|
548
|
+
|
|
549
|
+
def _get_class_name(self, call_node: ast.Call) -> str:
|
|
550
|
+
"""Extract class name from Call node"""
|
|
551
|
+
if isinstance(call_node.func, ast.Name):
|
|
552
|
+
return call_node.func.id
|
|
553
|
+
elif isinstance(call_node.func, ast.Attribute):
|
|
554
|
+
return call_node.func.attr
|
|
555
|
+
return "UnknownClass"
|
|
556
|
+
|
|
557
|
+
def _is_assertion(self, node: ast.stmt) -> bool:
|
|
558
|
+
"""Check if statement is an assertion"""
|
|
559
|
+
if isinstance(node, ast.Assert):
|
|
560
|
+
return True
|
|
561
|
+
|
|
562
|
+
if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
|
|
563
|
+
call_str = ast.unparse(node.value).lower()
|
|
564
|
+
assertion_methods = ["assert", "expect", "should"]
|
|
565
|
+
return any(method in call_str for method in assertion_methods)
|
|
566
|
+
|
|
567
|
+
return False
|
|
568
|
+
|
|
569
|
+
def _is_config_dict(self, dict_node: ast.Dict) -> bool:
|
|
570
|
+
"""Check if dictionary looks like configuration"""
|
|
571
|
+
# Keys should be strings
|
|
572
|
+
for key in dict_node.keys:
|
|
573
|
+
if not isinstance(key, ast.Constant) or not isinstance(key.value, str):
|
|
574
|
+
return False
|
|
575
|
+
return True
|
|
576
|
+
|
|
577
|
+
def _is_integration_test(self, func_node: ast.FunctionDef) -> bool:
|
|
578
|
+
"""Check if test looks like an integration test"""
|
|
579
|
+
test_name = func_node.name.lower()
|
|
580
|
+
integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
|
|
581
|
+
return any(keyword in test_name for keyword in integration_keywords)
|
|
582
|
+
|
|
583
|
+
def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
|
|
584
|
+
"""Find assertion that follows the target node"""
|
|
585
|
+
found_target = False
|
|
586
|
+
for stmt in func_node.body:
|
|
587
|
+
if stmt == target_node:
|
|
588
|
+
found_target = True
|
|
589
|
+
continue
|
|
590
|
+
if found_target and self._is_assertion(stmt):
|
|
591
|
+
return ast.unparse(stmt)
|
|
592
|
+
return ""
|
|
593
|
+
|
|
594
|
+
def _extract_final_assertion(self, func_node: ast.FunctionDef) -> str:
|
|
595
|
+
"""Extract the final assertion from test"""
|
|
596
|
+
for stmt in reversed(func_node.body):
|
|
597
|
+
if self._is_assertion(stmt):
|
|
598
|
+
return ast.unparse(stmt)
|
|
599
|
+
return ""
|
|
600
|
+
|
|
601
|
+
def _calculate_complexity(self, code: str) -> float:
|
|
602
|
+
"""Calculate code complexity score (0-1)"""
|
|
603
|
+
# Simple heuristic: more lines + more parameters = more complex
|
|
604
|
+
lines = code.count("\n") + 1
|
|
605
|
+
params = code.count(",") + 1
|
|
606
|
+
|
|
607
|
+
complexity = min(1.0, (lines * 0.1) + (params * 0.05))
|
|
608
|
+
return round(complexity, 2)
|
|
609
|
+
|
|
610
|
+
def _generate_id(self, code: str) -> str:
|
|
611
|
+
"""Generate unique ID for example"""
|
|
612
|
+
return hashlib.md5(code.encode()).hexdigest()[:8]
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
# ============================================================================
|
|
616
|
+
# GENERIC TEST ANALYZER (Regex-based for non-Python languages)
|
|
617
|
+
# ============================================================================
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
class GenericTestAnalyzer:
|
|
621
|
+
"""Regex-based test example extraction for non-Python languages"""
|
|
622
|
+
|
|
623
|
+
# Language-specific regex patterns
|
|
624
|
+
PATTERNS = {
|
|
625
|
+
"javascript": {
|
|
626
|
+
"instantiation": r"(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
|
|
627
|
+
"assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
|
|
628
|
+
"test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
|
|
629
|
+
"config": r"(?:const|let)\s+config\s*=\s*\{[\s\S]{20,500}?\}",
|
|
630
|
+
},
|
|
631
|
+
"typescript": {
|
|
632
|
+
"instantiation": r"(?:const|let|var)\s+(\w+):\s*\w+\s*=\s*new\s+(\w+)\(([^)]*)\)",
|
|
633
|
+
"assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
|
|
634
|
+
"test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
|
|
635
|
+
"config": r"(?:const|let)\s+config:\s*\w+\s*=\s*\{[\s\S]{20,500}?\}",
|
|
636
|
+
},
|
|
637
|
+
"go": {
|
|
638
|
+
"instantiation": r"(\w+)\s*:=\s*(\w+)\{([^}]+)\}",
|
|
639
|
+
"assertion": r't\.(?:Error|Fatal)(?:f)?\(["\']([^"\']+)["\']',
|
|
640
|
+
"test_function": r"func\s+(Test\w+)\(t\s+\*testing\.T\)",
|
|
641
|
+
"table_test": r"tests\s*:=\s*\[\]struct\s*\{[\s\S]{50,1000}?\}",
|
|
642
|
+
},
|
|
643
|
+
"rust": {
|
|
644
|
+
"instantiation": r"let\s+(\w+)\s*=\s*(\w+)::new\(([^)]*)\)",
|
|
645
|
+
"assertion": r"assert(?:_eq)?!\(([^)]+)\)",
|
|
646
|
+
"test_function": r"#\[test\]\s*fn\s+(\w+)\(\)",
|
|
647
|
+
},
|
|
648
|
+
"java": {
|
|
649
|
+
"instantiation": r"(\w+)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
|
|
650
|
+
"assertion": r"assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
|
|
651
|
+
"test_function": r"@Test\s+public\s+void\s+(\w+)\(\)",
|
|
652
|
+
},
|
|
653
|
+
"csharp": {
|
|
654
|
+
"instantiation": r"var\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
|
|
655
|
+
"assertion": r"Assert\.(?:AreEqual|IsTrue|IsFalse|IsNotNull)\(([^)]+)\)",
|
|
656
|
+
"test_function": r"\[Test\]\s+public\s+void\s+(\w+)\(\)",
|
|
657
|
+
},
|
|
658
|
+
"php": {
|
|
659
|
+
"instantiation": r"\$(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
|
|
660
|
+
"assertion": r"\$this->assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
|
|
661
|
+
"test_function": r"public\s+function\s+(test\w+)\(\)",
|
|
662
|
+
},
|
|
663
|
+
"ruby": {
|
|
664
|
+
"instantiation": r"(\w+)\s*=\s*(\w+)\.new\(([^)]*)\)",
|
|
665
|
+
"assertion": r"expect\(([^)]+)\)\.to\s+(?:eq|be|match)\(([^)]+)\)",
|
|
666
|
+
"test_function": r'(?:test|it)\s+["\']([^"\']+)["\']',
|
|
667
|
+
},
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
def extract(self, file_path: str, code: str, language: str) -> list[TestExample]:
|
|
671
|
+
"""Extract examples from test file using regex patterns"""
|
|
672
|
+
examples = []
|
|
673
|
+
|
|
674
|
+
language_lower = language.lower()
|
|
675
|
+
if language_lower not in self.PATTERNS:
|
|
676
|
+
logger.warning(f"Language {language} not supported for regex extraction")
|
|
677
|
+
return []
|
|
678
|
+
|
|
679
|
+
patterns = self.PATTERNS[language_lower]
|
|
680
|
+
|
|
681
|
+
# Extract test functions
|
|
682
|
+
test_functions = re.finditer(patterns["test_function"], code)
|
|
683
|
+
|
|
684
|
+
for match in test_functions:
|
|
685
|
+
test_name = match.group(1)
|
|
686
|
+
|
|
687
|
+
# Get test function body (approximate - find next function start)
|
|
688
|
+
start_pos = match.end()
|
|
689
|
+
next_match = re.search(patterns["test_function"], code[start_pos:])
|
|
690
|
+
end_pos = start_pos + next_match.start() if next_match else len(code)
|
|
691
|
+
test_body = code[start_pos:end_pos]
|
|
692
|
+
|
|
693
|
+
# Extract instantiations
|
|
694
|
+
for inst_match in re.finditer(patterns["instantiation"], test_body):
|
|
695
|
+
example = self._create_example(
|
|
696
|
+
test_name=test_name,
|
|
697
|
+
category="instantiation",
|
|
698
|
+
code=inst_match.group(0),
|
|
699
|
+
language=language,
|
|
700
|
+
file_path=file_path,
|
|
701
|
+
line_number=code[: start_pos + inst_match.start()].count("\n") + 1,
|
|
702
|
+
)
|
|
703
|
+
examples.append(example)
|
|
704
|
+
|
|
705
|
+
# Extract config dictionaries (if pattern exists)
|
|
706
|
+
if "config" in patterns:
|
|
707
|
+
for config_match in re.finditer(patterns["config"], test_body):
|
|
708
|
+
example = self._create_example(
|
|
709
|
+
test_name=test_name,
|
|
710
|
+
category="config",
|
|
711
|
+
code=config_match.group(0),
|
|
712
|
+
language=language,
|
|
713
|
+
file_path=file_path,
|
|
714
|
+
line_number=code[: start_pos + config_match.start()].count("\n") + 1,
|
|
715
|
+
)
|
|
716
|
+
examples.append(example)
|
|
717
|
+
|
|
718
|
+
return examples
|
|
719
|
+
|
|
720
|
+
def _create_example(
|
|
721
|
+
self,
|
|
722
|
+
test_name: str,
|
|
723
|
+
category: str,
|
|
724
|
+
code: str,
|
|
725
|
+
language: str,
|
|
726
|
+
file_path: str,
|
|
727
|
+
line_number: int,
|
|
728
|
+
) -> TestExample:
|
|
729
|
+
"""Create TestExample from regex match"""
|
|
730
|
+
return TestExample(
|
|
731
|
+
example_id=hashlib.md5(code.encode()).hexdigest()[:8],
|
|
732
|
+
test_name=test_name,
|
|
733
|
+
category=category,
|
|
734
|
+
code=code,
|
|
735
|
+
language=language,
|
|
736
|
+
description=f"Test: {test_name}",
|
|
737
|
+
expected_behavior="",
|
|
738
|
+
file_path=file_path,
|
|
739
|
+
line_start=line_number,
|
|
740
|
+
line_end=line_number + code.count("\n"),
|
|
741
|
+
complexity_score=min(1.0, (code.count("\n") + 1) * 0.1),
|
|
742
|
+
confidence=0.6, # Lower confidence for regex extraction
|
|
743
|
+
tags=[],
|
|
744
|
+
dependencies=[],
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
# ============================================================================
|
|
749
|
+
# EXAMPLE QUALITY FILTER
|
|
750
|
+
# ============================================================================
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
class ExampleQualityFilter:
|
|
754
|
+
"""Filter out trivial or low-quality examples"""
|
|
755
|
+
|
|
756
|
+
def __init__(self, min_confidence: float = 0.7, min_code_length: int = 20):
|
|
757
|
+
self.min_confidence = min_confidence
|
|
758
|
+
self.min_code_length = min_code_length
|
|
759
|
+
|
|
760
|
+
# Trivial patterns to exclude
|
|
761
|
+
self.trivial_patterns = [
|
|
762
|
+
"Mock()",
|
|
763
|
+
"MagicMock()",
|
|
764
|
+
"assertTrue(True)",
|
|
765
|
+
"assertFalse(False)",
|
|
766
|
+
"assertEqual(1, 1)",
|
|
767
|
+
"pass",
|
|
768
|
+
"...",
|
|
769
|
+
]
|
|
770
|
+
|
|
771
|
+
def filter(self, examples: list[TestExample]) -> list[TestExample]:
|
|
772
|
+
"""Filter examples by quality criteria"""
|
|
773
|
+
filtered = []
|
|
774
|
+
|
|
775
|
+
for example in examples:
|
|
776
|
+
# Check confidence threshold
|
|
777
|
+
if example.confidence < self.min_confidence:
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
# Check code length
|
|
781
|
+
if len(example.code) < self.min_code_length:
|
|
782
|
+
continue
|
|
783
|
+
|
|
784
|
+
# Check for trivial patterns
|
|
785
|
+
if self._is_trivial(example.code):
|
|
786
|
+
continue
|
|
787
|
+
|
|
788
|
+
filtered.append(example)
|
|
789
|
+
|
|
790
|
+
return filtered
|
|
791
|
+
|
|
792
|
+
def _is_trivial(self, code: str) -> bool:
|
|
793
|
+
"""Check if code contains trivial patterns"""
|
|
794
|
+
return any(pattern in code for pattern in self.trivial_patterns)
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
# ============================================================================
|
|
798
|
+
# TEST EXAMPLE EXTRACTOR (Main Orchestrator)
|
|
799
|
+
# ============================================================================
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
class TestExampleExtractor:
|
|
803
|
+
"""Main orchestrator for test example extraction"""
|
|
804
|
+
|
|
805
|
+
# Test file patterns
|
|
806
|
+
TEST_PATTERNS = [
|
|
807
|
+
"test_*.py",
|
|
808
|
+
"*_test.py",
|
|
809
|
+
"test*.js",
|
|
810
|
+
"*test.js",
|
|
811
|
+
"*_test.go",
|
|
812
|
+
"*_test.rs",
|
|
813
|
+
"Test*.java",
|
|
814
|
+
"Test*.cs",
|
|
815
|
+
"*Test.php",
|
|
816
|
+
"*_spec.rb",
|
|
817
|
+
]
|
|
818
|
+
|
|
819
|
+
# Language detection by extension
|
|
820
|
+
LANGUAGE_MAP = {
|
|
821
|
+
".py": "Python",
|
|
822
|
+
".js": "JavaScript",
|
|
823
|
+
".ts": "TypeScript",
|
|
824
|
+
".go": "Go",
|
|
825
|
+
".rs": "Rust",
|
|
826
|
+
".java": "Java",
|
|
827
|
+
".cs": "C#",
|
|
828
|
+
".php": "PHP",
|
|
829
|
+
".rb": "Ruby",
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
def __init__(
|
|
833
|
+
self,
|
|
834
|
+
min_confidence: float = 0.7,
|
|
835
|
+
max_per_file: int = 10,
|
|
836
|
+
languages: list[str] | None = None,
|
|
837
|
+
enhance_with_ai: bool = True,
|
|
838
|
+
):
|
|
839
|
+
self.python_analyzer = PythonTestAnalyzer()
|
|
840
|
+
self.generic_analyzer = GenericTestAnalyzer()
|
|
841
|
+
self.quality_filter = ExampleQualityFilter(min_confidence=min_confidence)
|
|
842
|
+
self.max_per_file = max_per_file
|
|
843
|
+
self.languages = [lang.lower() for lang in languages] if languages else None
|
|
844
|
+
self.enhance_with_ai = enhance_with_ai
|
|
845
|
+
|
|
846
|
+
# Initialize AI enhancer if enabled (C3.6)
|
|
847
|
+
self.ai_enhancer = None
|
|
848
|
+
if self.enhance_with_ai:
|
|
849
|
+
try:
|
|
850
|
+
from skill_seekers.cli.ai_enhancer import TestExampleEnhancer
|
|
851
|
+
|
|
852
|
+
self.ai_enhancer = TestExampleEnhancer()
|
|
853
|
+
except Exception as e:
|
|
854
|
+
logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}")
|
|
855
|
+
self.enhance_with_ai = False
|
|
856
|
+
|
|
857
|
+
def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport:
|
|
858
|
+
"""Extract examples from all test files in directory"""
|
|
859
|
+
directory = Path(directory)
|
|
860
|
+
|
|
861
|
+
if not directory.exists():
|
|
862
|
+
raise FileNotFoundError(f"Directory not found: {directory}")
|
|
863
|
+
|
|
864
|
+
# Find test files
|
|
865
|
+
test_files = self._find_test_files(directory, recursive)
|
|
866
|
+
|
|
867
|
+
logger.info(f"Found {len(test_files)} test files in {directory}")
|
|
868
|
+
|
|
869
|
+
# Extract from each file
|
|
870
|
+
all_examples = []
|
|
871
|
+
for test_file in test_files:
|
|
872
|
+
examples = self.extract_from_file(test_file)
|
|
873
|
+
all_examples.extend(examples)
|
|
874
|
+
|
|
875
|
+
# Generate report
|
|
876
|
+
return self._create_report(all_examples, directory=str(directory))
|
|
877
|
+
|
|
878
|
+
def extract_from_file(self, file_path: Path) -> list[TestExample]:
|
|
879
|
+
"""Extract examples from single test file"""
|
|
880
|
+
file_path = Path(file_path)
|
|
881
|
+
|
|
882
|
+
if not file_path.exists():
|
|
883
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
884
|
+
|
|
885
|
+
# Detect language
|
|
886
|
+
language = self._detect_language(file_path)
|
|
887
|
+
|
|
888
|
+
# Filter by language if specified
|
|
889
|
+
if self.languages and language.lower() not in self.languages:
|
|
890
|
+
return []
|
|
891
|
+
|
|
892
|
+
# Read file
|
|
893
|
+
try:
|
|
894
|
+
code = file_path.read_text(encoding="utf-8")
|
|
895
|
+
except UnicodeDecodeError:
|
|
896
|
+
logger.warning(f"Failed to read {file_path} (encoding error)")
|
|
897
|
+
return []
|
|
898
|
+
|
|
899
|
+
# Extract examples based on language
|
|
900
|
+
if language == "Python":
|
|
901
|
+
examples = self.python_analyzer.extract(str(file_path), code)
|
|
902
|
+
else:
|
|
903
|
+
examples = self.generic_analyzer.extract(str(file_path), code, language)
|
|
904
|
+
|
|
905
|
+
# Apply quality filter
|
|
906
|
+
filtered_examples = self.quality_filter.filter(examples)
|
|
907
|
+
|
|
908
|
+
# Limit per file
|
|
909
|
+
if len(filtered_examples) > self.max_per_file:
|
|
910
|
+
# Sort by confidence and take top N
|
|
911
|
+
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
|
|
912
|
+
: self.max_per_file
|
|
913
|
+
]
|
|
914
|
+
|
|
915
|
+
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
|
|
916
|
+
|
|
917
|
+
return filtered_examples
|
|
918
|
+
|
|
919
|
+
def _find_test_files(self, directory: Path, recursive: bool) -> list[Path]:
|
|
920
|
+
"""Find test files in directory"""
|
|
921
|
+
test_files = []
|
|
922
|
+
|
|
923
|
+
for pattern in self.TEST_PATTERNS:
|
|
924
|
+
if recursive:
|
|
925
|
+
test_files.extend(directory.rglob(pattern))
|
|
926
|
+
else:
|
|
927
|
+
test_files.extend(directory.glob(pattern))
|
|
928
|
+
|
|
929
|
+
return list(set(test_files)) # Remove duplicates
|
|
930
|
+
|
|
931
|
+
def _detect_language(self, file_path: Path) -> str:
|
|
932
|
+
"""Detect programming language from file extension"""
|
|
933
|
+
suffix = file_path.suffix.lower()
|
|
934
|
+
return self.LANGUAGE_MAP.get(suffix, "Unknown")
|
|
935
|
+
|
|
936
|
+
def _create_report(
|
|
937
|
+
self,
|
|
938
|
+
examples: list[TestExample],
|
|
939
|
+
file_path: str | None = None,
|
|
940
|
+
directory: str | None = None,
|
|
941
|
+
) -> ExampleReport:
|
|
942
|
+
"""Create summary report from examples"""
|
|
943
|
+
# Enhance examples with AI analysis (C3.6)
|
|
944
|
+
if self.enhance_with_ai and self.ai_enhancer and examples:
|
|
945
|
+
# Convert examples to dict format for AI processing
|
|
946
|
+
example_dicts = [ex.to_dict() for ex in examples]
|
|
947
|
+
enhanced_dicts = self.ai_enhancer.enhance_examples(example_dicts)
|
|
948
|
+
|
|
949
|
+
# Update examples with AI analysis
|
|
950
|
+
for i, example in enumerate(examples):
|
|
951
|
+
if i < len(enhanced_dicts) and "ai_analysis" in enhanced_dicts[i]:
|
|
952
|
+
example.ai_analysis = enhanced_dicts[i]["ai_analysis"]
|
|
953
|
+
|
|
954
|
+
# Count by category
|
|
955
|
+
examples_by_category = {}
|
|
956
|
+
for example in examples:
|
|
957
|
+
examples_by_category[example.category] = (
|
|
958
|
+
examples_by_category.get(example.category, 0) + 1
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
# Count by language
|
|
962
|
+
examples_by_language = {}
|
|
963
|
+
for example in examples:
|
|
964
|
+
examples_by_language[example.language] = (
|
|
965
|
+
examples_by_language.get(example.language, 0) + 1
|
|
966
|
+
)
|
|
967
|
+
|
|
968
|
+
# Calculate averages
|
|
969
|
+
avg_complexity = (
|
|
970
|
+
sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
|
|
971
|
+
)
|
|
972
|
+
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
|
|
973
|
+
|
|
974
|
+
return ExampleReport(
|
|
975
|
+
total_examples=len(examples),
|
|
976
|
+
examples_by_category=examples_by_category,
|
|
977
|
+
examples_by_language=examples_by_language,
|
|
978
|
+
examples=examples,
|
|
979
|
+
avg_complexity=round(avg_complexity, 2),
|
|
980
|
+
high_value_count=high_value_count,
|
|
981
|
+
file_path=file_path,
|
|
982
|
+
directory=directory,
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
# ============================================================================
|
|
987
|
+
# COMMAND-LINE INTERFACE
|
|
988
|
+
# ============================================================================
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def main():
|
|
992
|
+
"""Main entry point for CLI"""
|
|
993
|
+
parser = argparse.ArgumentParser(
|
|
994
|
+
description="Extract usage examples from test files",
|
|
995
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
996
|
+
epilog="""
|
|
997
|
+
Examples:
|
|
998
|
+
# Extract from directory
|
|
999
|
+
%(prog)s tests/ --language python
|
|
1000
|
+
|
|
1001
|
+
# Extract from single file
|
|
1002
|
+
%(prog)s --file tests/test_scraper.py
|
|
1003
|
+
|
|
1004
|
+
# JSON output
|
|
1005
|
+
%(prog)s tests/ --json > examples.json
|
|
1006
|
+
|
|
1007
|
+
# Filter by confidence
|
|
1008
|
+
%(prog)s tests/ --min-confidence 0.7
|
|
1009
|
+
""",
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
parser.add_argument("directory", nargs="?", help="Directory containing test files")
|
|
1013
|
+
parser.add_argument("--file", help="Single test file to analyze")
|
|
1014
|
+
parser.add_argument(
|
|
1015
|
+
"--language", help="Filter by programming language (python, javascript, etc.)"
|
|
1016
|
+
)
|
|
1017
|
+
parser.add_argument(
|
|
1018
|
+
"--min-confidence",
|
|
1019
|
+
type=float,
|
|
1020
|
+
default=0.5,
|
|
1021
|
+
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
|
|
1022
|
+
)
|
|
1023
|
+
parser.add_argument(
|
|
1024
|
+
"--max-per-file",
|
|
1025
|
+
type=int,
|
|
1026
|
+
default=10,
|
|
1027
|
+
help="Maximum examples per file (default: 10)",
|
|
1028
|
+
)
|
|
1029
|
+
parser.add_argument("--json", action="store_true", help="Output JSON format")
|
|
1030
|
+
parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
|
|
1031
|
+
parser.add_argument(
|
|
1032
|
+
"--recursive",
|
|
1033
|
+
action="store_true",
|
|
1034
|
+
default=True,
|
|
1035
|
+
help="Search directory recursively (default: True)",
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
args = parser.parse_args()
|
|
1039
|
+
|
|
1040
|
+
# Validate arguments
|
|
1041
|
+
if not args.directory and not args.file:
|
|
1042
|
+
parser.error("Either directory or --file must be specified")
|
|
1043
|
+
|
|
1044
|
+
# Create extractor
|
|
1045
|
+
languages = [args.language] if args.language else None
|
|
1046
|
+
extractor = TestExampleExtractor(
|
|
1047
|
+
min_confidence=args.min_confidence,
|
|
1048
|
+
max_per_file=args.max_per_file,
|
|
1049
|
+
languages=languages,
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
# Extract examples
|
|
1053
|
+
if args.file:
|
|
1054
|
+
examples = extractor.extract_from_file(Path(args.file))
|
|
1055
|
+
report = extractor._create_report(examples, file_path=args.file)
|
|
1056
|
+
else:
|
|
1057
|
+
report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive)
|
|
1058
|
+
|
|
1059
|
+
# Output results
|
|
1060
|
+
if args.json:
|
|
1061
|
+
print(json.dumps(report.to_dict(), indent=2))
|
|
1062
|
+
elif args.markdown:
|
|
1063
|
+
print(report.to_markdown())
|
|
1064
|
+
else:
|
|
1065
|
+
# Human-readable summary
|
|
1066
|
+
print("\nTest Example Extraction Results")
|
|
1067
|
+
print("=" * 50)
|
|
1068
|
+
print(f"Total Examples: {report.total_examples}")
|
|
1069
|
+
print(f"High Value (confidence > 0.7): {report.high_value_count}")
|
|
1070
|
+
print(f"Average Complexity: {report.avg_complexity:.2f}")
|
|
1071
|
+
print("\nExamples by Category:")
|
|
1072
|
+
for category, count in sorted(report.examples_by_category.items()):
|
|
1073
|
+
print(f" {category}: {count}")
|
|
1074
|
+
print("\nExamples by Language:")
|
|
1075
|
+
for language, count in sorted(report.examples_by_language.items()):
|
|
1076
|
+
print(f" {language}: {count}")
|
|
1077
|
+
print("\nUse --json or --markdown for detailed output")
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
if __name__ == "__main__":
|
|
1081
|
+
main()
|