evalvault 1.72.1__py3-none-any.whl → 1.73.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. evalvault/adapters/inbound/api/routers/pipeline.py +6 -0
  2. evalvault/adapters/inbound/cli/commands/analyze.py +40 -1
  3. evalvault/adapters/inbound/cli/commands/pipeline.py +100 -0
  4. evalvault/adapters/inbound/cli/commands/regress.py +96 -0
  5. evalvault/adapters/inbound/cli/commands/stage.py +217 -24
  6. evalvault/adapters/outbound/analysis/__init__.py +4 -0
  7. evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +458 -0
  8. evalvault/adapters/outbound/analysis/pipeline_factory.py +1 -0
  9. evalvault/adapters/outbound/analysis/statistical_adapter.py +12 -6
  10. evalvault/adapters/outbound/improvement/pattern_detector.py +4 -0
  11. evalvault/adapters/outbound/storage/base_sql.py +160 -0
  12. evalvault/adapters/outbound/storage/postgres_adapter.py +132 -8
  13. evalvault/adapters/outbound/storage/postgres_schema.sql +15 -0
  14. evalvault/adapters/outbound/storage/schema.sql +18 -1
  15. evalvault/adapters/outbound/storage/sqlite_adapter.py +115 -1
  16. evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +23 -1
  17. evalvault/domain/entities/analysis.py +1 -0
  18. evalvault/domain/entities/analysis_pipeline.py +1 -0
  19. evalvault/domain/entities/stage.py +13 -0
  20. evalvault/domain/services/intent_classifier.py +13 -0
  21. evalvault/domain/services/pipeline_template_registry.py +22 -0
  22. evalvault/ports/outbound/storage_port.py +32 -0
  23. {evalvault-1.72.1.dist-info → evalvault-1.73.1.dist-info}/METADATA +2 -1
  24. {evalvault-1.72.1.dist-info → evalvault-1.73.1.dist-info}/RECORD +27 -26
  25. {evalvault-1.72.1.dist-info → evalvault-1.73.1.dist-info}/WHEEL +0 -0
  26. {evalvault-1.72.1.dist-info → evalvault-1.73.1.dist-info}/entry_points.txt +0 -0
  27. {evalvault-1.72.1.dist-info → evalvault-1.73.1.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,458 @@
1
+ """Dataset feature analyzer module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import itertools
6
+ import math
7
+ import re
8
+ from collections import Counter
9
+ from typing import Any, cast
10
+
11
+ import numpy as np
12
+ from scipy import stats
13
+
14
+ from evalvault.adapters.outbound.analysis.base_module import BaseAnalysisModule
15
+ from evalvault.adapters.outbound.analysis.pipeline_helpers import get_upstream_output
16
+ from evalvault.adapters.outbound.improvement.pattern_detector import PatternDetector
17
+ from evalvault.domain.entities import EvaluationRun
18
+
19
+ try:
20
+ from evalvault.adapters.outbound.nlp.korean import KiwiTokenizer
21
+ except Exception: # pragma: no cover - optional dependency
22
+ KiwiTokenizer = None # type: ignore[assignment]
23
+
24
+
25
+ _WORD_PATTERN = re.compile(r"[A-Za-z0-9가-힣]+")
26
+ _SENTENCE_PATTERN = re.compile(r"[.!?。!?]+")
27
+ _HANGUL_PATTERN = re.compile(r"[가-힣]")
28
+ _LATIN_PATTERN = re.compile(r"[A-Za-z]")
29
+
30
+
31
+ class DatasetFeatureAnalyzerModule(BaseAnalysisModule):
32
+ """Extract dataset features and analyze metric relationships."""
33
+
34
+ module_id = "dataset_feature_analyzer"
35
+ name = "Dataset Feature Analyzer"
36
+ description = "질문/답변/정답/컨텍스트의 특성을 추출하고 점수 연관성을 분석합니다."
37
+ input_types = ["run"]
38
+ output_types = ["dataset_feature_analysis"]
39
+ requires = ["data_loader"]
40
+ tags = ["analysis", "features", "nlp"]
41
+
42
+ def __init__(self) -> None:
43
+ self._detector = PatternDetector()
44
+ self._tokenizer = KiwiTokenizer() if KiwiTokenizer else None
45
+
46
+ def execute(
47
+ self,
48
+ inputs: dict[str, Any],
49
+ params: dict[str, Any] | None = None,
50
+ ) -> dict[str, Any]:
51
+ loader_output = get_upstream_output(inputs, "load_data", "data_loader") or {}
52
+ run = loader_output.get("run")
53
+
54
+ if not isinstance(run, EvaluationRun):
55
+ return {"summary": {"run_id": None, "feature_count": 0}}
56
+
57
+ params = params or {}
58
+ include_vectors = bool(params.get("include_vectors", False))
59
+ max_nodes = int(params.get("max_graph_nodes", 50))
60
+ max_edges = int(params.get("max_graph_edges", 200))
61
+ min_samples = int(params.get("min_samples", 5))
62
+
63
+ feature_vectors = self._build_feature_vectors(run)
64
+ feature_stats = self._summarize_features(feature_vectors)
65
+ correlation_report = self._analyze_correlations(feature_vectors, min_samples=min_samples)
66
+ importance_report = self._analyze_feature_importance(
67
+ feature_vectors, min_samples=min_samples
68
+ )
69
+ graph = self._build_entity_graph(feature_vectors, max_nodes=max_nodes, max_edges=max_edges)
70
+
71
+ summary = {
72
+ "run_id": run.run_id,
73
+ "total_cases": len(feature_vectors),
74
+ "feature_count": len(feature_stats),
75
+ "metrics": list(run.metrics_evaluated),
76
+ "analysis_methods": ["correlation", importance_report.get("method")],
77
+ }
78
+
79
+ payload: dict[str, Any] = {
80
+ "summary": summary,
81
+ "feature_stats": feature_stats,
82
+ "feature_correlations": correlation_report,
83
+ "feature_importance": importance_report,
84
+ "entity_graph": graph,
85
+ }
86
+ if include_vectors:
87
+ payload["feature_vectors"] = [
88
+ {
89
+ "test_case_id": v["test_case_id"],
90
+ "features": v["features"],
91
+ "metric_scores": v["metric_scores"],
92
+ }
93
+ for v in feature_vectors
94
+ ]
95
+
96
+ return payload
97
+
98
+ def _build_feature_vectors(self, run: EvaluationRun) -> list[dict[str, Any]]:
99
+ vectors = []
100
+ feature_vectors = self._detector.extract_feature_vectors(run)
101
+ for vector in feature_vectors:
102
+ features = dict(vector.features)
103
+ features.update(
104
+ self._text_features(vector.question, vector.answer, vector.ground_truth)
105
+ )
106
+ features.update(self._context_features(vector.contexts))
107
+ features.update(
108
+ self._overlap_features(
109
+ vector.question, vector.answer, vector.ground_truth, vector.contexts
110
+ )
111
+ )
112
+ features.update(
113
+ self._language_features(
114
+ vector.question, vector.answer, vector.ground_truth, vector.contexts
115
+ )
116
+ )
117
+
118
+ vectors.append(
119
+ {
120
+ "test_case_id": vector.test_case_id,
121
+ "features": features,
122
+ "metric_scores": dict(vector.metric_scores),
123
+ "tokens": self._extract_tokens(
124
+ vector.question, vector.answer, vector.ground_truth, vector.contexts
125
+ ),
126
+ }
127
+ )
128
+ return vectors
129
+
130
+ def _text_features(
131
+ self, question: str, answer: str, ground_truth: str | None
132
+ ) -> dict[str, float]:
133
+ q_stats = self._basic_stats(question)
134
+ a_stats = self._basic_stats(answer)
135
+ gt_stats = self._basic_stats(ground_truth or "")
136
+ return {
137
+ "question_char_count": q_stats["char_count"],
138
+ "question_word_count": q_stats["word_count"],
139
+ "question_sentence_count": q_stats["sentence_count"],
140
+ "question_unique_word_ratio": q_stats["unique_word_ratio"],
141
+ "answer_char_count": a_stats["char_count"],
142
+ "answer_word_count": a_stats["word_count"],
143
+ "answer_sentence_count": a_stats["sentence_count"],
144
+ "answer_unique_word_ratio": a_stats["unique_word_ratio"],
145
+ "ground_truth_char_count": gt_stats["char_count"],
146
+ "ground_truth_word_count": gt_stats["word_count"],
147
+ "ground_truth_sentence_count": gt_stats["sentence_count"],
148
+ "ground_truth_unique_word_ratio": gt_stats["unique_word_ratio"],
149
+ }
150
+
151
+ def _context_features(self, contexts: list[str]) -> dict[str, float]:
152
+ merged = " ".join([ctx for ctx in contexts if ctx])
153
+ stats = self._basic_stats(merged)
154
+ total_length = sum(len(ctx) for ctx in contexts if ctx)
155
+ avg_length = total_length / len(contexts) if contexts else 0.0
156
+ return {
157
+ "context_count": float(len(contexts)),
158
+ "context_total_char_count": float(total_length),
159
+ "context_avg_char_count": float(avg_length),
160
+ "context_word_count": stats["word_count"],
161
+ "context_sentence_count": stats["sentence_count"],
162
+ "context_unique_word_ratio": stats["unique_word_ratio"],
163
+ }
164
+
165
+ def _overlap_features(
166
+ self,
167
+ question: str,
168
+ answer: str,
169
+ ground_truth: str | None,
170
+ contexts: list[str],
171
+ ) -> dict[str, float]:
172
+ question_tokens = self._token_set(question)
173
+ answer_tokens = self._token_set(answer)
174
+ truth_tokens = self._token_set(ground_truth or "")
175
+ context_tokens = self._token_set(" ".join(contexts))
176
+
177
+ return {
178
+ "question_answer_jaccard": self._jaccard(question_tokens, answer_tokens),
179
+ "question_context_jaccard": self._jaccard(question_tokens, context_tokens),
180
+ "answer_context_jaccard": self._jaccard(answer_tokens, context_tokens),
181
+ "question_truth_jaccard": self._jaccard(question_tokens, truth_tokens),
182
+ "answer_truth_jaccard": self._jaccard(answer_tokens, truth_tokens),
183
+ "truth_context_jaccard": self._jaccard(truth_tokens, context_tokens),
184
+ }
185
+
186
+ def _language_features(
187
+ self,
188
+ question: str,
189
+ answer: str,
190
+ ground_truth: str | None,
191
+ contexts: list[str],
192
+ ) -> dict[str, float]:
193
+ merged_context = " ".join([ctx for ctx in contexts if ctx])
194
+ return {
195
+ "question_korean_ratio": self._char_ratio(question, _HANGUL_PATTERN),
196
+ "question_english_ratio": self._char_ratio(question, _LATIN_PATTERN),
197
+ "answer_korean_ratio": self._char_ratio(answer, _HANGUL_PATTERN),
198
+ "answer_english_ratio": self._char_ratio(answer, _LATIN_PATTERN),
199
+ "ground_truth_korean_ratio": self._char_ratio(ground_truth or "", _HANGUL_PATTERN),
200
+ "ground_truth_english_ratio": self._char_ratio(ground_truth or "", _LATIN_PATTERN),
201
+ "context_korean_ratio": self._char_ratio(merged_context, _HANGUL_PATTERN),
202
+ "context_english_ratio": self._char_ratio(merged_context, _LATIN_PATTERN),
203
+ }
204
+
205
+ def _basic_stats(self, text: str) -> dict[str, float]:
206
+ stripped = text.strip()
207
+ if not stripped:
208
+ return {
209
+ "char_count": 0.0,
210
+ "word_count": 0.0,
211
+ "sentence_count": 0.0,
212
+ "unique_word_ratio": 0.0,
213
+ }
214
+ words = self._tokenize_words(stripped)
215
+ sentences = [s for s in _SENTENCE_PATTERN.split(stripped) if s.strip()]
216
+ unique_ratio = len({w.lower() for w in words}) / len(words) if words else 0.0
217
+ return {
218
+ "char_count": float(len(stripped)),
219
+ "word_count": float(len(words)),
220
+ "sentence_count": float(max(len(sentences), 1)),
221
+ "unique_word_ratio": float(unique_ratio),
222
+ }
223
+
224
+ def _tokenize_words(self, text: str) -> list[str]:
225
+ if self._tokenizer and _HANGUL_PATTERN.search(text):
226
+ return self._tokenizer.tokenize(text)
227
+ return _WORD_PATTERN.findall(text)
228
+
229
+ def _token_set(self, text: str) -> set[str]:
230
+ return {token.lower() for token in self._tokenize_words(text) if token}
231
+
232
+ def _jaccard(self, left: set[str], right: set[str]) -> float:
233
+ if not left and not right:
234
+ return 0.0
235
+ union = left | right
236
+ if not union:
237
+ return 0.0
238
+ return len(left & right) / len(union)
239
+
240
+ def _char_ratio(self, text: str, pattern: re.Pattern[str]) -> float:
241
+ if not text:
242
+ return 0.0
243
+ count = len(pattern.findall(text))
244
+ return count / max(len(text), 1)
245
+
246
+ def _summarize_features(self, vectors: list[dict[str, Any]]) -> dict[str, dict[str, float]]:
247
+ if not vectors:
248
+ return {}
249
+ feature_names = sorted(vectors[0]["features"].keys())
250
+ stats_map: dict[str, dict[str, float]] = {}
251
+ for name in feature_names:
252
+ values = [v["features"].get(name, 0.0) for v in vectors]
253
+ arr = np.array(values, dtype=float)
254
+ stats_map[name] = {
255
+ "mean": float(arr.mean()),
256
+ "std": float(arr.std()),
257
+ "min": float(arr.min()),
258
+ "max": float(arr.max()),
259
+ "median": float(np.median(arr)),
260
+ }
261
+ return stats_map
262
+
263
+ def _analyze_correlations(
264
+ self,
265
+ vectors: list[dict[str, Any]],
266
+ *,
267
+ min_samples: int,
268
+ ) -> dict[str, list[dict[str, float | str]]]:
269
+ if not vectors:
270
+ return {}
271
+ feature_names = sorted(vectors[0]["features"].keys())
272
+ correlations: dict[str, list[dict[str, float | str]]] = {}
273
+ for metric in self._metric_names(vectors):
274
+ pairs = []
275
+ xs, ys = self._aligned_series(vectors, metric, feature_names)
276
+ for name, values in xs.items():
277
+ target = ys.get(name)
278
+ if target is None or len(target) < min_samples:
279
+ continue
280
+ if len(set(values)) <= 1:
281
+ continue
282
+ try:
283
+ result = stats.pearsonr(values, target)
284
+ corr = cast(float, getattr(result, "statistic", result[0]))
285
+ p_value = cast(float, getattr(result, "pvalue", result[1]))
286
+ except Exception:
287
+ continue
288
+ pairs.append(
289
+ {
290
+ "feature": name,
291
+ "correlation": corr,
292
+ "p_value": p_value,
293
+ }
294
+ )
295
+ pairs.sort(key=lambda item: abs(item["correlation"]), reverse=True)
296
+ correlations[metric] = pairs[:50]
297
+ return correlations
298
+
299
+ def _analyze_feature_importance(
300
+ self,
301
+ vectors: list[dict[str, Any]],
302
+ *,
303
+ min_samples: int,
304
+ ) -> dict[str, Any]:
305
+ if not vectors:
306
+ return {"method": "none", "metrics": {}}
307
+ feature_names = sorted(vectors[0]["features"].keys())
308
+ metrics: dict[str, list[dict[str, Any]]] = {}
309
+
310
+ xgb_regressor = None
311
+ try:
312
+ from xgboost import XGBRegressor
313
+
314
+ xgb_regressor = XGBRegressor
315
+ except Exception:
316
+ xgb_regressor = None
317
+
318
+ for metric in self._metric_names(vectors):
319
+ xs, ys = self._aligned_matrix(vectors, metric, feature_names)
320
+ if xs is None or ys is None or len(ys) < min_samples:
321
+ continue
322
+ importances: list[dict[str, Any]] = []
323
+ if xgb_regressor is not None and len(ys) >= max(min_samples, 10):
324
+ try:
325
+ model = xgb_regressor(
326
+ n_estimators=200,
327
+ max_depth=4,
328
+ learning_rate=0.1,
329
+ subsample=0.8,
330
+ colsample_bytree=0.8,
331
+ tree_method="hist",
332
+ objective="reg:squarederror",
333
+ random_state=42,
334
+ )
335
+ model.fit(xs, ys)
336
+ for name, score in zip(feature_names, model.feature_importances_, strict=True):
337
+ importances.append({"feature": name, "importance": float(score)})
338
+ except Exception:
339
+ importances = []
340
+
341
+ if not importances:
342
+ corr_map = self._simple_importance_from_correlation(xs, ys, feature_names)
343
+ importances = [{"feature": name, "importance": score} for name, score in corr_map]
344
+
345
+ importances.sort(key=lambda item: item["importance"], reverse=True)
346
+ metrics[metric] = importances[:50]
347
+
348
+ return {
349
+ "method": "xgboost" if xgb_regressor is not None else "correlation",
350
+ "metrics": metrics,
351
+ }
352
+
353
+ def _simple_importance_from_correlation(
354
+ self,
355
+ xs: np.ndarray,
356
+ ys: np.ndarray,
357
+ feature_names: list[str],
358
+ ) -> list[tuple[str, float]]:
359
+ importance: list[tuple[str, float]] = []
360
+ for idx, name in enumerate(feature_names):
361
+ values = xs[:, idx]
362
+ if len(set(values)) <= 1:
363
+ continue
364
+ try:
365
+ corr = float(np.corrcoef(values, ys)[0, 1])
366
+ except Exception:
367
+ continue
368
+ if math.isnan(corr):
369
+ continue
370
+ importance.append((name, abs(corr)))
371
+ return importance
372
+
373
+ def _metric_names(self, vectors: list[dict[str, Any]]) -> list[str]:
374
+ names = set()
375
+ for vector in vectors:
376
+ names.update(vector["metric_scores"].keys())
377
+ return sorted(names)
378
+
379
+ def _aligned_series(
380
+ self,
381
+ vectors: list[dict[str, Any]],
382
+ metric: str,
383
+ feature_names: list[str],
384
+ ) -> tuple[dict[str, list[float]], dict[str, list[float]]]:
385
+ xs: dict[str, list[float]] = {name: [] for name in feature_names}
386
+ ys: dict[str, list[float]] = {name: [] for name in feature_names}
387
+ for vector in vectors:
388
+ score = vector["metric_scores"].get(metric)
389
+ if score is None:
390
+ continue
391
+ for name in feature_names:
392
+ xs[name].append(float(vector["features"].get(name, 0.0)))
393
+ ys[name].append(float(score))
394
+ return xs, ys
395
+
396
+ def _aligned_matrix(
397
+ self,
398
+ vectors: list[dict[str, Any]],
399
+ metric: str,
400
+ feature_names: list[str],
401
+ ) -> tuple[np.ndarray | None, np.ndarray | None]:
402
+ rows: list[list[float]] = []
403
+ targets: list[float] = []
404
+ for vector in vectors:
405
+ score = vector["metric_scores"].get(metric)
406
+ if score is None:
407
+ continue
408
+ rows.append([float(vector["features"].get(name, 0.0)) for name in feature_names])
409
+ targets.append(float(score))
410
+ if not rows:
411
+ return None, None
412
+ return np.array(rows, dtype=float), np.array(targets, dtype=float)
413
+
414
+ def _extract_tokens(
415
+ self,
416
+ question: str,
417
+ answer: str,
418
+ ground_truth: str | None,
419
+ contexts: list[str],
420
+ ) -> list[str]:
421
+ texts = [question, answer, ground_truth or ""] + [ctx for ctx in contexts if ctx]
422
+ tokens: list[str] = []
423
+ for text in texts:
424
+ if self._tokenizer and _HANGUL_PATTERN.search(text):
425
+ tokens.extend(self._tokenizer.extract_keywords(text))
426
+ else:
427
+ tokens.extend(_WORD_PATTERN.findall(text.lower()))
428
+ return [t for t in tokens if t]
429
+
430
+ def _build_entity_graph(
431
+ self,
432
+ vectors: list[dict[str, Any]],
433
+ *,
434
+ max_nodes: int,
435
+ max_edges: int,
436
+ ) -> dict[str, Any]:
437
+ node_counts: Counter[str] = Counter()
438
+ edge_counts: Counter[tuple[str, str]] = Counter()
439
+ for vector in vectors:
440
+ tokens = vector.get("tokens") or []
441
+ unique_tokens = list({t for t in tokens if t})
442
+ node_counts.update(unique_tokens)
443
+ for left, right in itertools.combinations(sorted(unique_tokens), 2):
444
+ edge_counts[(left, right)] += 1
445
+
446
+ top_nodes = [node for node, _count in node_counts.most_common(max_nodes)]
447
+ node_set = set(top_nodes)
448
+
449
+ edges = [
450
+ {"source": left, "target": right, "weight": count}
451
+ for (left, right), count in edge_counts.most_common(max_edges)
452
+ if left in node_set and right in node_set
453
+ ]
454
+ nodes = [{"id": node, "count": node_counts[node]} for node in top_nodes]
455
+ return {
456
+ "nodes": nodes,
457
+ "edges": edges,
458
+ }
@@ -58,6 +58,7 @@ def build_analysis_pipeline_service(
58
58
  service.register_module(analysis_modules.DiagnosticPlaybookModule())
59
59
  service.register_module(analysis_modules.RootCauseAnalyzerModule())
60
60
  service.register_module(analysis_modules.PatternDetectorModule())
61
+ service.register_module(analysis_modules.DatasetFeatureAnalyzerModule())
61
62
  service.register_module(analysis_modules.MultiTurnAnalyzerModule())
62
63
  service.register_module(analysis_modules.TimeSeriesAnalyzerModule())
63
64
  service.register_module(analysis_modules.TimeSeriesAdvancedModule())
@@ -10,7 +10,7 @@ numpy/scipy 기반 통계 분석 기능을 제공합니다.
10
10
  from __future__ import annotations
11
11
 
12
12
  import logging
13
- from typing import Literal
13
+ from typing import Any, Literal, cast
14
14
 
15
15
  import numpy as np
16
16
  from scipy import stats
@@ -206,8 +206,10 @@ class StatisticalAnalysisAdapter(BaseAnalysisAdapter):
206
206
  row.append(0.0)
207
207
  else:
208
208
  try:
209
- corr, p_value = stats.pearsonr(scores_i, scores_j)
210
- row.append(float(corr))
209
+ result = cast(Any, stats.pearsonr(scores_i, scores_j))
210
+ corr = float(getattr(result, "statistic", result[0]))
211
+ p_value = float(getattr(result, "pvalue", result[1]))
212
+ row.append(corr)
211
213
 
212
214
  # 유의미한 상관관계만 기록 (i < j로 중복 방지)
213
215
  if i < j and p_value < 0.05 and abs(corr) >= 0.3:
@@ -338,10 +340,14 @@ class StatisticalAnalysisAdapter(BaseAnalysisAdapter):
338
340
  mean_b = float(arr_b.mean())
339
341
 
340
342
  # 통계 검정
341
- if test_type == "t-test":
342
- _, p_value = stats.ttest_ind(arr_a, arr_b)
343
+ if len(arr_a) < 2 or len(arr_b) < 2 or np.std(arr_a) == 0.0 and np.std(arr_b) == 0.0:
344
+ p_value = 1.0
345
+ elif test_type == "t-test":
346
+ result = cast(Any, stats.ttest_ind(arr_a, arr_b))
347
+ p_value = float(getattr(result, "pvalue", result[1]))
343
348
  elif test_type == "mann-whitney":
344
- _, p_value = stats.mannwhitneyu(arr_a, arr_b, alternative="two-sided")
349
+ result = cast(Any, stats.mannwhitneyu(arr_a, arr_b, alternative="two-sided"))
350
+ p_value = float(getattr(result, "pvalue", result[1]))
345
351
  else:
346
352
  raise ValueError(f"Unknown test type: {test_type}")
347
353
 
@@ -139,6 +139,10 @@ class PatternDetector:
139
139
 
140
140
  return results
141
141
 
142
+ def extract_feature_vectors(self, run: EvaluationRun) -> list[FeatureVector]:
143
+ """Expose feature vectors for downstream analysis."""
144
+ return self._extract_features(run)
145
+
142
146
  def _extract_features(self, run: EvaluationRun) -> list[FeatureVector]:
143
147
  """테스트 케이스별 피처 추출."""
144
148
  vectors = []