evalvault 1.57.1__py3-none-any.whl → 1.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. evalvault/adapters/inbound/api/routers/pipeline.py +48 -0
  2. evalvault/adapters/inbound/cli/commands/analyze.py +434 -179
  3. evalvault/adapters/inbound/cli/commands/pipeline.py +5 -1
  4. evalvault/adapters/inbound/cli/commands/run.py +628 -183
  5. evalvault/adapters/inbound/cli/commands/run_helpers.py +29 -30
  6. evalvault/adapters/inbound/cli/utils/analysis_io.py +2 -2
  7. evalvault/adapters/inbound/cli/utils/progress.py +2 -2
  8. evalvault/adapters/outbound/analysis/__init__.py +13 -3
  9. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +2 -1
  10. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +2 -1
  11. evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +359 -0
  12. evalvault/adapters/outbound/analysis/llm_report_module.py +9 -9
  13. evalvault/adapters/outbound/analysis/network_analyzer_module.py +250 -0
  14. evalvault/adapters/outbound/analysis/pipeline_factory.py +3 -0
  15. evalvault/adapters/outbound/analysis/pipeline_helpers.py +1 -1
  16. evalvault/adapters/outbound/analysis/priority_summary_module.py +1 -1
  17. evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +3 -2
  18. evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +349 -0
  19. evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +1 -1
  20. evalvault/adapters/outbound/documents/__init__.py +4 -0
  21. evalvault/adapters/outbound/documents/ocr/__init__.py +3 -0
  22. evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py +112 -0
  23. evalvault/adapters/outbound/documents/pdf_extractor.py +50 -0
  24. evalvault/adapters/outbound/documents/versioned_loader.py +244 -0
  25. evalvault/adapters/outbound/improvement/insight_generator.py +23 -12
  26. evalvault/adapters/outbound/improvement/pattern_detector.py +16 -10
  27. evalvault/adapters/outbound/improvement/playbook_loader.py +21 -13
  28. evalvault/adapters/outbound/kg/graph_rag_retriever.py +2 -1
  29. evalvault/adapters/outbound/llm/__init__.py +63 -63
  30. evalvault/adapters/outbound/llm/instructor_factory.py +101 -7
  31. evalvault/adapters/outbound/llm/ollama_adapter.py +27 -27
  32. evalvault/adapters/outbound/llm/token_aware_chat.py +1 -1
  33. evalvault/adapters/outbound/report/__init__.py +2 -0
  34. evalvault/adapters/outbound/report/dashboard_generator.py +197 -0
  35. evalvault/adapters/outbound/report/llm_report_generator.py +4 -4
  36. evalvault/adapters/outbound/report/markdown_adapter.py +61 -63
  37. evalvault/adapters/outbound/storage/postgres_adapter.py +1 -1
  38. evalvault/adapters/outbound/tracer/open_rag_log_handler.py +3 -3
  39. evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +3 -3
  40. evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +4 -4
  41. evalvault/config/settings.py +10 -0
  42. evalvault/domain/entities/analysis_pipeline.py +13 -3
  43. evalvault/domain/services/analysis_service.py +3 -3
  44. evalvault/domain/services/document_versioning.py +119 -0
  45. evalvault/domain/services/evaluator.py +1 -1
  46. evalvault/domain/services/pipeline_template_registry.py +197 -127
  47. evalvault/domain/services/retriever_context.py +56 -2
  48. evalvault/domain/services/visual_space_service.py +1 -1
  49. evalvault/ports/outbound/analysis_port.py +2 -2
  50. evalvault/ports/outbound/improvement_port.py +4 -0
  51. evalvault-1.59.0.dist-info/METADATA +327 -0
  52. {evalvault-1.57.1.dist-info → evalvault-1.59.0.dist-info}/RECORD +55 -45
  53. evalvault-1.57.1.dist-info/METADATA +0 -683
  54. {evalvault-1.57.1.dist-info → evalvault-1.59.0.dist-info}/WHEEL +0 -0
  55. {evalvault-1.57.1.dist-info → evalvault-1.59.0.dist-info}/entry_points.txt +0 -0
  56. {evalvault-1.57.1.dist-info → evalvault-1.59.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,327 @@
1
+ Metadata-Version: 2.4
2
+ Name: evalvault
3
+ Version: 1.59.0
4
+ Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
+ Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
+ Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
7
+ Project-URL: Repository, https://github.com/ntts9990/EvalVault.git
8
+ Project-URL: Issues, https://github.com/ntts9990/EvalVault/issues
9
+ Project-URL: Changelog, https://github.com/ntts9990/EvalVault/releases
10
+ Author: EvalVault Contributors
11
+ Maintainer: EvalVault Contributors
12
+ License: Apache-2.0
13
+ License-File: LICENSE.md
14
+ Keywords: ai,evaluation,langfuse,llm,machine-learning,nlp,observability,opentelemetry,phoenix,rag,ragas,retrieval-augmented-generation,testing
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Quality Assurance
25
+ Classifier: Topic :: Software Development :: Testing
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.12
28
+ Requires-Dist: chardet
29
+ Requires-Dist: fastapi>=0.128.0
30
+ Requires-Dist: instructor
31
+ Requires-Dist: langchain-openai
32
+ Requires-Dist: langfuse
33
+ Requires-Dist: matplotlib<3.9.0,>=3.8.0
34
+ Requires-Dist: networkx
35
+ Requires-Dist: openai
36
+ Requires-Dist: openpyxl
37
+ Requires-Dist: pandas
38
+ Requires-Dist: pydantic
39
+ Requires-Dist: pydantic-settings
40
+ Requires-Dist: python-multipart
41
+ Requires-Dist: ragas==0.4.2
42
+ Requires-Dist: rich
43
+ Requires-Dist: truststore>=0.10.4
44
+ Requires-Dist: typer
45
+ Requires-Dist: uvicorn>=0.40.0
46
+ Requires-Dist: xlrd
47
+ Provides-Extra: analysis
48
+ Requires-Dist: scikit-learn>=1.3.0; extra == 'analysis'
49
+ Provides-Extra: anthropic
50
+ Requires-Dist: anthropic; extra == 'anthropic'
51
+ Requires-Dist: langchain-anthropic; extra == 'anthropic'
52
+ Provides-Extra: benchmark
53
+ Requires-Dist: datasets>=2.0.0; extra == 'benchmark'
54
+ Requires-Dist: lm-eval[api]>=0.4.0; extra == 'benchmark'
55
+ Provides-Extra: dashboard
56
+ Requires-Dist: matplotlib<3.9.0,>=3.8.0; extra == 'dashboard'
57
+ Provides-Extra: dev
58
+ Requires-Dist: anthropic; extra == 'dev'
59
+ Requires-Dist: arize-phoenix>=8.0.0; extra == 'dev'
60
+ Requires-Dist: datasets>=2.0.0; extra == 'dev'
61
+ Requires-Dist: faiss-cpu>=1.8.0; extra == 'dev'
62
+ Requires-Dist: ijson>=3.3.0; extra == 'dev'
63
+ Requires-Dist: kiwipiepy>=0.18.0; extra == 'dev'
64
+ Requires-Dist: langchain-anthropic; extra == 'dev'
65
+ Requires-Dist: lm-eval[api]>=0.4.0; extra == 'dev'
66
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
67
+ Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
68
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'dev'
69
+ Requires-Dist: mlflow>=2.0.0; extra == 'dev'
70
+ Requires-Dist: openinference-instrumentation-langchain>=0.1.0; extra == 'dev'
71
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == 'dev'
72
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'dev'
73
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'dev'
74
+ Requires-Dist: psycopg[binary]>=3.0.0; extra == 'dev'
75
+ Requires-Dist: pydeps>=3.0.1; extra == 'dev'
76
+ Requires-Dist: pymdown-extensions>=10.7.0; extra == 'dev'
77
+ Requires-Dist: pytest; extra == 'dev'
78
+ Requires-Dist: pytest-asyncio; extra == 'dev'
79
+ Requires-Dist: pytest-cov; extra == 'dev'
80
+ Requires-Dist: pytest-html; extra == 'dev'
81
+ Requires-Dist: pytest-mock; extra == 'dev'
82
+ Requires-Dist: pytest-rerunfailures; extra == 'dev'
83
+ Requires-Dist: pytest-xdist; extra == 'dev'
84
+ Requires-Dist: python-multipart; extra == 'dev'
85
+ Requires-Dist: rank-bm25>=0.2.2; extra == 'dev'
86
+ Requires-Dist: ruff; extra == 'dev'
87
+ Requires-Dist: scikit-learn<1.4.0,>=1.3.0; extra == 'dev'
88
+ Requires-Dist: sentence-transformers>=5.2.0; extra == 'dev'
89
+ Provides-Extra: docs
90
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
91
+ Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
92
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
93
+ Requires-Dist: pymdown-extensions>=10.7.0; extra == 'docs'
94
+ Provides-Extra: korean
95
+ Requires-Dist: kiwipiepy>=0.18.0; extra == 'korean'
96
+ Requires-Dist: rank-bm25>=0.2.2; extra == 'korean'
97
+ Requires-Dist: sentence-transformers>=5.2.0; extra == 'korean'
98
+ Provides-Extra: mlflow
99
+ Requires-Dist: mlflow>=2.0.0; extra == 'mlflow'
100
+ Provides-Extra: perf
101
+ Requires-Dist: faiss-cpu>=1.8.0; extra == 'perf'
102
+ Requires-Dist: ijson>=3.3.0; extra == 'perf'
103
+ Provides-Extra: phoenix
104
+ Requires-Dist: arize-phoenix>=8.0.0; extra == 'phoenix'
105
+ Requires-Dist: openinference-instrumentation-langchain>=0.1.0; extra == 'phoenix'
106
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == 'phoenix'
107
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'phoenix'
108
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'phoenix'
109
+ Provides-Extra: postgres
110
+ Requires-Dist: psycopg[binary]>=3.0.0; extra == 'postgres'
111
+ Provides-Extra: timeseries
112
+ Requires-Dist: aeon>=1.3.0; extra == 'timeseries'
113
+ Requires-Dist: numba>=0.55.0; extra == 'timeseries'
114
+ Provides-Extra: web
115
+ Description-Content-Type: text/markdown
116
+
117
+ # EvalVault
118
+
119
+ RAG(Retrieval-Augmented Generation) 시스템을 대상으로 **평가(Eval) → 분석(Analysis) → 추적(Tracing) → 개선 루프**를 하나의 워크플로로 묶는 CLI + Web UI 플랫폼입니다.
120
+
121
+ [![PyPI](https://img.shields.io/pypi/v/evalvault.svg)](https://pypi.org/project/evalvault/)
122
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
123
+ [![CI](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml)
124
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE.md)
125
+
126
+ English version? See `README.en.md`.
127
+
128
+ ---
129
+
130
+ ## Quick Links
131
+
132
+ - 문서 허브: `docs/INDEX.md`
133
+ - 사용자 가이드: `docs/guides/USER_GUIDE.md`
134
+ - 개발 가이드: `docs/guides/DEV_GUIDE.md`
135
+ - 상태/로드맵: `docs/STATUS.md`, `docs/ROADMAP.md`
136
+ - 개발 백서(설계/운영/품질 기준): `docs/new_whitepaper/INDEX.md`
137
+ - Open RAG Trace: `docs/architecture/open-rag-trace-spec.md`
138
+
139
+ ---
140
+
141
+ ## EvalVault가 해결하는 문제
142
+
143
+ RAG를 운영하다 보면 결국 아래 질문으로 귀결됩니다.
144
+
145
+ - “모델/프롬프트/리트리버를 바꿨는데, **진짜 좋아졌나?**”
146
+ - “좋아졌다면 **왜** 좋아졌고, 나빠졌다면 **어디서** 깨졌나?”
147
+ - “이 결론을 **재현 가능하게** 팀/CI에서 계속 검증할 수 있나?”
148
+
149
+ EvalVault는 위 질문을 **데이터셋 + 메트릭 + (선택)트레이싱** 관점에서 한 번에 답할 수 있게 설계했습니다.
150
+
151
+ ---
152
+
153
+ ## 핵심 개념
154
+
155
+ - **Run 단위**: 평가/분석/아티팩트/트레이스가 하나의 `run_id`로 묶입니다.
156
+ - **Dataset 중심**: threshold(합격 기준)는 데이터셋에 포함되어 “도메인별 합격 기준”을 유지합니다.
157
+ - **Artifacts-first**: 보고서(요약)뿐 아니라, 분석 모듈별 원본 결과(아티팩트)를 구조화된 디렉터리에 보존합니다.
158
+ - **Observability 옵션화**: Phoenix/Langfuse/MLflow는 “필요할 때 켜는” 방식으로, 실행 경로는 최대한 단순하게 유지합니다.
159
+
160
+ ---
161
+
162
+ ## 3분 Quickstart (CLI)
163
+
164
+ ```bash
165
+ uv sync --extra dev
166
+ cp .env.example .env
167
+
168
+ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
169
+ --metrics faithfulness,answer_relevancy \
170
+ --profile dev \
171
+ --db data/db/evalvault.db \
172
+ --auto-analyze
173
+ ```
174
+
175
+ - 결과는 `--db`에 저장되어 `history`, Web UI, 비교 분석에서 재사용됩니다.
176
+ - `--auto-analyze`는 요약 리포트 + 모듈별 아티팩트를 함께 생성합니다.
177
+
178
+ ---
179
+
180
+ ## Web UI (FastAPI + React)
181
+
182
+ ```bash
183
+ # API
184
+ uv run evalvault serve-api --reload
185
+
186
+ # Frontend
187
+ cd frontend
188
+ npm install
189
+ npm run dev
190
+ ```
191
+
192
+ 브라우저에서 `http://localhost:5173` 접속 후, Evaluation Studio에서 실행/히스토리/리포트를 확인합니다.
193
+
194
+ ---
195
+
196
+ ## 산출물(Artifacts) 경로
197
+
198
+ - 단일 실행 자동 분석:
199
+ - 요약 JSON: `reports/analysis/analysis_<RUN_ID>.json`
200
+ - 보고서: `reports/analysis/analysis_<RUN_ID>.md`
201
+ - 아티팩트 인덱스: `reports/analysis/artifacts/analysis_<RUN_ID>/index.json`
202
+ - 노드별 결과: `reports/analysis/artifacts/analysis_<RUN_ID>/<node_id>.json`
203
+
204
+ - A/B 비교 분석:
205
+ - 요약 JSON: `reports/comparison/comparison_<RUN_A>_<RUN_B>.json`
206
+ - 보고서: `reports/comparison/comparison_<RUN_A>_<RUN_B>.md`
207
+
208
+ ---
209
+
210
+ ## 데이터셋 포맷(요약)
211
+
212
+ ```json
213
+ {
214
+ "name": "insurance-qa",
215
+ "version": "1.0.0",
216
+ "thresholds": { "faithfulness": 0.8 },
217
+ "test_cases": [
218
+ {
219
+ "id": "tc-001",
220
+ "question": "...",
221
+ "answer": "...",
222
+ "contexts": ["..."]
223
+ }
224
+ ]
225
+ }
226
+ ```
227
+
228
+ - 필수 필드: `id`, `question`, `answer`, `contexts`
229
+ - `ground_truth`는 일부 메트릭에서 필요합니다.
230
+ - 템플릿: `docs/templates/dataset_template.json`, `docs/templates/dataset_template.csv`, `docs/templates/dataset_template.xlsx`
231
+ - 관련 문서: `docs/guides/USER_GUIDE.md`
232
+
233
+ ---
234
+
235
+ ## 지원 메트릭(대표)
236
+
237
+ - Ragas 계열: `faithfulness`, `answer_relevancy`, `context_precision`, `context_recall`, `factual_correctness`, `semantic_similarity`
238
+ - 커스텀 예시(도메인): `insurance_term_accuracy`
239
+
240
+ 정확한 옵션/운영 레시피는 `docs/guides/USER_GUIDE.md`를 기준으로 최신화합니다.
241
+
242
+ ---
243
+
244
+ ## RAGAS 0.4.2 데이터 전처리/후처리 (중요)
245
+
246
+ 아래 항목은 **RAGAS 0.4.2 기준**으로 EvalVault가 데이터와 점수를 안정화하기 위해 수행하는 처리들입니다. 모두 재현성과 품질 저하 방지를 위해 의도적으로 설계되었습니다.
247
+
248
+ ### 1) 데이터 전처리 (입력 안정화)
249
+ - **빈 질문/답변/컨텍스트 제거**: 평가 불가능한 케이스를 사전에 제거합니다. (`src/evalvault/domain/services/dataset_preprocessor.py`)
250
+ - **컨텍스트 정규화**: 공백 정리, 중복 제거, 길이 제한을 통해 컨텍스트 품질을 표준화합니다. (`src/evalvault/domain/services/dataset_preprocessor.py`)
251
+ - **레퍼런스 보완**: 레퍼런스가 필요한 메트릭에서 부족할 경우 질문/답변/컨텍스트 기반으로 보완합니다. (`src/evalvault/domain/services/dataset_preprocessor.py`)
252
+
253
+ **이유**: 입력 품질 편차로 인해 RAGAS 점수 분산이 커지는 것을 방지하고, 메트릭 실행 실패/왜곡을 줄입니다.
254
+
255
+ ### 2) 한국어/비영어권 대응 (프롬프트 언어 정렬)
256
+ - **한국어 데이터셋 자동 감지** 후 `answer_relevancy`, `factual_correctness`에 한국어 프롬프트를 기본 적용합니다. (`src/evalvault/domain/services/evaluator.py`)
257
+ - **사용자 프롬프트 오버라이드 지원**: 필요 시 YAML로 메트릭별 프롬프트를 덮어쓸 수 있습니다. (`src/evalvault/domain/services/ragas_prompt_overrides.py`)
258
+ - **외부 근거(비영어권 이슈)**:
259
+ - https://github.com/explodinggradients/ragas/issues/1829
260
+ - https://github.com/explodinggradients/ragas/issues/402
261
+ - **공식 문서(언어 이슈 직접 언급)**:
262
+ - https://docs.ragas.io/en/stable/howtos/customizations/metrics/_metrics_language_adaptation/
263
+
264
+ **이유**: 질문 생성/판정 프롬프트가 영어에 고정될 경우, 비영어 입력에서 언어 불일치로 점수 왜곡이 발생할 수 있으므로 이를 최소화합니다.
265
+
266
+ ### 3) 점수 후처리 (안정성 확보)
267
+ - **비숫자/NaN 점수는 0.0 처리**: 메트릭 실패가 전체 파이프라인을 중단시키지 않도록 방어합니다. (`src/evalvault/domain/services/evaluator.py`)
268
+ - **Faithfulness 폴백**: RAGAS가 실패하거나 한국어 텍스트에서 불안정할 경우, 한국어 전용 claim-level 분석으로 점수를 재구성합니다. (`src/evalvault/domain/services/evaluator.py`)
269
+
270
+ **이유**: LLM/임베딩 실패나 NaN으로 인해 결과가 끊기는 문제를 방지하고, 한국어에서 최소한의 신뢰도를 확보하기 위해서입니다.
271
+
272
+ ### 4) 요약/시각화 후처리 (비교 가능성 강화)
273
+ - **임계값 기준 정규화**: threshold를 0점 기준으로 정규화하여 성능 개선/악화를 직관적으로 표시합니다. (`src/evalvault/domain/services/visual_space_service.py`)
274
+ - **가중 합산**: `faithfulness`, `factual_correctness`, `answer_relevancy` 등을 가중 결합하여 축/지표로 요약합니다. (`src/evalvault/domain/services/visual_space_service.py`)
275
+
276
+ **이유**: 단일 지표만으로는 해석이 어려운 경우가 많아, 정책적 기준(임계값)과 함께 비교 가능한 요약 점수로 제공하기 위함입니다.
277
+
278
+ ---
279
+
280
+ ## 모델/프로필 설정(요약)
281
+
282
+ - 프로필 정의: `config/models.yaml`
283
+ - 공통 환경 변수(예):
284
+ - `EVALVAULT_PROFILE`
285
+ - `EVALVAULT_DB_PATH`
286
+ - `OPENAI_API_KEY` 또는 `OLLAMA_BASE_URL` 등
287
+ - 관련 문서: `docs/guides/USER_GUIDE.md`, `docs/guides/DEV_GUIDE.md`, `config/models.yaml`
288
+
289
+ ---
290
+
291
+ ## Open RAG Trace (외부 RAG 시스템까지 통합)
292
+
293
+ EvalVault는 OpenTelemetry + OpenInference 기반의 **Open RAG Trace** 스키마를 제공해, 외부/내부 RAG 시스템을 동일한 방식으로 계측/수집/분석할 수 있게 합니다.
294
+
295
+ - 스펙: `docs/architecture/open-rag-trace-spec.md`
296
+ - Collector: `docs/architecture/open-rag-trace-collector.md`
297
+ - 샘플/내부 래퍼: `docs/guides/open-rag-trace-samples.md`, `docs/guides/open-rag-trace-internal-adapter.md`
298
+ - 관련 문서: `docs/INDEX.md`, `docs/architecture/open-rag-trace-collector.md`
299
+
300
+ ---
301
+
302
+ ## 개발/기여
303
+
304
+ ```bash
305
+ uv run ruff check src/ tests/
306
+ uv run ruff format src/ tests/
307
+ uv run pytest tests -v
308
+ ```
309
+
310
+ - 기여 가이드: `CONTRIBUTING.md`
311
+ - 개발 루틴: `docs/guides/DEV_GUIDE.md`
312
+ - 관련 문서: `docs/STATUS.md`, `docs/ROADMAP.md`
313
+
314
+ ---
315
+
316
+ ## 문서
317
+
318
+ - `docs/INDEX.md`: 문서 허브
319
+ - `docs/STATUS.md`, `docs/ROADMAP.md`: 현재 상태/방향
320
+ - `docs/guides/USER_GUIDE.md`: 사용/운영 종합
321
+ - `docs/new_whitepaper/INDEX.md`: 설계/운영/품질 기준(전문가 관점)
322
+
323
+ ---
324
+
325
+ ## License
326
+
327
+ EvalVault is licensed under the [Apache 2.0](LICENSE.md) license.
@@ -12,13 +12,13 @@ evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUB
12
12
  evalvault/adapters/inbound/api/routers/config.py,sha256=CN-FH2cn0Ive-BD3WacWY6PFfuMtZEHP5_out3fvST4,3957
13
13
  evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
14
14
  evalvault/adapters/inbound/api/routers/knowledge.py,sha256=7mgyoUM1PepFb4X8_Ntn0vd7ZZYcNbM3_9nyD10g4Aw,5307
15
- evalvault/adapters/inbound/api/routers/pipeline.py,sha256=GKQaeR6FZVs_ManVMCn3vVBBONzA4rKHmvhLDxr5s5c,14702
15
+ evalvault/adapters/inbound/api/routers/pipeline.py,sha256=tWuXwM-AH_NVDzemtsxbi5Dyn5kYyc1vPFS1sg2TPuw,16655
16
16
  evalvault/adapters/inbound/api/routers/runs.py,sha256=W3QaSMN3ByqNLynh_uWkMv0_-NvsVKedbuKsEAAoZr0,33160
17
17
  evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
18
18
  evalvault/adapters/inbound/cli/app.py,sha256=Gf_VWXK2aUzVL63F5ulqPd88MgO1n823uISGhGHsdEI,1813
19
19
  evalvault/adapters/inbound/cli/commands/__init__.py,sha256=ciIHbHgP0gtasVi4l5cHjVojERrb-uipga_E0EwCrqM,3431
20
20
  evalvault/adapters/inbound/cli/commands/agent.py,sha256=YlOYMEzzS1aSKDKD_a7UK3St18X6GXGkdTatrzyd8Zc,7555
21
- evalvault/adapters/inbound/cli/commands/analyze.py,sha256=DDel6Yz99Yau-Ypnufrx6ldErTdNDE3GTqEZp47664M,37240
21
+ evalvault/adapters/inbound/cli/commands/analyze.py,sha256=aMi1BEDOX3yhN-ppBftDssPQLB5TdzIfpx9U7CZEgWo,48932
22
22
  evalvault/adapters/inbound/cli/commands/api.py,sha256=YdbJ_-QEajnFcjTa7P2heLMjFKpeQ4nWP_p-HvfYkEo,1943
23
23
  evalvault/adapters/inbound/cli/commands/benchmark.py,sha256=RZ4nRTF7d6hDZug-Pw8dGcFEyWdOKclwqkvS-gN4VWo,41097
24
24
  evalvault/adapters/inbound/cli/commands/config.py,sha256=r3DH2a0-PgJIzpyB7teiykDulhUwUJUkiFWLrbjhF6k,7148
@@ -33,22 +33,22 @@ evalvault/adapters/inbound/cli/commands/kg.py,sha256=ycV9Xj6SUUJLTyTfLZcjXDVLcZq
33
33
  evalvault/adapters/inbound/cli/commands/langfuse.py,sha256=aExhZ5WYT0FzJI4v1sF-a1jqy9b1BF46_HBtfiQjVGI,4085
34
34
  evalvault/adapters/inbound/cli/commands/method.py,sha256=K1UacoKwV9w8sLeQK8qHyTuZqFZrlcj6yS_y2izfRlo,18853
35
35
  evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6lYyBS3mHrCHk0qe-7bqB4U,15611
36
- evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=w-qcI88wxrWup63HKhj16jvqTev_nRPG7eMQQWQ8h08,7788
36
+ evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=Hg3A2LGTLw_rjd6ZgT5lOVsTASXIyq2DimUna24FRv0,7936
37
37
  evalvault/adapters/inbound/cli/commands/prompts.py,sha256=6UwQtKJf3JYhcNI4tQqjjsL-sp_cmu2VV7gETkCcmkk,5490
38
- evalvault/adapters/inbound/cli/commands/run.py,sha256=o5xY9jOWcNI-eZM1XwwiWk4QKYjh58tQ5bjqyT-eylc,92472
39
- evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=Wy4nJmAuVWHmhltKOMOKFNK4k8oZxbc9Gq6Ejb2FJC4,39696
38
+ evalvault/adapters/inbound/cli/commands/run.py,sha256=6d_AnONUiroNMF1xZt8O1sbtqb5HcE53ZMAU-UOp1cA,115469
39
+ evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=50nYzf4DUniJd7fQgT2cyh_FWVTWZzW0UMXCg-EHBuY,39764
40
40
  evalvault/adapters/inbound/cli/commands/stage.py,sha256=oRC9c5CysLX90Iy5Ba1pc_00DaOBS78lcBvzkbdrGRM,17123
41
41
  evalvault/adapters/inbound/cli/utils/__init__.py,sha256=QPNKneZS-Z-tTnYYxtgJXgcJWY6puUlRQcKrn7Mlv1M,685
42
- evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=kVjEBaYqQ6GBTNJ9W-xpExyy4y1-jpz_m7S5y6i5eQs,13395
42
+ evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=RHkKEq4e-PtbtRDlXAJWU80RYHNPw-O5V9_GujdaGfc,13393
43
43
  evalvault/adapters/inbound/cli/utils/console.py,sha256=uikQ7igql04HH6zSpCk_uA9HpQmD0IcJxEAo3K9_FsE,1787
44
44
  evalvault/adapters/inbound/cli/utils/errors.py,sha256=RXLJZHadvENXDQ4ZzDyaL_0Z5IEfb_x0MxQba8dD20g,4908
45
45
  evalvault/adapters/inbound/cli/utils/formatters.py,sha256=-n1slpbvJxRv-NNbbXCFZ1da52GySA3bC3E6wdaAPE0,946
46
46
  evalvault/adapters/inbound/cli/utils/options.py,sha256=0vBmbpil1DCdO7Pktz_G1D-yMopNr0EcyOZgEE8-T2E,1799
47
47
  evalvault/adapters/inbound/cli/utils/presets.py,sha256=6XNVBnkkWpYwK-_UYd7AdVRPdgRNcbLZX22fF7gNlZY,3255
48
- evalvault/adapters/inbound/cli/utils/progress.py,sha256=cr2vT4u8dfBf7iI08-GfeRGFpZBEFACasNhzyu_k-eQ,11112
48
+ evalvault/adapters/inbound/cli/utils/progress.py,sha256=2kFKKZ4AWNvo9IcSLhu8qbslKfPHqKWLxYOImHVecqA,11136
49
49
  evalvault/adapters/inbound/cli/utils/validators.py,sha256=OnjNvi_iLtxjWBIrqtbGWAmx2d4zNm_-qi0Ce-3KHZk,1436
50
50
  evalvault/adapters/outbound/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- evalvault/adapters/outbound/analysis/__init__.py,sha256=hMycj6XMvXk0BKU8--42FP1dP7Fx8rowTYFZLpiwt-8,5796
51
+ evalvault/adapters/outbound/analysis/__init__.py,sha256=TLuS-eKfXg97_Db5td1nTZkD3BErRLZLic1v2EAM2sA,6185
52
52
  evalvault/adapters/outbound/analysis/analysis_report_module.py,sha256=xah3wgJErHD_Hpb1YAwWRsxr8xaC8SW--CpNA7IgfxI,3957
53
53
  evalvault/adapters/outbound/analysis/base_module.py,sha256=eUN77SSD2KR4WKU7gLY8TlVewETx_YIZvPT4LUnBv4o,2523
54
54
  evalvault/adapters/outbound/analysis/bm25_searcher_module.py,sha256=I8BsXrHaOVxgRTEVUVumSvEpSZ_sT6zsbTd2rlyW6x8,3701
@@ -59,25 +59,27 @@ evalvault/adapters/outbound/analysis/comparison_report_module.py,sha256=0tTMZB5q
59
59
  evalvault/adapters/outbound/analysis/data_loader_module.py,sha256=6X0-ZcFtEfonQnbJ0POqmHXstJ1Wq1NvpijtbKSeEm0,3749
60
60
  evalvault/adapters/outbound/analysis/detailed_report_module.py,sha256=59CjuNQthlroJyGEhQap3PgahWfzXciKx_DD10gHXjM,3897
61
61
  evalvault/adapters/outbound/analysis/diagnostic_playbook_module.py,sha256=qMgcBlwr2_a-pgth7zV1CWdGeUrzTeZjB4K9uyDnKcc,3993
62
- evalvault/adapters/outbound/analysis/embedding_analyzer_module.py,sha256=u52OrJhxTuSKqi8EaHgGy3EweOiC8J1e2L2ocOGjNps,7269
62
+ evalvault/adapters/outbound/analysis/embedding_analyzer_module.py,sha256=-lxNC-h4HUr1co9mNRHGO-gM3MgArT2LNBvjQoR_UkE,7286
63
63
  evalvault/adapters/outbound/analysis/embedding_distribution_module.py,sha256=AdCOFnmCk4luKw8NT6VBlrxaetYcNmhLTgKhjYxmDDg,2581
64
- evalvault/adapters/outbound/analysis/embedding_searcher_module.py,sha256=7_fwpBD571siKUWaFoKBPfkJkLWgy-C5NMfv2EkPZpM,4858
64
+ evalvault/adapters/outbound/analysis/embedding_searcher_module.py,sha256=j6w_jIGG9rbmVuAUtx2uEAX6wPXPjqmQyW0sy7R97dQ,4875
65
65
  evalvault/adapters/outbound/analysis/hybrid_rrf_module.py,sha256=kaHSc7z3Jg_KrRLBqPMTV_9XXsL6v1dmbz-3dDO6IMw,3255
66
66
  evalvault/adapters/outbound/analysis/hybrid_weighted_module.py,sha256=AO-7thmnFGerUDWd8l9ydxeAkHkACo7Raf9O0RfW_nE,3671
67
- evalvault/adapters/outbound/analysis/llm_report_module.py,sha256=MMMxlNk34MBlT8j44Lwj7g0op6eyMY5u8ISA9Kj4_OQ,38782
67
+ evalvault/adapters/outbound/analysis/hypothesis_generator_module.py,sha256=tx9fWgS0rBoK5eJPmwK5POoV78yN03hkFmWhCx71Ln0,13337
68
+ evalvault/adapters/outbound/analysis/llm_report_module.py,sha256=KjIM2MET6gl9jUpxRo0rDVIzqSXFw-I4y0QoG_TULFA,38773
68
69
  evalvault/adapters/outbound/analysis/low_performer_extractor_module.py,sha256=Pt0Tmtc5Etqp_3SBDCPAzqWI2EF9woSg0mmBucEHlQw,1291
69
70
  evalvault/adapters/outbound/analysis/model_analyzer_module.py,sha256=28rHdXBXYIFpLHixbbZcv6-j2QVgl3yaGN0vU1Q0gFc,2682
70
71
  evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py,sha256=Hrh4mluMsOhQHPrliD2w0FVKokJpfikXOFKT6sNwk74,4158
71
72
  evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py,sha256=_uRKDXdwGbfYduf_3XT77vF8X3-_zW3stHYc3HKYQTE,2216
73
+ evalvault/adapters/outbound/analysis/network_analyzer_module.py,sha256=ITUVnt_CI5pHy5SAESBSi004yMtiAhGFsbhC61VTezk,8475
72
74
  evalvault/adapters/outbound/analysis/nlp_adapter.py,sha256=U7verYM4XTSPLTlb2z0b7yYzTP4kkNFl5LQ91XhXu_A,27432
73
75
  evalvault/adapters/outbound/analysis/nlp_analyzer_module.py,sha256=KtoMJNa4NE-91iTEpEWIid-mciaezwmhU2xlKbYl4fg,8221
74
76
  evalvault/adapters/outbound/analysis/pattern_detector_module.py,sha256=SyCDO_VS-r-tjGh8WrW-t1GCSC9ouxirdVk4NizFPXo,1882
75
- evalvault/adapters/outbound/analysis/pipeline_factory.py,sha256=T-seKrYmyNifZgoPUaiJE0ioTFexVp0qQfb1jkCOorA,3474
76
- evalvault/adapters/outbound/analysis/pipeline_helpers.py,sha256=aIyDV04YPSsxNzCYHG7UW8y7mQRBszrJebTUZ2wnny4,5756
77
- evalvault/adapters/outbound/analysis/priority_summary_module.py,sha256=OPX1DsUto4MjTFSzwdSan-xRPlT0FNCC1djWVu2oZT0,10803
77
+ evalvault/adapters/outbound/analysis/pipeline_factory.py,sha256=XvcCbKCN_otv1pGUzk0oE76RV19yFga8r6RngBvgEFo,3691
78
+ evalvault/adapters/outbound/analysis/pipeline_helpers.py,sha256=8E8IrYI5JvRrpnjxe0DS7srbPzB0XAxxXhLLYgfwsgU,5756
79
+ evalvault/adapters/outbound/analysis/priority_summary_module.py,sha256=o8Y0rfHjYYE9WNTwKtpJulwfvLA3MNMhYjdSg15Vacc,10802
78
80
  evalvault/adapters/outbound/analysis/ragas_evaluator_module.py,sha256=KQd8zdyMGKklZane077RRNPDOf6kqVDbj_R9Qbq-quA,7275
79
81
  evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py,sha256=D24GTaKabHacSBI-UqCd_jy61hnne8-QG1p4rqW1Bzk,5748
80
- evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py,sha256=9rxrwAp6kakJJuMreQm9OorwE6eXmirMtOK7SO4B8mQ,9296
82
+ evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py,sha256=_duIBlYhAsFygEpC7DuwoAqfTbVG2xgp70JjW1LJAGE,9312
81
83
  evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py,sha256=K1IJn4bvvz-BfqQmhd5Ik9oATjq_-G7V1AZSW8zKtSE,3121
82
84
  evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py,sha256=UagHWb2d1vD7aCH0vLl3tSJx86gkkxNarrF-rwtEBhU,2811
83
85
  evalvault/adapters/outbound/analysis/run_analyzer_module.py,sha256=oTle2vHiNFFajCe8vQaB1SbKthbUxtd7oDGoPdW9JJY,2172
@@ -91,10 +93,11 @@ evalvault/adapters/outbound/analysis/statistical_analyzer_module.py,sha256=FzM6T
91
93
  evalvault/adapters/outbound/analysis/statistical_comparator_module.py,sha256=dxGVa3zvuBRab6KhRIVfg2IDB0PEGljBmohlSYI-BtE,1843
92
94
  evalvault/adapters/outbound/analysis/summary_report_module.py,sha256=HCoM8DMEGwwFQUjJLDWXnC6vE9Xo0YvhLRh9v3GMHRg,4279
93
95
  evalvault/adapters/outbound/analysis/time_series_analyzer_module.py,sha256=6PhFH_aLDS85VePar3M9-ClaxonjcfmXCnCBP4vhvT8,1888
96
+ evalvault/adapters/outbound/analysis/timeseries_advanced_module.py,sha256=V0t4rjLeb_EyfGUskXKbeKV5hd3tdK1OIbytELsSoR4,12650
94
97
  evalvault/adapters/outbound/analysis/trend_detector_module.py,sha256=b8rSKXxOH9NKaLbkG-APDeRpIxrYA-U5damse9R0smM,1977
95
98
  evalvault/adapters/outbound/analysis/verification_report_module.py,sha256=vFEPfsIKRGJmzZ2sRY9r-ELFZWY0UB0aLqhrQc0NRxk,1922
96
99
  evalvault/adapters/outbound/benchmark/__init__.py,sha256=fSL1fXtFaYmjwAHndThrwNA5bhvcKp4PaVltBI42WSg,171
97
- evalvault/adapters/outbound/benchmark/lm_eval_adapter.py,sha256=RAqVhPWCAhmWGPR7xNzYyx-hSeIquUqCesdhgaBy99M,12532
100
+ evalvault/adapters/outbound/benchmark/lm_eval_adapter.py,sha256=xFj_Cgny3JN_COA1o4PjNH-hZ_rAgdLtovvmrqualqs,12531
98
101
  evalvault/adapters/outbound/cache/__init__.py,sha256=LcsKzxnx1AnAwS07iSCdws11CfEYuxkUjRkogN0SviI,317
99
102
  evalvault/adapters/outbound/cache/hybrid_cache.py,sha256=AVhctQVOIbQWwvn_K0kxSq3lkhucuM7tezmSkPDbCrA,12711
100
103
  evalvault/adapters/outbound/cache/memory_cache.py,sha256=jvjIgXp7YRj08_AzBFaJ58jjXNzUlYbG_zX6fQJP4C0,3533
@@ -110,28 +113,33 @@ evalvault/adapters/outbound/dataset/templates.py,sha256=5gfae7kqs66SRAP-OyWX6N2c
110
113
  evalvault/adapters/outbound/dataset/thresholds.py,sha256=5Vodqar6QrCL7R_Pq0gTxv1pJh_OiH_3pNimvJaUQA4,2199
111
114
  evalvault/adapters/outbound/debug/__init__.py,sha256=p_5zw3ys7E-ZjmY_74LYiz5f4Gd6tT-o6T2SNDxFuQg,168
112
115
  evalvault/adapters/outbound/debug/report_renderer.py,sha256=84lR7kA369JYregP4ARkfIWCcYpbluJ5mISeV0iIGnk,5427
116
+ evalvault/adapters/outbound/documents/__init__.py,sha256=s_Fj5bi4NvZ5bPd5RoxFA4z-RnejHbP9_8Ceo4w3LK8,59
117
+ evalvault/adapters/outbound/documents/pdf_extractor.py,sha256=ugDzoeZbTCqFPOlbl3a5nznHiBLaK6a23u20N3ZfFBc,1189
118
+ evalvault/adapters/outbound/documents/versioned_loader.py,sha256=oNlwE40lfOFhX6uNCbxTLH_S1-zMy6kNq95SyRv2I1I,7786
119
+ evalvault/adapters/outbound/documents/ocr/__init__.py,sha256=i6JIdO9UyHNd6wlb3KnBxLfI10WLjlFqssKBTbYUf6E,39
120
+ evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py,sha256=AORA9JUV5ux5QkrwXBg5xjzqJqgFUjTyyFrfqkUYdpw,3378
113
121
  evalvault/adapters/outbound/domain_memory/__init__.py,sha256=ksMX1IkNiDqQHLtJe9TOXiLC1iouGt6_QSdPLiALHHs,229
114
122
  evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql,sha256=APlNhJNFZdcm7Sb2tvr7V8JMiLinmXkx1gd6pgTf9ZI,11268
115
123
  evalvault/adapters/outbound/domain_memory/sqlite_adapter.py,sha256=RWobnFgvxiItxFAr6niY89sT19O-cnExTbP0I7UAY78,85186
116
124
  evalvault/adapters/outbound/improvement/__init__.py,sha256=tXA6vaZOLvqwJpyjGMiC8WrvszMmvUPzJnHjvJhQxSI,1143
117
- evalvault/adapters/outbound/improvement/insight_generator.py,sha256=_rxVq7aOyhsMqKVljaSDNVcev4GZ8GWWS7JQ3u5l6K4,17144
118
- evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=sEUelFjrAPdbTTPDqmlFLH2zSdTBwe0Fq-YyHv1eeI0,24215
119
- evalvault/adapters/outbound/improvement/playbook_loader.py,sha256=48jC7vn0c9rcypNV9i6wOyLRn3Xmi3WVY8xlTbEN1RI,7063
125
+ evalvault/adapters/outbound/improvement/insight_generator.py,sha256=U16l0euCZy0_08Zb_i0eijXSjS5t-iq0iMUfttwPqgI,17636
126
+ evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=4Pc5yrsi2warhKdpWxL0Ba9Ms2sCvFeRVWU8jTeALZ8,24608
127
+ evalvault/adapters/outbound/improvement/playbook_loader.py,sha256=zXDpiTpYWtQvVrDeo149YHKIyhF6nUP34j0FVnlBCJo,7471
120
128
  evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py,sha256=JdmXQsackWqeWTnULE4gfTK8vAikGR27h-TVc03CGXk,1706
121
129
  evalvault/adapters/outbound/kg/__init__.py,sha256=fUCKOV080ZjiEob9s4TmXWf-IDa6GbIFQMLfH6gFCKg,567
122
- evalvault/adapters/outbound/kg/graph_rag_retriever.py,sha256=d2JTQgcNHI7kvAL757_NysqcaSiuvXPHNuxdhHk1_Ug,18556
130
+ evalvault/adapters/outbound/kg/graph_rag_retriever.py,sha256=_6qd8p_2TpHnppv8LUQQPxUdTPoE0QTQ-rCVnY1ap6c,18658
123
131
  evalvault/adapters/outbound/kg/networkx_adapter.py,sha256=wSZjMiMzpS9ZZLTRYjHm7X6TN9ITbuTWr10KyR6hFxg,21043
124
132
  evalvault/adapters/outbound/kg/parallel_kg_builder.py,sha256=sbR-bVpjTFhV47Ss8LYA3Ytk-THqo1HJz2YLfAelRyg,8886
125
133
  evalvault/adapters/outbound/kg/query_strategies.py,sha256=BDDft2Cc-xDvJWyIuyUTv0bPvaIiXJLd76YdjzMAZlk,19445
126
- evalvault/adapters/outbound/llm/__init__.py,sha256=iXYeEz6MbKL8celAW3PkgdynFC4kCvHEFdn0HSR32k0,4113
134
+ evalvault/adapters/outbound/llm/__init__.py,sha256=GDllWZNBVqmQSFOXsXFlE82qxgaZWGL4TChu61bEG7U,4523
127
135
  evalvault/adapters/outbound/llm/anthropic_adapter.py,sha256=Qjpwqt6eDDvrSAGynvO0NZQIrhfEFJqLJfqfYPjcwaY,7491
128
136
  evalvault/adapters/outbound/llm/azure_adapter.py,sha256=FR9Gimy-jonzQjtILGHKXAY_ycbjGKbKLFiD-OZ8EVs,3033
129
137
  evalvault/adapters/outbound/llm/base.py,sha256=c4cJJTKwJiBlGNCV7qomq5mQCrbc48Yi7c3vjlat7Q0,7674
130
- evalvault/adapters/outbound/llm/instructor_factory.py,sha256=aXJyJCV8lns5A7sAyt6HZvbZedv2DcHTBXK4pr6TV-g,1364
138
+ evalvault/adapters/outbound/llm/instructor_factory.py,sha256=6iJIjZbCJwDYEpV673R3lQOupzeWlvjCmhjYqthrueY,4497
131
139
  evalvault/adapters/outbound/llm/llm_relation_augmenter.py,sha256=jdeDumCi34KqAfRwLh9VTia9wiP5bvVwvHxJylLDOcg,3854
132
- evalvault/adapters/outbound/llm/ollama_adapter.py,sha256=vuGaDiY--sm5Zg_plf2kgvbjErs_S9jJLw74H23UgE0,10432
140
+ evalvault/adapters/outbound/llm/ollama_adapter.py,sha256=L7MRlPUyYxTSVHhCz7LHducTqBifTzmovNBi26SWtr8,10615
133
141
  evalvault/adapters/outbound/llm/openai_adapter.py,sha256=Zv4gi0AxoQrXzf2hBrQjLaQVM4WttHuO2raTCekR_Kk,5163
134
- evalvault/adapters/outbound/llm/token_aware_chat.py,sha256=BMUvgC55cooUKew8afw7CNU7bXr6-ZdLc0_nT-eFQzY,11948
142
+ evalvault/adapters/outbound/llm/token_aware_chat.py,sha256=HLmwTZkiwmUuopjFwDGpamIRC44PWnwkRA4O-pUBt_s,11937
135
143
  evalvault/adapters/outbound/llm/vllm_adapter.py,sha256=zwH_KKydABn0J-hCs6OB-3809X_qBx8FPQ2EhO1gkcY,3897
136
144
  evalvault/adapters/outbound/methods/__init__.py,sha256=3vyE9w3Ex2oMaO4ZE7Fy6xlHhJ6YQXHQNCvBiW9X2lM,345
137
145
  evalvault/adapters/outbound/methods/baseline_oracle.py,sha256=oUsF5sIiPY5vuDtrz0Ki05SnPlnVzn7APERP5v1KpPM,1308
@@ -148,21 +156,22 @@ evalvault/adapters/outbound/nlp/korean/korean_evaluation.py,sha256=Mxwu3zhtdm8Te
148
156
  evalvault/adapters/outbound/nlp/korean/korean_stopwords.py,sha256=UemEFCJudg2EpsHg8uU2eR-iCh34kw4ZSVCRvnEC6a4,4293
149
157
  evalvault/adapters/outbound/nlp/korean/toolkit.py,sha256=EYGpd89ilpn4Wg5t8pALYt4Qi0aDHYOfXGuYbQx7do0,4246
150
158
  evalvault/adapters/outbound/phoenix/sync_service.py,sha256=i6gHpNiZXKQ5yzV9B2TPb-P1N45k_Ck5ruzh3oqp4d8,9122
151
- evalvault/adapters/outbound/report/__init__.py,sha256=jY17ysqwkRbKCquNv94XUBI4QZQlrAzKyhb0xu87jdA,365
152
- evalvault/adapters/outbound/report/llm_report_generator.py,sha256=cjt-ydcufMLiAyYoNJ7NXuwqS9dQBQaA1Mlw-ki7hYY,24087
153
- evalvault/adapters/outbound/report/markdown_adapter.py,sha256=oRI4IJfVR9GrjcOq7IwtD_ngP2sy3kt2B8XO-wYT2jI,16610
159
+ evalvault/adapters/outbound/report/__init__.py,sha256=8OUduTHnWkBLHYrc7mBg45DnAwz0RgvSJmz1HqxVjLY,477
160
+ evalvault/adapters/outbound/report/dashboard_generator.py,sha256=Dcu18NTK4lS8XNKnnnquagpZkd-4TSf5Mb2isFNW5Pk,7800
161
+ evalvault/adapters/outbound/report/llm_report_generator.py,sha256=sp2YRCmPOhn08vb8Bq_ayo-ZjgyBBxRhzRFvzlaDhsA,24063
162
+ evalvault/adapters/outbound/report/markdown_adapter.py,sha256=5PS72h_qe4ZtYs-umhX5TqQL2k5SuDaCUc6rRw9AKRw,16761
154
163
  evalvault/adapters/outbound/storage/__init__.py,sha256=n5R6thAPTx1leSwv6od6nBWcLWFa-UYD6cOLzN89T8I,614
155
164
  evalvault/adapters/outbound/storage/base_sql.py,sha256=pPKlT2P7fCTMxCAIi0tzYr0d5rkroPTB4dHQNDDRxsI,18167
156
165
  evalvault/adapters/outbound/storage/benchmark_storage_adapter.py,sha256=Qgf9xSSIkYQRpG4uLzcUdoYO9LTQDQ4tFRkkMYer-WA,9803
157
- evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=qRIb3h8FbrQVsSSkVaROQWcvdcS6YtH0bufL7jKdZwM,38134
166
+ evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=ro3DrE2e4l2jimoPidcmY0xBufhqi5M6_4VF-Ta0e-I,38133
158
167
  evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=BLDQ7ynzS9Aw6NxN3efALpG3eN1ZfsmBcEeWugHwF98,7075
159
168
  evalvault/adapters/outbound/storage/schema.sql,sha256=R9Y3j76qR3_UbbeX1olhHs1hbsLjS3YfiSFyUgqTM28,10057
160
169
  evalvault/adapters/outbound/storage/sqlite_adapter.py,sha256=cldMzXW9_0jdAC0YkwhFznleKX6yF4RkFNpmYc8_lZQ,47446
161
170
  evalvault/adapters/outbound/tracer/__init__.py,sha256=xrvQQuAvF_UI02mKLMV7GTrG3zn836n5zwCRrrmhq_U,1054
162
- evalvault/adapters/outbound/tracer/open_rag_log_handler.py,sha256=8aND6mDNUq8aqsvS_g0zPiXVJrafXfJLV6HHWgxyNDY,2824
163
- evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py,sha256=1YOwktXyrdHZmbDK6ESsvbfjdoyeueMpCnGX7U0L_NA,4781
171
+ evalvault/adapters/outbound/tracer/open_rag_log_handler.py,sha256=aq96FIWD-bBaSkq-bygWhQArC9LWghSwi-S03Mga0mI,2827
172
+ evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py,sha256=P-4PN1UweITXu5uN3LJVCEL3wRwiExzhgs3y2GN78xM,4784
164
173
  evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py,sha256=LFnk-3FSLwRz0pxp8gMZl3Kd1BIMv_4Gr21nbRQ53m4,2034
165
- evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=euHcXGdWEPKnyqmoCu2cwO3lMCLkHP5ApMxiIHXKwTo,3518
174
+ evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=D48Mbj-ioDKztjhV9513Q5DiUNiVdO60B_2sWMFEmnI,3520
166
175
  evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py,sha256=inmTAolAVsm0IrszE9VTJoI7HSvGGAnGNZVu_vZRAGg,741
167
176
  evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8RPfQNlddLxHCBvMTm_4,358
168
177
  evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=Gejd3fOBwShfjbtjVcZK9sCJKRz6oB3OaN6KukOYN38,17782
@@ -175,12 +184,12 @@ evalvault/config/instrumentation.py,sha256=L8on9HjB6Ji8cSOJ6Pepsopfg9okDNMWF7LKZ
175
184
  evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJIgBSrN2o,582
176
185
  evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
177
186
  evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
178
- evalvault/config/settings.py,sha256=kY58QYFAFfAIx_MGlK8Rkktkpwe4sjhhHQHBOXIMDns,12594
187
+ evalvault/config/settings.py,sha256=fh_Kl95LpAtLPgRrL-W-nBpp8HwWuGaoDb4NR54hcbc,12863
179
188
  evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
180
189
  evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
190
  evalvault/domain/entities/__init__.py,sha256=C63BX4ytkh0FCEfNFIy0MDY2tcYxp9G430IdyBxqqAk,2794
182
191
  evalvault/domain/entities/analysis.py,sha256=gcMtumC66g-AIqb2LgfMpm5BMzwJhJkjg-zuybNoJCM,15208
183
- evalvault/domain/entities/analysis_pipeline.py,sha256=YHe_lNfekDhIE5oOnNnjD52KVhxXXpEUuBmy1qqaKI8,16581
192
+ evalvault/domain/entities/analysis_pipeline.py,sha256=hD9rFHMa4rUq0InRkSKhh6HQ9ZeNYAHKADzs-kWRP04,16845
184
193
  evalvault/domain/entities/benchmark.py,sha256=CVbz_eW7Y9eM7wG7xA_xmldTIs72csdoTmu3E0NKoMU,18475
185
194
  evalvault/domain/entities/benchmark_run.py,sha256=2ZJOq5Ny_pfvRKM7E4RuIKxfxvoYK-5PAtPDhOwra9k,5653
186
195
  evalvault/domain/entities/dataset.py,sha256=WsC_5ivGluy-o2nXxLGmoC8DYl5UafVSo2hSowb3rvs,1886
@@ -204,7 +213,7 @@ evalvault/domain/metrics/retrieval_rank.py,sha256=F55ByadJBowyKHKBmKAZ0T0qN_R1_7
204
213
  evalvault/domain/metrics/terms_dictionary.json,sha256=-ZQmpx6yMOYoAOpcLj-xK2LkAeCbAw0EUb6-syIOKS0,3801
205
214
  evalvault/domain/metrics/text_match.py,sha256=P-YTZs9ekDqEmxLNBP8eXnMRymPdC8V4dJPtwG2ajVM,10219
206
215
  evalvault/domain/services/__init__.py,sha256=a7YjsOuOzbY2yQvtDJg7W8dPJme31rI04Qc49w5iHKE,686
207
- evalvault/domain/services/analysis_service.py,sha256=_W8HqgJSb2Rlv-fEtLsI3JpfQ0DSAM-zo_z6jgoPyIg,10769
216
+ evalvault/domain/services/analysis_service.py,sha256=oUEtfJHB3bNJ_Ksygx-pjnLm4CTk7_rDvDbqfkAfFD4,10838
208
217
  evalvault/domain/services/async_batch_executor.py,sha256=qYFRl7CGmv56XppeRhInde7Fw0GESCoZh8V-Iv_1hQQ,11140
209
218
  evalvault/domain/services/batch_executor.py,sha256=cYA_Q1es46n_PYeyyfm0iM2b7GGVtDoOGoMxexrf6tI,1243
210
219
  evalvault/domain/services/benchmark_report_service.py,sha256=IF-zqtvpsJ0ONJWUEw4ghKiC7ka_PWxUBO10lPaDRmI,15083
@@ -215,10 +224,11 @@ evalvault/domain/services/cluster_map_builder.py,sha256=qPKMPj-eSqECJSCOKvv3ZETg
215
224
  evalvault/domain/services/dataset_preprocessor.py,sha256=v-shY5ky1oW0LJwBfdfP4VFh7TXBabpLD5rMOmtS-dQ,14235
216
225
  evalvault/domain/services/debug_report_service.py,sha256=SGdFh8tctAIq7RotFbg47eetxdYSS4Yju7-LOzpCMCM,4386
217
226
  evalvault/domain/services/document_chunker.py,sha256=u05N1xSBcJuJPUfP7WmpY_EyHuUMuGMsPSM9qs-ID8c,2494
227
+ evalvault/domain/services/document_versioning.py,sha256=M1qZaMpQ2exVT1wkVAmvEPPuoYibJDt0F7pYfTK7mvE,3323
218
228
  evalvault/domain/services/domain_learning_hook.py,sha256=rhKBmdnrJyfGzFNsNxzyv8jZO26-WOosHSmBV_9qdJg,7176
219
229
  evalvault/domain/services/embedding_overlay.py,sha256=ZTNxUPXpHGbQ3Uri5DD3feTUFn7qrhuNshhyCQEvRuM,3559
220
230
  evalvault/domain/services/entity_extractor.py,sha256=f3Rf5saK8QsgetLNK1Hbxzt8PtttJZCicSR63S8DJ5k,14141
221
- evalvault/domain/services/evaluator.py,sha256=9Yi2Cb-fRzV6CUdvBTziP9vJs7i6btCuFt77EQfjFzE,64476
231
+ evalvault/domain/services/evaluator.py,sha256=rxj0tVMwtDrekNFstT6baQDfemsH-c6XtHxX-9BLKmo,64476
222
232
  evalvault/domain/services/experiment_comparator.py,sha256=IBrxIwux-8GucwlLx6e5lUqB9miSPvBLGJK9ctoW7Y0,3299
223
233
  evalvault/domain/services/experiment_manager.py,sha256=2k-qGiAUyZuqqmcp4P-M3Z9HTXwwcqW5HQYKNkcIHuI,4863
224
234
  evalvault/domain/services/experiment_reporter.py,sha256=QYlVmCFSx8hKTPMezc7QjJE07b3MSQ82Q4QVucSHLVY,1420
@@ -231,13 +241,13 @@ evalvault/domain/services/memory_aware_evaluator.py,sha256=vTiYoxiMfZ_CMjSBjqwkB
231
241
  evalvault/domain/services/memory_based_analysis.py,sha256=oh2irCy3le7fWiTtL31SMEhPyu7fyBVz-giO2hlNifE,4499
232
242
  evalvault/domain/services/method_runner.py,sha256=pABqKZeaALpWZYDfzAbd-VOZt2djQggRNIPuuPQeUSw,3571
233
243
  evalvault/domain/services/pipeline_orchestrator.py,sha256=yriVlEVZYDtt0Vwt4Ae6xyW1H6Dj4Hxdn8XQSvQNSoQ,19436
234
- evalvault/domain/services/pipeline_template_registry.py,sha256=XfMFx7cOVqBr_S0XzCeDcfeBytc2J4uXxJ7oy6wy4FU,21694
244
+ evalvault/domain/services/pipeline_template_registry.py,sha256=c1rvYsTQU5MdAsmbZ7LlnuF6TD3p4IXlzgq_i18J3f8,24039
235
245
  evalvault/domain/services/prompt_manifest.py,sha256=5s5Kd6-_Dn-xrjjlU99CVo6njsPhvE50H5m_85U-H6U,5612
236
246
  evalvault/domain/services/prompt_registry.py,sha256=81tq__u2fFxTEG8bWnyJ2Qdb9N89jcqIdSfOAKEbEvg,3029
237
247
  evalvault/domain/services/prompt_status.py,sha256=r1dFLGz4SfRxXaxsULQsr0-HpJkG9YfZ_yLIxF1MMBo,6731
238
248
  evalvault/domain/services/ragas_prompt_overrides.py,sha256=4BecYE2KrreUBbIM3ssP9WzHcK_wRc8jW7CE_k58QOU,1412
239
249
  evalvault/domain/services/retrieval_metrics.py,sha256=dtrQPLMrXSyWLcgF8EGcLNFwzwA59WDzEh41JRToHAY,2980
240
- evalvault/domain/services/retriever_context.py,sha256=fdtB-_faOXQv5RJWUaCuUv_lF5XRAybvTfMgRVFpKnY,4684
250
+ evalvault/domain/services/retriever_context.py,sha256=ySQ-GuadiggS0LVAib4AxA_0JpasYz4S9hbjau0eyIA,6482
241
251
  evalvault/domain/services/stage_event_builder.py,sha256=ScTgyeRiH7z_rnNI_2p-i9szVRIRwUxGSJvpEj3zto4,9645
242
252
  evalvault/domain/services/stage_metric_guide_service.py,sha256=_JdRsBRWirO24qYFlh6hG-dkoWlX6_XWEYKf_uUlKIQ,8807
243
253
  evalvault/domain/services/stage_metric_service.py,sha256=KukIWWhWVOtclrET6uyWJ17jG76LfkKiqrUrDIDJ3gw,15327
@@ -246,7 +256,7 @@ evalvault/domain/services/synthetic_qa_generator.py,sha256=v0-M9VXM5oZ12AkfDyhOJ
246
256
  evalvault/domain/services/testset_generator.py,sha256=6IpiZ0pqhKEymo-AlUdfJjDkF2P1n8Md_QKV4nOheyg,4470
247
257
  evalvault/domain/services/threshold_profiles.py,sha256=YuOrD5CkXugAdSQYbMsFzS5VS1R201JOJtpKTs4dpXU,1296
248
258
  evalvault/domain/services/unified_report_service.py,sha256=lG3VpMLC1MTYUlcGl-MUEE4PUopkyrhcgj4_ye9c_vM,11829
249
- evalvault/domain/services/visual_space_service.py,sha256=BBcPKT79WjpPb3odzHGsvIbWodMngqxlnvOc6cdm0pc,32570
259
+ evalvault/domain/services/visual_space_service.py,sha256=xG2jxKuRuqmQgbWsXOqmytKr6pQ7igQujNgdpb5gyB0,32569
250
260
  evalvault/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
251
261
  evalvault/ports/inbound/__init__.py,sha256=2Wsc0vNzH8_ZaErk4OHxP93hRonLUkMbn3W28DtTDO0,562
252
262
  evalvault/ports/inbound/analysis_pipeline_port.py,sha256=RJfKtp22AYEqnmRk6RDawAK52rEmyAhuk0FUPJQUwQU,1758
@@ -256,13 +266,13 @@ evalvault/ports/inbound/web_port.py,sha256=kjDyNXkgRwbevmSnm25URk-qHjGN9K9ML83FA
256
266
  evalvault/ports/outbound/__init__.py,sha256=jEmLbY3lZ9osue6pG5dc345BdMikBEWq4cnX7ocEul0,3276
257
267
  evalvault/ports/outbound/analysis_cache_port.py,sha256=zPSdUVK_yw3PMWPII2YvS1WLmCGlg5bDScSuYINW9yc,1386
258
268
  evalvault/ports/outbound/analysis_module_port.py,sha256=QYzkvie9-BbONj8ZgiQUjm8I-bn8mgzlXTzIXMhehmQ,1881
259
- evalvault/ports/outbound/analysis_port.py,sha256=zAiYKotd93IVZnPc8odSHN_t5nHW5VhICbFKR7SKSyY,2088
269
+ evalvault/ports/outbound/analysis_port.py,sha256=gE-iXToTgdQomj9JwNZJY4nwut8q0J6EurUmJNsnptQ,2127
260
270
  evalvault/ports/outbound/benchmark_port.py,sha256=pgo3rNbvvJS8x03UxBVQPBBgxc7X5kfG70ZlIf3sopE,7173
261
271
  evalvault/ports/outbound/causal_analysis_port.py,sha256=IsyVdFrs66mHcOc-_VbxrZQriwMrDxx-5a_4ElX5Bp0,941
262
272
  evalvault/ports/outbound/dataset_port.py,sha256=OpEBlkvFwpSRbmi-Lt3wK7n0wljmQ6m985mjyNn_qFk,990
263
273
  evalvault/ports/outbound/domain_memory_port.py,sha256=SZFurqsoBmTw1Kt_pej-YpMbooVeyV35jekhaDRojus,23320
264
274
  evalvault/ports/outbound/embedding_port.py,sha256=ZHeKRMRBNjpZKWxsLKrD8jJz0M66JTwNcrJbkRaklK4,2034
265
- evalvault/ports/outbound/improvement_port.py,sha256=ejulfyNLfFpiIoiS5QczxBh-XH4YDyW-dE8HK0Yzye4,2254
275
+ evalvault/ports/outbound/improvement_port.py,sha256=fIXhcG4n6OJ1hdvWeqEoLBrVsCNdHZRgtEZjR8lf3qA,2325
266
276
  evalvault/ports/outbound/intent_classifier_port.py,sha256=gqMIk0rH6Z43ceuMMRX4vqXurgHZz-CJX2bR5PVAkjQ,2253
267
277
  evalvault/ports/outbound/korean_nlp_port.py,sha256=mJCnxBAkV8a5Nd_VX6QcjfDucY62er8GlaNO4HQA8q8,1572
268
278
  evalvault/ports/outbound/llm_port.py,sha256=qnVmQmkWgpeJ_eB_W1gLgXTWpowfDgYrXG5uKUhIsy8,3528
@@ -278,8 +288,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
278
288
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
279
289
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
280
290
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
281
- evalvault-1.57.1.dist-info/METADATA,sha256=Jtdtv4zOIruMC5Wmo5kEGwqZZMlyhnQbZENl6U5uog8,32284
282
- evalvault-1.57.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
283
- evalvault-1.57.1.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
284
- evalvault-1.57.1.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
285
- evalvault-1.57.1.dist-info/RECORD,,
291
+ evalvault-1.59.0.dist-info/METADATA,sha256=unwBGPN_vReQ3ohlNQZjMhPy8GBTxDqy1eSPvprX7dk,14058
292
+ evalvault-1.59.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
293
+ evalvault-1.59.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
294
+ evalvault-1.59.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
295
+ evalvault-1.59.0.dist-info/RECORD,,