evalvault 1.63.1__py3-none-any.whl → 1.65.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. evalvault/adapters/inbound/api/main.py +147 -9
  2. evalvault/adapters/inbound/api/routers/config.py +6 -1
  3. evalvault/adapters/inbound/api/routers/knowledge.py +62 -6
  4. evalvault/adapters/inbound/cli/commands/__init__.py +14 -7
  5. evalvault/adapters/inbound/cli/commands/artifacts.py +107 -0
  6. evalvault/adapters/inbound/cli/commands/calibrate_judge.py +283 -0
  7. evalvault/adapters/inbound/cli/commands/compare.py +290 -0
  8. evalvault/adapters/inbound/cli/commands/history.py +13 -85
  9. evalvault/adapters/inbound/cli/commands/ops.py +110 -0
  10. evalvault/adapters/inbound/cli/commands/profile_difficulty.py +160 -0
  11. evalvault/adapters/inbound/cli/commands/regress.py +251 -0
  12. evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py +49 -0
  13. evalvault/adapters/outbound/artifact_fs.py +16 -0
  14. evalvault/adapters/outbound/filesystem/__init__.py +3 -0
  15. evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py +50 -0
  16. evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py +13 -0
  17. evalvault/adapters/outbound/judge_calibration_adapter.py +36 -0
  18. evalvault/adapters/outbound/judge_calibration_reporter.py +57 -0
  19. evalvault/adapters/outbound/methods/external_command.py +22 -1
  20. evalvault/adapters/outbound/tracker/langfuse_adapter.py +40 -15
  21. evalvault/adapters/outbound/tracker/log_sanitizer.py +93 -0
  22. evalvault/adapters/outbound/tracker/mlflow_adapter.py +3 -2
  23. evalvault/adapters/outbound/tracker/phoenix_adapter.py +90 -37
  24. evalvault/config/secret_manager.py +118 -0
  25. evalvault/config/settings.py +141 -1
  26. evalvault/domain/entities/__init__.py +10 -0
  27. evalvault/domain/entities/judge_calibration.py +50 -0
  28. evalvault/domain/entities/stage.py +11 -3
  29. evalvault/domain/services/artifact_lint_service.py +268 -0
  30. evalvault/domain/services/benchmark_runner.py +1 -6
  31. evalvault/domain/services/dataset_preprocessor.py +26 -0
  32. evalvault/domain/services/difficulty_profile_reporter.py +25 -0
  33. evalvault/domain/services/difficulty_profiling_service.py +304 -0
  34. evalvault/domain/services/evaluator.py +2 -0
  35. evalvault/domain/services/judge_calibration_service.py +495 -0
  36. evalvault/domain/services/ops_snapshot_service.py +159 -0
  37. evalvault/domain/services/regression_gate_service.py +199 -0
  38. evalvault/domain/services/run_comparison_service.py +159 -0
  39. evalvault/domain/services/stage_event_builder.py +6 -1
  40. evalvault/domain/services/stage_metric_service.py +83 -18
  41. evalvault/ports/outbound/__init__.py +4 -0
  42. evalvault/ports/outbound/artifact_fs_port.py +12 -0
  43. evalvault/ports/outbound/comparison_pipeline_port.py +22 -0
  44. evalvault/ports/outbound/difficulty_profile_port.py +15 -0
  45. evalvault/ports/outbound/judge_calibration_port.py +22 -0
  46. evalvault/ports/outbound/ops_snapshot_port.py +8 -0
  47. {evalvault-1.63.1.dist-info → evalvault-1.65.0.dist-info}/METADATA +8 -1
  48. {evalvault-1.63.1.dist-info → evalvault-1.65.0.dist-info}/RECORD +51 -23
  49. {evalvault-1.63.1.dist-info → evalvault-1.65.0.dist-info}/WHEEL +0 -0
  50. {evalvault-1.63.1.dist-info → evalvault-1.65.0.dist-info}/entry_points.txt +0 -0
  51. {evalvault-1.63.1.dist-info → evalvault-1.65.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.63.1
3
+ Version: 1.65.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -111,6 +111,10 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'phoenix'
111
111
  Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'phoenix'
112
112
  Provides-Extra: postgres
113
113
  Requires-Dist: psycopg[binary]>=3.0.0; extra == 'postgres'
114
+ Provides-Extra: secrets
115
+ Requires-Dist: boto3; extra == 'secrets'
116
+ Requires-Dist: google-cloud-secret-manager; extra == 'secrets'
117
+ Requires-Dist: hvac; extra == 'secrets'
114
118
  Provides-Extra: timeseries
115
119
  Requires-Dist: aeon>=1.3.0; extra == 'timeseries'
116
120
  Requires-Dist: numba>=0.55.0; extra == 'timeseries'
@@ -175,6 +179,9 @@ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
175
179
  --auto-analyze
176
180
  ```
177
181
 
182
+ - API 인증을 쓰려면 `.env`에 `API_AUTH_TOKENS`를 설정하세요.
183
+ - `secret://` 참조를 쓰면 `SECRET_PROVIDER`와 `--extra secrets`가 필요합니다.
184
+ - 레이트리밋은 `RATE_LIMIT_ENABLED`로 활성화합니다.
178
185
  - 결과는 기본 DB(`data/db/evalvault.db`)에 저장되어 `history`, Web UI, 비교 분석에서 재사용됩니다.
179
186
  - `--db`를 생략해도 기본 경로로 저장되며, 모든 데이터가 자동으로 엑셀로 내보내집니다.
180
187
  - `--auto-analyze`는 요약 리포트 + 모듈별 아티팩트를 함께 생성합니다.
@@ -6,36 +6,42 @@ evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
6
6
  evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
7
7
  evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
8
8
  evalvault/adapters/inbound/api/adapter.py,sha256=tYkJciUUFOK80QcSwzrqkXP1G4qUFItFV7uBYbjBGqU,68473
9
- evalvault/adapters/inbound/api/main.py,sha256=KdlAxKn0QfGI3UuoTrBDBbUs2xCvP8lnWOY1ce3svcU,2619
9
+ evalvault/adapters/inbound/api/main.py,sha256=lRuyg3aBs5jIk7tq4p4d7jrRkFpV_brZypoOq8s56Rk,6896
10
10
  evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
11
11
  evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
12
- evalvault/adapters/inbound/api/routers/config.py,sha256=CN-FH2cn0Ive-BD3WacWY6PFfuMtZEHP5_out3fvST4,3957
12
+ evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
13
13
  evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
14
- evalvault/adapters/inbound/api/routers/knowledge.py,sha256=7mgyoUM1PepFb4X8_Ntn0vd7ZZYcNbM3_9nyD10g4Aw,5307
14
+ evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
15
15
  evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
16
16
  evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
17
17
  evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
18
18
  evalvault/adapters/inbound/cli/app.py,sha256=ytNgHRg9ZTAl33AkB1wIL8RKfQ_Cf8fsy0gSsLTs7Ew,1603
19
- evalvault/adapters/inbound/cli/commands/__init__.py,sha256=cNPPhsudTQWdlh_OJm9mU8LGBnJLGMswJBcIV9MAlkI,3530
19
+ evalvault/adapters/inbound/cli/commands/__init__.py,sha256=kw0SAEwOce1v92Pd6YpQjSYsdwLU95TQqbKGM44fNhY,3995
20
20
  evalvault/adapters/inbound/cli/commands/agent.py,sha256=YlOYMEzzS1aSKDKD_a7UK3St18X6GXGkdTatrzyd8Zc,7555
21
21
  evalvault/adapters/inbound/cli/commands/analyze.py,sha256=aMi1BEDOX3yhN-ppBftDssPQLB5TdzIfpx9U7CZEgWo,48932
22
22
  evalvault/adapters/inbound/cli/commands/api.py,sha256=YdbJ_-QEajnFcjTa7P2heLMjFKpeQ4nWP_p-HvfYkEo,1943
23
+ evalvault/adapters/inbound/cli/commands/artifacts.py,sha256=bE8FQxmnU0mMIAPx5en8aKrtfNNkrbWoLxIX4ZT9D5c,3776
23
24
  evalvault/adapters/inbound/cli/commands/benchmark.py,sha256=RZ4nRTF7d6hDZug-Pw8dGcFEyWdOKclwqkvS-gN4VWo,41097
24
25
  evalvault/adapters/inbound/cli/commands/calibrate.py,sha256=-UnT0LQH40U5lzMLqMJ7DOTLa3mt5P_fJL2XzqIkvu4,4223
26
+ evalvault/adapters/inbound/cli/commands/calibrate_judge.py,sha256=hJBlNl9Rt-ZtoIu-HKfudhZb2j2HOoEnRbiG4n5TOTE,10348
27
+ evalvault/adapters/inbound/cli/commands/compare.py,sha256=X_uyJoT_yQP43RTWMLCwMuHwhOb8wCqFShjy477V-2c,10384
25
28
  evalvault/adapters/inbound/cli/commands/config.py,sha256=Mv9IQHBFHZ3I2stUzHDgLDn-Znt_Awdy3j-sk5ruUmw,6069
26
29
  evalvault/adapters/inbound/cli/commands/debug.py,sha256=KU-hL1gLhpjV2ZybDQgGMwRfm-hCynkrqY4UzETfL9k,2234
27
30
  evalvault/adapters/inbound/cli/commands/domain.py,sha256=dL9iqBlnr5mDeS1unXW6uxE0qp6yfnxj-ls6k3EenwI,27279
28
31
  evalvault/adapters/inbound/cli/commands/experiment.py,sha256=jficaFOsZ9EMHrPHCOZjq6jpFrgmqCwmIo--wA_OcvQ,10389
29
32
  evalvault/adapters/inbound/cli/commands/gate.py,sha256=SxBSHALhekw9OVuJcuk64tkS8YMDDsgmhMALTE38wwY,9956
30
33
  evalvault/adapters/inbound/cli/commands/generate.py,sha256=7IPvd0WAwPxt9uaxmzqWCwt0b2VC_wXiVxyJ3lP-xys,8562
31
- evalvault/adapters/inbound/cli/commands/history.py,sha256=P8rK_nRJrmtG3y9obq3OSYtDZGZt2o3660_9JtbSPkg,11100
34
+ evalvault/adapters/inbound/cli/commands/history.py,sha256=3xf1l-I8IW-1Vtne9ypepDMDRRbwOpEvAjh4Qf9tV2w,8420
32
35
  evalvault/adapters/inbound/cli/commands/init.py,sha256=7q86fUeBVA08fU_N0lAV6Lakxirq4val2jIyALlDy3E,8822
33
36
  evalvault/adapters/inbound/cli/commands/kg.py,sha256=ycV9Xj6SUUJLTyTfLZcjXDVLcZqwo7Gw878ZhZAeDoc,19155
34
37
  evalvault/adapters/inbound/cli/commands/langfuse.py,sha256=aExhZ5WYT0FzJI4v1sF-a1jqy9b1BF46_HBtfiQjVGI,4085
35
38
  evalvault/adapters/inbound/cli/commands/method.py,sha256=OWdoofhvsDJchgNKnGGjXfIsZ-IHKZEo6RlmTsZRRYM,19124
39
+ evalvault/adapters/inbound/cli/commands/ops.py,sha256=2r6hdrZ7STnWMhtzYmv8jF_ukBq4HuKB1El6YnyxwrY,4035
36
40
  evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6lYyBS3mHrCHk0qe-7bqB4U,15611
37
41
  evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=NeqWLzO9kRDuZd0pHAIHglP3F7VzoNOU4JI0QcSZ120,7788
42
+ evalvault/adapters/inbound/cli/commands/profile_difficulty.py,sha256=nOJH3iqgLAlXq4keLBj5oqpiRCg0jjGgT-7Q57HxEh8,6665
38
43
  evalvault/adapters/inbound/cli/commands/prompts.py,sha256=lddde5VbjYaqN_9gHPLNu6DWpg5fE-KqZzjN-XYwvJw,27153
44
+ evalvault/adapters/inbound/cli/commands/regress.py,sha256=Dy8hUOdjapxOW9Hoov0DHHblkMaExiqWfYS14CaC9Kk,8806
39
45
  evalvault/adapters/inbound/cli/commands/run.py,sha256=X19rgXhajhvZNA4c0JMmzmPatTxhZgfapuW07bZL9xA,119265
40
46
  evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=hu2TioocitUZzGR7HUwZ6gOeEJSvt5tGNjwXOlo4Eic,40336
41
47
  evalvault/adapters/inbound/cli/commands/stage.py,sha256=oRC9c5CysLX90Iy5Ba1pc_00DaOBS78lcBvzkbdrGRM,17123
@@ -52,6 +58,9 @@ evalvault/adapters/inbound/mcp/__init__.py,sha256=kctJsmaP4fY94T3WCOhgANk3TCLdfb
52
58
  evalvault/adapters/inbound/mcp/schemas.py,sha256=KUKm4gEc-UDyF8sUbyzAnAIzyZ6DcXsaCEIVR3oESNQ,4469
53
59
  evalvault/adapters/inbound/mcp/tools.py,sha256=fnvkWS5p93o3FNmUSbh3EW4jCAVwtBKHX6kDuEbXkK8,24219
54
60
  evalvault/adapters/outbound/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
+ evalvault/adapters/outbound/artifact_fs.py,sha256=nySk7-10B9rQpV9EslBmRUr0gFIi9V1E_E_wdC1WpIU,439
62
+ evalvault/adapters/outbound/judge_calibration_adapter.py,sha256=vu5dVJVo5AXa5ULcx8WE-6YgfD718BC8Ci1_HAdwy20,1155
63
+ evalvault/adapters/outbound/judge_calibration_reporter.py,sha256=R0nId6P1jYQ3M3636knPNFztuc8kewDBKsg2LLcUw58,2005
55
64
  evalvault/adapters/outbound/analysis/__init__.py,sha256=TLuS-eKfXg97_Db5td1nTZkD3BErRLZLic1v2EAM2sA,6185
56
65
  evalvault/adapters/outbound/analysis/analysis_report_module.py,sha256=xah3wgJErHD_Hpb1YAwWRsxr8xaC8SW--CpNA7IgfxI,3957
57
66
  evalvault/adapters/outbound/analysis/base_module.py,sha256=eUN77SSD2KR4WKU7gLY8TlVewETx_YIZvPT4LUnBv4o,2523
@@ -59,6 +68,7 @@ evalvault/adapters/outbound/analysis/bm25_searcher_module.py,sha256=I8BsXrHaOVxg
59
68
  evalvault/adapters/outbound/analysis/causal_adapter.py,sha256=Rt5QcoLDEjx8u_yidACz3u8SbAVYSJO6lLu6udwnd4U,27410
60
69
  evalvault/adapters/outbound/analysis/causal_analyzer_module.py,sha256=hBcTx7ZyUZ6HQ6I6W2VvSZ1ndatlgMen2KjKXk_Ltx4,6780
61
70
  evalvault/adapters/outbound/analysis/common.py,sha256=H1RqNBiOt7WRcHUM3jFydd3850GFQhEDUu8WBEhtMws,5734
71
+ evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py,sha256=D_ZPVgQHz3Cn3fxxl-TgLEoo_RNbhsOJCvqH-cJ2Lf4,1577
62
72
  evalvault/adapters/outbound/analysis/comparison_report_module.py,sha256=0tTMZB5qpGMaxlcWtTtTln7Y_jFEDFaaW7V-UyboBDM,2343
63
73
  evalvault/adapters/outbound/analysis/data_loader_module.py,sha256=6X0-ZcFtEfonQnbJ0POqmHXstJ1Wq1NvpijtbKSeEm0,3749
64
74
  evalvault/adapters/outbound/analysis/detailed_report_module.py,sha256=59CjuNQthlroJyGEhQap3PgahWfzXciKx_DD10gHXjM,3897
@@ -125,6 +135,9 @@ evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py,sha256=AORA9JUV5u
125
135
  evalvault/adapters/outbound/domain_memory/__init__.py,sha256=ksMX1IkNiDqQHLtJe9TOXiLC1iouGt6_QSdPLiALHHs,229
126
136
  evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql,sha256=APlNhJNFZdcm7Sb2tvr7V8JMiLinmXkx1gd6pgTf9ZI,11268
127
137
  evalvault/adapters/outbound/domain_memory/sqlite_adapter.py,sha256=RWobnFgvxiItxFAr6niY89sT19O-cnExTbP0I7UAY78,85186
138
+ evalvault/adapters/outbound/filesystem/__init__.py,sha256=eTQLuVPMpEctE92TtegKQT3wuJTIhiBS38BzfxRV-N0,122
139
+ evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py,sha256=9qO9_3E-SL6ngDOia6zcw680S1fQloxo32f6hx76YHs,1626
140
+ evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py,sha256=sOuUk8VD8yWwG8508uw1zqSnNsp3dQrF9bp9T2z-n48,448
128
141
  evalvault/adapters/outbound/improvement/__init__.py,sha256=tXA6vaZOLvqwJpyjGMiC8WrvszMmvUPzJnHjvJhQxSI,1143
129
142
  evalvault/adapters/outbound/improvement/insight_generator.py,sha256=U16l0euCZy0_08Zb_i0eijXSjS5t-iq0iMUfttwPqgI,17636
130
143
  evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=uFFjWNy8A4KIihw_ANtL6At73RirwNnFnN4rFsEvcXk,24602
@@ -148,7 +161,7 @@ evalvault/adapters/outbound/llm/token_aware_chat.py,sha256=yYmynaniNrYxtvXL6ejTE
148
161
  evalvault/adapters/outbound/llm/vllm_adapter.py,sha256=OKb3Nda9OLMmHdvLjvkeJcQVeXf-B8TDibmAs7PS7kg,5157
149
162
  evalvault/adapters/outbound/methods/__init__.py,sha256=3vyE9w3Ex2oMaO4ZE7Fy6xlHhJ6YQXHQNCvBiW9X2lM,345
150
163
  evalvault/adapters/outbound/methods/baseline_oracle.py,sha256=oUsF5sIiPY5vuDtrz0Ki05SnPlnVzn7APERP5v1KpPM,1308
151
- evalvault/adapters/outbound/methods/external_command.py,sha256=gR2mlgr-SCAO3cS3I7pYgS8hL8JE8Y-0VZIhg7USazY,5287
164
+ evalvault/adapters/outbound/methods/external_command.py,sha256=hsWaqMG0u2JhsS736n0t8sobrGSJMNNp1tUL_M4zgyg,6118
152
165
  evalvault/adapters/outbound/methods/registry.py,sha256=Znd35eouoe8k2E0NfDpVlDBSNAAWmyQkqBhAwVWllGI,7635
153
166
  evalvault/adapters/outbound/nlp/__init__.py,sha256=9MQMIjEUU03T0ZZtG-Wjz0Bt2-esGEcfv1kT9W6_CBY,40
154
167
  evalvault/adapters/outbound/nlp/korean/__init__.py,sha256=3ZVFHDxS6jzXat-WhTvW3hnbGNaeFhhWVVN1TtEOlnE,2267
@@ -180,9 +193,10 @@ evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py,sha256=LFnk-3FSL
180
193
  evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=D48Mbj-ioDKztjhV9513Q5DiUNiVdO60B_2sWMFEmnI,3520
181
194
  evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py,sha256=inmTAolAVsm0IrszE9VTJoI7HSvGGAnGNZVu_vZRAGg,741
182
195
  evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8RPfQNlddLxHCBvMTm_4,358
183
- evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=Gejd3fOBwShfjbtjVcZK9sCJKRz6oB3OaN6KukOYN38,17782
184
- evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=Wee1S7OPemPt5SoIdwBHuBdnXmLxNd3lcgQ9NNMKcDQ,7000
185
- evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=TNGU1RqpWwEEw5uQfx7-ClAh4C7wITwu_-X-fyVsCgc,22888
196
+ evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=oi6bnz2qjDSXEea_bACFRUEKk-tKw53d1xMfL4FE5dI,18749
197
+ evalvault/adapters/outbound/tracker/log_sanitizer.py,sha256=ilKTTSzsHslQYc-elnWu0Z3HKNNw1D1iI0_cCvYbo1M,2653
198
+ evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=m4xj3XBULFYg27U3twKrldLhbLyLNefezmb2pCpHJrw,7180
199
+ evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=VmGkVuBX5Nae6zTaaSbpcA94spFJGmwjW0gOh2vIxnk,25332
186
200
  evalvault/config/__init__.py,sha256=UCgeDx62M2gOuFvdN29wWwny2fdH4bPY_uUC3-42eDw,1297
187
201
  evalvault/config/agent_types.py,sha256=EP2Pv3ZtOzDXIvIa-Hnd1to9JIbMUtGitrlwzZtx0Ys,13418
188
202
  evalvault/config/domain_config.py,sha256=rOgNA2T8NWlDzcEFC0shdUCCww0lI1E5fUm5QrKQSZI,9264
@@ -190,10 +204,11 @@ evalvault/config/instrumentation.py,sha256=L8on9HjB6Ji8cSOJ6Pepsopfg9okDNMWF7LKZ
190
204
  evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJIgBSrN2o,582
191
205
  evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
192
206
  evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
193
- evalvault/config/settings.py,sha256=T92GShlYKDaVinwbsbWX2DmNfm91Cvcvh8Te8pNOTsw,12875
207
+ evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
208
+ evalvault/config/settings.py,sha256=JKJf8t20sOHYnHoCfTxqupQixNgfmWYJhChiGMNz-W0,17617
194
209
  evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
195
210
  evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
- evalvault/domain/entities/__init__.py,sha256=RZi_6oQcq-2-sJcydfKOSr03vFxo-mF7CGHN9Ma4Cdg,3379
211
+ evalvault/domain/entities/__init__.py,sha256=wszRJ1Imdc5NJ1bQPC2udk-mAgFdlw4uZV5IPNjLpHQ,3669
197
212
  evalvault/domain/entities/analysis.py,sha256=gcMtumC66g-AIqb2LgfMpm5BMzwJhJkjg-zuybNoJCM,15208
198
213
  evalvault/domain/entities/analysis_pipeline.py,sha256=hD9rFHMa4rUq0InRkSKhh6HQ9ZeNYAHKADzs-kWRP04,16845
199
214
  evalvault/domain/entities/benchmark.py,sha256=CVbz_eW7Y9eM7wG7xA_xmldTIs72csdoTmu3E0NKoMU,18475
@@ -203,6 +218,7 @@ evalvault/domain/entities/debug.py,sha256=r92lgvOpq2svw70syJIo78muRAvrSn5h1JByH_
203
218
  evalvault/domain/entities/experiment.py,sha256=oWjbu0IJZ6oIRcnA-8ppeJDgp57Tv8ZjQ3UOZ0X9KJ8,2576
204
219
  evalvault/domain/entities/feedback.py,sha256=xiaZaUQhyuxyW_i2scXt8eKZshMC6tXe3981e-uukw8,1604
205
220
  evalvault/domain/entities/improvement.py,sha256=WHI7q1jXRxkuHhBWOrpk8UdLaH0UwjZVjRIDsqVDyZo,19322
221
+ evalvault/domain/entities/judge_calibration.py,sha256=fhQEI7g2nZuG1OliikhxgefcFAJldDqEmfTs9Mp-FPk,1234
206
222
  evalvault/domain/entities/kg.py,sha256=8awN1M4vxAGQZk_ZG8i2CXKTizQ8FA1VCLhUWHZq0U8,3001
207
223
  evalvault/domain/entities/memory.py,sha256=bfS75q8K8_jNrB7IYh4mjP8Lkyj-I0TVsmHCP0FuICw,8423
208
224
  evalvault/domain/entities/method.py,sha256=a3jZi7SjcpK3HeVyVwQkUMwpnmg2RbxCnH4NqYPLCOI,1157
@@ -210,7 +226,7 @@ evalvault/domain/entities/prompt.py,sha256=VzuUzqkqXv0FwTSNGyV5sSCft5sxTbG_Noq6Y
210
226
  evalvault/domain/entities/prompt_suggestion.py,sha256=Ep_XSjdYUj7pFSCMyeeZKs8yTnp74AVx05Zqr7829PE,1243
211
227
  evalvault/domain/entities/rag_trace.py,sha256=sZgnkG4fK6KOe3Np6TYAZ_tPnsRbOmucDSQns35U1n4,11868
212
228
  evalvault/domain/entities/result.py,sha256=OaGHMDLWMW2O4fNVuVTUvWFVBQ1iu93OD_oI3NumrCQ,10697
213
- evalvault/domain/entities/stage.py,sha256=dbVzhgpP_p2p2eDJBWe7mwyyl6zUTP9kEKN_YRUvufY,7183
229
+ evalvault/domain/entities/stage.py,sha256=UqS59sjoMs_bhMupNtvagbIx8QgHgFjWoRPhJ3uJP2s,7426
214
230
  evalvault/domain/metrics/__init__.py,sha256=fxjC5Z_8OuBIeMn80bYgnZZxpNoay2wH-qtG3NqCUvk,797
215
231
  evalvault/domain/metrics/analysis_registry.py,sha256=JZpBrBs7-JExHKYuEML6Vg_uYLm-WniBE3BfiU5OtJg,7641
216
232
  evalvault/domain/metrics/confidence.py,sha256=AX4oeN28OvmMkwD0pT-jskkOlXh87C1pe2W9P1sF69g,17224
@@ -224,21 +240,24 @@ evalvault/domain/metrics/terms_dictionary.json,sha256=-ZQmpx6yMOYoAOpcLj-xK2LkAe
224
240
  evalvault/domain/metrics/text_match.py,sha256=P-YTZs9ekDqEmxLNBP8eXnMRymPdC8V4dJPtwG2ajVM,10219
225
241
  evalvault/domain/services/__init__.py,sha256=X5Af1kf_vSt3S3mFwOV6OQdro-lFxwbVdNd7nJznkC8,1024
226
242
  evalvault/domain/services/analysis_service.py,sha256=oUEtfJHB3bNJ_Ksygx-pjnLm4CTk7_rDvDbqfkAfFD4,10838
243
+ evalvault/domain/services/artifact_lint_service.py,sha256=80P46weoj9lBxOqg_ViHZEQ6Cfo69XV4cniZlmMsti0,8434
227
244
  evalvault/domain/services/async_batch_executor.py,sha256=qYFRl7CGmv56XppeRhInde7Fw0GESCoZh8V-Iv_1hQQ,11140
228
245
  evalvault/domain/services/batch_executor.py,sha256=cYA_Q1es46n_PYeyyfm0iM2b7GGVtDoOGoMxexrf6tI,1243
229
246
  evalvault/domain/services/benchmark_report_service.py,sha256=IF-zqtvpsJ0ONJWUEw4ghKiC7ka_PWxUBO10lPaDRmI,15083
230
- evalvault/domain/services/benchmark_runner.py,sha256=iqirGDs-yemSmqLDnCPGA7Wug0ps6z0vRgRsgetypOM,26328
247
+ evalvault/domain/services/benchmark_runner.py,sha256=4tvQEDrfvp2fC2luUPuPBcRjEPLHdrdystLpe3PnBqM,26046
231
248
  evalvault/domain/services/benchmark_service.py,sha256=TrmnvBMAPmcs0PewGZcn2rxHbviZ8KxmDvJCeyqm28I,6286
232
249
  evalvault/domain/services/cache_metrics.py,sha256=FKNZoxym30lc1SxTGmTn3Pr-PDNoAqgC9_d_IdF_jOQ,3463
233
250
  evalvault/domain/services/cluster_map_builder.py,sha256=qPKMPj-eSqECJSCOKvv3ZETgIwxwiKWbU3d6_feCoDg,6885
234
- evalvault/domain/services/dataset_preprocessor.py,sha256=v-shY5ky1oW0LJwBfdfP4VFh7TXBabpLD5rMOmtS-dQ,14235
251
+ evalvault/domain/services/dataset_preprocessor.py,sha256=PnhLiPk0E9DIzjUr8N75296CCfl1AUXGv-lpaXBi0Ok,14797
235
252
  evalvault/domain/services/debug_report_service.py,sha256=SGdFh8tctAIq7RotFbg47eetxdYSS4Yju7-LOzpCMCM,4386
253
+ evalvault/domain/services/difficulty_profile_reporter.py,sha256=uIj9-eiO2dDvQ6tP-DJBddfBq8VT63st0wtNC8Co4NQ,680
254
+ evalvault/domain/services/difficulty_profiling_service.py,sha256=wB3T2iz_dZjvj7wiU2fnM0XT-doMNokV_YqSt24Wc6A,11078
236
255
  evalvault/domain/services/document_chunker.py,sha256=u05N1xSBcJuJPUfP7WmpY_EyHuUMuGMsPSM9qs-ID8c,2494
237
256
  evalvault/domain/services/document_versioning.py,sha256=M1qZaMpQ2exVT1wkVAmvEPPuoYibJDt0F7pYfTK7mvE,3323
238
257
  evalvault/domain/services/domain_learning_hook.py,sha256=rhKBmdnrJyfGzFNsNxzyv8jZO26-WOosHSmBV_9qdJg,7176
239
258
  evalvault/domain/services/embedding_overlay.py,sha256=ZTNxUPXpHGbQ3Uri5DD3feTUFn7qrhuNshhyCQEvRuM,3559
240
259
  evalvault/domain/services/entity_extractor.py,sha256=f3Rf5saK8QsgetLNK1Hbxzt8PtttJZCicSR63S8DJ5k,14141
241
- evalvault/domain/services/evaluator.py,sha256=YReil1mokTILyllAbG_QnFhob-15G5tNeWZZMbSc3yo,67551
260
+ evalvault/domain/services/evaluator.py,sha256=cKcPRoqkUFc00lgx_blbIFl0qJyPIDjPeCciWAPm7m8,67669
242
261
  evalvault/domain/services/experiment_comparator.py,sha256=IBrxIwux-8GucwlLx6e5lUqB9miSPvBLGJK9ctoW7Y0,3299
243
262
  evalvault/domain/services/experiment_manager.py,sha256=2k-qGiAUyZuqqmcp4P-M3Z9HTXwwcqW5HQYKNkcIHuI,4863
244
263
  evalvault/domain/services/experiment_reporter.py,sha256=QYlVmCFSx8hKTPMezc7QjJE07b3MSQ82Q4QVucSHLVY,1420
@@ -247,10 +266,12 @@ evalvault/domain/services/experiment_statistics.py,sha256=aOrqbBjB1swHPaFRziID1m
247
266
  evalvault/domain/services/holdout_splitter.py,sha256=Sos61Zy_bBjStt8LPHJ3KxDNda-OmX7AVUsT24K1n6Q,1910
248
267
  evalvault/domain/services/improvement_guide_service.py,sha256=gMoVFlDsprOEEfRGKmdbk9_Due62J63Q-rL2zr65Q0s,17881
249
268
  evalvault/domain/services/intent_classifier.py,sha256=hsWivDXqXJjCJEE-OI7eUGeYrewpYxlz67Z0TI3oskU,11707
269
+ evalvault/domain/services/judge_calibration_service.py,sha256=cOaAsbfMBlaDxoMAXe8MacDDRK0tCD-tXRnYjB6sEPs,19264
250
270
  evalvault/domain/services/kg_generator.py,sha256=oEugjPdn8Pb2Q3r5yAZl0dZJibNUkEherlRVquknB6k,24969
251
271
  evalvault/domain/services/memory_aware_evaluator.py,sha256=vTiYoxiMfZ_CMjSBjqwkBRdpiXRwQ2zXnQ2pXzVHYts,5249
252
272
  evalvault/domain/services/memory_based_analysis.py,sha256=oh2irCy3le7fWiTtL31SMEhPyu7fyBVz-giO2hlNifE,4499
253
273
  evalvault/domain/services/method_runner.py,sha256=pABqKZeaALpWZYDfzAbd-VOZt2djQggRNIPuuPQeUSw,3571
274
+ evalvault/domain/services/ops_snapshot_service.py,sha256=1CqJN2p3tM6SgzLCZKcVEM213fd1cDGexTRPG_3e59w,5138
254
275
  evalvault/domain/services/pipeline_orchestrator.py,sha256=yriVlEVZYDtt0Vwt4Ae6xyW1H6Dj4Hxdn8XQSvQNSoQ,19436
255
276
  evalvault/domain/services/pipeline_template_registry.py,sha256=aWqXLQ24grpSZo9M4tZLRo1ysD10c6hUpW3JupZH9e0,28083
256
277
  evalvault/domain/services/prompt_candidate_service.py,sha256=Ibyb5EaWK28Ju2HnTqHHGOoiA9Q-VwY3hjxVODALwGY,3997
@@ -260,12 +281,14 @@ evalvault/domain/services/prompt_scoring_service.py,sha256=SlvfuIbhj92RJu4RQAJ1B
260
281
  evalvault/domain/services/prompt_status.py,sha256=r1dFLGz4SfRxXaxsULQsr0-HpJkG9YfZ_yLIxF1MMBo,6731
261
282
  evalvault/domain/services/prompt_suggestion_reporter.py,sha256=Fc6sCPebUMk8SZVpjoJ6bCEun0ma-YmayEQnulBVv8s,10577
262
283
  evalvault/domain/services/ragas_prompt_overrides.py,sha256=4BecYE2KrreUBbIM3ssP9WzHcK_wRc8jW7CE_k58QOU,1412
284
+ evalvault/domain/services/regression_gate_service.py,sha256=qBMODgpizmEzqEL8_JX-FYSVyARiroMW7MFVzlz7gjc,6579
263
285
  evalvault/domain/services/retrieval_metrics.py,sha256=dtrQPLMrXSyWLcgF8EGcLNFwzwA59WDzEh41JRToHAY,2980
264
286
  evalvault/domain/services/retriever_context.py,sha256=ySQ-GuadiggS0LVAib4AxA_0JpasYz4S9hbjau0eyIA,6482
287
+ evalvault/domain/services/run_comparison_service.py,sha256=_NScltCRcY3zrvdyYDiPmssTxCDv1GyjCLdP3uAxJts,5631
265
288
  evalvault/domain/services/satisfaction_calibration_service.py,sha256=H7Z8opOyPHRO5qVIw-XDsNhIwdCteAS9_a3BTlfIqHg,11906
266
- evalvault/domain/services/stage_event_builder.py,sha256=ScTgyeRiH7z_rnNI_2p-i9szVRIRwUxGSJvpEj3zto4,9645
289
+ evalvault/domain/services/stage_event_builder.py,sha256=FAT34Wmylvd2Yz5rDlhaTh1lqSCDhGApCXMi7Hjkib0,9748
267
290
  evalvault/domain/services/stage_metric_guide_service.py,sha256=_JdRsBRWirO24qYFlh6hG-dkoWlX6_XWEYKf_uUlKIQ,8807
268
- evalvault/domain/services/stage_metric_service.py,sha256=KukIWWhWVOtclrET6uyWJ17jG76LfkKiqrUrDIDJ3gw,15327
291
+ evalvault/domain/services/stage_metric_service.py,sha256=_u6ThZ8rGw8H9h3TNpu0j8XhpIfukHSoyc1ZpCa3Z00,18031
269
292
  evalvault/domain/services/stage_summary_service.py,sha256=VVtuAr4vwzvmNFn8rqURJrhKFqAMG4CaBmyGiUk_xG0,1590
270
293
  evalvault/domain/services/synthetic_qa_generator.py,sha256=aiOTPoHZbKRTEeodABQ2I5lq8-Vs_kQtuzcGWd4MTGE,16526
271
294
  evalvault/domain/services/testset_generator.py,sha256=6IpiZ0pqhKEymo-AlUdfJjDkF2P1n8Md_QKV4nOheyg,4470
@@ -278,22 +301,27 @@ evalvault/ports/inbound/analysis_pipeline_port.py,sha256=RJfKtp22AYEqnmRk6RDawAK
278
301
  evalvault/ports/inbound/evaluator_port.py,sha256=rDvouIRUjBD7uICgrpeo11vNPvo27_0CdylRHPodPSE,1323
279
302
  evalvault/ports/inbound/learning_hook_port.py,sha256=EeJeMl3chcPHlj5mkLOj6tm8s_qdDRvoCwK1-0l70tI,3297
280
303
  evalvault/ports/inbound/web_port.py,sha256=b4uMhwOMLXy3LeILc7ZK3RR-XtoW4p4NzoTpj4syptg,5578
281
- evalvault/ports/outbound/__init__.py,sha256=0uPE6CXkoK6ECs3O4OZYAmVg5VAAHoF5rdb1eoj1NAc,3367
304
+ evalvault/ports/outbound/__init__.py,sha256=x3LseXtwX0NONM1mnhT3fMchz7U6gEDRUX0TDswpg5E,3591
282
305
  evalvault/ports/outbound/analysis_cache_port.py,sha256=zPSdUVK_yw3PMWPII2YvS1WLmCGlg5bDScSuYINW9yc,1386
283
306
  evalvault/ports/outbound/analysis_module_port.py,sha256=QYzkvie9-BbONj8ZgiQUjm8I-bn8mgzlXTzIXMhehmQ,1881
284
307
  evalvault/ports/outbound/analysis_port.py,sha256=gE-iXToTgdQomj9JwNZJY4nwut8q0J6EurUmJNsnptQ,2127
308
+ evalvault/ports/outbound/artifact_fs_port.py,sha256=SN966vwHiIjLA06MBWePr7V0NmafbiQbSLFlXAN3YKU,273
285
309
  evalvault/ports/outbound/benchmark_port.py,sha256=pgo3rNbvvJS8x03UxBVQPBBgxc7X5kfG70ZlIf3sopE,7173
286
310
  evalvault/ports/outbound/causal_analysis_port.py,sha256=IsyVdFrs66mHcOc-_VbxrZQriwMrDxx-5a_4ElX5Bp0,941
311
+ evalvault/ports/outbound/comparison_pipeline_port.py,sha256=IOLK6vZdzjSV6Qcvkl9GD-wRxx6Waa3dsYOCFdD1mXY,503
287
312
  evalvault/ports/outbound/dataset_port.py,sha256=OpEBlkvFwpSRbmi-Lt3wK7n0wljmQ6m985mjyNn_qFk,990
313
+ evalvault/ports/outbound/difficulty_profile_port.py,sha256=hQY-TR64WyUNnCxD9Mw-QraO3ZBw0VUP8KoCmVsQYBE,347
288
314
  evalvault/ports/outbound/domain_memory_port.py,sha256=SZFurqsoBmTw1Kt_pej-YpMbooVeyV35jekhaDRojus,23320
289
315
  evalvault/ports/outbound/embedding_port.py,sha256=ZHeKRMRBNjpZKWxsLKrD8jJz0M66JTwNcrJbkRaklK4,2034
290
316
  evalvault/ports/outbound/improvement_port.py,sha256=fIXhcG4n6OJ1hdvWeqEoLBrVsCNdHZRgtEZjR8lf3qA,2325
291
317
  evalvault/ports/outbound/intent_classifier_port.py,sha256=gqMIk0rH6Z43ceuMMRX4vqXurgHZz-CJX2bR5PVAkjQ,2253
318
+ evalvault/ports/outbound/judge_calibration_port.py,sha256=kShZ2MZGvgQZaY7XxwkmLXtquK_RFKcwuWRBfJOrILA,602
292
319
  evalvault/ports/outbound/korean_nlp_port.py,sha256=mJCnxBAkV8a5Nd_VX6QcjfDucY62er8GlaNO4HQA8q8,1572
293
320
  evalvault/ports/outbound/llm_factory_port.py,sha256=lzoDJi6A6ltk-t3N4oY8DSwMBMfnvXGgSduILOpzoas,305
294
321
  evalvault/ports/outbound/llm_port.py,sha256=YAW0i-41yT8KzMuzZGEO5yPDkHN0onGxj55eL0cdPHY,4393
295
322
  evalvault/ports/outbound/method_port.py,sha256=sntcKgwagAdJGxp0dI-S_bhBQcOW9QpnND3fOjrsX9E,1377
296
323
  evalvault/ports/outbound/nlp_analysis_port.py,sha256=QDJHAsSpynTenuaKp78t1s--U036mtYeUEX0p5vQw24,3046
324
+ evalvault/ports/outbound/ops_snapshot_port.py,sha256=6v72W41tlnxjkJfbfHhFiJMPlRSAQ-BvrI2T09_yddk,214
297
325
  evalvault/ports/outbound/relation_augmenter_port.py,sha256=cMcHQnmK111WzZr50vYr7affeHhOtpFZxPARwkg9xbk,651
298
326
  evalvault/ports/outbound/report_port.py,sha256=wgReSYL4SupXIoALFh0QFWfX2kzPftXpWTvGLCMd2B8,1315
299
327
  evalvault/ports/outbound/stage_storage_port.py,sha256=Nlf9upsXxgCABQB5cJdpLQYsoZNiGRAU5zE5D-Ptp2I,1201
@@ -304,8 +332,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
304
332
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
305
333
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
306
334
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
307
- evalvault-1.63.1.dist-info/METADATA,sha256=wkxUE5VIFBr2zmdSpRtcGs8Xwbry3ydY7U2XJFEEzjo,23879
308
- evalvault-1.63.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
309
- evalvault-1.63.1.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
310
- evalvault-1.63.1.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
311
- evalvault-1.63.1.dist-info/RECORD,,
335
+ evalvault-1.65.0.dist-info/METADATA,sha256=1f23dU3LtSQwrlalym1j8mH-vZaM7HWMzDh-ByjVOjo,24276
336
+ evalvault-1.65.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
337
+ evalvault-1.65.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
338
+ evalvault-1.65.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
339
+ evalvault-1.65.0.dist-info/RECORD,,