evalvault 1.75.0__tar.gz → 1.76.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (907) hide show
  1. {evalvault-1.75.0 → evalvault-1.76.0}/PKG-INFO +1 -1
  2. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EXPERIMENT_TRACKING_STACK.md +16 -0
  3. {evalvault-1.75.0 → evalvault-1.76.0}/pyproject.toml +1 -1
  4. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/adapter.py +99 -63
  5. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/config.py +3 -1
  6. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/method.py +2 -2
  7. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run.py +146 -28
  8. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run_helpers.py +157 -55
  9. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/factory.py +1 -1
  10. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
  11. evalvault-1.76.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +387 -0
  12. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
  13. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/instrumentation.py +8 -6
  14. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/phoenix_support.py +5 -0
  15. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/settings.py +40 -4
  16. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/evaluator.py +2 -0
  17. {evalvault-1.75.0 → evalvault-1.76.0}/uv.lock +1 -1
  18. evalvault-1.75.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +0 -232
  19. {evalvault-1.75.0 → evalvault-1.76.0}/.dockerignore +0 -0
  20. {evalvault-1.75.0 → evalvault-1.76.0}/.env.example +0 -0
  21. {evalvault-1.75.0 → evalvault-1.76.0}/.env.offline.example +0 -0
  22. {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/ci.yml +0 -0
  23. {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/regression-gate.yml +0 -0
  24. {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/release.yml +0 -0
  25. {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/stale.yml +0 -0
  26. {evalvault-1.75.0 → evalvault-1.76.0}/.gitignore +0 -0
  27. {evalvault-1.75.0 → evalvault-1.76.0}/.pre-commit-config.yaml +0 -0
  28. {evalvault-1.75.0 → evalvault-1.76.0}/.python-version +0 -0
  29. {evalvault-1.75.0 → evalvault-1.76.0}/AGENTS.md +0 -0
  30. {evalvault-1.75.0 → evalvault-1.76.0}/CHANGELOG.md +0 -0
  31. {evalvault-1.75.0 → evalvault-1.76.0}/CLAUDE.md +0 -0
  32. {evalvault-1.75.0 → evalvault-1.76.0}/CODE_OF_CONDUCT.md +0 -0
  33. {evalvault-1.75.0 → evalvault-1.76.0}/CONTRIBUTING.md +0 -0
  34. {evalvault-1.75.0 → evalvault-1.76.0}/Dockerfile +0 -0
  35. {evalvault-1.75.0 → evalvault-1.76.0}/LICENSE.md +0 -0
  36. {evalvault-1.75.0 → evalvault-1.76.0}/README.en.md +0 -0
  37. {evalvault-1.75.0 → evalvault-1.76.0}/README.md +0 -0
  38. {evalvault-1.75.0 → evalvault-1.76.0}/SECURITY.md +0 -0
  39. {evalvault-1.75.0 → evalvault-1.76.0}/agent/README.md +0 -0
  40. {evalvault-1.75.0 → evalvault-1.76.0}/agent/agent.py +0 -0
  41. {evalvault-1.75.0 → evalvault-1.76.0}/agent/client.py +0 -0
  42. {evalvault-1.75.0 → evalvault-1.76.0}/agent/config.py +0 -0
  43. {evalvault-1.75.0 → evalvault-1.76.0}/agent/main.py +0 -0
  44. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/README.md +0 -0
  45. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/shared/decisions.md +0 -0
  46. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/shared/dependencies.md +0 -0
  47. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/templates/coordinator_guide.md +0 -0
  48. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/templates/work_log_template.md +0 -0
  49. {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory_integration.py +0 -0
  50. {evalvault-1.75.0 → evalvault-1.76.0}/agent/progress.py +0 -0
  51. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/app_spec.txt +0 -0
  52. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/baseline.txt +0 -0
  53. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/coding_prompt.md +0 -0
  54. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/existing_project_prompt.md +0 -0
  55. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/architecture_prompt.md +0 -0
  56. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/base_prompt.md +0 -0
  57. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/coordinator_prompt.md +0 -0
  58. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/observability_prompt.md +0 -0
  59. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/initializer_prompt.md +0 -0
  60. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/prompt_manifest.json +0 -0
  61. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/system.txt +0 -0
  62. {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts.py +0 -0
  63. {evalvault-1.75.0 → evalvault-1.76.0}/agent/requirements.txt +0 -0
  64. {evalvault-1.75.0 → evalvault-1.76.0}/agent/security.py +0 -0
  65. {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/memory.yaml +0 -0
  66. {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_en.json +0 -0
  67. {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_ko.json +0 -0
  68. {evalvault-1.75.0 → evalvault-1.76.0}/config/methods.yaml +0 -0
  69. {evalvault-1.75.0 → evalvault-1.76.0}/config/models.yaml +0 -0
  70. {evalvault-1.75.0 → evalvault-1.76.0}/config/ragas_prompts_override.yaml +0 -0
  71. {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/ci.json +0 -0
  72. {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/default.json +0 -0
  73. {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/ux.json +0 -0
  74. {evalvault-1.75.0 → evalvault-1.76.0}/config/stage_metric_playbook.yaml +0 -0
  75. {evalvault-1.75.0 → evalvault-1.76.0}/config/stage_metric_thresholds.json +0 -0
  76. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/dummy_test_dataset.json +0 -0
  77. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.csv +0 -0
  78. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.json +0 -0
  79. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_2.json +0 -0
  80. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_3.json +0 -0
  81. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/ragas_ko90_en10.json +0 -0
  82. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/sample.json +0 -0
  83. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_20q_cluster_map.csv +0 -0
  84. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_20q_korean.json +0 -0
  85. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_2q_cluster_map.csv +0 -0
  86. {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_2q_korean.json +0 -0
  87. {evalvault-1.75.0 → evalvault-1.76.0}/data/kg/knowledge_graph.json +0 -0
  88. {evalvault-1.75.0 → evalvault-1.76.0}/data/rag/user_guide_bm25.json +0 -0
  89. {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.csv +0 -0
  90. {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.json +0 -0
  91. {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.xlsx +0 -0
  92. {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/method_input_template.json +0 -0
  93. {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.langfuse.yml +0 -0
  94. {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.offline.modelcache.yml +0 -0
  95. {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.offline.yml +0 -0
  96. {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.phoenix.yaml +0 -0
  97. {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.yml +0 -0
  98. {evalvault-1.75.0 → evalvault-1.76.0}/docs/INDEX.md +0 -0
  99. {evalvault-1.75.0 → evalvault-1.76.0}/docs/README.ko.md +0 -0
  100. {evalvault-1.75.0 → evalvault-1.76.0}/docs/ROADMAP.md +0 -0
  101. {evalvault-1.75.0 → evalvault-1.76.0}/docs/STATUS.md +0 -0
  102. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/adapters/inbound.md +0 -0
  103. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/adapters/outbound.md +0 -0
  104. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/config.md +0 -0
  105. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/entities.md +0 -0
  106. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/metrics.md +0 -0
  107. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/services.md +0 -0
  108. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/ports/inbound.md +0 -0
  109. {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/ports/outbound.md +0 -0
  110. {evalvault-1.75.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-collector.md +0 -0
  111. {evalvault-1.75.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-spec.md +0 -0
  112. {evalvault-1.75.0 → evalvault-1.76.0}/docs/getting-started/INSTALLATION.md +0 -0
  113. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/AGENTS_SYSTEM_GUIDE.md +0 -0
  114. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CHAINLIT_INTEGRATION_PLAN.md +0 -0
  115. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CI_REGRESSION_GATE.md +0 -0
  116. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_MCP_PLAN.md +0 -0
  117. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_PARALLEL_FEATURES_SPEC.md +0 -0
  118. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_UX_REDESIGN.md +0 -0
  119. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/DEV_GUIDE.md +0 -0
  120. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/DOCS_REFRESH_PLAN.md +0 -0
  121. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_DIAGNOSTIC_PLAYBOOK.md +0 -0
  122. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_RUN_EXCEL_SHEETS.md +0 -0
  123. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_WORK_PLAN.md +0 -0
  124. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EXTERNAL_TRACE_API_SPEC.md +0 -0
  125. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/Extension_2.md +0 -0
  126. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/Extension_Data_Difficulty_Profiling_Custom_Judge_Model.md +0 -0
  127. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/INSURANCE_SUMMARY_METRICS_PLAN.md +0 -0
  128. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/LENA_MVP_IMPLEMENTATION_PLAN.md +0 -0
  129. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/LENA_RAGAS_CALIBRATION_DEV_PLAN.md +0 -0
  130. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/MULTITURN_EVAL_GUIDE.md +0 -0
  131. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/NEXT_STEPS_EXECUTION_PLAN.md +0 -0
  132. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OFFLINE_DOCKER.md +0 -0
  133. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OFFLINE_MODELS.md +0 -0
  134. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_INTERNAL_ADAPTER.md +0 -0
  135. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_SAMPLES.md +0 -0
  136. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/P0_P3_EXECUTION_REPORT.md +0 -0
  137. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/P1_P4_WORK_PLAN.md +0 -0
  138. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PARALLEL_WORK_APPROVAL_RULES.md +0 -0
  139. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PRD_LENA.md +0 -0
  140. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PROJECT_STATUS_AND_PLAN.md +0 -0
  141. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAGAS_HUMAN_FEEDBACK_CALIBRATION_GUIDE.md +0 -0
  142. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_CLI_WORKFLOW_TEMPLATES.md +0 -0
  143. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_NOISE_REDUCTION_GUIDE.md +0 -0
  144. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPLEMENTATION_LOG.md +0 -0
  145. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPROVEMENT_PROPOSAL.md +0 -0
  146. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PGVECTOR_PREINDEX_PLAN.md +0 -0
  147. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RELEASE_CHECKLIST.md +0 -0
  148. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/USER_GUIDE.md +0 -0
  149. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/WEBUI_CLI_ROLLOUT_PLAN.md +0 -0
  150. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/WORKLOG_LAST_2_DAYS.md +0 -0
  151. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/_DEPRECATED_NOTICE.md +0 -0
  152. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/cli_process.md +0 -0
  153. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/prompt_suggestions_design.md +0 -0
  154. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/rag_human_feedback_calibration_implementation_plan.md +0 -0
  155. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/refactoring_strategy.md +0 -0
  156. {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/repeat_query.md +0 -0
  157. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/00_overview.md +0 -0
  158. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/01_architecture.md +0 -0
  159. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/02_data_and_metrics.md +0 -0
  160. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/03_workflows.md +0 -0
  161. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/04_operations.md +0 -0
  162. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/05_security.md +0 -0
  163. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/06_quality_and_testing.md +0 -0
  164. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/07_ux_and_product.md +0 -0
  165. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/08_roadmap.md +0 -0
  166. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/09_competitive_positioning.md +0 -0
  167. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/EXTERNAL.md +0 -0
  168. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/INDEX.md +0 -0
  169. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/WORKLOG_DOCS_CLEANUP_2026-01-29.md +0 -0
  170. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-coverage-matrix.md +0 -0
  171. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-file-inventory.md +0 -0
  172. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-roadmap.md +0 -0
  173. {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-taxonomy.md +0 -0
  174. {evalvault-1.75.0 → evalvault-1.76.0}/docs/mapping/component-to-whitepaper.yaml +0 -0
  175. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/00_frontmatter.md +0 -0
  176. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/01_overview.md +0 -0
  177. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/02_architecture.md +0 -0
  178. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/03_data_flow.md +0 -0
  179. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/04_components.md +0 -0
  180. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/05_expert_lenses.md +0 -0
  181. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/06_implementation.md +0 -0
  182. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/07_advanced.md +0 -0
  183. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/08_customization.md +0 -0
  184. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/09_quality.md +0 -0
  185. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/10_performance.md +0 -0
  186. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/11_security.md +0 -0
  187. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/12_operations.md +0 -0
  188. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/13_standards.md +0 -0
  189. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/14_roadmap.md +0 -0
  190. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/INDEX.md +0 -0
  191. {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/STYLE_GUIDE.md +0 -0
  192. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_000_master_plan.md +0 -0
  193. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_010_agent_playbook.md +0 -0
  194. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_020_logging_policy.md +0 -0
  195. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_030_phase0_responsibility_map.md +0 -0
  196. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_040_wbs_parallel_plan.md +0 -0
  197. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-0-baseline.md +0 -0
  198. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-1-evaluator.md +0 -0
  199. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-2-cli-run.md +0 -0
  200. {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-3-analysis.md +0 -0
  201. {evalvault-1.75.0 → evalvault-1.76.0}/docs/security_audit_worklog.md +0 -0
  202. {evalvault-1.75.0 → evalvault-1.76.0}/docs/stylesheets/extra.css +0 -0
  203. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.csv +0 -0
  204. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.json +0 -0
  205. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.xlsx +0 -0
  206. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/eval_report_templates.md +0 -0
  207. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/kg_template.json +0 -0
  208. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/otel_openinference_trace_example.json +0 -0
  209. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/ragas_dataset_example_ko90_en10.json +0 -0
  210. {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/retriever_docs_template.json +0 -0
  211. {evalvault-1.75.0 → evalvault-1.76.0}/docs/tools/generate-whitepaper.py +0 -0
  212. {evalvault-1.75.0 → evalvault-1.76.0}/docs/web_ui_analysis_migration_plan.md +0 -0
  213. {evalvault-1.75.0 → evalvault-1.76.0}/dummy_test_dataset.json +0 -0
  214. {evalvault-1.75.0 → evalvault-1.76.0}/examples/README.md +0 -0
  215. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/README.md +0 -0
  216. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/faithfulness_test.json +0 -0
  217. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/insurance_qa_100.json +0 -0
  218. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/keyword_extraction_test.json +0 -0
  219. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/retrieval_test.json +0 -0
  220. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/comparison.json +0 -0
  221. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/full_results.json +0 -0
  222. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/leaderboard.json +0 -0
  223. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/results_mteb.json +0 -0
  224. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/retrieval_result.json +0 -0
  225. {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/run_korean_benchmark.py +0 -0
  226. {evalvault-1.75.0 → evalvault-1.76.0}/examples/kg_generator_demo.py +0 -0
  227. {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/README.md +0 -0
  228. {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/pyproject.toml +0 -0
  229. {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/__init__.py +0 -0
  230. {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/methods.py +0 -0
  231. {evalvault-1.75.0 → evalvault-1.76.0}/examples/stage_events.jsonl +0 -0
  232. {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/comprehensive_workflow_test.py +0 -0
  233. {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/insurance_eval_dataset.json +0 -0
  234. {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/output/comprehensive_report.html +0 -0
  235. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/.env.example +0 -0
  236. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/.gitignore +0 -0
  237. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/Dockerfile +0 -0
  238. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/README.md +0 -0
  239. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/analysis-compare.spec.ts +0 -0
  240. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/analysis-lab.spec.ts +0 -0
  241. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/compare-runs.spec.ts +0 -0
  242. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/dashboard.spec.ts +0 -0
  243. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/domain-memory.spec.ts +0 -0
  244. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/evaluation-studio.spec.ts +0 -0
  245. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/judge-calibration.spec.ts +0 -0
  246. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/knowledge-base.spec.ts +0 -0
  247. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/intents.json +0 -0
  248. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/run_details.json +0 -0
  249. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/runs.json +0 -0
  250. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/run-details.spec.ts +0 -0
  251. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/eslint.config.js +0 -0
  252. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/index.html +0 -0
  253. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/nginx.conf +0 -0
  254. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/package-lock.json +0 -0
  255. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/package.json +0 -0
  256. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/playwright.config.ts +0 -0
  257. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/public/vite.svg +0 -0
  258. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/App.css +0 -0
  259. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/App.tsx +0 -0
  260. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/assets/react.svg +0 -0
  261. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/AnalysisNodeOutputs.tsx +0 -0
  262. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/InsightSpacePanel.tsx +0 -0
  263. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/Layout.tsx +0 -0
  264. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/MarkdownContent.tsx +0 -0
  265. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/PrioritySummaryPanel.tsx +0 -0
  266. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpaceLegend.tsx +0 -0
  267. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot2D.tsx +0 -0
  268. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot3D.tsx +0 -0
  269. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/StatusBadge.tsx +0 -0
  270. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ToastProvider.tsx +0 -0
  271. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/VirtualizedText.tsx +0 -0
  272. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Conversation.tsx +0 -0
  273. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Message.tsx +0 -0
  274. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/PromptInput.tsx +0 -0
  275. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Response.tsx +0 -0
  276. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/index.ts +0 -0
  277. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/config/ui.ts +0 -0
  278. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/config.ts +0 -0
  279. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/hooks/useInsightSpace.ts +0 -0
  280. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/index.css +0 -0
  281. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/main.tsx +0 -0
  282. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AiSdkChat.tsx +0 -0
  283. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisCompareView.tsx +0 -0
  284. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisLab.tsx +0 -0
  285. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisResultView.tsx +0 -0
  286. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Chat.tsx +0 -0
  287. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/CompareRuns.tsx +0 -0
  288. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/ComprehensiveAnalysis.tsx +0 -0
  289. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/CustomerReport.tsx +0 -0
  290. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Dashboard.tsx +0 -0
  291. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/DomainMemory.tsx +0 -0
  292. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/EvaluationStudio.tsx +0 -0
  293. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/JudgeCalibration.tsx +0 -0
  294. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/KnowledgeBase.tsx +0 -0
  295. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/RunDetails.tsx +0 -0
  296. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Settings.tsx +0 -0
  297. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Visualization.tsx +0 -0
  298. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/VisualizationHome.tsx +0 -0
  299. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/services/api.ts +0 -0
  300. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/types/plotly.d.ts +0 -0
  301. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/cliCommandBuilder.ts +0 -0
  302. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/clipboard.ts +0 -0
  303. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/format.ts +0 -0
  304. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/phoenix.ts +0 -0
  305. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/runAnalytics.ts +0 -0
  306. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/score.ts +0 -0
  307. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/summaryMetrics.ts +0 -0
  308. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tailwind.config.js +0 -0
  309. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.app.json +0 -0
  310. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.json +0 -0
  311. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.node.json +0 -0
  312. {evalvault-1.75.0 → evalvault-1.76.0}/frontend/vite.config.ts +0 -0
  313. {evalvault-1.75.0 → evalvault-1.76.0}/mkdocs.yml +0 -0
  314. {evalvault-1.75.0 → evalvault-1.76.0}/package-lock.json +0 -0
  315. {evalvault-1.75.0 → evalvault-1.76.0}/prompts/system_override.txt +0 -0
  316. {evalvault-1.75.0 → evalvault-1.76.0}/reports/.gitkeep +0 -0
  317. {evalvault-1.75.0 → evalvault-1.76.0}/reports/README.md +0 -0
  318. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r1_smoke.md +0 -0
  319. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag.md +0 -0
  320. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag_openai.md +0 -0
  321. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25.md +0 -0
  322. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25_langfuse3.md +0 -0
  323. {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_dense_faiss.md +0 -0
  324. {evalvault-1.75.0 → evalvault-1.76.0}/reports/feature_verification_report.md +0 -0
  325. {evalvault-1.75.0 → evalvault-1.76.0}/reports/improvement_1d91a667-4288-4742-be3a-a8f5310c5140.md +0 -0
  326. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_events.jsonl +0 -0
  327. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_report.txt +0 -0
  328. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_events.jsonl +0 -0
  329. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_report.txt +0 -0
  330. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse2_stage_events.jsonl +0 -0
  331. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse3_stage_events.jsonl +0 -0
  332. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse_stage_events.jsonl +0 -0
  333. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_phoenix_stage_events.jsonl +0 -0
  334. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_stage_events.jsonl +0 -0
  335. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_stage_report.txt +0 -0
  336. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_events.jsonl +0 -0
  337. {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_report.txt +0 -0
  338. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/benchmark/download_kmmlu.py +0 -0
  339. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/ci/run_regression_gate.py +0 -0
  340. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_demo.py +0 -0
  341. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_integration_template.py +0 -0
  342. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/otel-collector-config.yaml +0 -0
  343. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/preindex_pgvector_runs.py +0 -0
  344. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/start_web_ui_with_phoenix.sh +0 -0
  345. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/validate_open_rag_trace.py +0 -0
  346. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/verify_dashboard_endpoint.sh +0 -0
  347. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev_seed_pipeline_results.py +0 -0
  348. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/__init__.py +0 -0
  349. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/__init__.py +0 -0
  350. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/ast_scanner.py +0 -0
  351. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/confidence_scorer.py +0 -0
  352. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/graph_builder.py +0 -0
  353. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/side_effect_detector.py +0 -0
  354. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/generate_api_docs.py +0 -0
  355. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/models/__init__.py +0 -0
  356. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/models/schema.py +0 -0
  357. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/renderer/__init__.py +0 -0
  358. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/renderer/html_generator.py +0 -0
  359. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/build_full_offline_bundle.sh +0 -0
  360. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/bundle_datasets.sh +0 -0
  361. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/bundle_model_cache.sh +0 -0
  362. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_api_base_only.sh +0 -0
  363. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_base_images.sh +0 -0
  364. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_images.sh +0 -0
  365. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/import_images.sh +0 -0
  366. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/load_base_images.sh +0 -0
  367. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/predownload_nlp_models.py +0 -0
  368. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/restore_datasets.sh +0 -0
  369. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/restore_model_cache.sh +0 -0
  370. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/smoke_test.sh +0 -0
  371. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/ops/phoenix_watch.py +0 -0
  372. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/backfill_langfuse_trace_url.py +0 -0
  373. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_dense_smoke.py +0 -0
  374. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_evalvault_run_dataset.json +0 -0
  375. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_retriever_docs.json +0 -0
  376. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_smoke_real.jsonl +0 -0
  377. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_stage_events_sample.jsonl +0 -0
  378. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/pipeline_template_inspect.py +0 -0
  379. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/reports/generate_release_notes.py +0 -0
  380. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/run_with_timeout.py +0 -0
  381. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/test_full_evaluation.py +0 -0
  382. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/tests/run_regressions.py +0 -0
  383. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/tests/run_retriever_stage_report_smoke.sh +0 -0
  384. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/validate_tutorials.py +0 -0
  385. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/verify_ragas_compliance.py +0 -0
  386. {evalvault-1.75.0 → evalvault-1.76.0}/scripts/verify_workflows.py +0 -0
  387. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/__init__.py +0 -0
  388. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/__init__.py +0 -0
  389. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/__init__.py +0 -0
  390. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/__init__.py +0 -0
  391. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/main.py +0 -0
  392. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/__init__.py +0 -0
  393. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/benchmark.py +0 -0
  394. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/calibration.py +0 -0
  395. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/chat.py +0 -0
  396. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/domain.py +0 -0
  397. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/knowledge.py +0 -0
  398. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/mcp.py +0 -0
  399. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/pipeline.py +0 -0
  400. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/runs.py +0 -0
  401. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/__init__.py +0 -0
  402. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/app.py +0 -0
  403. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/__init__.py +0 -0
  404. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/agent.py +0 -0
  405. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/analyze.py +0 -0
  406. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/api.py +0 -0
  407. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/artifacts.py +0 -0
  408. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/benchmark.py +0 -0
  409. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate.py +0 -0
  410. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate_judge.py +0 -0
  411. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/compare.py +0 -0
  412. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/config.py +0 -0
  413. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/debug.py +0 -0
  414. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/domain.py +0 -0
  415. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/experiment.py +0 -0
  416. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/gate.py +0 -0
  417. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/generate.py +0 -0
  418. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/graph_rag.py +0 -0
  419. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/history.py +0 -0
  420. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/init.py +0 -0
  421. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/kg.py +0 -0
  422. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/langfuse.py +0 -0
  423. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/ops.py +0 -0
  424. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/phoenix.py +0 -0
  425. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/pipeline.py +0 -0
  426. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/profile_difficulty.py +0 -0
  427. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/prompts.py +0 -0
  428. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/regress.py +0 -0
  429. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/stage.py +0 -0
  430. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/__init__.py +0 -0
  431. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/analysis_io.py +0 -0
  432. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/console.py +0 -0
  433. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/errors.py +0 -0
  434. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/formatters.py +0 -0
  435. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/options.py +0 -0
  436. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/presets.py +0 -0
  437. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/progress.py +0 -0
  438. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/validators.py +0 -0
  439. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/__init__.py +0 -0
  440. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/schemas.py +0 -0
  441. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/tools.py +0 -0
  442. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/__init__.py +0 -0
  443. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/__init__.py +0 -0
  444. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/analysis_report_module.py +0 -0
  445. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/base_module.py +0 -0
  446. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/bm25_searcher_module.py +0 -0
  447. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_adapter.py +0 -0
  448. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_analyzer_module.py +0 -0
  449. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/common.py +0 -0
  450. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py +0 -0
  451. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_report_module.py +0 -0
  452. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/data_loader_module.py +0 -0
  453. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +0 -0
  454. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/detailed_report_module.py +0 -0
  455. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/diagnostic_playbook_module.py +0 -0
  456. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +0 -0
  457. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_distribution_module.py +0 -0
  458. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_searcher_module.py +0 -0
  459. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_rrf_module.py +0 -0
  460. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_weighted_module.py +0 -0
  461. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +0 -0
  462. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/llm_report_module.py +0 -0
  463. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/low_performer_extractor_module.py +0 -0
  464. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/model_analyzer_module.py +0 -0
  465. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py +0 -0
  466. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py +0 -0
  467. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/multiturn_analyzer_module.py +0 -0
  468. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/network_analyzer_module.py +0 -0
  469. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_adapter.py +0 -0
  470. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_analyzer_module.py +0 -0
  471. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pattern_detector_module.py +0 -0
  472. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_factory.py +0 -0
  473. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_helpers.py +0 -0
  474. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/priority_summary_module.py +0 -0
  475. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +0 -0
  476. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py +0 -0
  477. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +0 -0
  478. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py +0 -0
  479. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py +0 -0
  480. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_analyzer_module.py +0 -0
  481. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_change_detector_module.py +0 -0
  482. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_comparator_module.py +0 -0
  483. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_loader_module.py +0 -0
  484. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_metric_comparator_module.py +0 -0
  485. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/search_comparator_module.py +0 -0
  486. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_adapter.py +0 -0
  487. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_analyzer_module.py +0 -0
  488. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_comparator_module.py +0 -0
  489. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/summary_report_module.py +0 -0
  490. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/time_series_analyzer_module.py +0 -0
  491. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +0 -0
  492. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/trend_detector_module.py +0 -0
  493. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/verification_report_module.py +0 -0
  494. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/artifact_fs.py +0 -0
  495. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/__init__.py +0 -0
  496. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +0 -0
  497. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/__init__.py +0 -0
  498. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/hybrid_cache.py +0 -0
  499. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/memory_cache.py +0 -0
  500. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/__init__.py +0 -0
  501. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/base.py +0 -0
  502. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/csv_loader.py +0 -0
  503. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/excel_loader.py +0 -0
  504. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/json_loader.py +0 -0
  505. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/loader_factory.py +0 -0
  506. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/method_input_loader.py +0 -0
  507. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/multiturn_json_loader.py +0 -0
  508. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/streaming_loader.py +0 -0
  509. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/templates.py +0 -0
  510. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/thresholds.py +0 -0
  511. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/__init__.py +0 -0
  512. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/report_renderer.py +0 -0
  513. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/__init__.py +0 -0
  514. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/__init__.py +0 -0
  515. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py +0 -0
  516. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/pdf_extractor.py +0 -0
  517. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/versioned_loader.py +0 -0
  518. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/__init__.py +0 -0
  519. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql +0 -0
  520. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/factory.py +0 -0
  521. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/postgres_adapter.py +0 -0
  522. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +0 -0
  523. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/sqlite_adapter.py +0 -0
  524. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/__init__.py +0 -0
  525. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py +0 -0
  526. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py +0 -0
  527. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/__init__.py +0 -0
  528. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/insight_generator.py +0 -0
  529. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/pattern_detector.py +0 -0
  530. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/playbook_loader.py +0 -0
  531. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py +0 -0
  532. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_adapter.py +0 -0
  533. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_reporter.py +0 -0
  534. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/__init__.py +0 -0
  535. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/graph_rag_retriever.py +0 -0
  536. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/networkx_adapter.py +0 -0
  537. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/parallel_kg_builder.py +0 -0
  538. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/query_strategies.py +0 -0
  539. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/__init__.py +0 -0
  540. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/anthropic_adapter.py +0 -0
  541. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/azure_adapter.py +0 -0
  542. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/base.py +0 -0
  543. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/instructor_factory.py +0 -0
  544. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/llm_relation_augmenter.py +0 -0
  545. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/ollama_adapter.py +0 -0
  546. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/openai_adapter.py +0 -0
  547. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/token_aware_chat.py +0 -0
  548. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/vllm_adapter.py +0 -0
  549. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/__init__.py +0 -0
  550. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/baseline_oracle.py +0 -0
  551. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/external_command.py +0 -0
  552. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/registry.py +0 -0
  553. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/__init__.py +0 -0
  554. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/__init__.py +0 -0
  555. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/bm25_retriever.py +0 -0
  556. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/dense_retriever.py +0 -0
  557. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/document_chunker.py +0 -0
  558. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/hybrid_retriever.py +0 -0
  559. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/kiwi_tokenizer.py +0 -0
  560. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_evaluation.py +0 -0
  561. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_stopwords.py +0 -0
  562. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit.py +0 -0
  563. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit_factory.py +0 -0
  564. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/__init__.py +0 -0
  565. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/report_renderer.py +0 -0
  566. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/__init__.py +0 -0
  567. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/ci_report_formatter.py +0 -0
  568. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/dashboard_generator.py +0 -0
  569. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/llm_report_generator.py +0 -0
  570. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/markdown_adapter.py +0 -0
  571. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/pr_comment_formatter.py +0 -0
  572. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/__init__.py +0 -0
  573. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/graph_rag_adapter.py +0 -0
  574. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/pgvector_store.py +0 -0
  575. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/__init__.py +0 -0
  576. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/base_sql.py +0 -0
  577. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/benchmark_storage_adapter.py +0 -0
  578. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/factory.py +0 -0
  579. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_adapter.py +0 -0
  580. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_schema.sql +0 -0
  581. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/schema.sql +0 -0
  582. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/sqlite_adapter.py +0 -0
  583. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/__init__.py +0 -0
  584. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_log_handler.py +0 -0
  585. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +0 -0
  586. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py +0 -0
  587. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +0 -0
  588. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py +0 -0
  589. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/__init__.py +0 -0
  590. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/langfuse_adapter.py +0 -0
  591. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/log_sanitizer.py +0 -0
  592. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/__init__.py +0 -0
  593. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/agent_types.py +0 -0
  594. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/domain_config.py +0 -0
  595. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/langfuse_support.py +0 -0
  596. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/model_config.py +0 -0
  597. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/playbooks/improvement_playbook.yaml +0 -0
  598. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/secret_manager.py +0 -0
  599. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/debug_ragas.py +0 -0
  600. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/debug_ragas_real.py +0 -0
  601. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/__init__.py +0 -0
  602. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/__init__.py +0 -0
  603. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis.py +0 -0
  604. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis_pipeline.py +0 -0
  605. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark.py +0 -0
  606. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark_run.py +0 -0
  607. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/dataset.py +0 -0
  608. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/debug.py +0 -0
  609. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/experiment.py +0 -0
  610. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/feedback.py +0 -0
  611. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/graph_rag.py +0 -0
  612. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/improvement.py +0 -0
  613. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/judge_calibration.py +0 -0
  614. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/kg.py +0 -0
  615. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/memory.py +0 -0
  616. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/method.py +0 -0
  617. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/multiturn.py +0 -0
  618. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/ops_report.py +0 -0
  619. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt.py +0 -0
  620. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt_suggestion.py +0 -0
  621. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/rag_trace.py +0 -0
  622. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/result.py +0 -0
  623. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/stage.py +0 -0
  624. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/__init__.py +0 -0
  625. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/analysis_registry.py +0 -0
  626. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/confidence.py +0 -0
  627. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/contextual_relevancy.py +0 -0
  628. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/entity_preservation.py +0 -0
  629. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/insurance.py +0 -0
  630. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/multiturn_metrics.py +0 -0
  631. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/no_answer.py +0 -0
  632. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/registry.py +0 -0
  633. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/retrieval_rank.py +0 -0
  634. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_accuracy.py +0 -0
  635. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_needs_followup.py +0 -0
  636. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_non_definitive.py +0 -0
  637. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_risk_coverage.py +0 -0
  638. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/terms_dictionary.json +0 -0
  639. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/text_match.py +0 -0
  640. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/__init__.py +0 -0
  641. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/analysis_service.py +0 -0
  642. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/artifact_lint_service.py +0 -0
  643. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/async_batch_executor.py +0 -0
  644. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/batch_executor.py +0 -0
  645. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_report_service.py +0 -0
  646. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_runner.py +0 -0
  647. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_service.py +0 -0
  648. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/cache_metrics.py +0 -0
  649. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/cluster_map_builder.py +0 -0
  650. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/custom_metric_snapshot.py +0 -0
  651. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/dataset_preprocessor.py +0 -0
  652. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/debug_report_service.py +0 -0
  653. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profile_reporter.py +0 -0
  654. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profiling_service.py +0 -0
  655. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_chunker.py +0 -0
  656. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_versioning.py +0 -0
  657. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/domain_learning_hook.py +0 -0
  658. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/embedding_overlay.py +0 -0
  659. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/entity_extractor.py +0 -0
  660. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_comparator.py +0 -0
  661. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_manager.py +0 -0
  662. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_reporter.py +0 -0
  663. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_repository.py +0 -0
  664. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_statistics.py +0 -0
  665. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/graph_rag_experiment.py +0 -0
  666. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/holdout_splitter.py +0 -0
  667. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/improvement_guide_service.py +0 -0
  668. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/intent_classifier.py +0 -0
  669. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/judge_calibration_service.py +0 -0
  670. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/kg_generator.py +0 -0
  671. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_aware_evaluator.py +0 -0
  672. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_based_analysis.py +0 -0
  673. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/method_runner.py +0 -0
  674. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/multiturn_evaluator.py +0 -0
  675. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_report_service.py +0 -0
  676. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_snapshot_service.py +0 -0
  677. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_orchestrator.py +0 -0
  678. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_template_registry.py +0 -0
  679. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_candidate_service.py +0 -0
  680. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_manifest.py +0 -0
  681. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_registry.py +0 -0
  682. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_scoring_service.py +0 -0
  683. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_status.py +0 -0
  684. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_suggestion_reporter.py +0 -0
  685. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ragas_prompt_overrides.py +0 -0
  686. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/regression_gate_service.py +0 -0
  687. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/retrieval_metrics.py +0 -0
  688. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/retriever_context.py +0 -0
  689. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/run_comparison_service.py +0 -0
  690. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/satisfaction_calibration_service.py +0 -0
  691. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_event_builder.py +0 -0
  692. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_guide_service.py +0 -0
  693. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_service.py +0 -0
  694. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_summary_service.py +0 -0
  695. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/synthetic_qa_generator.py +0 -0
  696. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/testset_generator.py +0 -0
  697. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/threshold_profiles.py +0 -0
  698. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/unified_report_service.py +0 -0
  699. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/visual_space_service.py +0 -0
  700. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/mkdocs_helpers.py +0 -0
  701. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/__init__.py +0 -0
  702. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/__init__.py +0 -0
  703. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/analysis_pipeline_port.py +0 -0
  704. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/evaluator_port.py +0 -0
  705. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/learning_hook_port.py +0 -0
  706. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/multiturn_port.py +0 -0
  707. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/web_port.py +0 -0
  708. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/__init__.py +0 -0
  709. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_cache_port.py +0 -0
  710. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_module_port.py +0 -0
  711. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_port.py +0 -0
  712. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/artifact_fs_port.py +0 -0
  713. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/benchmark_port.py +0 -0
  714. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/causal_analysis_port.py +0 -0
  715. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/comparison_pipeline_port.py +0 -0
  716. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/dataset_port.py +0 -0
  717. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/difficulty_profile_port.py +0 -0
  718. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/domain_memory_port.py +0 -0
  719. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/embedding_port.py +0 -0
  720. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/graph_retriever_port.py +0 -0
  721. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/improvement_port.py +0 -0
  722. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/intent_classifier_port.py +0 -0
  723. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/judge_calibration_port.py +0 -0
  724. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/korean_nlp_port.py +0 -0
  725. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_factory_port.py +0 -0
  726. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_port.py +0 -0
  727. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/method_port.py +0 -0
  728. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/nlp_analysis_port.py +0 -0
  729. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/ops_snapshot_port.py +0 -0
  730. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/relation_augmenter_port.py +0 -0
  731. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/report_port.py +0 -0
  732. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/stage_storage_port.py +0 -0
  733. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/storage_port.py +0 -0
  734. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracer_port.py +0 -0
  735. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracker_port.py +0 -0
  736. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/reports/__init__.py +0 -0
  737. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/reports/release_notes.py +0 -0
  738. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/scripts/__init__.py +0 -0
  739. {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/scripts/regression_runner.py +0 -0
  740. {evalvault-1.75.0 → evalvault-1.76.0}/tests/__init__.py +0 -0
  741. {evalvault-1.75.0 → evalvault-1.76.0}/tests/conftest.py +0 -0
  742. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/README.md +0 -0
  743. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_min.json +0 -0
  744. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_multi.json +0 -0
  745. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/auto_insurance_qa_korean_full.json +0 -0
  746. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/callcenter_summary_5cases.json +0 -0
  747. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/comprehensive_dataset.json +0 -0
  748. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.json +0 -0
  749. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.xlsx +0 -0
  750. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/evaluation_test_sample.json +0 -0
  751. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_benchmark.json +0 -0
  752. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_multi_sample.json +0 -0
  753. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_retriever_docs.json +0 -0
  754. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_smoke.json +0 -0
  755. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_document.txt +0 -0
  756. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.csv +0 -0
  757. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.json +0 -0
  758. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.xlsx +0 -0
  759. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.csv +0 -0
  760. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.json +0 -0
  761. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.xlsx +0 -0
  762. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean_versioned_pdf.json +0 -0
  763. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/multiturn_benchmark.json +0 -0
  764. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/regression_baseline.json +0 -0
  765. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_full_domain_memory.json +0 -0
  766. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_simple.json +0 -0
  767. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/summary_eval_minimal.json +0 -0
  768. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/kg/minimal_graph.json +0 -0
  769. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.csv +0 -0
  770. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.json +0 -0
  771. {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.xlsx +0 -0
  772. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/__init__.py +0 -0
  773. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/benchmark/test_benchmark_service_integration.py +0 -0
  774. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/conftest.py +0 -0
  775. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_cli_integration.py +0 -0
  776. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_data_flow.py +0 -0
  777. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_e2e_scenarios.py +0 -0
  778. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_evaluation_flow.py +0 -0
  779. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_full_workflow.py +0 -0
  780. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_langfuse_flow.py +0 -0
  781. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_phoenix_flow.py +0 -0
  782. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_pipeline_api_contracts.py +0 -0
  783. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_storage_flow.py +0 -0
  784. {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_summary_eval_fixture.py +0 -0
  785. {evalvault-1.75.0 → evalvault-1.76.0}/tests/optional_deps.py +0 -0
  786. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/__init__.py +0 -0
  787. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_execute_tools.py +0 -0
  788. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_read_tools.py +0 -0
  789. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_pdf_extractor.py +0 -0
  790. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_versioned_loader.py +0 -0
  791. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/__init__.py +0 -0
  792. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_insight_generator.py +0 -0
  793. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_pattern_detector.py +0 -0
  794. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_playbook_loader.py +0 -0
  795. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_stage_metric_playbook_loader.py +0 -0
  796. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_graph_rag_retriever.py +0 -0
  797. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_parallel_kg_builder.py +0 -0
  798. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/retriever/test_graph_rag_adapter.py +0 -0
  799. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/storage/test_benchmark_storage_adapter.py +0 -0
  800. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/config/test_phoenix_support.py +0 -0
  801. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/conftest.py +0 -0
  802. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_analysis_metric_registry.py +0 -0
  803. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_confidence.py +0 -0
  804. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_contextual_relevancy.py +0 -0
  805. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_entity_preservation.py +0 -0
  806. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_metric_registry.py +0 -0
  807. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_multiturn_metrics.py +0 -0
  808. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_no_answer.py +0 -0
  809. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_retrieval_rank.py +0 -0
  810. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_text_match.py +0 -0
  811. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_cache_metrics.py +0 -0
  812. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_claim_level.py +0 -0
  813. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_dataset_preprocessor.py +0 -0
  814. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_document_versioning.py +0 -0
  815. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_evaluator_comprehensive.py +0 -0
  816. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_holdout_splitter.py +0 -0
  817. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_improvement_guide_service.py +0 -0
  818. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_judge_calibration_service.py +0 -0
  819. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_ops_snapshot_service.py +0 -0
  820. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_regression_gate_service.py +0 -0
  821. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retrieval_metrics.py +0 -0
  822. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retriever_context.py +0 -0
  823. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_event_builder.py +0 -0
  824. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_metric_guide_service.py +0 -0
  825. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_synthetic_qa_generator.py +0 -0
  826. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_embedding_overlay.py +0 -0
  827. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_manifest.py +0 -0
  828. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_status.py +0 -0
  829. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/reports/test_release_notes.py +0 -0
  830. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/scripts/test_regression_runner.py +0 -0
  831. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_agent_types.py +0 -0
  832. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_entities.py +0 -0
  833. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_modules.py +0 -0
  834. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_pipeline.py +0 -0
  835. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_service.py +0 -0
  836. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_anthropic_adapter.py +0 -0
  837. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_artifact_lint_service.py +0 -0
  838. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_async_batch_executor.py +0 -0
  839. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_azure_adapter.py +0 -0
  840. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_benchmark_helpers.py +0 -0
  841. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_benchmark_runner.py +0 -0
  842. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_causal_adapter.py +0 -0
  843. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ci_gate_cli.py +0 -0
  844. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli.py +0 -0
  845. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_artifacts.py +0 -0
  846. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_calibrate_judge.py +0 -0
  847. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_domain.py +0 -0
  848. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_init.py +0 -0
  849. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_ops.py +0 -0
  850. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_progress.py +0 -0
  851. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_utils.py +0 -0
  852. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_data_loaders.py +0 -0
  853. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_difficulty_profiling_service.py +0 -0
  854. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_domain_config.py +0 -0
  855. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_domain_memory.py +0 -0
  856. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entities.py +0 -0
  857. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entities_kg.py +0 -0
  858. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entity_extractor.py +0 -0
  859. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_evaluator.py +0 -0
  860. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_experiment.py +0 -0
  861. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_hybrid_cache.py +0 -0
  862. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_instrumentation.py +0 -0
  863. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_insurance_metric.py +0 -0
  864. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_intent_classifier.py +0 -0
  865. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kg_generator.py +0 -0
  866. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kg_networkx.py +0 -0
  867. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kiwi_tokenizer.py +0 -0
  868. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kiwi_warning_suppression.py +0 -0
  869. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_dense.py +0 -0
  870. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_evaluation.py +0 -0
  871. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_retrieval.py +0 -0
  872. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_langfuse_tracker.py +0 -0
  873. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_llm_relation_augmenter.py +0 -0
  874. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_lm_eval_adapter.py +0 -0
  875. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_markdown_report.py +0 -0
  876. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_memory_cache.py +0 -0
  877. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_memory_services.py +0 -0
  878. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_method_plugins.py +0 -0
  879. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_mlflow_tracker.py +0 -0
  880. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_model_config.py +0 -0
  881. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_nlp_adapter.py +0 -0
  882. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_nlp_entities.py +0 -0
  883. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ollama_adapter.py +0 -0
  884. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_openai_adapter.py +0 -0
  885. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_phoenix_adapter.py +0 -0
  886. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_pipeline_orchestrator.py +0 -0
  887. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ports.py +0 -0
  888. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_postgres_storage.py +0 -0
  889. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_pr_comment_formatter.py +0 -0
  890. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_prompt_candidate_service.py +0 -0
  891. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_rag_trace_entities.py +0 -0
  892. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_regress_cli.py +0 -0
  893. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_comparison_service.py +0 -0
  894. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_memory_helpers.py +0 -0
  895. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_mode_fixtures.py +0 -0
  896. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_settings.py +0 -0
  897. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_sqlite_storage.py +0 -0
  898. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_cli.py +0 -0
  899. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_event_schema.py +0 -0
  900. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_metric_service.py +0 -0
  901. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_storage.py +0 -0
  902. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_summary_service.py +0 -0
  903. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_statistical_adapter.py +0 -0
  904. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_streaming_loader.py +0 -0
  905. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_summary_eval_fixture.py +0 -0
  906. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_testset_generator.py +0 -0
  907. {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_web_adapter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.75.0
3
+ Version: 1.76.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -14,6 +14,11 @@
14
14
  - MLflow: 실험/버전/아티팩트 저장
15
15
  - Phoenix: LLM 트레이싱/관찰/디버깅
16
16
 
17
+ ### 운영 규칙 (필수)
18
+ - 모든 평가 run은 **MLflow + Phoenix에 동시에 로깅**된다.
19
+ - tracker 옵션에서 둘 중 하나라도 누락되면 실행이 실패한다.
20
+ - 기본 tracker: `mlflow+phoenix`
21
+
17
22
  ### 라이선스/자가호스팅 참고
18
23
  - MLflow: Apache 2.0 (상업 사용 가능) https://raw.githubusercontent.com/mlflow/mlflow/master/LICENSE.txt
19
24
  - Phoenix: Elastic License 2.0 (자가호스팅 허용, 제3자에게 SaaS 제공 금지) https://raw.githubusercontent.com/Arize-ai/phoenix/main/LICENSE
@@ -111,3 +116,14 @@ metrics.json 구조 예시:
111
116
  ## EvalVault 연동 참고
112
117
  - MLflow 어댑터: src/evalvault/adapters/outbound/tracker/mlflow_adapter.py
113
118
  - Phoenix 어댑터: src/evalvault/adapters/outbound/tracker/phoenix_adapter.py
119
+
120
+ ## 설정 값
121
+ - `MLFLOW_TRACKING_URI`: MLflow tracking server URI
122
+ - `MLFLOW_EXPERIMENT_NAME`: 실험 이름 (기본: evalvault)
123
+ - `PHOENIX_ENDPOINT`: Phoenix OTLP endpoint (예: http://localhost:6006/v1/traces)
124
+ - `PHOENIX_API_TOKEN`: Phoenix API 토큰 (옵션)
125
+
126
+ ## CLI 기본 사용
127
+ ```bash
128
+ uv run evalvault run <DATASET> --tracker mlflow+phoenix
129
+ ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evalvault"
3
- version = "1.75.0"
3
+ version = "1.76.0"
4
4
  description = "RAG evaluation system using Ragas with Phoenix/Langfuse tracing"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -24,7 +24,7 @@ from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrat
24
24
  from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
25
25
  from evalvault.adapters.outbound.report import MarkdownReportAdapter
26
26
  from evalvault.config.phoenix_support import PhoenixExperimentResolver
27
- from evalvault.config.settings import Settings
27
+ from evalvault.config.settings import Settings, resolve_tracker_providers
28
28
  from evalvault.domain.entities import (
29
29
  CalibrationResult,
30
30
  FeedbackSummary,
@@ -217,56 +217,83 @@ class WebUIAdapter:
217
217
  logger.warning(f"Failed to create LLM adapter for {model_id}: {e}, using default")
218
218
  return self._llm_adapter
219
219
 
220
- def _get_tracker(
220
+ def _get_trackers(
221
221
  self,
222
222
  settings: Settings,
223
223
  tracker_config: dict[str, Any] | None,
224
- ) -> tuple[str | None, Any | None]:
225
- provider = (tracker_config or {}).get("provider") or "none"
226
- provider = provider.lower()
227
-
228
- if provider in {"none", ""}:
229
- return None, None
230
-
231
- if provider == "langfuse":
232
- if not settings.langfuse_public_key or not settings.langfuse_secret_key:
233
- logger.warning("Langfuse credentials missing; skipping tracker logging.")
234
- return None, None
235
- from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
236
-
237
- return provider, LangfuseAdapter(
238
- public_key=settings.langfuse_public_key,
239
- secret_key=settings.langfuse_secret_key,
240
- host=settings.langfuse_host,
241
- )
224
+ ) -> list[tuple[str, Any]]:
225
+ provider = (tracker_config or {}).get("provider") or settings.tracker_provider or "none"
226
+ providers = resolve_tracker_providers(provider)
227
+ if not providers or providers == ["none"]:
228
+ return []
229
+ required = {"mlflow", "phoenix"}
230
+ if not required.issubset(set(providers)):
231
+ raise RuntimeError("Tracker must include both mlflow and phoenix")
232
+
233
+ trackers: list[tuple[str, Any]] = []
234
+ for entry in providers:
235
+ if entry == "langfuse":
236
+ if not settings.langfuse_public_key or not settings.langfuse_secret_key:
237
+ raise RuntimeError("Langfuse credentials missing")
238
+ from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
239
+
240
+ trackers.append(
241
+ (
242
+ entry,
243
+ LangfuseAdapter(
244
+ public_key=settings.langfuse_public_key,
245
+ secret_key=settings.langfuse_secret_key,
246
+ host=settings.langfuse_host,
247
+ ),
248
+ )
249
+ )
250
+ continue
242
251
 
243
- if provider == "phoenix":
244
- from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
252
+ if entry == "phoenix":
253
+ from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
245
254
 
246
- ensure_phoenix_instrumentation(settings, force=True)
247
- try:
248
- from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
249
- except ImportError as exc:
250
- logger.warning("Phoenix extras not installed: %s", exc)
251
- return None, None
252
- return provider, PhoenixAdapter(endpoint=settings.phoenix_endpoint)
253
-
254
- if provider == "mlflow":
255
- if not settings.mlflow_tracking_uri:
256
- logger.warning("MLflow tracking URI missing; skipping tracker logging.")
257
- return None, None
258
- try:
259
- from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
260
- except ImportError as exc:
261
- logger.warning("MLflow adapter unavailable: %s", exc)
262
- return None, None
263
- return provider, MLflowAdapter(
264
- tracking_uri=settings.mlflow_tracking_uri,
265
- experiment_name=settings.mlflow_experiment_name,
266
- )
255
+ ensure_phoenix_instrumentation(settings, force=True)
256
+ try:
257
+ from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
258
+ except ImportError as exc:
259
+ raise RuntimeError("Phoenix extras not installed") from exc
260
+ trackers.append(
261
+ (
262
+ entry,
263
+ PhoenixAdapter(
264
+ endpoint=settings.phoenix_endpoint,
265
+ project_name=getattr(settings, "phoenix_project_name", None),
266
+ annotations_enabled=getattr(
267
+ settings,
268
+ "phoenix_annotations_enabled",
269
+ True,
270
+ ),
271
+ ),
272
+ )
273
+ )
274
+ continue
275
+
276
+ if entry == "mlflow":
277
+ if not settings.mlflow_tracking_uri:
278
+ raise RuntimeError("MLflow tracking URI missing")
279
+ try:
280
+ from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
281
+ except ImportError as exc:
282
+ raise RuntimeError("MLflow adapter unavailable") from exc
283
+ trackers.append(
284
+ (
285
+ entry,
286
+ MLflowAdapter(
287
+ tracking_uri=settings.mlflow_tracking_uri,
288
+ experiment_name=settings.mlflow_experiment_name,
289
+ ),
290
+ )
291
+ )
292
+ continue
267
293
 
268
- logger.warning("Unknown tracker provider: %s", provider)
269
- return None, None
294
+ raise RuntimeError(f"Unknown tracker provider: {entry}")
295
+
296
+ return trackers
270
297
 
271
298
  @staticmethod
272
299
  def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
@@ -425,7 +452,11 @@ class WebUIAdapter:
425
452
  dataset.metadata["domain"] = requested_domain
426
453
 
427
454
  settings = self._settings or Settings()
428
- tracker_provider, tracker = self._get_tracker(settings, request.tracker_config)
455
+ try:
456
+ trackers = self._get_trackers(settings, request.tracker_config)
457
+ except RuntimeError as exc:
458
+ raise RuntimeError(f"Tracker configuration error: {exc}") from exc
459
+ tracker_providers = [provider for provider, _ in trackers]
429
460
  stage_store = bool(request.stage_store)
430
461
 
431
462
  retriever_instance = None
@@ -484,7 +515,7 @@ class WebUIAdapter:
484
515
  )
485
516
  from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
486
517
 
487
- tracer = PhoenixTracerAdapter() if tracker_provider == "phoenix" else None
518
+ tracer = PhoenixTracerAdapter() if "phoenix" in tracker_providers else None
488
519
  memory_adapter = build_domain_memory_adapter(
489
520
  settings=self._settings,
490
521
  db_path=Path(memory_db_path) if memory_db_path else None,
@@ -696,22 +727,27 @@ class WebUIAdapter:
696
727
  str(request.threshold_profile).strip().lower()
697
728
  )
698
729
 
699
- if tracker and tracker_provider:
700
- try:
701
- trace_id = tracker.log_evaluation_run(result)
702
- if tracker_provider == "phoenix":
703
- endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
704
- phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
705
- phoenix_meta.update(
706
- {
707
- "trace_id": trace_id,
708
- "endpoint": endpoint,
709
- "trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
710
- "schema_version": 2,
711
- }
712
- )
713
- except Exception as exc:
714
- logger.warning("Tracker logging failed: %s", exc)
730
+ if trackers:
731
+ result.tracker_metadata.setdefault("tracker_providers", tracker_providers)
732
+ for provider, tracker in trackers:
733
+ try:
734
+ trace_id = tracker.log_evaluation_run(result)
735
+ provider_meta = result.tracker_metadata.setdefault(provider, {})
736
+ if isinstance(provider_meta, dict):
737
+ provider_meta.setdefault("trace_id", trace_id)
738
+ if provider == "phoenix":
739
+ endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
740
+ phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
741
+ phoenix_meta.update(
742
+ {
743
+ "trace_id": trace_id,
744
+ "endpoint": endpoint,
745
+ "trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
746
+ "schema_version": 2,
747
+ }
748
+ )
749
+ except Exception as exc:
750
+ raise RuntimeError(f"Tracker logging failed for {provider}: {exc}") from exc
715
751
 
716
752
  if stage_store and self._storage and hasattr(self._storage, "save_stage_events"):
717
753
  try:
@@ -71,7 +71,9 @@ class ConfigUpdateRequest(BaseModel):
71
71
  phoenix_endpoint: str | None = None
72
72
  phoenix_enabled: bool | None = None
73
73
  phoenix_sample_rate: float | None = None
74
- tracker_provider: Literal["langfuse", "mlflow", "phoenix", "none"] | None = None
74
+ phoenix_project_name: str | None = None
75
+ phoenix_annotations_enabled: bool | None = None
76
+ tracker_provider: str | None = None
75
77
  postgres_host: str | None = None
76
78
  postgres_port: int | None = None
77
79
  postgres_database: str | None = None
@@ -31,7 +31,7 @@ from ..utils.validators import parse_csv_option, validate_choices
31
31
  from .run_helpers import (
32
32
  _display_results,
33
33
  _is_oss_open_model,
34
- _log_to_tracker,
34
+ _log_to_trackers,
35
35
  _resolve_thresholds,
36
36
  _save_results,
37
37
  _save_to_db,
@@ -419,7 +419,7 @@ def create_method_app(console: Console) -> typer.Typer:
419
419
  _display_results(result, console)
420
420
 
421
421
  if tracker and tracker != "none":
422
- _log_to_tracker(settings, result, console, tracker_type=tracker)
422
+ _log_to_trackers(settings, result, console, tracker_type=tracker)
423
423
 
424
424
  if eval_output:
425
425
  _save_results(eval_output, result, console)
@@ -33,7 +33,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
33
33
  from evalvault.adapters.outbound.storage.factory import build_storage_adapter
34
34
  from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
35
35
  from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
36
- from evalvault.config.settings import Settings, apply_profile
36
+ from evalvault.config.settings import Settings, apply_profile, resolve_tracker_providers
37
37
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
38
38
  from evalvault.domain.entities.multiturn import (
39
39
  MultiTurnConversationRecord,
@@ -86,7 +86,8 @@ from .run_helpers import (
86
86
  _display_results,
87
87
  _evaluate_streaming_run,
88
88
  _is_oss_open_model,
89
- _log_to_tracker,
89
+ _log_analysis_artifacts,
90
+ _log_to_trackers,
90
91
  _option_was_provided,
91
92
  _print_run_mode_banner,
92
93
  _resolve_thresholds,
@@ -178,6 +179,14 @@ def _log_duration(
178
179
  _log_timestamp(console, verbose, f"{message} ({elapsed:.2f}s)")
179
180
 
180
181
 
182
+ def _infer_phoenix_model_provider(model_name: str) -> str:
183
+ if not model_name:
184
+ return "OPENAI"
185
+ provider = model_name.split("/")[0].upper() if "/" in model_name else "OPENAI"
186
+ allowed = {"OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE", "DEEPSEEK", "XAI", "AWS", "OLLAMA"}
187
+ return provider if provider in allowed else "OPENAI"
188
+
189
+
181
190
  def register_run_commands(
182
191
  app: typer.Typer,
183
192
  console: Console,
@@ -358,10 +367,13 @@ def register_run_commands(
358
367
  help="Store stage events in the SQLite database (requires --db).",
359
368
  ),
360
369
  tracker: str = typer.Option(
361
- "none",
370
+ "mlflow+phoenix",
362
371
  "--tracker",
363
372
  "-t",
364
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
373
+ help=(
374
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
375
+ "or combinations like 'mlflow+phoenix'."
376
+ ),
365
377
  rich_help_panel="Simple mode preset",
366
378
  ),
367
379
  langfuse: bool = typer.Option(
@@ -667,13 +679,24 @@ def register_run_commands(
667
679
  tracker_override = _option_was_provided(ctx, "tracker") or langfuse
668
680
  selected_tracker = tracker
669
681
  if preset.default_tracker:
670
- if tracker_override and tracker != preset.default_tracker:
671
- print_cli_warning(
672
- console,
673
- f"Simple 모드는 tracker={preset.default_tracker}로 고정됩니다.",
674
- tips=["다른 Tracker 사용하려면 --mode full을 사용하세요."],
675
- )
676
- selected_tracker = preset.default_tracker
682
+ if tracker_override:
683
+ try:
684
+ providers = resolve_tracker_providers(tracker)
685
+ except ValueError as exc:
686
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
687
+ raise typer.Exit(2) from exc
688
+ if providers == ["none"]:
689
+ selected_tracker = preset.default_tracker
690
+ elif preset.default_tracker not in providers:
691
+ print_cli_warning(
692
+ console,
693
+ f"Simple 모드는 tracker에 {preset.default_tracker}가 포함되어야 합니다.",
694
+ tips=["다른 Tracker를 사용하려면 --mode full을 사용하세요."],
695
+ )
696
+ providers.append(preset.default_tracker)
697
+ selected_tracker = "+".join(providers)
698
+ else:
699
+ selected_tracker = preset.default_tracker
677
700
  tracker = selected_tracker
678
701
 
679
702
  prompt_manifest_value = prompt_manifest
@@ -1646,10 +1669,29 @@ def register_run_commands(
1646
1669
  )
1647
1670
  raise typer.Exit(2) from exc
1648
1671
 
1672
+ effective_tracker = tracker
1673
+ if langfuse and tracker == "none" and not preset.default_tracker:
1674
+ effective_tracker = "langfuse"
1675
+ print_cli_warning(
1676
+ console,
1677
+ "--langfuse 플래그는 곧 제거됩니다.",
1678
+ tips=["대신 --tracker langfuse를 사용하세요."],
1679
+ )
1680
+
1681
+ try:
1682
+ effective_providers = resolve_tracker_providers(effective_tracker)
1683
+ except ValueError as exc:
1684
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
1685
+ raise typer.Exit(2) from exc
1686
+
1649
1687
  phoenix_dataset_name = phoenix_dataset
1650
1688
  if phoenix_experiment and not phoenix_dataset_name:
1651
1689
  phoenix_dataset_name = f"{ds.name}:{ds.version}"
1652
1690
 
1691
+ auto_phoenix_sync = "phoenix" in effective_providers
1692
+ if auto_phoenix_sync and not phoenix_dataset_name:
1693
+ phoenix_dataset_name = f"{ds.name}:{ds.version}"
1694
+
1653
1695
  phoenix_dataset_description_value = phoenix_dataset_description
1654
1696
  if phoenix_dataset_name and not phoenix_dataset_description_value:
1655
1697
  desc_source = ds.metadata.get("description") if isinstance(ds.metadata, dict) else None
@@ -1659,13 +1701,20 @@ def register_run_commands(
1659
1701
  phoenix_dataset_result: dict[str, Any] | None = None
1660
1702
  phoenix_experiment_result: dict[str, Any] | None = None
1661
1703
 
1662
- if phoenix_dataset_name or phoenix_experiment:
1704
+ if phoenix_dataset_name or phoenix_experiment or auto_phoenix_sync:
1663
1705
  try:
1664
1706
  phoenix_sync_service = PhoenixSyncService(
1665
1707
  endpoint=settings.phoenix_endpoint,
1666
1708
  api_token=getattr(settings, "phoenix_api_token", None),
1667
1709
  )
1668
1710
  except PhoenixSyncError as exc:
1711
+ if auto_phoenix_sync:
1712
+ print_cli_error(
1713
+ console,
1714
+ "Phoenix Sync 서비스를 초기화할 수 없습니다.",
1715
+ details=str(exc),
1716
+ )
1717
+ raise typer.Exit(2) from exc
1669
1718
  print_cli_warning(
1670
1719
  console,
1671
1720
  "Phoenix Sync 서비스를 초기화할 수 없습니다.",
@@ -1673,19 +1722,10 @@ def register_run_commands(
1673
1722
  )
1674
1723
  phoenix_sync_service = None
1675
1724
 
1676
- effective_tracker = tracker
1677
- if langfuse and tracker == "none" and not preset.default_tracker:
1678
- effective_tracker = "langfuse"
1679
- print_cli_warning(
1680
- console,
1681
- "--langfuse 플래그는 곧 제거됩니다.",
1682
- tips=["대신 --tracker langfuse를 사용하세요."],
1683
- )
1684
-
1685
1725
  config_wants_phoenix = getattr(settings, "phoenix_enabled", False)
1686
1726
  if not isinstance(config_wants_phoenix, bool):
1687
1727
  config_wants_phoenix = False
1688
- should_enable_phoenix = effective_tracker == "phoenix" or config_wants_phoenix
1728
+ should_enable_phoenix = "phoenix" in effective_providers or config_wants_phoenix
1689
1729
  if should_enable_phoenix:
1690
1730
  ensure_phoenix_instrumentation(settings, console=console, force=True)
1691
1731
 
@@ -2032,6 +2072,9 @@ def register_run_commands(
2032
2072
  )
2033
2073
  if prompt_bundle:
2034
2074
  result.tracker_metadata["prompt_set"] = build_prompt_summary(prompt_bundle)
2075
+ result.tracker_metadata["prompt_set_detail"] = prompt_bundle.to_dict(
2076
+ include_content=True
2077
+ )
2035
2078
 
2036
2079
  if retriever_instance or used_versioned_prefill:
2037
2080
  retriever_tracker_meta: dict[str, Any] = {
@@ -2105,13 +2148,29 @@ def register_run_commands(
2105
2148
  )
2106
2149
  console.print(f"[dim]View datasets: {dataset_info.url}[/dim]")
2107
2150
  except PhoenixSyncError as exc:
2151
+ if auto_phoenix_sync:
2152
+ print_cli_error(
2153
+ console,
2154
+ "Phoenix Dataset 업로드에 실패했습니다.",
2155
+ details=str(exc),
2156
+ )
2157
+ raise typer.Exit(2) from exc
2108
2158
  print_cli_warning(
2109
2159
  console,
2110
2160
  "Phoenix Dataset 업로드에 실패했습니다.",
2111
2161
  tips=[str(exc)],
2112
2162
  )
2163
+ if auto_phoenix_sync and not phoenix_experiment:
2164
+ phoenix_experiment = f"{result.model_name}-{result.run_id[:8]}"
2113
2165
  if phoenix_experiment:
2114
2166
  if not phoenix_dataset_result:
2167
+ if auto_phoenix_sync:
2168
+ print_cli_error(
2169
+ console,
2170
+ "Dataset 업로드에 실패해 Phoenix Experiment 생성을 진행할 수 없습니다.",
2171
+ details="Phoenix dataset 업로드가 필요합니다.",
2172
+ )
2173
+ raise typer.Exit(2)
2115
2174
  print_cli_warning(
2116
2175
  console,
2117
2176
  "Dataset 업로드에 실패해 Phoenix Experiment 생성을 건너뜁니다.",
@@ -2169,6 +2228,41 @@ def register_run_commands(
2169
2228
  phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
2170
2229
  phoenix_meta.setdefault("schema_version", 2)
2171
2230
  phoenix_meta["prompts"] = prompt_metadata_entries
2231
+ if phoenix_sync_service and "phoenix" in effective_providers:
2232
+ try:
2233
+ prompt_set_summary = result.tracker_metadata.get("prompt_set") or {}
2234
+ prompt_set_name = prompt_set_summary.get("prompt_set_name")
2235
+ prompt_entries = list(prompt_metadata_entries)
2236
+ prompt_set_detail = result.tracker_metadata.get("prompt_set_detail")
2237
+ if isinstance(prompt_set_detail, dict):
2238
+ for item in prompt_set_detail.get("items", []):
2239
+ prompt = item.get("prompt") or {}
2240
+ if not isinstance(prompt, dict):
2241
+ continue
2242
+ prompt_entries.append(
2243
+ {
2244
+ "name": prompt.get("name"),
2245
+ "role": item.get("role"),
2246
+ "kind": prompt.get("kind"),
2247
+ "checksum": prompt.get("checksum"),
2248
+ "content": prompt.get("content"),
2249
+ "source": prompt.get("source"),
2250
+ }
2251
+ )
2252
+ synced = phoenix_sync_service.sync_prompts(
2253
+ prompt_entries=prompt_entries,
2254
+ model_name=result.model_name,
2255
+ model_provider=_infer_phoenix_model_provider(result.model_name),
2256
+ prompt_set_name=prompt_set_name,
2257
+ )
2258
+ if synced:
2259
+ phoenix_meta["prompts"] = synced
2260
+ except PhoenixSyncError as exc:
2261
+ print_cli_warning(
2262
+ console,
2263
+ "Phoenix Prompt 동기화에 실패했습니다.",
2264
+ tips=[str(exc)],
2265
+ )
2172
2266
 
2173
2267
  if stage_events or stage_store:
2174
2268
  stage_event_builder = StageEventBuilder()
@@ -2187,7 +2281,7 @@ def register_run_commands(
2187
2281
 
2188
2282
  if effective_tracker != "none":
2189
2283
  phoenix_opts = None
2190
- if effective_tracker == "phoenix":
2284
+ if "phoenix" in effective_providers:
2191
2285
  phoenix_opts = {
2192
2286
  "max_traces": phoenix_max_traces,
2193
2287
  "metadata": phoenix_trace_metadata or None,
@@ -2198,7 +2292,7 @@ def register_run_commands(
2198
2292
  verbose,
2199
2293
  f"Tracker 로깅 시작 ({effective_tracker})",
2200
2294
  )
2201
- _log_to_tracker(
2295
+ _log_to_trackers(
2202
2296
  settings,
2203
2297
  result,
2204
2298
  console,
@@ -2276,6 +2370,12 @@ def register_run_commands(
2276
2370
  pipeline_result,
2277
2371
  artifacts_dir=artifacts_dir,
2278
2372
  )
2373
+ result.tracker_metadata["analysis_artifacts"] = {
2374
+ "dir": artifact_index.get("dir"),
2375
+ "index": artifact_index.get("index"),
2376
+ "output": str(analysis_output_path),
2377
+ "report": str(analysis_report_path),
2378
+ }
2279
2379
  payload = serialize_pipeline_result(pipeline_result)
2280
2380
  payload["run_id"] = result.run_id
2281
2381
  payload["artifacts"] = artifact_index
@@ -2292,6 +2392,18 @@ def register_run_commands(
2292
2392
  "[green]자동 분석 상세 결과 저장:[/green] "
2293
2393
  f"{artifact_index['dir']} (index: {artifact_index['index']})\n"
2294
2394
  )
2395
+ if effective_tracker != "none":
2396
+ _log_analysis_artifacts(
2397
+ settings,
2398
+ result,
2399
+ console,
2400
+ effective_tracker,
2401
+ analysis_payload=payload,
2402
+ artifact_index=artifact_index,
2403
+ report_text=report_text,
2404
+ output_path=analysis_output_path,
2405
+ report_path=analysis_report_path,
2406
+ )
2295
2407
 
2296
2408
  @app.command(
2297
2409
  name="run-simple",
@@ -2395,10 +2507,13 @@ def register_run_commands(
2395
2507
  help="Store stage events in the SQLite database (requires --db).",
2396
2508
  ),
2397
2509
  tracker: str = typer.Option(
2398
- "none",
2510
+ "mlflow+phoenix",
2399
2511
  "--tracker",
2400
2512
  "-t",
2401
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
2513
+ help=(
2514
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
2515
+ "or combinations like 'mlflow+phoenix'."
2516
+ ),
2402
2517
  ),
2403
2518
  langfuse: bool = typer.Option(
2404
2519
  False,
@@ -2687,10 +2802,13 @@ def register_run_commands(
2687
2802
  help="Store stage events in the SQLite database (requires --db).",
2688
2803
  ),
2689
2804
  tracker: str = typer.Option(
2690
- "none",
2805
+ "mlflow+phoenix",
2691
2806
  "--tracker",
2692
2807
  "-t",
2693
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
2808
+ help=(
2809
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
2810
+ "or combinations like 'mlflow+phoenix'."
2811
+ ),
2694
2812
  ),
2695
2813
  langfuse: bool = typer.Option(
2696
2814
  False,