evalvault 1.74.0__tar.gz → 1.76.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (952) hide show
  1. {evalvault-1.74.0 → evalvault-1.76.0}/.env.example +40 -9
  2. {evalvault-1.74.0 → evalvault-1.76.0}/.env.offline.example +1 -1
  3. {evalvault-1.74.0 → evalvault-1.76.0}/.gitignore +5 -0
  4. evalvault-1.76.0/PKG-INFO +221 -0
  5. {evalvault-1.74.0 → evalvault-1.76.0}/README.en.md +25 -27
  6. evalvault-1.76.0/README.md +95 -0
  7. {evalvault-1.74.0 → evalvault-1.76.0}/agent/README.md +2 -2
  8. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory/shared/dependencies.md +2 -2
  9. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/improvement/coordinator_prompt.md +2 -2
  10. evalvault-1.76.0/data/rag/user_guide_bm25.json +55 -0
  11. evalvault-1.76.0/docker-compose.offline.modelcache.yml +11 -0
  12. {evalvault-1.74.0 → evalvault-1.76.0}/docker-compose.offline.yml +1 -1
  13. {evalvault-1.74.0 → evalvault-1.76.0}/docker-compose.yml +1 -1
  14. evalvault-1.76.0/docs/INDEX.md +70 -0
  15. {evalvault-1.74.0 → evalvault-1.76.0}/docs/README.ko.md +11 -10
  16. evalvault-1.76.0/docs/ROADMAP.md +6 -0
  17. evalvault-1.76.0/docs/STATUS.md +6 -0
  18. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/adapters/inbound.md +2 -2
  19. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/domain/metrics.md +1 -1
  20. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/ports/outbound.md +1 -1
  21. evalvault-1.76.0/docs/getting-started/INSTALLATION.md +8 -0
  22. evalvault-1.76.0/docs/guides/CHAINLIT_INTEGRATION_PLAN.md +5 -0
  23. evalvault-1.76.0/docs/guides/CLI_MCP_PLAN.md +6 -0
  24. evalvault-1.76.0/docs/guides/CLI_UX_REDESIGN.md +5 -0
  25. evalvault-1.76.0/docs/guides/DEV_GUIDE.md +7 -0
  26. evalvault-1.76.0/docs/guides/DOCS_REFRESH_PLAN.md +6 -0
  27. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_DIAGNOSTIC_PLAYBOOK.md +3 -2
  28. evalvault-1.76.0/docs/guides/EVALVAULT_WORK_PLAN.md +5 -0
  29. evalvault-1.76.0/docs/guides/EXPERIMENT_TRACKING_STACK.md +129 -0
  30. evalvault-1.76.0/docs/guides/LENA_MVP_IMPLEMENTATION_PLAN.md +6 -0
  31. evalvault-1.76.0/docs/guides/LENA_RAGAS_CALIBRATION_DEV_PLAN.md +6 -0
  32. evalvault-1.76.0/docs/guides/NEXT_STEPS_EXECUTION_PLAN.md +8 -0
  33. evalvault-1.76.0/docs/guides/OFFLINE_MODELS.md +85 -0
  34. evalvault-1.76.0/docs/guides/P0_P3_EXECUTION_REPORT.md +6 -0
  35. evalvault-1.76.0/docs/guides/P1_P4_WORK_PLAN.md +5 -0
  36. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/PARALLEL_WORK_APPROVAL_RULES.md +1 -1
  37. evalvault-1.76.0/docs/guides/PRD_LENA.md +5 -0
  38. evalvault-1.76.0/docs/guides/PROJECT_STATUS_AND_PLAN.md +6 -0
  39. evalvault-1.76.0/docs/guides/RAG_CLI_WORKFLOW_TEMPLATES.md +8 -0
  40. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/RAG_NOISE_REDUCTION_GUIDE.md +4 -4
  41. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPLEMENTATION_LOG.md +1 -1
  42. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPROVEMENT_PROPOSAL.md +49 -11
  43. evalvault-1.76.0/docs/guides/RAG_PGVECTOR_PREINDEX_PLAN.md +94 -0
  44. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/RELEASE_CHECKLIST.md +5 -4
  45. evalvault-1.76.0/docs/guides/USER_GUIDE.md +12 -0
  46. evalvault-1.76.0/docs/guides/WEBUI_CLI_ROLLOUT_PLAN.md +6 -0
  47. evalvault-1.76.0/docs/guides/WORKLOG_LAST_2_DAYS.md +12 -0
  48. evalvault-1.76.0/docs/guides/_DEPRECATED_NOTICE.md +7 -0
  49. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/00_overview.md +9 -9
  50. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/01_architecture.md +32 -69
  51. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/02_data_and_metrics.md +2 -2
  52. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/03_workflows.md +25 -20
  53. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/04_operations.md +23 -13
  54. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/06_quality_and_testing.md +2 -2
  55. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/07_ux_and_product.md +20 -10
  56. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/08_roadmap.md +10 -3
  57. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/09_competitive_positioning.md +1 -1
  58. evalvault-1.76.0/docs/handbook/WORKLOG_DOCS_CLEANUP_2026-01-29.md +47 -0
  59. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/appendix-coverage-matrix.md +2 -0
  60. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/appendix-file-inventory.md +2 -0
  61. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/appendix-roadmap.md +5 -3
  62. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/appendix-taxonomy.md +17 -14
  63. evalvault-1.76.0/docs/new_whitepaper/00_frontmatter.md +12 -0
  64. evalvault-1.76.0/docs/new_whitepaper/01_overview.md +10 -0
  65. evalvault-1.76.0/docs/new_whitepaper/02_architecture.md +10 -0
  66. evalvault-1.76.0/docs/new_whitepaper/03_data_flow.md +10 -0
  67. evalvault-1.76.0/docs/new_whitepaper/04_components.md +10 -0
  68. evalvault-1.76.0/docs/new_whitepaper/05_expert_lenses.md +10 -0
  69. evalvault-1.76.0/docs/new_whitepaper/06_implementation.md +11 -0
  70. evalvault-1.76.0/docs/new_whitepaper/07_advanced.md +10 -0
  71. evalvault-1.76.0/docs/new_whitepaper/08_customization.md +10 -0
  72. evalvault-1.76.0/docs/new_whitepaper/09_quality.md +9 -0
  73. evalvault-1.76.0/docs/new_whitepaper/10_performance.md +10 -0
  74. evalvault-1.76.0/docs/new_whitepaper/11_security.md +10 -0
  75. evalvault-1.76.0/docs/new_whitepaper/12_operations.md +9 -0
  76. evalvault-1.76.0/docs/new_whitepaper/13_standards.md +11 -0
  77. evalvault-1.76.0/docs/new_whitepaper/14_roadmap.md +10 -0
  78. evalvault-1.76.0/docs/new_whitepaper/INDEX.md +8 -0
  79. evalvault-1.76.0/docs/new_whitepaper/STYLE_GUIDE.md +6 -0
  80. {evalvault-1.74.0 → evalvault-1.76.0}/docs/security_audit_worklog.md +1 -1
  81. {evalvault-1.74.0 → evalvault-1.76.0}/examples/README.md +2 -3
  82. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/App.tsx +2 -2
  83. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/JudgeCalibration.tsx +91 -18
  84. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/services/api.ts +18 -2
  85. {evalvault-1.74.0 → evalvault-1.76.0}/pyproject.toml +3 -1
  86. {evalvault-1.74.0 → evalvault-1.76.0}/reports/feature_verification_report.md +3 -3
  87. evalvault-1.76.0/scripts/dev/preindex_pgvector_runs.py +219 -0
  88. evalvault-1.76.0/scripts/offline/build_full_offline_bundle.sh +64 -0
  89. evalvault-1.76.0/scripts/offline/bundle_model_cache.sh +37 -0
  90. evalvault-1.76.0/scripts/offline/export_api_base_only.sh +33 -0
  91. evalvault-1.76.0/scripts/offline/predownload_nlp_models.py +116 -0
  92. evalvault-1.76.0/scripts/offline/restore_model_cache.sh +17 -0
  93. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/adapter.py +127 -80
  94. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/chat.py +303 -17
  95. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/config.py +3 -1
  96. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/domain.py +10 -5
  97. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
  98. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/runs.py +23 -4
  99. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
  100. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
  101. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
  102. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
  103. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/compare.py +2 -7
  104. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/debug.py +3 -2
  105. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/domain.py +12 -12
  106. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
  107. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/gate.py +3 -2
  108. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
  109. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/history.py +3 -12
  110. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/method.py +3 -4
  111. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/ops.py +2 -2
  112. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
  113. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
  114. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
  115. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/regress.py +5 -4
  116. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run.py +188 -59
  117. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run_helpers.py +181 -70
  118. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/stage.py +6 -25
  119. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/options.py +10 -4
  120. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/tools.py +11 -8
  121. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
  122. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
  123. evalvault-1.76.0/src/evalvault/adapters/outbound/domain_memory/__init__.py +11 -0
  124. evalvault-1.76.0/src/evalvault/adapters/outbound/domain_memory/factory.py +68 -0
  125. evalvault-1.76.0/src/evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
  126. evalvault-1.76.0/src/evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
  127. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/factory.py +1 -1
  128. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
  129. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
  130. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
  131. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
  132. evalvault-1.76.0/src/evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
  133. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/base_sql.py +3 -2
  134. evalvault-1.76.0/src/evalvault/adapters/outbound/storage/factory.py +53 -0
  135. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
  136. evalvault-1.76.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +387 -0
  137. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
  138. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/instrumentation.py +8 -6
  139. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/phoenix_support.py +5 -0
  140. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/settings.py +71 -11
  141. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/domain_learning_hook.py +2 -1
  142. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/evaluator.py +2 -0
  143. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/web_port.py +3 -1
  144. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/storage_port.py +2 -0
  145. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_ci_gate_cli.py +8 -8
  146. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli.py +42 -42
  147. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_calibrate_judge.py +1 -1
  148. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_domain.py +6 -6
  149. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_utils.py +8 -4
  150. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_korean_dense.py +3 -3
  151. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_regress_cli.py +2 -2
  152. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_web_adapter.py +1 -1
  153. {evalvault-1.74.0 → evalvault-1.76.0}/uv.lock +17 -1
  154. evalvault-1.74.0/PKG-INFO +0 -585
  155. evalvault-1.74.0/README.md +0 -461
  156. evalvault-1.74.0/data/rag/user_guide_bm25.json +0 -3674
  157. evalvault-1.74.0/docs/INDEX.md +0 -78
  158. evalvault-1.74.0/docs/ROADMAP.md +0 -38
  159. evalvault-1.74.0/docs/STATUS.md +0 -40
  160. evalvault-1.74.0/docs/getting-started/INSTALLATION.md +0 -90
  161. evalvault-1.74.0/docs/guides/CHAINLIT_INTEGRATION_PLAN.md +0 -157
  162. evalvault-1.74.0/docs/guides/CLI_MCP_PLAN.md +0 -243
  163. evalvault-1.74.0/docs/guides/CLI_UX_REDESIGN.md +0 -50
  164. evalvault-1.74.0/docs/guides/DEV_GUIDE.md +0 -87
  165. evalvault-1.74.0/docs/guides/DOCS_REFRESH_PLAN.md +0 -96
  166. evalvault-1.74.0/docs/guides/EVALVAULT_WORK_PLAN.md +0 -109
  167. evalvault-1.74.0/docs/guides/LENA_MVP_IMPLEMENTATION_PLAN.md +0 -763
  168. evalvault-1.74.0/docs/guides/LENA_RAGAS_CALIBRATION_DEV_PLAN.md +0 -428
  169. evalvault-1.74.0/docs/guides/NEXT_STEPS_EXECUTION_PLAN.md +0 -93
  170. evalvault-1.74.0/docs/guides/P0_P3_EXECUTION_REPORT.md +0 -108
  171. evalvault-1.74.0/docs/guides/P1_P4_WORK_PLAN.md +0 -571
  172. evalvault-1.74.0/docs/guides/PRD_LENA.md +0 -637
  173. evalvault-1.74.0/docs/guides/PROJECT_STATUS_AND_PLAN.md +0 -291
  174. evalvault-1.74.0/docs/guides/RAG_CLI_WORKFLOW_TEMPLATES.md +0 -318
  175. evalvault-1.74.0/docs/guides/USER_GUIDE.md +0 -1515
  176. evalvault-1.74.0/docs/guides/WEBUI_CLI_ROLLOUT_PLAN.md +0 -185
  177. evalvault-1.74.0/docs/guides/WORKLOG_LAST_2_DAYS.md +0 -71
  178. evalvault-1.74.0/docs/new_whitepaper/00_frontmatter.md +0 -114
  179. evalvault-1.74.0/docs/new_whitepaper/01_overview.md +0 -140
  180. evalvault-1.74.0/docs/new_whitepaper/02_architecture.md +0 -172
  181. evalvault-1.74.0/docs/new_whitepaper/03_data_flow.md +0 -166
  182. evalvault-1.74.0/docs/new_whitepaper/04_components.md +0 -151
  183. evalvault-1.74.0/docs/new_whitepaper/05_expert_lenses.md +0 -123
  184. evalvault-1.74.0/docs/new_whitepaper/06_implementation.md +0 -139
  185. evalvault-1.74.0/docs/new_whitepaper/07_advanced.md +0 -289
  186. evalvault-1.74.0/docs/new_whitepaper/08_customization.md +0 -227
  187. evalvault-1.74.0/docs/new_whitepaper/09_quality.md +0 -188
  188. evalvault-1.74.0/docs/new_whitepaper/10_performance.md +0 -147
  189. evalvault-1.74.0/docs/new_whitepaper/11_security.md +0 -133
  190. evalvault-1.74.0/docs/new_whitepaper/12_operations.md +0 -209
  191. evalvault-1.74.0/docs/new_whitepaper/13_standards.md +0 -135
  192. evalvault-1.74.0/docs/new_whitepaper/14_roadmap.md +0 -112
  193. evalvault-1.74.0/docs/new_whitepaper/INDEX.md +0 -40
  194. evalvault-1.74.0/docs/new_whitepaper/STYLE_GUIDE.md +0 -112
  195. evalvault-1.74.0/reports/retrieval_benchmark_smoke_precision.csv +0 -4
  196. evalvault-1.74.0/reports/retrieval_benchmark_smoke_precision_graphrag.csv +0 -5
  197. evalvault-1.74.0/reports/retrieval_benchmark_smoke_precision_multi.csv +0 -4
  198. evalvault-1.74.0/src/evalvault/adapters/outbound/domain_memory/__init__.py +0 -7
  199. evalvault-1.74.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +0 -232
  200. {evalvault-1.74.0 → evalvault-1.76.0}/.dockerignore +0 -0
  201. {evalvault-1.74.0 → evalvault-1.76.0}/.github/workflows/ci.yml +0 -0
  202. {evalvault-1.74.0 → evalvault-1.76.0}/.github/workflows/regression-gate.yml +0 -0
  203. {evalvault-1.74.0 → evalvault-1.76.0}/.github/workflows/release.yml +0 -0
  204. {evalvault-1.74.0 → evalvault-1.76.0}/.github/workflows/stale.yml +0 -0
  205. {evalvault-1.74.0 → evalvault-1.76.0}/.pre-commit-config.yaml +0 -0
  206. {evalvault-1.74.0 → evalvault-1.76.0}/.python-version +0 -0
  207. {evalvault-1.74.0 → evalvault-1.76.0}/AGENTS.md +0 -0
  208. {evalvault-1.74.0 → evalvault-1.76.0}/CHANGELOG.md +0 -0
  209. {evalvault-1.74.0 → evalvault-1.76.0}/CLAUDE.md +0 -0
  210. {evalvault-1.74.0 → evalvault-1.76.0}/CODE_OF_CONDUCT.md +0 -0
  211. {evalvault-1.74.0 → evalvault-1.76.0}/CONTRIBUTING.md +0 -0
  212. {evalvault-1.74.0 → evalvault-1.76.0}/Dockerfile +0 -0
  213. {evalvault-1.74.0 → evalvault-1.76.0}/LICENSE.md +0 -0
  214. {evalvault-1.74.0 → evalvault-1.76.0}/SECURITY.md +0 -0
  215. {evalvault-1.74.0 → evalvault-1.76.0}/agent/agent.py +0 -0
  216. {evalvault-1.74.0 → evalvault-1.76.0}/agent/client.py +0 -0
  217. {evalvault-1.74.0 → evalvault-1.76.0}/agent/config.py +0 -0
  218. {evalvault-1.74.0 → evalvault-1.76.0}/agent/main.py +0 -0
  219. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory/README.md +0 -0
  220. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory/shared/decisions.md +0 -0
  221. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory/templates/coordinator_guide.md +0 -0
  222. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory/templates/work_log_template.md +0 -0
  223. {evalvault-1.74.0 → evalvault-1.76.0}/agent/memory_integration.py +0 -0
  224. {evalvault-1.74.0 → evalvault-1.76.0}/agent/progress.py +0 -0
  225. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/app_spec.txt +0 -0
  226. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/baseline.txt +0 -0
  227. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/coding_prompt.md +0 -0
  228. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/existing_project_prompt.md +0 -0
  229. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/improvement/architecture_prompt.md +0 -0
  230. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/improvement/base_prompt.md +0 -0
  231. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/improvement/observability_prompt.md +0 -0
  232. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/initializer_prompt.md +0 -0
  233. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/prompt_manifest.json +0 -0
  234. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts/system.txt +0 -0
  235. {evalvault-1.74.0 → evalvault-1.76.0}/agent/prompts.py +0 -0
  236. {evalvault-1.74.0 → evalvault-1.76.0}/agent/requirements.txt +0 -0
  237. {evalvault-1.74.0 → evalvault-1.76.0}/agent/security.py +0 -0
  238. {evalvault-1.74.0 → evalvault-1.76.0}/config/domains/insurance/memory.yaml +0 -0
  239. {evalvault-1.74.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_en.json +0 -0
  240. {evalvault-1.74.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_ko.json +0 -0
  241. {evalvault-1.74.0 → evalvault-1.76.0}/config/methods.yaml +0 -0
  242. {evalvault-1.74.0 → evalvault-1.76.0}/config/models.yaml +0 -0
  243. {evalvault-1.74.0 → evalvault-1.76.0}/config/ragas_prompts_override.yaml +0 -0
  244. {evalvault-1.74.0 → evalvault-1.76.0}/config/regressions/ci.json +0 -0
  245. {evalvault-1.74.0 → evalvault-1.76.0}/config/regressions/default.json +0 -0
  246. {evalvault-1.74.0 → evalvault-1.76.0}/config/regressions/ux.json +0 -0
  247. {evalvault-1.74.0 → evalvault-1.76.0}/config/stage_metric_playbook.yaml +0 -0
  248. {evalvault-1.74.0 → evalvault-1.76.0}/config/stage_metric_thresholds.json +0 -0
  249. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/dummy_test_dataset.json +0 -0
  250. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.csv +0 -0
  251. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.json +0 -0
  252. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_2.json +0 -0
  253. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_3.json +0 -0
  254. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/ragas_ko90_en10.json +0 -0
  255. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/sample.json +0 -0
  256. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/visualization_20q_cluster_map.csv +0 -0
  257. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/visualization_20q_korean.json +0 -0
  258. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/visualization_2q_cluster_map.csv +0 -0
  259. {evalvault-1.74.0 → evalvault-1.76.0}/data/datasets/visualization_2q_korean.json +0 -0
  260. {evalvault-1.74.0 → evalvault-1.76.0}/data/kg/knowledge_graph.json +0 -0
  261. {evalvault-1.74.0 → evalvault-1.76.0}/dataset_templates/dataset_template.csv +0 -0
  262. {evalvault-1.74.0 → evalvault-1.76.0}/dataset_templates/dataset_template.json +0 -0
  263. {evalvault-1.74.0 → evalvault-1.76.0}/dataset_templates/dataset_template.xlsx +0 -0
  264. {evalvault-1.74.0 → evalvault-1.76.0}/dataset_templates/method_input_template.json +0 -0
  265. {evalvault-1.74.0 → evalvault-1.76.0}/docker-compose.langfuse.yml +0 -0
  266. {evalvault-1.74.0 → evalvault-1.76.0}/docker-compose.phoenix.yaml +0 -0
  267. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/adapters/outbound.md +0 -0
  268. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/config.md +0 -0
  269. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/domain/entities.md +0 -0
  270. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/domain/services.md +0 -0
  271. {evalvault-1.74.0 → evalvault-1.76.0}/docs/api/ports/inbound.md +0 -0
  272. {evalvault-1.74.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-collector.md +0 -0
  273. {evalvault-1.74.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-spec.md +0 -0
  274. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/AGENTS_SYSTEM_GUIDE.md +0 -0
  275. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/CI_REGRESSION_GATE.md +0 -0
  276. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/CLI_PARALLEL_FEATURES_SPEC.md +0 -0
  277. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_RUN_EXCEL_SHEETS.md +0 -0
  278. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/EXTERNAL_TRACE_API_SPEC.md +0 -0
  279. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/Extension_2.md +0 -0
  280. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/Extension_Data_Difficulty_Profiling_Custom_Judge_Model.md +0 -0
  281. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/INSURANCE_SUMMARY_METRICS_PLAN.md +0 -0
  282. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/MULTITURN_EVAL_GUIDE.md +0 -0
  283. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/OFFLINE_DOCKER.md +0 -0
  284. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_INTERNAL_ADAPTER.md +0 -0
  285. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_SAMPLES.md +0 -0
  286. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/RAGAS_HUMAN_FEEDBACK_CALIBRATION_GUIDE.md +0 -0
  287. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/cli_process.md +0 -0
  288. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/prompt_suggestions_design.md +0 -0
  289. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/rag_human_feedback_calibration_implementation_plan.md +0 -0
  290. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/refactoring_strategy.md +0 -0
  291. {evalvault-1.74.0 → evalvault-1.76.0}/docs/guides/repeat_query.md +0 -0
  292. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/05_security.md +0 -0
  293. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/EXTERNAL.md +0 -0
  294. {evalvault-1.74.0 → evalvault-1.76.0}/docs/handbook/INDEX.md +0 -0
  295. {evalvault-1.74.0 → evalvault-1.76.0}/docs/mapping/component-to-whitepaper.yaml +0 -0
  296. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/REFAC_000_master_plan.md +0 -0
  297. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/REFAC_010_agent_playbook.md +0 -0
  298. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/REFAC_020_logging_policy.md +0 -0
  299. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/REFAC_030_phase0_responsibility_map.md +0 -0
  300. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/REFAC_040_wbs_parallel_plan.md +0 -0
  301. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/logs/phase-0-baseline.md +0 -0
  302. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/logs/phase-1-evaluator.md +0 -0
  303. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/logs/phase-2-cli-run.md +0 -0
  304. {evalvault-1.74.0 → evalvault-1.76.0}/docs/refactor/logs/phase-3-analysis.md +0 -0
  305. {evalvault-1.74.0 → evalvault-1.76.0}/docs/stylesheets/extra.css +0 -0
  306. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/dataset_template.csv +0 -0
  307. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/dataset_template.json +0 -0
  308. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/dataset_template.xlsx +0 -0
  309. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/eval_report_templates.md +0 -0
  310. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/kg_template.json +0 -0
  311. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/otel_openinference_trace_example.json +0 -0
  312. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/ragas_dataset_example_ko90_en10.json +0 -0
  313. {evalvault-1.74.0 → evalvault-1.76.0}/docs/templates/retriever_docs_template.json +0 -0
  314. {evalvault-1.74.0 → evalvault-1.76.0}/docs/tools/generate-whitepaper.py +0 -0
  315. {evalvault-1.74.0 → evalvault-1.76.0}/docs/web_ui_analysis_migration_plan.md +0 -0
  316. {evalvault-1.74.0 → evalvault-1.76.0}/dummy_test_dataset.json +0 -0
  317. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/README.md +0 -0
  318. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/faithfulness_test.json +0 -0
  319. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/insurance_qa_100.json +0 -0
  320. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/keyword_extraction_test.json +0 -0
  321. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/retrieval_test.json +0 -0
  322. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/output/comparison.json +0 -0
  323. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/output/full_results.json +0 -0
  324. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/output/leaderboard.json +0 -0
  325. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/output/results_mteb.json +0 -0
  326. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/output/retrieval_result.json +0 -0
  327. {evalvault-1.74.0 → evalvault-1.76.0}/examples/benchmarks/run_korean_benchmark.py +0 -0
  328. {evalvault-1.74.0 → evalvault-1.76.0}/examples/kg_generator_demo.py +0 -0
  329. {evalvault-1.74.0 → evalvault-1.76.0}/examples/method_plugin_template/README.md +0 -0
  330. {evalvault-1.74.0 → evalvault-1.76.0}/examples/method_plugin_template/pyproject.toml +0 -0
  331. {evalvault-1.74.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/__init__.py +0 -0
  332. {evalvault-1.74.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/methods.py +0 -0
  333. {evalvault-1.74.0 → evalvault-1.76.0}/examples/stage_events.jsonl +0 -0
  334. {evalvault-1.74.0 → evalvault-1.76.0}/examples/usecase/comprehensive_workflow_test.py +0 -0
  335. {evalvault-1.74.0 → evalvault-1.76.0}/examples/usecase/insurance_eval_dataset.json +0 -0
  336. {evalvault-1.74.0 → evalvault-1.76.0}/examples/usecase/output/comprehensive_report.html +0 -0
  337. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/.env.example +0 -0
  338. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/.gitignore +0 -0
  339. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/Dockerfile +0 -0
  340. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/README.md +0 -0
  341. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/analysis-compare.spec.ts +0 -0
  342. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/analysis-lab.spec.ts +0 -0
  343. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/compare-runs.spec.ts +0 -0
  344. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/dashboard.spec.ts +0 -0
  345. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/domain-memory.spec.ts +0 -0
  346. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/evaluation-studio.spec.ts +0 -0
  347. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/judge-calibration.spec.ts +0 -0
  348. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/knowledge-base.spec.ts +0 -0
  349. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/mocks/intents.json +0 -0
  350. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/mocks/run_details.json +0 -0
  351. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/mocks/runs.json +0 -0
  352. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/e2e/run-details.spec.ts +0 -0
  353. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/eslint.config.js +0 -0
  354. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/index.html +0 -0
  355. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/nginx.conf +0 -0
  356. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/package-lock.json +0 -0
  357. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/package.json +0 -0
  358. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/playwright.config.ts +0 -0
  359. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/public/vite.svg +0 -0
  360. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/App.css +0 -0
  361. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/assets/react.svg +0 -0
  362. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/AnalysisNodeOutputs.tsx +0 -0
  363. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/InsightSpacePanel.tsx +0 -0
  364. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/Layout.tsx +0 -0
  365. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/MarkdownContent.tsx +0 -0
  366. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/PrioritySummaryPanel.tsx +0 -0
  367. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/SpaceLegend.tsx +0 -0
  368. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot2D.tsx +0 -0
  369. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot3D.tsx +0 -0
  370. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/StatusBadge.tsx +0 -0
  371. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ToastProvider.tsx +0 -0
  372. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/VirtualizedText.tsx +0 -0
  373. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Conversation.tsx +0 -0
  374. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Message.tsx +0 -0
  375. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/PromptInput.tsx +0 -0
  376. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Response.tsx +0 -0
  377. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/index.ts +0 -0
  378. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/config/ui.ts +0 -0
  379. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/config.ts +0 -0
  380. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/hooks/useInsightSpace.ts +0 -0
  381. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/index.css +0 -0
  382. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/main.tsx +0 -0
  383. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/AiSdkChat.tsx +0 -0
  384. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisCompareView.tsx +0 -0
  385. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisLab.tsx +0 -0
  386. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisResultView.tsx +0 -0
  387. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/Chat.tsx +0 -0
  388. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/CompareRuns.tsx +0 -0
  389. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/ComprehensiveAnalysis.tsx +0 -0
  390. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/CustomerReport.tsx +0 -0
  391. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/Dashboard.tsx +0 -0
  392. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/DomainMemory.tsx +0 -0
  393. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/EvaluationStudio.tsx +0 -0
  394. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/KnowledgeBase.tsx +0 -0
  395. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/RunDetails.tsx +0 -0
  396. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/Settings.tsx +0 -0
  397. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/Visualization.tsx +0 -0
  398. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/pages/VisualizationHome.tsx +0 -0
  399. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/types/plotly.d.ts +0 -0
  400. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/cliCommandBuilder.ts +0 -0
  401. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/clipboard.ts +0 -0
  402. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/format.ts +0 -0
  403. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/phoenix.ts +0 -0
  404. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/runAnalytics.ts +0 -0
  405. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/score.ts +0 -0
  406. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/src/utils/summaryMetrics.ts +0 -0
  407. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/tailwind.config.js +0 -0
  408. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/tsconfig.app.json +0 -0
  409. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/tsconfig.json +0 -0
  410. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/tsconfig.node.json +0 -0
  411. {evalvault-1.74.0 → evalvault-1.76.0}/frontend/vite.config.ts +0 -0
  412. {evalvault-1.74.0 → evalvault-1.76.0}/mkdocs.yml +0 -0
  413. {evalvault-1.74.0 → evalvault-1.76.0}/package-lock.json +0 -0
  414. {evalvault-1.74.0 → evalvault-1.76.0}/prompts/system_override.txt +0 -0
  415. {evalvault-1.74.0 → evalvault-1.76.0}/reports/.gitkeep +0 -0
  416. {evalvault-1.74.0 → evalvault-1.76.0}/reports/README.md +0 -0
  417. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r1_smoke.md +0 -0
  418. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag.md +0 -0
  419. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag_openai.md +0 -0
  420. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25.md +0 -0
  421. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25_langfuse3.md +0 -0
  422. {evalvault-1.74.0 → evalvault-1.76.0}/reports/debug_report_r3_dense_faiss.md +0 -0
  423. {evalvault-1.74.0 → evalvault-1.76.0}/reports/improvement_1d91a667-4288-4742-be3a-a8f5310c5140.md +0 -0
  424. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_events.jsonl +0 -0
  425. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_report.txt +0 -0
  426. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_events.jsonl +0 -0
  427. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_report.txt +0 -0
  428. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse2_stage_events.jsonl +0 -0
  429. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse3_stage_events.jsonl +0 -0
  430. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse_stage_events.jsonl +0 -0
  431. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_phoenix_stage_events.jsonl +0 -0
  432. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_stage_events.jsonl +0 -0
  433. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_bm25_stage_report.txt +0 -0
  434. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_events.jsonl +0 -0
  435. {evalvault-1.74.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_report.txt +0 -0
  436. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/benchmark/download_kmmlu.py +0 -0
  437. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/ci/run_regression_gate.py +0 -0
  438. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_demo.py +0 -0
  439. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_integration_template.py +0 -0
  440. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/otel-collector-config.yaml +0 -0
  441. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/start_web_ui_with_phoenix.sh +0 -0
  442. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/validate_open_rag_trace.py +0 -0
  443. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev/verify_dashboard_endpoint.sh +0 -0
  444. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/dev_seed_pipeline_results.py +0 -0
  445. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/__init__.py +0 -0
  446. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/analyzer/__init__.py +0 -0
  447. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/analyzer/ast_scanner.py +0 -0
  448. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/analyzer/confidence_scorer.py +0 -0
  449. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/analyzer/graph_builder.py +0 -0
  450. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/analyzer/side_effect_detector.py +0 -0
  451. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/generate_api_docs.py +0 -0
  452. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/models/__init__.py +0 -0
  453. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/models/schema.py +0 -0
  454. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/renderer/__init__.py +0 -0
  455. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/docs/renderer/html_generator.py +0 -0
  456. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/bundle_datasets.sh +0 -0
  457. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/export_base_images.sh +0 -0
  458. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/export_images.sh +0 -0
  459. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/import_images.sh +0 -0
  460. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/load_base_images.sh +0 -0
  461. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/restore_datasets.sh +0 -0
  462. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/offline/smoke_test.sh +0 -0
  463. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/ops/phoenix_watch.py +0 -0
  464. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/backfill_langfuse_trace_url.py +0 -0
  465. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/r3_dense_smoke.py +0 -0
  466. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/r3_evalvault_run_dataset.json +0 -0
  467. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/r3_retriever_docs.json +0 -0
  468. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/r3_smoke_real.jsonl +0 -0
  469. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/perf/r3_stage_events_sample.jsonl +0 -0
  470. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/pipeline_template_inspect.py +0 -0
  471. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/reports/generate_release_notes.py +0 -0
  472. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/run_with_timeout.py +0 -0
  473. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/test_full_evaluation.py +0 -0
  474. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/tests/run_regressions.py +0 -0
  475. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/tests/run_retriever_stage_report_smoke.sh +0 -0
  476. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/validate_tutorials.py +0 -0
  477. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/verify_ragas_compliance.py +0 -0
  478. {evalvault-1.74.0 → evalvault-1.76.0}/scripts/verify_workflows.py +0 -0
  479. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/__init__.py +0 -0
  480. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/__init__.py +0 -0
  481. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/__init__.py +0 -0
  482. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/__init__.py +0 -0
  483. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/main.py +0 -0
  484. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/__init__.py +0 -0
  485. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/benchmark.py +0 -0
  486. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/calibration.py +9 -9
  487. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/knowledge.py +0 -0
  488. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/mcp.py +0 -0
  489. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/__init__.py +0 -0
  490. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/app.py +0 -0
  491. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/__init__.py +0 -0
  492. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/agent.py +0 -0
  493. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/api.py +0 -0
  494. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/artifacts.py +0 -0
  495. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/config.py +0 -0
  496. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/generate.py +0 -0
  497. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/init.py +0 -0
  498. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/kg.py +0 -0
  499. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/langfuse.py +0 -0
  500. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/phoenix.py +0 -0
  501. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/__init__.py +0 -0
  502. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/analysis_io.py +0 -0
  503. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/console.py +0 -0
  504. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/errors.py +0 -0
  505. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/formatters.py +0 -0
  506. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/presets.py +0 -0
  507. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/progress.py +0 -0
  508. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/validators.py +0 -0
  509. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/__init__.py +0 -0
  510. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/schemas.py +0 -0
  511. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/__init__.py +0 -0
  512. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/__init__.py +0 -0
  513. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/analysis_report_module.py +0 -0
  514. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/base_module.py +0 -0
  515. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/bm25_searcher_module.py +0 -0
  516. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_adapter.py +0 -0
  517. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_analyzer_module.py +0 -0
  518. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/common.py +0 -0
  519. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py +0 -0
  520. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_report_module.py +0 -0
  521. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/data_loader_module.py +0 -0
  522. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +0 -0
  523. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/detailed_report_module.py +0 -0
  524. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/diagnostic_playbook_module.py +0 -0
  525. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_distribution_module.py +0 -0
  526. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_rrf_module.py +0 -0
  527. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_weighted_module.py +0 -0
  528. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +0 -0
  529. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/llm_report_module.py +0 -0
  530. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/low_performer_extractor_module.py +0 -0
  531. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/model_analyzer_module.py +0 -0
  532. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py +0 -0
  533. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py +0 -0
  534. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/multiturn_analyzer_module.py +0 -0
  535. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/network_analyzer_module.py +0 -0
  536. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_adapter.py +0 -0
  537. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_analyzer_module.py +0 -0
  538. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pattern_detector_module.py +0 -0
  539. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_factory.py +0 -0
  540. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_helpers.py +0 -0
  541. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/priority_summary_module.py +0 -0
  542. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +0 -0
  543. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py +0 -0
  544. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +0 -0
  545. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py +0 -0
  546. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py +0 -0
  547. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_analyzer_module.py +0 -0
  548. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_change_detector_module.py +0 -0
  549. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_comparator_module.py +0 -0
  550. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_loader_module.py +0 -0
  551. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_metric_comparator_module.py +0 -0
  552. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/search_comparator_module.py +0 -0
  553. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_adapter.py +0 -0
  554. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_analyzer_module.py +0 -0
  555. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_comparator_module.py +0 -0
  556. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/summary_report_module.py +0 -0
  557. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/time_series_analyzer_module.py +0 -0
  558. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +0 -0
  559. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/trend_detector_module.py +0 -0
  560. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/verification_report_module.py +0 -0
  561. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/artifact_fs.py +0 -0
  562. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/__init__.py +0 -0
  563. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +0 -0
  564. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/__init__.py +0 -0
  565. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/hybrid_cache.py +0 -0
  566. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/memory_cache.py +0 -0
  567. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/__init__.py +0 -0
  568. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/base.py +0 -0
  569. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/csv_loader.py +0 -0
  570. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/excel_loader.py +0 -0
  571. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/json_loader.py +0 -0
  572. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/loader_factory.py +0 -0
  573. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/method_input_loader.py +0 -0
  574. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/multiturn_json_loader.py +0 -0
  575. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/streaming_loader.py +0 -0
  576. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/templates.py +0 -0
  577. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/thresholds.py +0 -0
  578. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/__init__.py +0 -0
  579. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/report_renderer.py +0 -0
  580. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/__init__.py +0 -0
  581. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/__init__.py +0 -0
  582. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py +0 -0
  583. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/pdf_extractor.py +0 -0
  584. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/versioned_loader.py +0 -0
  585. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql +0 -0
  586. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/sqlite_adapter.py +0 -0
  587. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/__init__.py +0 -0
  588. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py +0 -0
  589. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py +0 -0
  590. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/__init__.py +0 -0
  591. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/insight_generator.py +0 -0
  592. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/pattern_detector.py +0 -0
  593. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/playbook_loader.py +0 -0
  594. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py +0 -0
  595. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_adapter.py +0 -0
  596. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_reporter.py +0 -0
  597. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/__init__.py +0 -0
  598. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/graph_rag_retriever.py +0 -0
  599. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/networkx_adapter.py +0 -0
  600. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/parallel_kg_builder.py +0 -0
  601. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/query_strategies.py +0 -0
  602. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/__init__.py +0 -0
  603. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/anthropic_adapter.py +0 -0
  604. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/azure_adapter.py +0 -0
  605. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/base.py +0 -0
  606. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/instructor_factory.py +0 -0
  607. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/llm_relation_augmenter.py +0 -0
  608. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/ollama_adapter.py +0 -0
  609. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/openai_adapter.py +0 -0
  610. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/token_aware_chat.py +0 -0
  611. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/__init__.py +0 -0
  612. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/baseline_oracle.py +0 -0
  613. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/external_command.py +0 -0
  614. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/registry.py +0 -0
  615. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/__init__.py +0 -0
  616. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/__init__.py +0 -0
  617. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/bm25_retriever.py +0 -0
  618. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/document_chunker.py +0 -0
  619. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/hybrid_retriever.py +0 -0
  620. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/kiwi_tokenizer.py +0 -0
  621. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_evaluation.py +0 -0
  622. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_stopwords.py +0 -0
  623. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit_factory.py +0 -0
  624. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/__init__.py +0 -0
  625. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/report_renderer.py +0 -0
  626. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/__init__.py +0 -0
  627. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/ci_report_formatter.py +0 -0
  628. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/dashboard_generator.py +0 -0
  629. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/llm_report_generator.py +0 -0
  630. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/markdown_adapter.py +0 -0
  631. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/pr_comment_formatter.py +0 -0
  632. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/__init__.py +0 -0
  633. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/graph_rag_adapter.py +0 -0
  634. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/__init__.py +0 -0
  635. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/benchmark_storage_adapter.py +0 -0
  636. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_adapter.py +0 -0
  637. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/schema.sql +0 -0
  638. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/sqlite_adapter.py +0 -0
  639. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/__init__.py +0 -0
  640. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_log_handler.py +0 -0
  641. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +0 -0
  642. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py +0 -0
  643. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +0 -0
  644. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py +0 -0
  645. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/__init__.py +0 -0
  646. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/langfuse_adapter.py +0 -0
  647. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/log_sanitizer.py +0 -0
  648. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/__init__.py +0 -0
  649. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/agent_types.py +0 -0
  650. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/domain_config.py +0 -0
  651. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/langfuse_support.py +0 -0
  652. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/model_config.py +0 -0
  653. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/playbooks/improvement_playbook.yaml +0 -0
  654. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/config/secret_manager.py +0 -0
  655. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/debug_ragas.py +0 -0
  656. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/debug_ragas_real.py +0 -0
  657. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/__init__.py +0 -0
  658. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/__init__.py +0 -0
  659. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis.py +0 -0
  660. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis_pipeline.py +0 -0
  661. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark.py +0 -0
  662. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark_run.py +0 -0
  663. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/dataset.py +0 -0
  664. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/debug.py +0 -0
  665. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/experiment.py +0 -0
  666. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/feedback.py +0 -0
  667. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/graph_rag.py +0 -0
  668. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/improvement.py +0 -0
  669. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/judge_calibration.py +0 -0
  670. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/kg.py +0 -0
  671. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/memory.py +0 -0
  672. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/method.py +0 -0
  673. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/multiturn.py +0 -0
  674. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/ops_report.py +0 -0
  675. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt.py +0 -0
  676. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt_suggestion.py +0 -0
  677. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/rag_trace.py +0 -0
  678. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/result.py +0 -0
  679. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/entities/stage.py +0 -0
  680. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/__init__.py +0 -0
  681. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/analysis_registry.py +0 -0
  682. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/confidence.py +0 -0
  683. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/contextual_relevancy.py +0 -0
  684. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/entity_preservation.py +0 -0
  685. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/insurance.py +0 -0
  686. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/multiturn_metrics.py +0 -0
  687. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/no_answer.py +0 -0
  688. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/registry.py +0 -0
  689. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/retrieval_rank.py +0 -0
  690. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_accuracy.py +0 -0
  691. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_needs_followup.py +0 -0
  692. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_non_definitive.py +0 -0
  693. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_risk_coverage.py +0 -0
  694. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/terms_dictionary.json +0 -0
  695. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/text_match.py +0 -0
  696. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/__init__.py +0 -0
  697. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/analysis_service.py +0 -0
  698. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/artifact_lint_service.py +0 -0
  699. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/async_batch_executor.py +0 -0
  700. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/batch_executor.py +0 -0
  701. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_report_service.py +0 -0
  702. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_runner.py +0 -0
  703. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_service.py +0 -0
  704. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/cache_metrics.py +0 -0
  705. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/cluster_map_builder.py +0 -0
  706. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/custom_metric_snapshot.py +0 -0
  707. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/dataset_preprocessor.py +0 -0
  708. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/debug_report_service.py +0 -0
  709. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profile_reporter.py +0 -0
  710. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profiling_service.py +0 -0
  711. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_chunker.py +0 -0
  712. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_versioning.py +0 -0
  713. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/embedding_overlay.py +0 -0
  714. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/entity_extractor.py +0 -0
  715. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_comparator.py +0 -0
  716. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_manager.py +0 -0
  717. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_reporter.py +0 -0
  718. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_repository.py +0 -0
  719. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_statistics.py +0 -0
  720. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/graph_rag_experiment.py +0 -0
  721. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/holdout_splitter.py +0 -0
  722. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/improvement_guide_service.py +0 -0
  723. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/intent_classifier.py +0 -0
  724. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/judge_calibration_service.py +0 -0
  725. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/kg_generator.py +0 -0
  726. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_aware_evaluator.py +0 -0
  727. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_based_analysis.py +0 -0
  728. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/method_runner.py +0 -0
  729. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/multiturn_evaluator.py +0 -0
  730. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_report_service.py +0 -0
  731. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_snapshot_service.py +0 -0
  732. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_orchestrator.py +0 -0
  733. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_template_registry.py +0 -0
  734. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_candidate_service.py +0 -0
  735. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_manifest.py +0 -0
  736. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_registry.py +0 -0
  737. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_scoring_service.py +0 -0
  738. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_status.py +0 -0
  739. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_suggestion_reporter.py +0 -0
  740. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/ragas_prompt_overrides.py +0 -0
  741. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/regression_gate_service.py +0 -0
  742. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/retrieval_metrics.py +0 -0
  743. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/retriever_context.py +0 -0
  744. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/run_comparison_service.py +0 -0
  745. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/satisfaction_calibration_service.py +0 -0
  746. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_event_builder.py +0 -0
  747. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_guide_service.py +0 -0
  748. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_service.py +0 -0
  749. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_summary_service.py +0 -0
  750. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/synthetic_qa_generator.py +0 -0
  751. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/testset_generator.py +0 -0
  752. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/threshold_profiles.py +0 -0
  753. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/unified_report_service.py +0 -0
  754. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/domain/services/visual_space_service.py +0 -0
  755. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/mkdocs_helpers.py +0 -0
  756. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/__init__.py +0 -0
  757. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/__init__.py +0 -0
  758. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/analysis_pipeline_port.py +0 -0
  759. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/evaluator_port.py +0 -0
  760. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/learning_hook_port.py +0 -0
  761. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/multiturn_port.py +0 -0
  762. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/__init__.py +0 -0
  763. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_cache_port.py +0 -0
  764. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_module_port.py +0 -0
  765. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_port.py +0 -0
  766. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/artifact_fs_port.py +0 -0
  767. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/benchmark_port.py +0 -0
  768. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/causal_analysis_port.py +0 -0
  769. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/comparison_pipeline_port.py +0 -0
  770. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/dataset_port.py +0 -0
  771. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/difficulty_profile_port.py +0 -0
  772. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/domain_memory_port.py +0 -0
  773. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/embedding_port.py +0 -0
  774. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/graph_retriever_port.py +0 -0
  775. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/improvement_port.py +0 -0
  776. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/intent_classifier_port.py +0 -0
  777. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/judge_calibration_port.py +0 -0
  778. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/korean_nlp_port.py +0 -0
  779. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_factory_port.py +0 -0
  780. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_port.py +0 -0
  781. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/method_port.py +0 -0
  782. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/nlp_analysis_port.py +0 -0
  783. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/ops_snapshot_port.py +0 -0
  784. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/relation_augmenter_port.py +0 -0
  785. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/report_port.py +0 -0
  786. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/stage_storage_port.py +0 -0
  787. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracer_port.py +0 -0
  788. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracker_port.py +0 -0
  789. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/reports/__init__.py +0 -0
  790. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/reports/release_notes.py +0 -0
  791. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/scripts/__init__.py +0 -0
  792. {evalvault-1.74.0 → evalvault-1.76.0}/src/evalvault/scripts/regression_runner.py +0 -0
  793. {evalvault-1.74.0 → evalvault-1.76.0}/tests/__init__.py +0 -0
  794. {evalvault-1.74.0 → evalvault-1.76.0}/tests/conftest.py +0 -0
  795. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/README.md +0 -0
  796. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_min.json +0 -0
  797. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_multi.json +0 -0
  798. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/auto_insurance_qa_korean_full.json +0 -0
  799. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/callcenter_summary_5cases.json +0 -0
  800. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/comprehensive_dataset.json +0 -0
  801. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.json +0 -0
  802. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.xlsx +0 -0
  803. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/evaluation_test_sample.json +0 -0
  804. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_benchmark.json +0 -0
  805. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_multi_sample.json +0 -0
  806. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_retriever_docs.json +0 -0
  807. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_smoke.json +0 -0
  808. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_document.txt +0 -0
  809. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.csv +0 -0
  810. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.json +0 -0
  811. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.xlsx +0 -0
  812. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.csv +0 -0
  813. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.json +0 -0
  814. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.xlsx +0 -0
  815. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean_versioned_pdf.json +0 -0
  816. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/multiturn_benchmark.json +0 -0
  817. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/regression_baseline.json +0 -0
  818. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_full_domain_memory.json +0 -0
  819. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_simple.json +0 -0
  820. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/e2e/summary_eval_minimal.json +0 -0
  821. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/kg/minimal_graph.json +0 -0
  822. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.csv +0 -0
  823. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.json +0 -0
  824. {evalvault-1.74.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.xlsx +0 -0
  825. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/__init__.py +0 -0
  826. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/benchmark/test_benchmark_service_integration.py +0 -0
  827. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/conftest.py +0 -0
  828. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_cli_integration.py +0 -0
  829. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_data_flow.py +0 -0
  830. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_e2e_scenarios.py +0 -0
  831. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_evaluation_flow.py +0 -0
  832. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_full_workflow.py +0 -0
  833. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_langfuse_flow.py +0 -0
  834. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_phoenix_flow.py +0 -0
  835. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_pipeline_api_contracts.py +0 -0
  836. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_storage_flow.py +0 -0
  837. {evalvault-1.74.0 → evalvault-1.76.0}/tests/integration/test_summary_eval_fixture.py +0 -0
  838. {evalvault-1.74.0 → evalvault-1.76.0}/tests/optional_deps.py +0 -0
  839. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/__init__.py +0 -0
  840. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_execute_tools.py +0 -0
  841. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_read_tools.py +0 -0
  842. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_pdf_extractor.py +0 -0
  843. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_versioned_loader.py +0 -0
  844. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/__init__.py +0 -0
  845. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_insight_generator.py +0 -0
  846. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_pattern_detector.py +0 -0
  847. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_playbook_loader.py +0 -0
  848. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_stage_metric_playbook_loader.py +0 -0
  849. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_graph_rag_retriever.py +0 -0
  850. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_parallel_kg_builder.py +0 -0
  851. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/retriever/test_graph_rag_adapter.py +0 -0
  852. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/storage/test_benchmark_storage_adapter.py +0 -0
  853. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/config/test_phoenix_support.py +0 -0
  854. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/conftest.py +0 -0
  855. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_analysis_metric_registry.py +0 -0
  856. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_confidence.py +0 -0
  857. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_contextual_relevancy.py +0 -0
  858. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_entity_preservation.py +0 -0
  859. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_metric_registry.py +0 -0
  860. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_multiturn_metrics.py +0 -0
  861. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_no_answer.py +0 -0
  862. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_retrieval_rank.py +0 -0
  863. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_text_match.py +0 -0
  864. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_cache_metrics.py +0 -0
  865. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_claim_level.py +0 -0
  866. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_dataset_preprocessor.py +0 -0
  867. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_document_versioning.py +0 -0
  868. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_evaluator_comprehensive.py +0 -0
  869. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_holdout_splitter.py +0 -0
  870. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_improvement_guide_service.py +0 -0
  871. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_judge_calibration_service.py +0 -0
  872. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_ops_snapshot_service.py +0 -0
  873. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_regression_gate_service.py +0 -0
  874. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retrieval_metrics.py +0 -0
  875. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retriever_context.py +0 -0
  876. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_event_builder.py +0 -0
  877. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_metric_guide_service.py +0 -0
  878. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/services/test_synthetic_qa_generator.py +0 -0
  879. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/test_embedding_overlay.py +0 -0
  880. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_manifest.py +0 -0
  881. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_status.py +0 -0
  882. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/reports/test_release_notes.py +0 -0
  883. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/scripts/test_regression_runner.py +0 -0
  884. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_agent_types.py +0 -0
  885. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_analysis_entities.py +0 -0
  886. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_analysis_modules.py +0 -0
  887. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_analysis_pipeline.py +0 -0
  888. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_analysis_service.py +0 -0
  889. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_anthropic_adapter.py +0 -0
  890. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_artifact_lint_service.py +0 -0
  891. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_async_batch_executor.py +0 -0
  892. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_azure_adapter.py +0 -0
  893. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_benchmark_helpers.py +0 -0
  894. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_benchmark_runner.py +0 -0
  895. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_causal_adapter.py +0 -0
  896. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_artifacts.py +0 -0
  897. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_init.py +0 -0
  898. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_ops.py +0 -0
  899. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_cli_progress.py +0 -0
  900. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_data_loaders.py +0 -0
  901. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_difficulty_profiling_service.py +0 -0
  902. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_domain_config.py +0 -0
  903. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_domain_memory.py +0 -0
  904. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_entities.py +0 -0
  905. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_entities_kg.py +0 -0
  906. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_entity_extractor.py +0 -0
  907. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_evaluator.py +0 -0
  908. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_experiment.py +0 -0
  909. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_hybrid_cache.py +0 -0
  910. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_instrumentation.py +0 -0
  911. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_insurance_metric.py +0 -0
  912. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_intent_classifier.py +0 -0
  913. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_kg_generator.py +0 -0
  914. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_kg_networkx.py +0 -0
  915. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_kiwi_tokenizer.py +0 -0
  916. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_kiwi_warning_suppression.py +0 -0
  917. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_korean_evaluation.py +0 -0
  918. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_korean_retrieval.py +0 -0
  919. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_langfuse_tracker.py +0 -0
  920. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_llm_relation_augmenter.py +0 -0
  921. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_lm_eval_adapter.py +0 -0
  922. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_markdown_report.py +0 -0
  923. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_memory_cache.py +0 -0
  924. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_memory_services.py +0 -0
  925. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_method_plugins.py +0 -0
  926. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_mlflow_tracker.py +0 -0
  927. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_model_config.py +0 -0
  928. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_nlp_adapter.py +0 -0
  929. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_nlp_entities.py +0 -0
  930. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_ollama_adapter.py +0 -0
  931. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_openai_adapter.py +0 -0
  932. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_phoenix_adapter.py +0 -0
  933. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_pipeline_orchestrator.py +0 -0
  934. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_ports.py +0 -0
  935. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_postgres_storage.py +0 -0
  936. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_pr_comment_formatter.py +0 -0
  937. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_prompt_candidate_service.py +0 -0
  938. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_rag_trace_entities.py +0 -0
  939. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_run_comparison_service.py +0 -0
  940. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_run_memory_helpers.py +0 -0
  941. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_run_mode_fixtures.py +0 -0
  942. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_settings.py +0 -0
  943. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_sqlite_storage.py +0 -0
  944. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_stage_cli.py +0 -0
  945. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_stage_event_schema.py +0 -0
  946. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_stage_metric_service.py +0 -0
  947. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_stage_storage.py +0 -0
  948. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_stage_summary_service.py +0 -0
  949. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_statistical_adapter.py +0 -0
  950. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_streaming_loader.py +0 -0
  951. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_summary_eval_fixture.py +0 -0
  952. {evalvault-1.74.0 → evalvault-1.76.0}/tests/unit/test_testset_generator.py +0 -0
@@ -16,9 +16,17 @@
16
16
  # - prod: 운영용 고성능 모델 (gpt-oss-safeguard:20b, qwen3-embedding:8b)
17
17
  # - openai: OpenAI API 사용 (gpt-5-mini, text-embedding-3-small)
18
18
  EVALVAULT_PROFILE=dev
19
- # SQLite DB 경로 (API/CLI 공통)
19
+ # 기본 스토리지: PostgreSQL + pgvector
20
+ # POSTGRES_HOST=localhost
21
+ # POSTGRES_PORT=5432
22
+ # POSTGRES_DATABASE=evalvault
23
+ # POSTGRES_USER=postgres
24
+ # POSTGRES_PASSWORD=your-password
25
+ # POSTGRES_CONNECTION_STRING=postgresql://user:pass@localhost:5432/evalvault
26
+ # SQLite를 쓰려면 아래를 명시하세요 (API/CLI 공통)
27
+ # DB_BACKEND=sqlite
20
28
  # EVALVAULT_DB_PATH=data/db/evalvault.db
21
- # 도메인 메모리 DB 경로
29
+ # 도메인 메모리 DB 경로 (SQLite 전용)
22
30
  # EVALVAULT_MEMORY_DB_PATH=data/db/evalvault_memory.db
23
31
 
24
32
  # ================================================
@@ -32,6 +40,21 @@ OLLAMA_TIMEOUT=120
32
40
  # - 예시: OLLAMA_TOOL_MODELS=gpt-oss:120b,gpt-oss-safeguard:120b,gpt-oss-safeguard:20b
33
41
  # OLLAMA_TOOL_MODELS=
34
42
 
43
+ # 라우팅/챗 모델 (선택)
44
+ # OLLAMA_ROUTER_MODEL=gemma3:1b
45
+ # OLLAMA_CHAT_MODEL=gemma3:1b
46
+ # OLLAMA_CHAT_TIMEOUT_SECONDS=180
47
+
48
+ # 간단 챗 모드 (RAG/도구 호출 없이 Ollama만 사용)
49
+ # EVALVAULT_CHAT_SIMPLE_MODE=true
50
+
51
+ # RAG 범위/성능 튜닝
52
+ # EVALVAULT_RAG_USER_GUIDE_LIMIT=10
53
+ # EVALVAULT_RAG_USE_HYBRID=false
54
+ # EVALVAULT_RAG_VECTOR_STORE=none
55
+ # EVALVAULT_RAG_EMBEDDING_PROFILE=
56
+ # EVALVAULT_CHAT_RUN_CONTEXT_ENABLED=false
57
+
35
58
  # ================================================
36
59
  # OpenAI 설정 (외부망)
37
60
  # ================================================
@@ -88,13 +111,21 @@ OPENAI_API_KEY=sk-your-api-key-here
88
111
  # MLFLOW_EXPERIMENT_NAME=evalvault
89
112
 
90
113
  # ================================================
91
- # PostgreSQL 설정 (선택 - 프로덕션 스토리지)
92
- # ================================================
93
- # POSTGRES_HOST=localhost
94
- # POSTGRES_PORT=5432
95
- # POSTGRES_DATABASE=evalvault
96
- # POSTGRES_USER=postgres
97
- # POSTGRES_PASSWORD=your-password
114
+ # RAG Retriever 설정
115
+ # ================================================
116
+ # 하이브리드 검색 사용 여부 (BM25 + Dense)
117
+ # EVALVAULT_RAG_USE_HYBRID=true
118
+ # 벡터 스토어 종류 (pgvector|memory)
119
+ # EVALVAULT_RAG_VECTOR_STORE=pgvector
120
+ # 임베딩 프로파일 (dev|prod)
121
+ # EVALVAULT_RAG_EMBEDDING_PROFILE=dev
122
+ # LLM 없이 컨텍스트만 반환
123
+ # EVALVAULT_RAG_LLM_ENABLED=false
124
+ # pgvector 인덱스 옵션 (성능 튜닝)
125
+ # EVALVAULT_RAG_PGVECTOR_INDEX=hnsw # hnsw|ivfflat|none
126
+ # EVALVAULT_RAG_PGVECTOR_INDEX_LISTS=100
127
+ # EVALVAULT_RAG_PGVECTOR_HNSW_M=16
128
+ # EVALVAULT_RAG_PGVECTOR_HNSW_EF_CONSTRUCTION=64
98
129
 
99
130
  # ================================================
100
131
  # API 인증 / CORS / Frontend 설정
@@ -14,7 +14,7 @@ EVALVAULT_PROFILE=dev
14
14
  # ================================================
15
15
  # PostgreSQL (core stack)
16
16
  # ================================================
17
- POSTGRES_IMAGE=postgres:16.4-alpine
17
+ POSTGRES_IMAGE=pgvector/pgvector:0.8.0-pg16
18
18
  POSTGRES_USER=evalvault
19
19
  POSTGRES_PASSWORD=evalvault
20
20
  POSTGRES_DB=evalvault
@@ -54,6 +54,7 @@ coverage.xml
54
54
  reports/*.html
55
55
  reports/*.xml
56
56
  reports/*.json
57
+ reports/*.csv
57
58
  reports/analysis/
58
59
  reports/analysis/**
59
60
  reports/comparison/
@@ -129,6 +130,7 @@ celerybeat-schedule
129
130
 
130
131
  # Environments
131
132
  .env
133
+ .env.offline
132
134
  .venv
133
135
  env/
134
136
  venv/
@@ -160,3 +162,6 @@ dmypy.json
160
162
  .LSOverride
161
163
  scratch/
162
164
  .sisyphus/
165
+
166
+ # Local artifacts
167
+ MagicMock/
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.4
2
+ Name: evalvault
3
+ Version: 1.76.0
4
+ Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
+ Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
+ Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
7
+ Project-URL: Repository, https://github.com/ntts9990/EvalVault.git
8
+ Project-URL: Issues, https://github.com/ntts9990/EvalVault/issues
9
+ Project-URL: Changelog, https://github.com/ntts9990/EvalVault/releases
10
+ Author: EvalVault Contributors
11
+ Maintainer: EvalVault Contributors
12
+ License: Apache-2.0
13
+ License-File: LICENSE.md
14
+ Keywords: ai,evaluation,langfuse,llm,machine-learning,nlp,observability,opentelemetry,phoenix,rag,ragas,retrieval-augmented-generation,testing
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Quality Assurance
25
+ Classifier: Topic :: Software Development :: Testing
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.12
28
+ Requires-Dist: chainlit>=2.9.5
29
+ Requires-Dist: chardet
30
+ Requires-Dist: fastapi>=0.128.0
31
+ Requires-Dist: instructor
32
+ Requires-Dist: langchain-openai
33
+ Requires-Dist: langfuse
34
+ Requires-Dist: matplotlib<3.9.0,>=3.8.0
35
+ Requires-Dist: networkx
36
+ Requires-Dist: openai
37
+ Requires-Dist: openpyxl
38
+ Requires-Dist: pandas
39
+ Requires-Dist: pydantic
40
+ Requires-Dist: pydantic-settings
41
+ Requires-Dist: pypdf>=4.3.0
42
+ Requires-Dist: python-multipart
43
+ Requires-Dist: ragas==0.4.2
44
+ Requires-Dist: rich
45
+ Requires-Dist: truststore>=0.10.4
46
+ Requires-Dist: typer
47
+ Requires-Dist: uvicorn>=0.40.0
48
+ Requires-Dist: xlrd
49
+ Provides-Extra: analysis
50
+ Requires-Dist: scikit-learn>=1.3.0; extra == 'analysis'
51
+ Requires-Dist: xgboost>=2.0.0; extra == 'analysis'
52
+ Provides-Extra: anthropic
53
+ Requires-Dist: anthropic; extra == 'anthropic'
54
+ Requires-Dist: langchain-anthropic; extra == 'anthropic'
55
+ Provides-Extra: benchmark
56
+ Requires-Dist: datasets>=2.0.0; extra == 'benchmark'
57
+ Requires-Dist: lm-eval[api]>=0.4.0; extra == 'benchmark'
58
+ Provides-Extra: dashboard
59
+ Requires-Dist: matplotlib<3.9.0,>=3.8.0; extra == 'dashboard'
60
+ Provides-Extra: dev
61
+ Requires-Dist: anthropic; extra == 'dev'
62
+ Requires-Dist: arize-phoenix>=8.0.0; extra == 'dev'
63
+ Requires-Dist: datasets>=2.0.0; extra == 'dev'
64
+ Requires-Dist: faiss-cpu>=1.8.0; extra == 'dev'
65
+ Requires-Dist: ijson>=3.3.0; extra == 'dev'
66
+ Requires-Dist: kiwipiepy>=0.18.0; extra == 'dev'
67
+ Requires-Dist: langchain-anthropic; extra == 'dev'
68
+ Requires-Dist: lm-eval[api]>=0.4.0; extra == 'dev'
69
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
70
+ Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
71
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'dev'
72
+ Requires-Dist: mlflow>=2.0.0; extra == 'dev'
73
+ Requires-Dist: openinference-instrumentation-langchain>=0.1.0; extra == 'dev'
74
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == 'dev'
75
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'dev'
76
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'dev'
77
+ Requires-Dist: pgvector>=0.2.5; extra == 'dev'
78
+ Requires-Dist: psycopg[binary]>=3.0.0; extra == 'dev'
79
+ Requires-Dist: pydeps>=3.0.1; extra == 'dev'
80
+ Requires-Dist: pymdown-extensions>=10.7.0; extra == 'dev'
81
+ Requires-Dist: pytest; extra == 'dev'
82
+ Requires-Dist: pytest-asyncio; extra == 'dev'
83
+ Requires-Dist: pytest-cov; extra == 'dev'
84
+ Requires-Dist: pytest-html; extra == 'dev'
85
+ Requires-Dist: pytest-mock; extra == 'dev'
86
+ Requires-Dist: pytest-rerunfailures; extra == 'dev'
87
+ Requires-Dist: pytest-xdist; extra == 'dev'
88
+ Requires-Dist: python-multipart; extra == 'dev'
89
+ Requires-Dist: rank-bm25>=0.2.2; extra == 'dev'
90
+ Requires-Dist: ruff; extra == 'dev'
91
+ Requires-Dist: scikit-learn<1.4.0,>=1.3.0; extra == 'dev'
92
+ Requires-Dist: sentence-transformers>=5.2.0; extra == 'dev'
93
+ Requires-Dist: xgboost>=2.0.0; extra == 'dev'
94
+ Provides-Extra: docs
95
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
96
+ Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
97
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
98
+ Requires-Dist: pymdown-extensions>=10.7.0; extra == 'docs'
99
+ Provides-Extra: korean
100
+ Requires-Dist: kiwipiepy>=0.18.0; extra == 'korean'
101
+ Requires-Dist: rank-bm25>=0.2.2; extra == 'korean'
102
+ Requires-Dist: sentence-transformers>=5.2.0; extra == 'korean'
103
+ Provides-Extra: mlflow
104
+ Requires-Dist: mlflow>=2.0.0; extra == 'mlflow'
105
+ Provides-Extra: perf
106
+ Requires-Dist: faiss-cpu>=1.8.0; extra == 'perf'
107
+ Requires-Dist: ijson>=3.3.0; extra == 'perf'
108
+ Provides-Extra: phoenix
109
+ Requires-Dist: arize-phoenix>=8.0.0; extra == 'phoenix'
110
+ Requires-Dist: openinference-instrumentation-langchain>=0.1.0; extra == 'phoenix'
111
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == 'phoenix'
112
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'phoenix'
113
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'phoenix'
114
+ Provides-Extra: postgres
115
+ Requires-Dist: pgvector>=0.2.5; extra == 'postgres'
116
+ Requires-Dist: psycopg[binary]>=3.0.0; extra == 'postgres'
117
+ Provides-Extra: secrets
118
+ Requires-Dist: boto3; extra == 'secrets'
119
+ Requires-Dist: google-cloud-secret-manager; extra == 'secrets'
120
+ Requires-Dist: hvac; extra == 'secrets'
121
+ Provides-Extra: timeseries
122
+ Requires-Dist: aeon>=1.3.0; extra == 'timeseries'
123
+ Requires-Dist: numba>=0.55.0; extra == 'timeseries'
124
+ Provides-Extra: web
125
+ Description-Content-Type: text/markdown
126
+
127
+ # EvalVault
128
+
129
+ RAG(Retrieval-Augmented Generation) 시스템을 대상으로 **평가(Eval) → 분석(Analysis) → 추적(Tracing) → 개선 루프**를 하나의 워크플로로 묶는 CLI + Web UI 플랫폼입니다.
130
+
131
+ [![PyPI](https://img.shields.io/pypi/v/evalvault.svg)](https://pypi.org/project/evalvault/)
132
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
133
+ [![CI](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml)
134
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE.md)
135
+
136
+ English version? See `README.en.md`.
137
+
138
+ ---
139
+
140
+ ## Quickstart (CLI)
141
+
142
+ ```bash
143
+ uv sync --extra dev
144
+ cp .env.example .env
145
+
146
+ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
147
+ --metrics faithfulness,answer_relevancy \
148
+ --profile dev \
149
+ --auto-analyze
150
+ ```
151
+
152
+ Tip: 기본 저장소는 Postgres+pgvector입니다. SQLite를 쓰려면 `--db` 또는 `DB_BACKEND=sqlite` + `EVALVAULT_DB_PATH`를 지정하세요.
153
+
154
+ ---
155
+
156
+ ## 핵심 기능
157
+
158
+ - **End-to-End 평가 루프**: Eval → Analysis → Tracing → Improvement를 한 흐름으로 실행
159
+ - **Dataset 중심 운영**: 합격 기준(threshold)을 데이터셋에 유지
160
+ - **Artifacts-first**: 보고서뿐 아니라 모듈별 원본 결과를 구조화 저장
161
+ - **옵션형 Observability**: Phoenix/Langfuse/MLflow는 필요할 때만 활성화
162
+ - **CLI + Web UI**: 동일 run_id 기반으로 히스토리/비교/리포트 통합
163
+
164
+ ---
165
+
166
+ ## 문서 허브
167
+
168
+ - 문서 인덱스: `docs/INDEX.md`
169
+ - 핸드북(교과서형): `docs/handbook/INDEX.md`
170
+ - 외부 요약본: `docs/handbook/EXTERNAL.md`
171
+ - 운영 가이드(로컬/도커/관측/런북): `docs/handbook/CHAPTERS/04_operations.md`
172
+ - 워크플로(실행/분석/비교/회귀): `docs/handbook/CHAPTERS/03_workflows.md`
173
+ - 품질/테스트/CI: `docs/handbook/CHAPTERS/06_quality_and_testing.md`
174
+ - 아키텍처: `docs/handbook/CHAPTERS/01_architecture.md`
175
+ - 오프라인/폐쇄망(Docker/모델 캐시): `docs/guides/OFFLINE_DOCKER.md`, `docs/guides/OFFLINE_MODELS.md`
176
+
177
+ 참고(호환성): `docs/guides/USER_GUIDE.md`, `docs/guides/DEV_GUIDE.md` 등 일부 문서는 과거 링크 호환을 위한 deprecated 스텁이며, 최신 내용은 handbook을 따릅니다.
178
+
179
+ ---
180
+
181
+ ## Web UI
182
+
183
+ ```bash
184
+ # API
185
+ uv run evalvault serve-api --reload
186
+
187
+ # Frontend
188
+ cd frontend
189
+ npm install
190
+ npm run dev
191
+ ```
192
+
193
+ 브라우저에서 `http://localhost:5173` 접속 후, Evaluation Studio에서 실행/히스토리/리포트를 확인합니다.
194
+
195
+ ---
196
+
197
+ ## 오프라인/폐쇄망
198
+
199
+ - Docker 이미지 번들: `docs/guides/OFFLINE_DOCKER.md`
200
+ - NLP 모델 캐시 번들: `docs/guides/OFFLINE_MODELS.md`
201
+
202
+ LLM 모델은 폐쇄망 내부 인프라가 관리하며, EvalVault는 **분석용 NLP 모델 캐시**만 번들에 포함합니다.
203
+
204
+ ---
205
+
206
+ ## 기여
207
+
208
+ ```bash
209
+ uv run ruff check src/ tests/
210
+ uv run ruff format src/ tests/
211
+ uv run pytest tests -v
212
+ ```
213
+
214
+ - 기여 가이드: `CONTRIBUTING.md`
215
+ - 개발/테스트 루틴: `AGENTS.md`, `docs/handbook/CHAPTERS/06_quality_and_testing.md`
216
+
217
+ ---
218
+
219
+ ## License
220
+
221
+ EvalVault is licensed under the [Apache 2.0](LICENSE.md) license.
@@ -43,20 +43,19 @@ Open `http://localhost:5173`, run an evaluation in Evaluation Studio (for exampl
43
43
  and insights.
44
44
 
45
45
  - LLM report language: `/api/v1/runs/{run_id}/report?language=en` (default: ko)
46
- - Details: `docs/guides/USER_GUIDE.md#보고서-언어-옵션`
46
+ - Details: `docs/handbook/CHAPTERS/00_overview.md`
47
47
  - Feedback aggregation: latest value per `rater_id` + `test_case_id` (cancellations excluded)
48
- - Details: `docs/guides/USER_GUIDE.md#피드백-집계-규칙`
48
+ - Details: `docs/handbook/CHAPTERS/02_data_and_metrics.md`
49
49
 
50
50
  **CLI (terminal view)**
51
51
  ```bash
52
52
  uv run evalvault run tests/fixtures/e2e/insurance_qa_korean.json \
53
53
  --metrics faithfulness,answer_relevancy \
54
- --profile dev \
55
- --db data/db/evalvault.db
56
- uv run evalvault history --db data/db/evalvault.db
57
- uv run evalvault analyze <RUN_ID> --db data/db/evalvault.db
54
+ --profile dev
55
+ uv run evalvault history
56
+ uv run evalvault analyze <RUN_ID>
58
57
  ```
59
- Tip: keep the same `--db` (or `EVALVAULT_DB_PATH`) so the Web UI can read the run.
58
+ Tip: Postgres is the default store. Use `--db` or `DB_BACKEND=sqlite` + `EVALVAULT_DB_PATH` for SQLite, and keep the same settings so the Web UI can read the run.
60
59
 
61
60
  ---
62
61
 
@@ -186,7 +185,7 @@ The core contract is **module-level spans (`rag.module`) + log events + shared a
186
185
  - Learn facts/behaviors from past runs to auto-tune thresholds and augment context
187
186
  - DAG-based analysis pipeline with statistical, NLP, and causal modules for multi-faceted interpretation
188
187
 
189
- See the [User Guide](docs/guides/USER_GUIDE.md) for end-to-end workflows, Phoenix/Langfuse integration, and troubleshooting.
188
+ See the [Handbook](docs/handbook/INDEX.md) for end-to-end workflows, operations, and troubleshooting.
190
189
 
191
190
  ---
192
191
 
@@ -228,7 +227,7 @@ uv sync --extra dev
228
227
  cp .env.example .env
229
228
  # set OPENAI_API_KEY or OLLAMA settings, LANGFUSE/PHOENIX keys, etc.
230
229
  ```
231
- Optional SQLite path override:
230
+ Optional SQLite path override (when using SQLite):
232
231
  ```bash
233
232
  # .env
234
233
  EVALVAULT_DB_PATH=/path/to/data/db/evalvault.db
@@ -247,10 +246,9 @@ uv sync --extra dev
247
246
  ```bash
248
247
  cp .env.example .env
249
248
  ollama pull gemma3:1b
250
- uv run evalvault run tests/fixtures/e2e/insurance_qa_korean.json \
251
- --metrics faithfulness \
252
- --db data/db/evalvault.db \
253
- --profile dev
249
+ uv run evalvault run tests/fixtures/e2e/insurance_qa_korean.json \
250
+ --metrics faithfulness \
251
+ --profile dev
254
252
  ```
255
253
  Tip: embedding metrics like `answer_relevancy` also need `qwen3-embedding:0.6b`.
256
254
 
@@ -258,9 +256,9 @@ uv sync --extra dev
258
256
  ```bash
259
257
  cp .env.example .env
260
258
  printf "\nEVALVAULT_PROFILE=vllm\nVLLM_BASE_URL=http://localhost:8001/v1\nVLLM_MODEL=gpt-oss-120b\n" >> .env
261
- uv run evalvault run tests/fixtures/e2e/insurance_qa_korean.json \
262
- --metrics faithfulness \
263
- --db data/db/evalvault.db
259
+ uv run evalvault run tests/fixtures/e2e/insurance_qa_korean.json \
260
+ --metrics faithfulness \
261
+ --profile dev
264
262
  ```
265
263
  Tip: embedding metrics require `VLLM_EMBEDDING_MODEL` and a `/v1/embeddings` endpoint.
266
264
  If you use Ollama models that support tool/function calling, list them in
@@ -295,20 +293,21 @@ uv sync --extra dev
295
293
 
296
294
  3. **Run an evaluation**
297
295
  ```bash
298
- uv run evalvault run tests/fixtures/sample_dataset.json \
299
- --metrics faithfulness,answer_relevancy \
300
- --profile dev \
301
- --db data/db/evalvault.db
296
+ uv run evalvault run tests/fixtures/sample_dataset.json \
297
+ --metrics faithfulness,answer_relevancy \
298
+ --profile dev
302
299
  ```
303
- Tip: `--db` stores results for `history/export/web`. Add `--tracker phoenix` only if
304
- Phoenix is configured (and `uv sync --extra phoenix` is installed).
300
+ Tip: For SQLite, pass `--db` (or set `DB_BACKEND=sqlite` + `EVALVAULT_DB_PATH`).
301
+ For Postgres, set `POSTGRES_*` or `POSTGRES_CONNECTION_STRING` so the Web UI can
302
+ read the same DB. Add `--tracker phoenix` only if Phoenix is configured
303
+ (and `uv sync --extra phoenix` is installed).
305
304
 
306
305
  4. **Inspect history**
307
306
  ```bash
308
- uv run evalvault history --db data/db/evalvault.db
307
+ uv run evalvault history
309
308
  ```
310
309
 
311
- More examples (parallel runs, dataset streaming, Langfuse logging, Phoenix dataset sync, prompt manifest diffs, etc.) live in the [User Guide](docs/guides/USER_GUIDE.md).
310
+ More examples (parallel runs, dataset streaming, Langfuse logging, Phoenix dataset sync, prompt manifest diffs, etc.) live in the [Handbook](docs/handbook/INDEX.md) and `examples/`.
312
311
 
313
312
  ---
314
313
 
@@ -372,9 +371,8 @@ On top of these, `StageMetricService` derives **pipeline-stage metrics** such as
372
371
 
373
372
  ## Documentation
374
373
  - [Docs Index](docs/INDEX.md): documentation hub.
375
- - [User Guide](docs/guides/USER_GUIDE.md): installation, configuration, CLI recipes, Web UI, Phoenix, automation.
376
- - [Dev Guide](docs/guides/DEV_GUIDE.md): local dev/test/lint routines.
377
- - [Developer Whitepaper](docs/new_whitepaper/INDEX.md): architecture, operations, and engineering standards.
374
+ - [Handbook](docs/handbook/INDEX.md): internal SSoT (architecture, workflows, ops, quality).
375
+ - [External Summary](docs/handbook/EXTERNAL.md): shareable overview.
378
376
  - [Open RAG Trace Spec](docs/architecture/open-rag-trace-spec.md): tracing schema and integration guide.
379
377
  - [CHANGELOG](CHANGELOG.md) for release history.
380
378
 
@@ -0,0 +1,95 @@
1
+ # EvalVault
2
+
3
+ RAG(Retrieval-Augmented Generation) 시스템을 대상으로 **평가(Eval) → 분석(Analysis) → 추적(Tracing) → 개선 루프**를 하나의 워크플로로 묶는 CLI + Web UI 플랫폼입니다.
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/evalvault.svg)](https://pypi.org/project/evalvault/)
6
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
7
+ [![CI](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/ntts9990/EvalVault/actions/workflows/ci.yml)
8
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE.md)
9
+
10
+ English version? See `README.en.md`.
11
+
12
+ ---
13
+
14
+ ## Quickstart (CLI)
15
+
16
+ ```bash
17
+ uv sync --extra dev
18
+ cp .env.example .env
19
+
20
+ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
21
+ --metrics faithfulness,answer_relevancy \
22
+ --profile dev \
23
+ --auto-analyze
24
+ ```
25
+
26
+ Tip: 기본 저장소는 Postgres+pgvector입니다. SQLite를 쓰려면 `--db` 또는 `DB_BACKEND=sqlite` + `EVALVAULT_DB_PATH`를 지정하세요.
27
+
28
+ ---
29
+
30
+ ## 핵심 기능
31
+
32
+ - **End-to-End 평가 루프**: Eval → Analysis → Tracing → Improvement를 한 흐름으로 실행
33
+ - **Dataset 중심 운영**: 합격 기준(threshold)을 데이터셋에 유지
34
+ - **Artifacts-first**: 보고서뿐 아니라 모듈별 원본 결과를 구조화 저장
35
+ - **옵션형 Observability**: Phoenix/Langfuse/MLflow는 필요할 때만 활성화
36
+ - **CLI + Web UI**: 동일 run_id 기반으로 히스토리/비교/리포트 통합
37
+
38
+ ---
39
+
40
+ ## 문서 허브
41
+
42
+ - 문서 인덱스: `docs/INDEX.md`
43
+ - 핸드북(교과서형): `docs/handbook/INDEX.md`
44
+ - 외부 요약본: `docs/handbook/EXTERNAL.md`
45
+ - 운영 가이드(로컬/도커/관측/런북): `docs/handbook/CHAPTERS/04_operations.md`
46
+ - 워크플로(실행/분석/비교/회귀): `docs/handbook/CHAPTERS/03_workflows.md`
47
+ - 품질/테스트/CI: `docs/handbook/CHAPTERS/06_quality_and_testing.md`
48
+ - 아키텍처: `docs/handbook/CHAPTERS/01_architecture.md`
49
+ - 오프라인/폐쇄망(Docker/모델 캐시): `docs/guides/OFFLINE_DOCKER.md`, `docs/guides/OFFLINE_MODELS.md`
50
+
51
+ 참고(호환성): `docs/guides/USER_GUIDE.md`, `docs/guides/DEV_GUIDE.md` 등 일부 문서는 과거 링크 호환을 위한 deprecated 스텁이며, 최신 내용은 handbook을 따릅니다.
52
+
53
+ ---
54
+
55
+ ## Web UI
56
+
57
+ ```bash
58
+ # API
59
+ uv run evalvault serve-api --reload
60
+
61
+ # Frontend
62
+ cd frontend
63
+ npm install
64
+ npm run dev
65
+ ```
66
+
67
+ 브라우저에서 `http://localhost:5173` 접속 후, Evaluation Studio에서 실행/히스토리/리포트를 확인합니다.
68
+
69
+ ---
70
+
71
+ ## 오프라인/폐쇄망
72
+
73
+ - Docker 이미지 번들: `docs/guides/OFFLINE_DOCKER.md`
74
+ - NLP 모델 캐시 번들: `docs/guides/OFFLINE_MODELS.md`
75
+
76
+ LLM 모델은 폐쇄망 내부 인프라가 관리하며, EvalVault는 **분석용 NLP 모델 캐시**만 번들에 포함합니다.
77
+
78
+ ---
79
+
80
+ ## 기여
81
+
82
+ ```bash
83
+ uv run ruff check src/ tests/
84
+ uv run ruff format src/ tests/
85
+ uv run pytest tests -v
86
+ ```
87
+
88
+ - 기여 가이드: `CONTRIBUTING.md`
89
+ - 개발/테스트 루틴: `AGENTS.md`, `docs/handbook/CHAPTERS/06_quality_and_testing.md`
90
+
91
+ ---
92
+
93
+ ## License
94
+
95
+ EvalVault is licensed under the [Apache 2.0](LICENSE.md) license.
@@ -27,7 +27,7 @@ evalvault (PyPI Package) agent/ (Development Only)
27
27
 
28
28
  ### Development Mode (This Folder)
29
29
 
30
- Agents for improving EvalVault codebase based on the current roadmap and engineering standards (see `docs/ROADMAP.md`, `docs/new_whitepaper/INDEX.md`):
30
+ Agents for improving EvalVault codebase based on the current roadmap and engineering standards (see `docs/handbook/CHAPTERS/08_roadmap.md`, `docs/handbook/INDEX.md`):
31
31
 
32
32
  | Agent Type | Focus | P-Levels |
33
33
  |------------|-------|----------|
@@ -299,7 +299,7 @@ The agent system follows the project documentation and current engineering stand
299
299
  - [Claude Agent SDK Docs](https://platform.claude.com/docs/en/agent-sdk/overview)
300
300
  - [Effective Harnesses](https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents)
301
301
  - [Docs Index](../docs/INDEX.md)
302
- - [Developer Whitepaper](../docs/new_whitepaper/INDEX.md)
302
+ - [Handbook](../docs/handbook/INDEX.md)
303
303
  - [Open RAG Trace Spec](../docs/architecture/open-rag-trace-spec.md)
304
304
  - [Agent Types Configuration](../src/evalvault/config/agent_types.py)
305
305
  - [nonstop-agent](https://github.com/seolcoding/nonstop-agent)
@@ -8,7 +8,7 @@
8
8
  | 문서 | 용도 |
9
9
  |------|------|
10
10
  | [docs/INDEX.md](../../../docs/INDEX.md) | 프로젝트 문서 허브(최신 링크) |
11
- | [Developer Whitepaper](../../../docs/new_whitepaper/INDEX.md) | 설계/운영/품질 기준 |
11
+ | [Handbook](../../../docs/handbook/INDEX.md) | 설계/운영/품질 기준(SSoT) |
12
12
  | [agent/README.md](../../README.md) | 에이전트 시스템 사용법 |
13
13
 
14
14
  ---
@@ -138,7 +138,7 @@ architecture (Storage Adapter)┘
138
138
  | `src/evalvault/ports/outbound/tracker_port.py` | `observability` | `rag-data` | 스키마 변경 공유 |
139
139
  | `src/evalvault/domain/entities/result.py` | `architecture` | - | 테스트 영향 체크 |
140
140
  | `docs/INDEX.md` | `coordinator` | All | 문서 구조/링크 변경 시 동기화 |
141
- | `docs/new_whitepaper/INDEX.md` | `coordinator` | All | 설계/운영 기준 변경 시 동기화 |
141
+ | `docs/handbook/INDEX.md` | `coordinator` | All | 설계/운영 기준 변경 시 동기화 |
142
142
  | `agent/memory/shared/decisions.md` | All | - | ADR 형식 준수 |
143
143
 
144
144
  ### Shared Namespaces
@@ -45,7 +45,7 @@ cat agent/memory/shared/decisions.md | tail -50
45
45
 
46
46
  ## PARALLEL EXECUTION GROUPS
47
47
 
48
- From the current roadmap/standards (`docs/ROADMAP.md`, `docs/new_whitepaper/INDEX.md`):
48
+ From the current roadmap/standards (`docs/handbook/CHAPTERS/08_roadmap.md`, `docs/handbook/INDEX.md`):
49
49
 
50
50
  ### Group A: Fully Independent (Can Run Together)
51
51
  - `performance`: Caching, batch processing
@@ -141,7 +141,7 @@ agent/memory/shared/
141
141
  docs/
142
142
  ├── ROADMAP.md # Public direction
143
143
  ├── STATUS.md # One-page snapshot
144
- └── new_whitepaper/ # Engineering standards
144
+ └── handbook/ # Engineering standards (SSoT)
145
145
 
146
146
  feature_list.json # Task tracking
147
147
  claude-progress.txt # Session progress
@@ -0,0 +1,55 @@
1
+ {
2
+ "version": 1,
3
+ "source": "docs/guides/USER_GUIDE.md",
4
+ "source_hash": "8b4302500e7f6656b363782eb76a5a8d7582f8f3304451bb31dd37109522270e",
5
+ "chunk_limit": 10,
6
+ "created_at": "2026-01-29T07:08:42.415275+00:00",
7
+ "documents": [
8
+ "# USER_GUIDE (Deprecated)\n\n이 파일은 과거의 “종합 사용자 가이드”였으나, `docs/handbook`이 최신/정답 문서입니다.\n\n핵심 진입점:\n\n- 전체 목차/내비게이션: `docs/handbook/INDEX.md` - 워크플로(평가 → 분석 → 비교): `docs/handbook/CHAPTERS/03_workflows.md` - 운영(로컬/도커/오프라인/DB): `docs/handbook/CHAPTERS/04_operations.md` - 데이터/메트릭/임계값/산출물: `docs/handbook/CHAPTERS/02_data_and_metrics.md`\n\n구식 내용은 혼선을 줄이기 위해 제거했습니다. 필요 시 Git 히스토리를 참고하세요."
9
+ ],
10
+ "tokens": [
11
+ [
12
+ "파일",
13
+ "과거",
14
+ "종합",
15
+ "사용자",
16
+ "가이드",
17
+ "최신",
18
+ "정답",
19
+ "문서",
20
+ "핵심",
21
+ "진입",
22
+ "점",
23
+ "전체",
24
+ "목차",
25
+ "내비게이션",
26
+ "워크",
27
+ "플로",
28
+ "평가",
29
+ "분석",
30
+ "비교",
31
+ "운영",
32
+ "로컬",
33
+ "도커",
34
+ "오프라인",
35
+ "데이터",
36
+ "메트릭",
37
+ "임계",
38
+ "값",
39
+ "산출",
40
+ "물",
41
+ "구식",
42
+ "내용",
43
+ "혼선",
44
+ "줄이다",
45
+ "위하다",
46
+ "제거",
47
+ "하",
48
+ "필요",
49
+ "시",
50
+ "히스토리",
51
+ "참고",
52
+ "하"
53
+ ]
54
+ ]
55
+ }
@@ -0,0 +1,11 @@
1
+ services:
2
+ evalvault-api:
3
+ environment:
4
+ HF_HOME: /app/model_cache/hf
5
+ HF_HUB_CACHE: /app/model_cache/hf/hub
6
+ TRANSFORMERS_CACHE: /app/model_cache/hf/hub
7
+ SENTENCE_TRANSFORMERS_HOME: /app/model_cache/sentence-transformers
8
+ HF_HUB_OFFLINE: "1"
9
+ TRANSFORMERS_OFFLINE: "1"
10
+ volumes:
11
+ - ./model_cache:/app/model_cache
@@ -17,7 +17,7 @@
17
17
  services:
18
18
  # PostgreSQL database for evaluation storage
19
19
  postgres:
20
- image: ${POSTGRES_IMAGE:-postgres:16.4-alpine}
20
+ image: ${POSTGRES_IMAGE:-pgvector/pgvector:0.8.0-pg16}
21
21
  container_name: evalvault-postgres
22
22
  pull_policy: never
23
23
  restart: unless-stopped