evalvault 1.75.0__tar.gz → 1.76.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evalvault-1.75.0 → evalvault-1.76.0}/PKG-INFO +1 -1
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EXPERIMENT_TRACKING_STACK.md +16 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/pyproject.toml +1 -1
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/adapter.py +99 -63
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/config.py +3 -1
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/method.py +2 -2
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run.py +146 -28
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/run_helpers.py +157 -55
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/factory.py +1 -1
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
- evalvault-1.76.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +387 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/instrumentation.py +8 -6
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/phoenix_support.py +5 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/settings.py +40 -4
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/evaluator.py +2 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/uv.lock +1 -1
- evalvault-1.75.0/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +0 -232
- {evalvault-1.75.0 → evalvault-1.76.0}/.dockerignore +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.env.example +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.env.offline.example +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/ci.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/regression-gate.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/release.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.github/workflows/stale.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.gitignore +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.pre-commit-config.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/.python-version +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/AGENTS.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/CHANGELOG.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/CLAUDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/CODE_OF_CONDUCT.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/CONTRIBUTING.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/Dockerfile +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/LICENSE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/README.en.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/SECURITY.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/agent.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/client.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/main.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/shared/decisions.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/shared/dependencies.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/templates/coordinator_guide.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory/templates/work_log_template.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/memory_integration.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/progress.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/app_spec.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/baseline.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/coding_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/existing_project_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/architecture_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/base_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/coordinator_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/improvement/observability_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/initializer_prompt.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/prompt_manifest.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts/system.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/prompts.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/requirements.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/agent/security.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/memory.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_en.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/domains/insurance/terms_dictionary_ko.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/methods.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/models.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/ragas_prompts_override.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/ci.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/default.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/regressions/ux.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/stage_metric_playbook.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/config/stage_metric_thresholds.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/dummy_test_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_2.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/insurance_qa_korean_3.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/ragas_ko90_en10.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/sample.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_20q_cluster_map.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_20q_korean.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_2q_cluster_map.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/datasets/visualization_2q_korean.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/kg/knowledge_graph.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/data/rag/user_guide_bm25.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/dataset_template.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/dataset_templates/method_input_template.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.langfuse.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.offline.modelcache.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.offline.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.phoenix.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docker-compose.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/INDEX.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/README.ko.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/ROADMAP.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/STATUS.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/adapters/inbound.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/adapters/outbound.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/config.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/entities.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/metrics.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/domain/services.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/ports/inbound.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/api/ports/outbound.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-collector.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/architecture/open-rag-trace-spec.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/getting-started/INSTALLATION.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/AGENTS_SYSTEM_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CHAINLIT_INTEGRATION_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CI_REGRESSION_GATE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_MCP_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_PARALLEL_FEATURES_SPEC.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/CLI_UX_REDESIGN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/DEV_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/DOCS_REFRESH_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_DIAGNOSTIC_PLAYBOOK.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_RUN_EXCEL_SHEETS.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EVALVAULT_WORK_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/EXTERNAL_TRACE_API_SPEC.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/Extension_2.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/Extension_Data_Difficulty_Profiling_Custom_Judge_Model.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/INSURANCE_SUMMARY_METRICS_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/LENA_MVP_IMPLEMENTATION_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/LENA_RAGAS_CALIBRATION_DEV_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/MULTITURN_EVAL_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/NEXT_STEPS_EXECUTION_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OFFLINE_DOCKER.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OFFLINE_MODELS.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_INTERNAL_ADAPTER.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/OPEN_RAG_TRACE_SAMPLES.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/P0_P3_EXECUTION_REPORT.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/P1_P4_WORK_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PARALLEL_WORK_APPROVAL_RULES.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PRD_LENA.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/PROJECT_STATUS_AND_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAGAS_HUMAN_FEEDBACK_CALIBRATION_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_CLI_WORKFLOW_TEMPLATES.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_NOISE_REDUCTION_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPLEMENTATION_LOG.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PERFORMANCE_IMPROVEMENT_PROPOSAL.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RAG_PGVECTOR_PREINDEX_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/RELEASE_CHECKLIST.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/USER_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/WEBUI_CLI_ROLLOUT_PLAN.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/WORKLOG_LAST_2_DAYS.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/_DEPRECATED_NOTICE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/cli_process.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/prompt_suggestions_design.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/rag_human_feedback_calibration_implementation_plan.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/refactoring_strategy.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/guides/repeat_query.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/00_overview.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/01_architecture.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/02_data_and_metrics.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/03_workflows.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/04_operations.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/05_security.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/06_quality_and_testing.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/07_ux_and_product.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/08_roadmap.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/CHAPTERS/09_competitive_positioning.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/EXTERNAL.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/INDEX.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/WORKLOG_DOCS_CLEANUP_2026-01-29.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-coverage-matrix.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-file-inventory.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-roadmap.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/handbook/appendix-taxonomy.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/mapping/component-to-whitepaper.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/00_frontmatter.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/01_overview.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/02_architecture.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/03_data_flow.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/04_components.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/05_expert_lenses.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/06_implementation.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/07_advanced.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/08_customization.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/09_quality.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/10_performance.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/11_security.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/12_operations.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/13_standards.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/14_roadmap.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/INDEX.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/new_whitepaper/STYLE_GUIDE.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_000_master_plan.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_010_agent_playbook.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_020_logging_policy.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_030_phase0_responsibility_map.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/REFAC_040_wbs_parallel_plan.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-0-baseline.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-1-evaluator.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-2-cli-run.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/refactor/logs/phase-3-analysis.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/security_audit_worklog.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/stylesheets/extra.css +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/dataset_template.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/eval_report_templates.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/kg_template.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/otel_openinference_trace_example.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/ragas_dataset_example_ko90_en10.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/templates/retriever_docs_template.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/tools/generate-whitepaper.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/docs/web_ui_analysis_migration_plan.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/dummy_test_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/faithfulness_test.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/insurance_qa_100.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/keyword_extraction_test.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/korean_rag/retrieval_test.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/comparison.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/full_results.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/leaderboard.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/results_mteb.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/output/retrieval_result.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/benchmarks/run_korean_benchmark.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/kg_generator_demo.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/pyproject.toml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/method_plugin_template/src/method_plugin_template/methods.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/comprehensive_workflow_test.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/insurance_eval_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/examples/usecase/output/comprehensive_report.html +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/.env.example +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/.gitignore +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/Dockerfile +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/analysis-compare.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/analysis-lab.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/compare-runs.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/dashboard.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/domain-memory.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/evaluation-studio.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/judge-calibration.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/knowledge-base.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/intents.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/run_details.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/mocks/runs.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/e2e/run-details.spec.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/eslint.config.js +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/index.html +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/nginx.conf +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/package-lock.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/package.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/playwright.config.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/public/vite.svg +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/App.css +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/App.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/assets/react.svg +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/AnalysisNodeOutputs.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/InsightSpacePanel.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/Layout.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/MarkdownContent.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/PrioritySummaryPanel.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpaceLegend.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot2D.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/SpacePlot3D.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/StatusBadge.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ToastProvider.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/VirtualizedText.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Conversation.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Message.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/PromptInput.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/Response.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/components/ai-elements/index.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/config/ui.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/config.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/hooks/useInsightSpace.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/index.css +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/main.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AiSdkChat.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisCompareView.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisLab.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/AnalysisResultView.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Chat.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/CompareRuns.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/ComprehensiveAnalysis.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/CustomerReport.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Dashboard.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/DomainMemory.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/EvaluationStudio.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/JudgeCalibration.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/KnowledgeBase.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/RunDetails.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Settings.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/Visualization.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/pages/VisualizationHome.tsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/services/api.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/types/plotly.d.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/cliCommandBuilder.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/clipboard.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/format.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/phoenix.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/runAnalytics.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/score.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/src/utils/summaryMetrics.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tailwind.config.js +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.app.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/tsconfig.node.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/frontend/vite.config.ts +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/mkdocs.yml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/package-lock.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/prompts/system_override.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/.gitkeep +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r1_smoke.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r2_graphrag_openai.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_bm25_langfuse3.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/debug_report_r3_dense_faiss.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/feature_verification_report.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/improvement_1d91a667-4288-4742-be3a-a8f5310c5140.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_openai_stage_report.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r2_graphrag_stage_report.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse2_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse3_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_langfuse_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_phoenix_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_bm25_stage_report.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_events.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/reports/r3_dense_faiss_stage_report.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/benchmark/download_kmmlu.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/ci/run_regression_gate.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_demo.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/open_rag_trace_integration_template.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/otel-collector-config.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/preindex_pgvector_runs.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/start_web_ui_with_phoenix.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/validate_open_rag_trace.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev/verify_dashboard_endpoint.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/dev_seed_pipeline_results.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/ast_scanner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/confidence_scorer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/graph_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/analyzer/side_effect_detector.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/generate_api_docs.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/models/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/models/schema.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/renderer/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/docs/renderer/html_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/build_full_offline_bundle.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/bundle_datasets.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/bundle_model_cache.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_api_base_only.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_base_images.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/export_images.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/import_images.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/load_base_images.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/predownload_nlp_models.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/restore_datasets.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/restore_model_cache.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/offline/smoke_test.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/ops/phoenix_watch.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/backfill_langfuse_trace_url.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_dense_smoke.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_evalvault_run_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_retriever_docs.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_smoke_real.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/perf/r3_stage_events_sample.jsonl +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/pipeline_template_inspect.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/reports/generate_release_notes.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/run_with_timeout.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/test_full_evaluation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/tests/run_regressions.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/tests/run_retriever_stage_report_smoke.sh +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/validate_tutorials.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/verify_ragas_compliance.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/scripts/verify_workflows.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/main.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/benchmark.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/calibration.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/chat.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/domain.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/knowledge.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/mcp.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/pipeline.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/api/routers/runs.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/app.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/agent.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/analyze.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/api.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/artifacts.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/benchmark.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/calibrate_judge.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/compare.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/debug.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/domain.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/experiment.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/gate.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/generate.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/graph_rag.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/history.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/init.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/kg.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/langfuse.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/ops.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/phoenix.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/pipeline.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/profile_difficulty.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/prompts.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/regress.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/commands/stage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/analysis_io.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/console.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/errors.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/formatters.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/options.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/presets.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/progress.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/cli/utils/validators.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/schemas.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/inbound/mcp/tools.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/analysis_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/base_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/bm25_searcher_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/causal_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/common.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/comparison_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/data_loader_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/detailed_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/diagnostic_playbook_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_distribution_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/embedding_searcher_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_rrf_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hybrid_weighted_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/llm_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/low_performer_extractor_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/model_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/multiturn_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/network_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/nlp_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pattern_detector_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/pipeline_helpers.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/priority_summary_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_change_detector_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_comparator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_loader_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/run_metric_comparator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/search_comparator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/statistical_comparator_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/summary_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/time_series_analyzer_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/trend_detector_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/analysis/verification_report_module.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/artifact_fs.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/hybrid_cache.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/cache/memory_cache.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/base.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/csv_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/excel_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/json_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/loader_factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/method_input_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/multiturn_json_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/streaming_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/templates.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/dataset/thresholds.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/debug/report_renderer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/pdf_extractor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/documents/versioned_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/postgres_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/domain_memory/sqlite_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/insight_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/pattern_detector.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/playbook_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/judge_calibration_reporter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/graph_rag_retriever.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/networkx_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/parallel_kg_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/kg/query_strategies.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/anthropic_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/azure_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/base.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/instructor_factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/llm_relation_augmenter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/ollama_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/openai_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/token_aware_chat.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/llm/vllm_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/baseline_oracle.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/external_command.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/methods/registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/bm25_retriever.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/dense_retriever.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/document_chunker.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/hybrid_retriever.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/kiwi_tokenizer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_evaluation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/korean_stopwords.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit_factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/ops/report_renderer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/ci_report_formatter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/dashboard_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/llm_report_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/markdown_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/report/pr_comment_formatter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/graph_rag_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/retriever/pgvector_store.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/base_sql.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/benchmark_storage_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/factory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/postgres_schema.sql +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/schema.sql +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/storage/sqlite_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_log_handler.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/langfuse_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/adapters/outbound/tracker/log_sanitizer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/agent_types.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/domain_config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/langfuse_support.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/model_config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/playbooks/improvement_playbook.yaml +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/config/secret_manager.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/debug_ragas.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/debug_ragas_real.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/analysis_pipeline.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/benchmark_run.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/dataset.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/debug.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/experiment.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/feedback.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/graph_rag.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/improvement.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/judge_calibration.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/kg.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/memory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/method.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/multiturn.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/ops_report.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/prompt_suggestion.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/rag_trace.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/result.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/entities/stage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/analysis_registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/confidence.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/contextual_relevancy.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/entity_preservation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/insurance.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/multiturn_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/no_answer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/retrieval_rank.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_accuracy.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_needs_followup.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_non_definitive.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/summary_risk_coverage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/terms_dictionary.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/metrics/text_match.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/analysis_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/artifact_lint_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/async_batch_executor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/batch_executor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_report_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_runner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/benchmark_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/cache_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/cluster_map_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/custom_metric_snapshot.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/dataset_preprocessor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/debug_report_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profile_reporter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/difficulty_profiling_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_chunker.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/document_versioning.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/domain_learning_hook.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/embedding_overlay.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/entity_extractor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_comparator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_manager.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_reporter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_repository.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/experiment_statistics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/graph_rag_experiment.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/holdout_splitter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/improvement_guide_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/intent_classifier.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/judge_calibration_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/kg_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_aware_evaluator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/memory_based_analysis.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/method_runner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/multiturn_evaluator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_report_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ops_snapshot_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_orchestrator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/pipeline_template_registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_candidate_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_manifest.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_scoring_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_status.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/prompt_suggestion_reporter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/ragas_prompt_overrides.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/regression_gate_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/retrieval_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/retriever_context.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/run_comparison_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/satisfaction_calibration_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_event_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_guide_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_metric_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/stage_summary_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/synthetic_qa_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/testset_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/threshold_profiles.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/unified_report_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/domain/services/visual_space_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/mkdocs_helpers.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/analysis_pipeline_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/evaluator_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/learning_hook_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/multiturn_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/inbound/web_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_cache_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_module_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/analysis_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/artifact_fs_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/benchmark_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/causal_analysis_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/comparison_pipeline_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/dataset_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/difficulty_profile_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/domain_memory_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/embedding_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/graph_retriever_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/improvement_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/intent_classifier_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/judge_calibration_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/korean_nlp_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_factory_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/llm_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/method_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/nlp_analysis_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/ops_snapshot_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/relation_augmenter_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/report_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/stage_storage_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/storage_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracer_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/ports/outbound/tracker_port.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/reports/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/reports/release_notes.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/scripts/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/src/evalvault/scripts/regression_runner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/conftest.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/README.md +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_min.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/benchmark/retrieval_ground_truth_multi.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/auto_insurance_qa_korean_full.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/callcenter_summary_5cases.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/comprehensive_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/edge_cases.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/evaluation_test_sample.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_benchmark.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_multi_sample.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_retriever_docs.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/graphrag_smoke.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_document.txt +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_english.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/insurance_qa_korean_versioned_pdf.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/multiturn_benchmark.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/regression_baseline.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_full_domain_memory.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/run_mode_simple.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/e2e/summary_eval_minimal.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/kg/minimal_graph.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.csv +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.json +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/fixtures/sample_dataset.xlsx +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/benchmark/test_benchmark_service_integration.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/conftest.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_cli_integration.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_data_flow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_e2e_scenarios.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_evaluation_flow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_full_workflow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_langfuse_flow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_phoenix_flow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_pipeline_api_contracts.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_storage_flow.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/integration/test_summary_eval_fixture.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/optional_deps.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_execute_tools.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/inbound/mcp/test_read_tools.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_pdf_extractor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/documents/test_versioned_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/__init__.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_insight_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_pattern_detector.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_playbook_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/improvement/test_stage_metric_playbook_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_graph_rag_retriever.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/kg/test_parallel_kg_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/retriever/test_graph_rag_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/adapters/outbound/storage/test_benchmark_storage_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/config/test_phoenix_support.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/conftest.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_analysis_metric_registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_confidence.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_contextual_relevancy.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_entity_preservation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_metric_registry.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_multiturn_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_no_answer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_retrieval_rank.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/metrics/test_text_match.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_cache_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_claim_level.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_dataset_preprocessor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_document_versioning.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_evaluator_comprehensive.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_holdout_splitter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_improvement_guide_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_judge_calibration_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_ops_snapshot_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_regression_gate_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retrieval_metrics.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_retriever_context.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_event_builder.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_stage_metric_guide_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/services/test_synthetic_qa_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_embedding_overlay.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_manifest.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/domain/test_prompt_status.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/reports/test_release_notes.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/scripts/test_regression_runner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_agent_types.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_entities.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_modules.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_pipeline.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_analysis_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_anthropic_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_artifact_lint_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_async_batch_executor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_azure_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_benchmark_helpers.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_benchmark_runner.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_causal_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ci_gate_cli.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_artifacts.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_calibrate_judge.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_domain.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_init.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_ops.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_progress.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_cli_utils.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_data_loaders.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_difficulty_profiling_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_domain_config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_domain_memory.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entities.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entities_kg.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_entity_extractor.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_evaluator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_experiment.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_hybrid_cache.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_instrumentation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_insurance_metric.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_intent_classifier.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kg_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kg_networkx.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kiwi_tokenizer.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_kiwi_warning_suppression.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_dense.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_evaluation.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_korean_retrieval.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_langfuse_tracker.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_llm_relation_augmenter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_lm_eval_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_markdown_report.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_memory_cache.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_memory_services.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_method_plugins.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_mlflow_tracker.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_model_config.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_nlp_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_nlp_entities.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ollama_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_openai_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_phoenix_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_pipeline_orchestrator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_ports.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_postgres_storage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_pr_comment_formatter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_prompt_candidate_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_rag_trace_entities.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_regress_cli.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_comparison_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_memory_helpers.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_run_mode_fixtures.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_settings.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_sqlite_storage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_cli.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_event_schema.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_metric_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_storage.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_stage_summary_service.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_statistical_adapter.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_streaming_loader.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_summary_eval_fixture.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_testset_generator.py +0 -0
- {evalvault-1.75.0 → evalvault-1.76.0}/tests/unit/test_web_adapter.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.76.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -14,6 +14,11 @@
|
|
|
14
14
|
- MLflow: 실험/버전/아티팩트 저장
|
|
15
15
|
- Phoenix: LLM 트레이싱/관찰/디버깅
|
|
16
16
|
|
|
17
|
+
### 운영 규칙 (필수)
|
|
18
|
+
- 모든 평가 run은 **MLflow + Phoenix에 동시에 로깅**된다.
|
|
19
|
+
- tracker 옵션에서 둘 중 하나라도 누락되면 실행이 실패한다.
|
|
20
|
+
- 기본 tracker: `mlflow+phoenix`
|
|
21
|
+
|
|
17
22
|
### 라이선스/자가호스팅 참고
|
|
18
23
|
- MLflow: Apache 2.0 (상업 사용 가능) https://raw.githubusercontent.com/mlflow/mlflow/master/LICENSE.txt
|
|
19
24
|
- Phoenix: Elastic License 2.0 (자가호스팅 허용, 제3자에게 SaaS 제공 금지) https://raw.githubusercontent.com/Arize-ai/phoenix/main/LICENSE
|
|
@@ -111,3 +116,14 @@ metrics.json 구조 예시:
|
|
|
111
116
|
## EvalVault 연동 참고
|
|
112
117
|
- MLflow 어댑터: src/evalvault/adapters/outbound/tracker/mlflow_adapter.py
|
|
113
118
|
- Phoenix 어댑터: src/evalvault/adapters/outbound/tracker/phoenix_adapter.py
|
|
119
|
+
|
|
120
|
+
## 설정 값
|
|
121
|
+
- `MLFLOW_TRACKING_URI`: MLflow tracking server URI
|
|
122
|
+
- `MLFLOW_EXPERIMENT_NAME`: 실험 이름 (기본: evalvault)
|
|
123
|
+
- `PHOENIX_ENDPOINT`: Phoenix OTLP endpoint (예: http://localhost:6006/v1/traces)
|
|
124
|
+
- `PHOENIX_API_TOKEN`: Phoenix API 토큰 (옵션)
|
|
125
|
+
|
|
126
|
+
## CLI 기본 사용
|
|
127
|
+
```bash
|
|
128
|
+
uv run evalvault run <DATASET> --tracker mlflow+phoenix
|
|
129
|
+
```
|
|
@@ -24,7 +24,7 @@ from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrat
|
|
|
24
24
|
from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
|
|
25
25
|
from evalvault.adapters.outbound.report import MarkdownReportAdapter
|
|
26
26
|
from evalvault.config.phoenix_support import PhoenixExperimentResolver
|
|
27
|
-
from evalvault.config.settings import Settings
|
|
27
|
+
from evalvault.config.settings import Settings, resolve_tracker_providers
|
|
28
28
|
from evalvault.domain.entities import (
|
|
29
29
|
CalibrationResult,
|
|
30
30
|
FeedbackSummary,
|
|
@@ -217,56 +217,83 @@ class WebUIAdapter:
|
|
|
217
217
|
logger.warning(f"Failed to create LLM adapter for {model_id}: {e}, using default")
|
|
218
218
|
return self._llm_adapter
|
|
219
219
|
|
|
220
|
-
def
|
|
220
|
+
def _get_trackers(
|
|
221
221
|
self,
|
|
222
222
|
settings: Settings,
|
|
223
223
|
tracker_config: dict[str, Any] | None,
|
|
224
|
-
) -> tuple[str
|
|
225
|
-
provider = (tracker_config or {}).get("provider") or "none"
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
224
|
+
) -> list[tuple[str, Any]]:
|
|
225
|
+
provider = (tracker_config or {}).get("provider") or settings.tracker_provider or "none"
|
|
226
|
+
providers = resolve_tracker_providers(provider)
|
|
227
|
+
if not providers or providers == ["none"]:
|
|
228
|
+
return []
|
|
229
|
+
required = {"mlflow", "phoenix"}
|
|
230
|
+
if not required.issubset(set(providers)):
|
|
231
|
+
raise RuntimeError("Tracker must include both mlflow and phoenix")
|
|
232
|
+
|
|
233
|
+
trackers: list[tuple[str, Any]] = []
|
|
234
|
+
for entry in providers:
|
|
235
|
+
if entry == "langfuse":
|
|
236
|
+
if not settings.langfuse_public_key or not settings.langfuse_secret_key:
|
|
237
|
+
raise RuntimeError("Langfuse credentials missing")
|
|
238
|
+
from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
|
|
239
|
+
|
|
240
|
+
trackers.append(
|
|
241
|
+
(
|
|
242
|
+
entry,
|
|
243
|
+
LangfuseAdapter(
|
|
244
|
+
public_key=settings.langfuse_public_key,
|
|
245
|
+
secret_key=settings.langfuse_secret_key,
|
|
246
|
+
host=settings.langfuse_host,
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
continue
|
|
242
251
|
|
|
243
|
-
|
|
244
|
-
|
|
252
|
+
if entry == "phoenix":
|
|
253
|
+
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
245
254
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
255
|
+
ensure_phoenix_instrumentation(settings, force=True)
|
|
256
|
+
try:
|
|
257
|
+
from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
|
|
258
|
+
except ImportError as exc:
|
|
259
|
+
raise RuntimeError("Phoenix extras not installed") from exc
|
|
260
|
+
trackers.append(
|
|
261
|
+
(
|
|
262
|
+
entry,
|
|
263
|
+
PhoenixAdapter(
|
|
264
|
+
endpoint=settings.phoenix_endpoint,
|
|
265
|
+
project_name=getattr(settings, "phoenix_project_name", None),
|
|
266
|
+
annotations_enabled=getattr(
|
|
267
|
+
settings,
|
|
268
|
+
"phoenix_annotations_enabled",
|
|
269
|
+
True,
|
|
270
|
+
),
|
|
271
|
+
),
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
if entry == "mlflow":
|
|
277
|
+
if not settings.mlflow_tracking_uri:
|
|
278
|
+
raise RuntimeError("MLflow tracking URI missing")
|
|
279
|
+
try:
|
|
280
|
+
from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
|
|
281
|
+
except ImportError as exc:
|
|
282
|
+
raise RuntimeError("MLflow adapter unavailable") from exc
|
|
283
|
+
trackers.append(
|
|
284
|
+
(
|
|
285
|
+
entry,
|
|
286
|
+
MLflowAdapter(
|
|
287
|
+
tracking_uri=settings.mlflow_tracking_uri,
|
|
288
|
+
experiment_name=settings.mlflow_experiment_name,
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
continue
|
|
267
293
|
|
|
268
|
-
|
|
269
|
-
|
|
294
|
+
raise RuntimeError(f"Unknown tracker provider: {entry}")
|
|
295
|
+
|
|
296
|
+
return trackers
|
|
270
297
|
|
|
271
298
|
@staticmethod
|
|
272
299
|
def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
|
|
@@ -425,7 +452,11 @@ class WebUIAdapter:
|
|
|
425
452
|
dataset.metadata["domain"] = requested_domain
|
|
426
453
|
|
|
427
454
|
settings = self._settings or Settings()
|
|
428
|
-
|
|
455
|
+
try:
|
|
456
|
+
trackers = self._get_trackers(settings, request.tracker_config)
|
|
457
|
+
except RuntimeError as exc:
|
|
458
|
+
raise RuntimeError(f"Tracker configuration error: {exc}") from exc
|
|
459
|
+
tracker_providers = [provider for provider, _ in trackers]
|
|
429
460
|
stage_store = bool(request.stage_store)
|
|
430
461
|
|
|
431
462
|
retriever_instance = None
|
|
@@ -484,7 +515,7 @@ class WebUIAdapter:
|
|
|
484
515
|
)
|
|
485
516
|
from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
|
|
486
517
|
|
|
487
|
-
tracer = PhoenixTracerAdapter() if
|
|
518
|
+
tracer = PhoenixTracerAdapter() if "phoenix" in tracker_providers else None
|
|
488
519
|
memory_adapter = build_domain_memory_adapter(
|
|
489
520
|
settings=self._settings,
|
|
490
521
|
db_path=Path(memory_db_path) if memory_db_path else None,
|
|
@@ -696,22 +727,27 @@ class WebUIAdapter:
|
|
|
696
727
|
str(request.threshold_profile).strip().lower()
|
|
697
728
|
)
|
|
698
729
|
|
|
699
|
-
if
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
730
|
+
if trackers:
|
|
731
|
+
result.tracker_metadata.setdefault("tracker_providers", tracker_providers)
|
|
732
|
+
for provider, tracker in trackers:
|
|
733
|
+
try:
|
|
734
|
+
trace_id = tracker.log_evaluation_run(result)
|
|
735
|
+
provider_meta = result.tracker_metadata.setdefault(provider, {})
|
|
736
|
+
if isinstance(provider_meta, dict):
|
|
737
|
+
provider_meta.setdefault("trace_id", trace_id)
|
|
738
|
+
if provider == "phoenix":
|
|
739
|
+
endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
|
|
740
|
+
phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
|
|
741
|
+
phoenix_meta.update(
|
|
742
|
+
{
|
|
743
|
+
"trace_id": trace_id,
|
|
744
|
+
"endpoint": endpoint,
|
|
745
|
+
"trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
|
|
746
|
+
"schema_version": 2,
|
|
747
|
+
}
|
|
748
|
+
)
|
|
749
|
+
except Exception as exc:
|
|
750
|
+
raise RuntimeError(f"Tracker logging failed for {provider}: {exc}") from exc
|
|
715
751
|
|
|
716
752
|
if stage_store and self._storage and hasattr(self._storage, "save_stage_events"):
|
|
717
753
|
try:
|
|
@@ -71,7 +71,9 @@ class ConfigUpdateRequest(BaseModel):
|
|
|
71
71
|
phoenix_endpoint: str | None = None
|
|
72
72
|
phoenix_enabled: bool | None = None
|
|
73
73
|
phoenix_sample_rate: float | None = None
|
|
74
|
-
|
|
74
|
+
phoenix_project_name: str | None = None
|
|
75
|
+
phoenix_annotations_enabled: bool | None = None
|
|
76
|
+
tracker_provider: str | None = None
|
|
75
77
|
postgres_host: str | None = None
|
|
76
78
|
postgres_port: int | None = None
|
|
77
79
|
postgres_database: str | None = None
|
|
@@ -31,7 +31,7 @@ from ..utils.validators import parse_csv_option, validate_choices
|
|
|
31
31
|
from .run_helpers import (
|
|
32
32
|
_display_results,
|
|
33
33
|
_is_oss_open_model,
|
|
34
|
-
|
|
34
|
+
_log_to_trackers,
|
|
35
35
|
_resolve_thresholds,
|
|
36
36
|
_save_results,
|
|
37
37
|
_save_to_db,
|
|
@@ -419,7 +419,7 @@ def create_method_app(console: Console) -> typer.Typer:
|
|
|
419
419
|
_display_results(result, console)
|
|
420
420
|
|
|
421
421
|
if tracker and tracker != "none":
|
|
422
|
-
|
|
422
|
+
_log_to_trackers(settings, result, console, tracker_type=tracker)
|
|
423
423
|
|
|
424
424
|
if eval_output:
|
|
425
425
|
_save_results(eval_output, result, console)
|
|
@@ -33,7 +33,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
|
33
33
|
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
34
34
|
from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
|
|
35
35
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
36
|
-
from evalvault.config.settings import Settings, apply_profile
|
|
36
|
+
from evalvault.config.settings import Settings, apply_profile, resolve_tracker_providers
|
|
37
37
|
from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
|
|
38
38
|
from evalvault.domain.entities.multiturn import (
|
|
39
39
|
MultiTurnConversationRecord,
|
|
@@ -86,7 +86,8 @@ from .run_helpers import (
|
|
|
86
86
|
_display_results,
|
|
87
87
|
_evaluate_streaming_run,
|
|
88
88
|
_is_oss_open_model,
|
|
89
|
-
|
|
89
|
+
_log_analysis_artifacts,
|
|
90
|
+
_log_to_trackers,
|
|
90
91
|
_option_was_provided,
|
|
91
92
|
_print_run_mode_banner,
|
|
92
93
|
_resolve_thresholds,
|
|
@@ -178,6 +179,14 @@ def _log_duration(
|
|
|
178
179
|
_log_timestamp(console, verbose, f"{message} ({elapsed:.2f}s)")
|
|
179
180
|
|
|
180
181
|
|
|
182
|
+
def _infer_phoenix_model_provider(model_name: str) -> str:
|
|
183
|
+
if not model_name:
|
|
184
|
+
return "OPENAI"
|
|
185
|
+
provider = model_name.split("/")[0].upper() if "/" in model_name else "OPENAI"
|
|
186
|
+
allowed = {"OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE", "DEEPSEEK", "XAI", "AWS", "OLLAMA"}
|
|
187
|
+
return provider if provider in allowed else "OPENAI"
|
|
188
|
+
|
|
189
|
+
|
|
181
190
|
def register_run_commands(
|
|
182
191
|
app: typer.Typer,
|
|
183
192
|
console: Console,
|
|
@@ -358,10 +367,13 @@ def register_run_commands(
|
|
|
358
367
|
help="Store stage events in the SQLite database (requires --db).",
|
|
359
368
|
),
|
|
360
369
|
tracker: str = typer.Option(
|
|
361
|
-
"
|
|
370
|
+
"mlflow+phoenix",
|
|
362
371
|
"--tracker",
|
|
363
372
|
"-t",
|
|
364
|
-
help=
|
|
373
|
+
help=(
|
|
374
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
375
|
+
"or combinations like 'mlflow+phoenix'."
|
|
376
|
+
),
|
|
365
377
|
rich_help_panel="Simple mode preset",
|
|
366
378
|
),
|
|
367
379
|
langfuse: bool = typer.Option(
|
|
@@ -667,13 +679,24 @@ def register_run_commands(
|
|
|
667
679
|
tracker_override = _option_was_provided(ctx, "tracker") or langfuse
|
|
668
680
|
selected_tracker = tracker
|
|
669
681
|
if preset.default_tracker:
|
|
670
|
-
if tracker_override
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
682
|
+
if tracker_override:
|
|
683
|
+
try:
|
|
684
|
+
providers = resolve_tracker_providers(tracker)
|
|
685
|
+
except ValueError as exc:
|
|
686
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
687
|
+
raise typer.Exit(2) from exc
|
|
688
|
+
if providers == ["none"]:
|
|
689
|
+
selected_tracker = preset.default_tracker
|
|
690
|
+
elif preset.default_tracker not in providers:
|
|
691
|
+
print_cli_warning(
|
|
692
|
+
console,
|
|
693
|
+
f"Simple 모드는 tracker에 {preset.default_tracker}가 포함되어야 합니다.",
|
|
694
|
+
tips=["다른 Tracker를 사용하려면 --mode full을 사용하세요."],
|
|
695
|
+
)
|
|
696
|
+
providers.append(preset.default_tracker)
|
|
697
|
+
selected_tracker = "+".join(providers)
|
|
698
|
+
else:
|
|
699
|
+
selected_tracker = preset.default_tracker
|
|
677
700
|
tracker = selected_tracker
|
|
678
701
|
|
|
679
702
|
prompt_manifest_value = prompt_manifest
|
|
@@ -1646,10 +1669,29 @@ def register_run_commands(
|
|
|
1646
1669
|
)
|
|
1647
1670
|
raise typer.Exit(2) from exc
|
|
1648
1671
|
|
|
1672
|
+
effective_tracker = tracker
|
|
1673
|
+
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1674
|
+
effective_tracker = "langfuse"
|
|
1675
|
+
print_cli_warning(
|
|
1676
|
+
console,
|
|
1677
|
+
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1678
|
+
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1679
|
+
)
|
|
1680
|
+
|
|
1681
|
+
try:
|
|
1682
|
+
effective_providers = resolve_tracker_providers(effective_tracker)
|
|
1683
|
+
except ValueError as exc:
|
|
1684
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
1685
|
+
raise typer.Exit(2) from exc
|
|
1686
|
+
|
|
1649
1687
|
phoenix_dataset_name = phoenix_dataset
|
|
1650
1688
|
if phoenix_experiment and not phoenix_dataset_name:
|
|
1651
1689
|
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1652
1690
|
|
|
1691
|
+
auto_phoenix_sync = "phoenix" in effective_providers
|
|
1692
|
+
if auto_phoenix_sync and not phoenix_dataset_name:
|
|
1693
|
+
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1694
|
+
|
|
1653
1695
|
phoenix_dataset_description_value = phoenix_dataset_description
|
|
1654
1696
|
if phoenix_dataset_name and not phoenix_dataset_description_value:
|
|
1655
1697
|
desc_source = ds.metadata.get("description") if isinstance(ds.metadata, dict) else None
|
|
@@ -1659,13 +1701,20 @@ def register_run_commands(
|
|
|
1659
1701
|
phoenix_dataset_result: dict[str, Any] | None = None
|
|
1660
1702
|
phoenix_experiment_result: dict[str, Any] | None = None
|
|
1661
1703
|
|
|
1662
|
-
if phoenix_dataset_name or phoenix_experiment:
|
|
1704
|
+
if phoenix_dataset_name or phoenix_experiment or auto_phoenix_sync:
|
|
1663
1705
|
try:
|
|
1664
1706
|
phoenix_sync_service = PhoenixSyncService(
|
|
1665
1707
|
endpoint=settings.phoenix_endpoint,
|
|
1666
1708
|
api_token=getattr(settings, "phoenix_api_token", None),
|
|
1667
1709
|
)
|
|
1668
1710
|
except PhoenixSyncError as exc:
|
|
1711
|
+
if auto_phoenix_sync:
|
|
1712
|
+
print_cli_error(
|
|
1713
|
+
console,
|
|
1714
|
+
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
1715
|
+
details=str(exc),
|
|
1716
|
+
)
|
|
1717
|
+
raise typer.Exit(2) from exc
|
|
1669
1718
|
print_cli_warning(
|
|
1670
1719
|
console,
|
|
1671
1720
|
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
@@ -1673,19 +1722,10 @@ def register_run_commands(
|
|
|
1673
1722
|
)
|
|
1674
1723
|
phoenix_sync_service = None
|
|
1675
1724
|
|
|
1676
|
-
effective_tracker = tracker
|
|
1677
|
-
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1678
|
-
effective_tracker = "langfuse"
|
|
1679
|
-
print_cli_warning(
|
|
1680
|
-
console,
|
|
1681
|
-
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1682
|
-
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1683
|
-
)
|
|
1684
|
-
|
|
1685
1725
|
config_wants_phoenix = getattr(settings, "phoenix_enabled", False)
|
|
1686
1726
|
if not isinstance(config_wants_phoenix, bool):
|
|
1687
1727
|
config_wants_phoenix = False
|
|
1688
|
-
should_enable_phoenix =
|
|
1728
|
+
should_enable_phoenix = "phoenix" in effective_providers or config_wants_phoenix
|
|
1689
1729
|
if should_enable_phoenix:
|
|
1690
1730
|
ensure_phoenix_instrumentation(settings, console=console, force=True)
|
|
1691
1731
|
|
|
@@ -2032,6 +2072,9 @@ def register_run_commands(
|
|
|
2032
2072
|
)
|
|
2033
2073
|
if prompt_bundle:
|
|
2034
2074
|
result.tracker_metadata["prompt_set"] = build_prompt_summary(prompt_bundle)
|
|
2075
|
+
result.tracker_metadata["prompt_set_detail"] = prompt_bundle.to_dict(
|
|
2076
|
+
include_content=True
|
|
2077
|
+
)
|
|
2035
2078
|
|
|
2036
2079
|
if retriever_instance or used_versioned_prefill:
|
|
2037
2080
|
retriever_tracker_meta: dict[str, Any] = {
|
|
@@ -2105,13 +2148,29 @@ def register_run_commands(
|
|
|
2105
2148
|
)
|
|
2106
2149
|
console.print(f"[dim]View datasets: {dataset_info.url}[/dim]")
|
|
2107
2150
|
except PhoenixSyncError as exc:
|
|
2151
|
+
if auto_phoenix_sync:
|
|
2152
|
+
print_cli_error(
|
|
2153
|
+
console,
|
|
2154
|
+
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2155
|
+
details=str(exc),
|
|
2156
|
+
)
|
|
2157
|
+
raise typer.Exit(2) from exc
|
|
2108
2158
|
print_cli_warning(
|
|
2109
2159
|
console,
|
|
2110
2160
|
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2111
2161
|
tips=[str(exc)],
|
|
2112
2162
|
)
|
|
2163
|
+
if auto_phoenix_sync and not phoenix_experiment:
|
|
2164
|
+
phoenix_experiment = f"{result.model_name}-{result.run_id[:8]}"
|
|
2113
2165
|
if phoenix_experiment:
|
|
2114
2166
|
if not phoenix_dataset_result:
|
|
2167
|
+
if auto_phoenix_sync:
|
|
2168
|
+
print_cli_error(
|
|
2169
|
+
console,
|
|
2170
|
+
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 진행할 수 없습니다.",
|
|
2171
|
+
details="Phoenix dataset 업로드가 필요합니다.",
|
|
2172
|
+
)
|
|
2173
|
+
raise typer.Exit(2)
|
|
2115
2174
|
print_cli_warning(
|
|
2116
2175
|
console,
|
|
2117
2176
|
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 건너뜁니다.",
|
|
@@ -2169,6 +2228,41 @@ def register_run_commands(
|
|
|
2169
2228
|
phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
|
|
2170
2229
|
phoenix_meta.setdefault("schema_version", 2)
|
|
2171
2230
|
phoenix_meta["prompts"] = prompt_metadata_entries
|
|
2231
|
+
if phoenix_sync_service and "phoenix" in effective_providers:
|
|
2232
|
+
try:
|
|
2233
|
+
prompt_set_summary = result.tracker_metadata.get("prompt_set") or {}
|
|
2234
|
+
prompt_set_name = prompt_set_summary.get("prompt_set_name")
|
|
2235
|
+
prompt_entries = list(prompt_metadata_entries)
|
|
2236
|
+
prompt_set_detail = result.tracker_metadata.get("prompt_set_detail")
|
|
2237
|
+
if isinstance(prompt_set_detail, dict):
|
|
2238
|
+
for item in prompt_set_detail.get("items", []):
|
|
2239
|
+
prompt = item.get("prompt") or {}
|
|
2240
|
+
if not isinstance(prompt, dict):
|
|
2241
|
+
continue
|
|
2242
|
+
prompt_entries.append(
|
|
2243
|
+
{
|
|
2244
|
+
"name": prompt.get("name"),
|
|
2245
|
+
"role": item.get("role"),
|
|
2246
|
+
"kind": prompt.get("kind"),
|
|
2247
|
+
"checksum": prompt.get("checksum"),
|
|
2248
|
+
"content": prompt.get("content"),
|
|
2249
|
+
"source": prompt.get("source"),
|
|
2250
|
+
}
|
|
2251
|
+
)
|
|
2252
|
+
synced = phoenix_sync_service.sync_prompts(
|
|
2253
|
+
prompt_entries=prompt_entries,
|
|
2254
|
+
model_name=result.model_name,
|
|
2255
|
+
model_provider=_infer_phoenix_model_provider(result.model_name),
|
|
2256
|
+
prompt_set_name=prompt_set_name,
|
|
2257
|
+
)
|
|
2258
|
+
if synced:
|
|
2259
|
+
phoenix_meta["prompts"] = synced
|
|
2260
|
+
except PhoenixSyncError as exc:
|
|
2261
|
+
print_cli_warning(
|
|
2262
|
+
console,
|
|
2263
|
+
"Phoenix Prompt 동기화에 실패했습니다.",
|
|
2264
|
+
tips=[str(exc)],
|
|
2265
|
+
)
|
|
2172
2266
|
|
|
2173
2267
|
if stage_events or stage_store:
|
|
2174
2268
|
stage_event_builder = StageEventBuilder()
|
|
@@ -2187,7 +2281,7 @@ def register_run_commands(
|
|
|
2187
2281
|
|
|
2188
2282
|
if effective_tracker != "none":
|
|
2189
2283
|
phoenix_opts = None
|
|
2190
|
-
if
|
|
2284
|
+
if "phoenix" in effective_providers:
|
|
2191
2285
|
phoenix_opts = {
|
|
2192
2286
|
"max_traces": phoenix_max_traces,
|
|
2193
2287
|
"metadata": phoenix_trace_metadata or None,
|
|
@@ -2198,7 +2292,7 @@ def register_run_commands(
|
|
|
2198
2292
|
verbose,
|
|
2199
2293
|
f"Tracker 로깅 시작 ({effective_tracker})",
|
|
2200
2294
|
)
|
|
2201
|
-
|
|
2295
|
+
_log_to_trackers(
|
|
2202
2296
|
settings,
|
|
2203
2297
|
result,
|
|
2204
2298
|
console,
|
|
@@ -2276,6 +2370,12 @@ def register_run_commands(
|
|
|
2276
2370
|
pipeline_result,
|
|
2277
2371
|
artifacts_dir=artifacts_dir,
|
|
2278
2372
|
)
|
|
2373
|
+
result.tracker_metadata["analysis_artifacts"] = {
|
|
2374
|
+
"dir": artifact_index.get("dir"),
|
|
2375
|
+
"index": artifact_index.get("index"),
|
|
2376
|
+
"output": str(analysis_output_path),
|
|
2377
|
+
"report": str(analysis_report_path),
|
|
2378
|
+
}
|
|
2279
2379
|
payload = serialize_pipeline_result(pipeline_result)
|
|
2280
2380
|
payload["run_id"] = result.run_id
|
|
2281
2381
|
payload["artifacts"] = artifact_index
|
|
@@ -2292,6 +2392,18 @@ def register_run_commands(
|
|
|
2292
2392
|
"[green]자동 분석 상세 결과 저장:[/green] "
|
|
2293
2393
|
f"{artifact_index['dir']} (index: {artifact_index['index']})\n"
|
|
2294
2394
|
)
|
|
2395
|
+
if effective_tracker != "none":
|
|
2396
|
+
_log_analysis_artifacts(
|
|
2397
|
+
settings,
|
|
2398
|
+
result,
|
|
2399
|
+
console,
|
|
2400
|
+
effective_tracker,
|
|
2401
|
+
analysis_payload=payload,
|
|
2402
|
+
artifact_index=artifact_index,
|
|
2403
|
+
report_text=report_text,
|
|
2404
|
+
output_path=analysis_output_path,
|
|
2405
|
+
report_path=analysis_report_path,
|
|
2406
|
+
)
|
|
2295
2407
|
|
|
2296
2408
|
@app.command(
|
|
2297
2409
|
name="run-simple",
|
|
@@ -2395,10 +2507,13 @@ def register_run_commands(
|
|
|
2395
2507
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2396
2508
|
),
|
|
2397
2509
|
tracker: str = typer.Option(
|
|
2398
|
-
"
|
|
2510
|
+
"mlflow+phoenix",
|
|
2399
2511
|
"--tracker",
|
|
2400
2512
|
"-t",
|
|
2401
|
-
help=
|
|
2513
|
+
help=(
|
|
2514
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2515
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2516
|
+
),
|
|
2402
2517
|
),
|
|
2403
2518
|
langfuse: bool = typer.Option(
|
|
2404
2519
|
False,
|
|
@@ -2687,10 +2802,13 @@ def register_run_commands(
|
|
|
2687
2802
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2688
2803
|
),
|
|
2689
2804
|
tracker: str = typer.Option(
|
|
2690
|
-
"
|
|
2805
|
+
"mlflow+phoenix",
|
|
2691
2806
|
"--tracker",
|
|
2692
2807
|
"-t",
|
|
2693
|
-
help=
|
|
2808
|
+
help=(
|
|
2809
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2810
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2811
|
+
),
|
|
2694
2812
|
),
|
|
2695
2813
|
langfuse: bool = typer.Option(
|
|
2696
2814
|
False,
|