evalvault 1.61.0__tar.gz → 1.62.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evalvault-1.61.0 → evalvault-1.62.0}/PKG-INFO +3 -1
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/DEV_GUIDE.md +9 -0
- evalvault-1.62.0/docs/guides/rag_human_feedback_calibration_implementation_plan.md +218 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/RunDetails.tsx +303 -15
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/services/api.ts +64 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/pyproject.toml +3 -1
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/adapter.py +29 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/runs.py +129 -6
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/__init__.py +2 -0
- evalvault-1.62.0/src/evalvault/adapters/inbound/cli/commands/calibrate.py +111 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/nlp_adapter.py +46 -2
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/nlp_analyzer_module.py +1 -1
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/base_sql.py +91 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/postgres_adapter.py +22 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/postgres_schema.sql +14 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/schema.sql +15 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/sqlite_adapter.py +25 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/__init__.py +12 -0
- evalvault-1.62.0/src/evalvault/domain/entities/feedback.py +58 -0
- evalvault-1.62.0/src/evalvault/domain/services/satisfaction_calibration_service.py +328 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/storage_port.py +10 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_sqlite_storage.py +28 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/uv.lock +24 -1
- {evalvault-1.61.0 → evalvault-1.62.0}/.cursor/worktrees.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.dockerignore +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.env.example +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/ISSUE_TEMPLATE/question.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/dependabot.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/pull_request_template.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/stale.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/workflows/ci.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/workflows/release.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.github/workflows/stale.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.gitignore +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.pre-commit-config.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/.python-version +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/AGENTS.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/CHANGELOG.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/CLAUDE.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/CODE_OF_CONDUCT.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/CONTRIBUTING.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/Dockerfile +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/LICENSE.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/README.en.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/SECURITY.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/agent.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/client.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/main.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory/shared/decisions.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory/shared/dependencies.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory/templates/coordinator_guide.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory/templates/work_log_template.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/memory_integration.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/progress.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/app_spec.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/baseline.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/coding_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/existing_project_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/improvement/architecture_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/improvement/base_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/improvement/coordinator_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/improvement/observability_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/initializer_prompt.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/prompt_manifest.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts/system.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/prompts.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/requirements.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/agent/security.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/domains/insurance/memory.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/domains/insurance/terms_dictionary_en.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/domains/insurance/terms_dictionary_ko.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/methods.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/models.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/regressions/default.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/regressions/ux.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/stage_metric_playbook.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/config/stage_metric_thresholds.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/dummy_test_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/insurance_qa_korean.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/insurance_qa_korean.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/insurance_qa_korean_2.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/insurance_qa_korean_3.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/sample.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/visualization_20q_cluster_map.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/visualization_20q_korean.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/visualization_2q_cluster_map.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/datasets/visualization_2q_korean.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/kg/knowledge_graph.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/raw/The Complete Guide to Mastering Suno Advanced Strategies for Professional Music Generation.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/raw/edge_cases.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/raw/run_mode_full_domain_memory.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/data/raw/sample_rag_knowledge.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/dataset_templates/dataset_template.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/dataset_templates/dataset_template.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/dataset_templates/dataset_template.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/dataset_templates/method_input_template.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docker-compose.langfuse.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docker-compose.phoenix.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docker-compose.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/INDEX.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/README.ko.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/ROADMAP.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/STATUS.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/adapters/inbound.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/adapters/outbound.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/config.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/domain/entities.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/domain/metrics.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/domain/services.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/ports/inbound.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/api/ports/outbound.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/architecture/open-rag-trace-collector.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/architecture/open-rag-trace-spec.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/getting-started/INSTALLATION.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/AGENTS_SYSTEM_GUIDE.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/CLI_MCP_PLAN.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/OPEN_RAG_TRACE_INTERNAL_ADAPTER.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/OPEN_RAG_TRACE_SAMPLES.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/RAG_HUMAN_FEEDBACK_CALIBRATION.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/RELEASE_CHECKLIST.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/guides/USER_GUIDE.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/mapping/component-to-whitepaper.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/00_frontmatter.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/01_overview.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/02_architecture.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/03_data_flow.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/04_components.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/05_expert_lenses.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/06_implementation.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/07_advanced.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/08_customization.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/09_quality.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/10_performance.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/11_security.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/12_operations.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/13_standards.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/14_roadmap.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/INDEX.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/new_whitepaper/STYLE_GUIDE.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/stylesheets/extra.css +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/templates/dataset_template.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/templates/dataset_template.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/templates/dataset_template.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/templates/kg_template.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/templates/retriever_docs_template.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/tools/generate-whitepaper.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/docs/web_ui_analysis_migration_plan.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/dummy_test_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/korean_rag/faithfulness_test.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/korean_rag/insurance_qa_100.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/korean_rag/keyword_extraction_test.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/korean_rag/retrieval_test.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/output/comparison.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/output/full_results.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/output/leaderboard.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/output/results_mteb.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/output/retrieval_result.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/benchmarks/run_korean_benchmark.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/kg_generator_demo.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/method_plugin_template/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/method_plugin_template/pyproject.toml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/method_plugin_template/src/method_plugin_template/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/method_plugin_template/src/method_plugin_template/methods.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/usecase/comprehensive_workflow_test.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/usecase/insurance_eval_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/examples/usecase/output/comprehensive_report.html +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/.env.example +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/.gitignore +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/analysis-compare.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/analysis-lab.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/compare-runs.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/dashboard.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/domain-memory.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/evaluation-studio.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/knowledge-base.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/mocks/intents.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/mocks/run_details.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/mocks/runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/e2e/run-details.spec.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/eslint.config.js +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/index.html +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/package-lock.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/package.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/playwright.config.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/public/vite.svg +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/App.css +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/App.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/assets/react.svg +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/AnalysisNodeOutputs.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/InsightSpacePanel.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/Layout.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/MarkdownContent.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/PrioritySummaryPanel.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/SpaceLegend.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/SpacePlot2D.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/SpacePlot3D.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/StatusBadge.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/components/VirtualizedText.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/config/ui.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/config.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/hooks/useInsightSpace.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/index.css +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/main.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/AnalysisCompareView.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/AnalysisLab.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/AnalysisResultView.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/CompareRuns.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/ComprehensiveAnalysis.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/CustomerReport.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/Dashboard.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/DomainMemory.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/EvaluationStudio.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/KnowledgeBase.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/Settings.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/Visualization.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/pages/VisualizationHome.tsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/types/plotly.d.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/utils/format.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/utils/phoenix.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/utils/runAnalytics.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/utils/score.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/src/utils/summaryMetrics.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/tailwind.config.js +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/tsconfig.app.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/tsconfig.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/tsconfig.node.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/frontend/vite.config.ts +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/mkdocs.yml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/package-lock.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/.gitkeep +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_0aa9fab0-6c2c-4c1c-b228-202a38a2f00c.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_0aa9fab0-6c2c-4c1c-b228-202a38a2f00c.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_2163f844-ee2c-4630-9ba8-35cd9954d92e.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_2163f844-ee2c-4630-9ba8-35cd9954d92e.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_4516d358-2797-4c46-9f14-c1d975588025.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_4516d358-2797-4c46-9f14-c1d975588025.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_9fbf4776-9f5b-4c4b-ba08-c556032cee86.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_9fbf4776-9f5b-4c4b-ba08-c556032cee86.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/causal_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/diagnostic.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/load_data.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/low_samples.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/nlp_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/pattern_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/priority_summary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/ragas_eval.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/root_cause.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/statistics.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/time_series.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4516d358-2797-4c46-9f14-c1d975588025/trend_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/causal_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/diagnostic.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/load_data.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/low_samples.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/nlp_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/pattern_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/priority_summary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/ragas_eval.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/root_cause.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/statistics.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/time_series.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_4792d785-a8ea-4fd3-8a0c-dcbf1889f5fb/trend_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/causal_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/diagnostic.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/load_data.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/low_samples.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/nlp_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/pattern_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/priority_summary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/ragas_eval.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/root_cause.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/statistics.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/time_series.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_8f825b22-87f1-4d9b-b3a0-8ff65dbec2c5/trend_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/causal_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/diagnostic.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/load_data.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/low_samples.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/nlp_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/pattern_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/priority_summary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/ragas_eval.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/root_cause.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/statistics.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/time_series.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_e2f7e6bb-a86e-4f6a-8002-0c6f1a831775/trend_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/causal_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/diagnostic.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/load_data.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/low_samples.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/nlp_analysis.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/pattern_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/priority_summary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/ragas_eval.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/root_cause.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/statistics.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/time_series.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/analysis/artifacts/analysis_f1287e90-43b6-42c8-b3ac-e6cb3e06a71e/trend_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/run_change_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_0aa9fab0_f1287e90/run_metric_comparison.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/run_change_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_8f825b22_4516d358/run_metric_comparison.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/final_output.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/index.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/load_runs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/report.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/run_change_detection.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/artifacts/comparison_f1287e90_8f825b22/run_metric_comparison.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_0aa9fab0_9fbf4776.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_0aa9fab0_9fbf4776.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_0aa9fab0_f1287e90.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_0aa9fab0_f1287e90.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_8f825b22_4516d358.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_8f825b22_4516d358.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_9fbf4776_a491fa0e.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_9fbf4776_a491fa0e.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_f1287e90_8f825b22.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/comparison/comparison_f1287e90_8f825b22.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r1_smoke.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r2_graphrag.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r2_graphrag_openai.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r3_bm25.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r3_bm25_langfuse3.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/debug_report_r3_dense_faiss.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/improvement_1d91a667-4288-4742-be3a-a8f5310c5140.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r2_graphrag_openai_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r2_graphrag_openai_stage_report.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r2_graphrag_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r2_graphrag_stage_report.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_langfuse2_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_langfuse3_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_langfuse_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_phoenix_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_bm25_stage_report.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_dense_faiss_stage_events.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/r3_dense_faiss_stage_report.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/retrieval_benchmark_smoke_precision.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/retrieval_benchmark_smoke_precision_graphrag.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/reports/retrieval_benchmark_smoke_precision_multi.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/benchmark/download_kmmlu.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev/open_rag_trace_demo.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev/open_rag_trace_integration_template.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev/otel-collector-config.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev/start_web_ui_with_phoenix.sh +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev/validate_open_rag_trace.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/dev_seed_pipeline_results.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/analyzer/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/analyzer/ast_scanner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/analyzer/confidence_scorer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/analyzer/graph_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/analyzer/side_effect_detector.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/generate_api_docs.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/models/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/models/schema.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/renderer/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/docs/renderer/html_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/ops/phoenix_watch.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/backfill_langfuse_trace_url.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/r3_dense_smoke.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/r3_evalvault_run_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/r3_retriever_docs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/r3_smoke_real.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/perf/r3_stage_events_sample.jsonl +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/pipeline_template_inspect.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/reports/generate_release_notes.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/run_with_timeout.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/test_full_evaluation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/tests/run_regressions.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/tests/run_retriever_stage_report_smoke.sh +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/validate_tutorials.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/verify_ragas_compliance.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/scripts/verify_workflows.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/main.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/benchmark.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/domain.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/knowledge.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/api/routers/pipeline.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/app.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/agent.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/analyze.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/api.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/benchmark.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/debug.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/domain.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/experiment.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/gate.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/generate.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/history.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/init.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/kg.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/langfuse.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/method.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/phoenix.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/pipeline.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/prompts.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/run.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/run_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/commands/stage.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/analysis_io.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/console.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/errors.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/formatters.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/options.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/presets.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/progress.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/cli/utils/validators.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/mcp/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/mcp/schemas.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/inbound/mcp/tools.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/analysis_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/base_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/bm25_searcher_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/causal_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/causal_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/common.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/comparison_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/data_loader_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/detailed_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/diagnostic_playbook_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/embedding_distribution_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/embedding_searcher_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/hybrid_rrf_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/hybrid_weighted_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/llm_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/low_performer_extractor_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/model_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/network_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/pattern_detector_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/pipeline_factory.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/pipeline_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/priority_summary_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/run_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/run_change_detector_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/run_comparator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/run_loader_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/run_metric_comparator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/search_comparator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/statistical_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/statistical_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/statistical_comparator_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/summary_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/time_series_analyzer_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/trend_detector_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/analysis/verification_report_module.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/benchmark/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/cache/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/cache/hybrid_cache.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/cache/memory_cache.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/base.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/csv_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/excel_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/json_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/loader_factory.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/method_input_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/streaming_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/templates.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/dataset/thresholds.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/debug/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/debug/report_renderer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/documents/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/documents/ocr/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/documents/pdf_extractor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/documents/versioned_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/domain_memory/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/domain_memory/sqlite_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/improvement/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/improvement/insight_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/improvement/pattern_detector.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/improvement/playbook_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/kg/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/kg/graph_rag_retriever.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/kg/networkx_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/kg/parallel_kg_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/kg/query_strategies.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/anthropic_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/azure_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/base.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/instructor_factory.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/llm_relation_augmenter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/ollama_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/openai_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/token_aware_chat.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/llm/vllm_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/methods/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/methods/baseline_oracle.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/methods/external_command.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/methods/registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/bm25_retriever.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/dense_retriever.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/document_chunker.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/hybrid_retriever.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/kiwi_tokenizer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/korean_evaluation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/korean_stopwords.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/nlp/korean/toolkit.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/phoenix/sync_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/report/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/report/dashboard_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/report/llm_report_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/report/markdown_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/storage/benchmark_storage_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/open_rag_log_handler.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracker/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracker/langfuse_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracker/mlflow_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/adapters/outbound/tracker/phoenix_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/agent_types.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/domain_config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/instrumentation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/langfuse_support.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/model_config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/phoenix_support.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/playbooks/improvement_playbook.yaml +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/config/settings.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/debug_ragas.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/debug_ragas_real.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/analysis.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/analysis_pipeline.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/benchmark.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/benchmark_run.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/dataset.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/debug.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/experiment.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/improvement.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/kg.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/memory.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/method.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/prompt.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/rag_trace.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/result.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/entities/stage.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/analysis_registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/confidence.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/contextual_relevancy.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/entity_preservation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/insurance.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/no_answer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/retrieval_rank.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/terms_dictionary.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/metrics/text_match.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/analysis_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/async_batch_executor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/batch_executor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/benchmark_report_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/benchmark_runner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/benchmark_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/cache_metrics.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/cluster_map_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/dataset_preprocessor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/debug_report_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/document_chunker.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/document_versioning.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/domain_learning_hook.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/embedding_overlay.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/entity_extractor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/evaluator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/experiment_comparator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/experiment_manager.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/experiment_reporter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/experiment_repository.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/experiment_statistics.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/improvement_guide_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/intent_classifier.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/kg_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/memory_aware_evaluator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/memory_based_analysis.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/method_runner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/pipeline_orchestrator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/pipeline_template_registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/prompt_manifest.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/prompt_registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/prompt_status.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/ragas_prompt_overrides.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/retrieval_metrics.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/retriever_context.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/stage_event_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/stage_metric_guide_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/stage_metric_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/stage_summary_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/synthetic_qa_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/testset_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/threshold_profiles.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/unified_report_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/domain/services/visual_space_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/mkdocs_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/inbound/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/inbound/analysis_pipeline_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/inbound/evaluator_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/inbound/learning_hook_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/inbound/web_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/analysis_cache_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/analysis_module_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/analysis_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/benchmark_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/causal_analysis_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/dataset_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/domain_memory_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/embedding_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/improvement_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/intent_classifier_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/korean_nlp_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/llm_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/method_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/nlp_analysis_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/relation_augmenter_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/report_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/stage_storage_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/tracer_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/ports/outbound/tracker_port.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/reports/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/reports/release_notes.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/scripts/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/src/evalvault/scripts/regression_runner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/conftest.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/README.md +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/benchmark/retrieval_ground_truth_min.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/benchmark/retrieval_ground_truth_multi.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/auto_insurance_qa_korean_full.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/comprehensive_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/edge_cases.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/edge_cases.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/evaluation_test_sample.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/graphrag_retriever_docs.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/graphrag_smoke.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_document.txt +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_english.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_english.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_english.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_korean.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_korean.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_korean.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/insurance_qa_korean_versioned_pdf.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/run_mode_full_domain_memory.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/run_mode_simple.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/e2e/summary_eval_minimal.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/kg/minimal_graph.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/sample_dataset.csv +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/sample_dataset.json +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/fixtures/sample_dataset.xlsx +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/benchmark/test_benchmark_service_integration.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/conftest.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_cli_integration.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_data_flow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_e2e_scenarios.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_evaluation_flow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_full_workflow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_langfuse_flow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_phoenix_flow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_pipeline_api_contracts.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_storage_flow.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/integration/test_summary_eval_fixture.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/optional_deps.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/inbound/mcp/test_execute_tools.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/inbound/mcp/test_read_tools.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/documents/test_pdf_extractor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/documents/test_versioned_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/improvement/__init__.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/improvement/test_insight_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/improvement/test_pattern_detector.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/improvement/test_playbook_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/improvement/test_stage_metric_playbook_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/kg/test_graph_rag_retriever.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/kg/test_parallel_kg_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/adapters/outbound/storage/test_benchmark_storage_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/config/test_phoenix_support.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/conftest.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_analysis_metric_registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_confidence.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_contextual_relevancy.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_entity_preservation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_metric_registry.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_no_answer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_retrieval_rank.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/metrics/test_text_match.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_cache_metrics.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_claim_level.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_dataset_preprocessor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_document_versioning.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_evaluator_comprehensive.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_improvement_guide_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_retrieval_metrics.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_retriever_context.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_stage_event_builder.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_stage_metric_guide_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/services/test_synthetic_qa_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/test_embedding_overlay.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/test_prompt_manifest.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/domain/test_prompt_status.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/reports/test_release_notes.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/scripts/test_regression_runner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_agent_types.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_analysis_entities.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_analysis_modules.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_analysis_pipeline.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_analysis_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_anthropic_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_async_batch_executor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_azure_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_benchmark_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_benchmark_runner.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_causal_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_cli.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_cli_domain.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_cli_init.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_cli_progress.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_cli_utils.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_data_loaders.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_domain_config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_domain_memory.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_entities.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_entities_kg.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_entity_extractor.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_evaluator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_experiment.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_hybrid_cache.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_instrumentation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_insurance_metric.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_intent_classifier.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_kg_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_kg_networkx.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_kiwi_tokenizer.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_kiwi_warning_suppression.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_korean_dense.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_korean_evaluation.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_korean_retrieval.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_langfuse_tracker.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_llm_relation_augmenter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_lm_eval_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_markdown_report.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_memory_cache.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_memory_services.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_method_plugins.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_mlflow_tracker.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_model_config.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_nlp_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_nlp_entities.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_ollama_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_openai_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_phoenix_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_pipeline_orchestrator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_ports.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_postgres_storage.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_rag_trace_entities.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_run_memory_helpers.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_run_mode_fixtures.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_settings.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_stage_cli.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_stage_metric_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_stage_storage.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_stage_summary_service.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_statistical_adapter.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_streaming_loader.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_summary_eval_fixture.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_testset_generator.py +0 -0
- {evalvault-1.61.0 → evalvault-1.62.0}/tests/unit/test_web_adapter.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.62.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -46,6 +46,7 @@ Requires-Dist: uvicorn>=0.40.0
|
|
|
46
46
|
Requires-Dist: xlrd
|
|
47
47
|
Provides-Extra: analysis
|
|
48
48
|
Requires-Dist: scikit-learn>=1.3.0; extra == 'analysis'
|
|
49
|
+
Requires-Dist: xgboost>=2.0.0; extra == 'analysis'
|
|
49
50
|
Provides-Extra: anthropic
|
|
50
51
|
Requires-Dist: anthropic; extra == 'anthropic'
|
|
51
52
|
Requires-Dist: langchain-anthropic; extra == 'anthropic'
|
|
@@ -86,6 +87,7 @@ Requires-Dist: rank-bm25>=0.2.2; extra == 'dev'
|
|
|
86
87
|
Requires-Dist: ruff; extra == 'dev'
|
|
87
88
|
Requires-Dist: scikit-learn<1.4.0,>=1.3.0; extra == 'dev'
|
|
88
89
|
Requires-Dist: sentence-transformers>=5.2.0; extra == 'dev'
|
|
90
|
+
Requires-Dist: xgboost>=2.0.0; extra == 'dev'
|
|
89
91
|
Provides-Extra: docs
|
|
90
92
|
Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
|
|
91
93
|
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
@@ -62,6 +62,15 @@ npm run dev
|
|
|
62
62
|
|
|
63
63
|
---
|
|
64
64
|
|
|
65
|
+
## 타입체크 (Pyright 비활성화)
|
|
66
|
+
|
|
67
|
+
EvalVault는 Ruff만 사용합니다. Pyright/Pylance 경고가 보이면 에디터 설정을 끄세요.
|
|
68
|
+
|
|
69
|
+
- VS Code: 확장(“Pylance”, “Pyright”) 비활성화 또는 제거
|
|
70
|
+
- VS Code 설정 예시: `"python.analysis.typeCheckingMode": "off"`
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
65
74
|
## 문서 작업 규칙 (Docs)
|
|
66
75
|
|
|
67
76
|
- `docs/`는 **현재 프로젝트에 필요한 문서만** 유지합니다. (중복/과거 정보는 삭제)
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# RAG 인간 피드백 보정: 상세 구현 계획서
|
|
2
|
+
|
|
3
|
+
본 문서는 `docs/guides/rag_human_feedback_calibration.md`의 설계를 기반으로 EvalVault에 **사람 만족도 보정(calibration) 기능**을 구현하기 위한 상세 실행 계획을 정리합니다.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. 목표/성공 기준
|
|
8
|
+
|
|
9
|
+
### 목표
|
|
10
|
+
- 대표 샘플 기반 인간 평가 수집 → 보정 모델 학습 → 전체 결과에 보정 점수 적용.
|
|
11
|
+
- RAGAS 점수와 사용자 만족도 괴리를 줄이고, 이해 가능한 보정 지표를 제공.
|
|
12
|
+
|
|
13
|
+
### 성공 기준
|
|
14
|
+
- DB에 `satisfaction_feedback` 저장/조회 가능.
|
|
15
|
+
- Run 상세 응답에 `calibrated_satisfaction`, `imputed`, `imputation_source` 포함.
|
|
16
|
+
- CLI `evalvault calibrate` 실행 시 보정 모델 성능 요약 출력.
|
|
17
|
+
- Web UI에서 평가 입력/조회/보정 점수 표시.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## 2. 전제 및 스코프
|
|
22
|
+
|
|
23
|
+
### 전제
|
|
24
|
+
- 문서에 제시된 정책을 기본값으로 채택:
|
|
25
|
+
- 만족도 라벨: 1~5
|
|
26
|
+
- Thumb 피드백: up/down/none (약한 레이블)
|
|
27
|
+
- 보정 점수: `calibrated_satisfaction`
|
|
28
|
+
- 결측치 보정 규칙: thumb → 매핑, 없으면 모델 예측
|
|
29
|
+
|
|
30
|
+
### 스코프
|
|
31
|
+
- 백엔드: StoragePort, SQL 스키마, API, CLI, 도메인 서비스
|
|
32
|
+
- 프론트엔드: RunDetails UI에 만족도 평가 탭 + 보정 점수 표시
|
|
33
|
+
- 모델: 선형 회귀 + XGBoost 회귀(선형은 설명용)
|
|
34
|
+
|
|
35
|
+
### 비스코프(초기)
|
|
36
|
+
- 실시간 온라인 학습, A/B 실험 자동 트리거
|
|
37
|
+
- 자동 평가자(LLM Judge) 연동
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 3. 아키텍처 개요
|
|
42
|
+
|
|
43
|
+
### 데이터 플로우
|
|
44
|
+
1) 대표 샘플 선정(클러스터링) → 2) 인간 평가 수집 → 3) 피처 생성 → 4) 모델 학습 → 5) 보정 점수 추정 → 6) UI 표시
|
|
45
|
+
|
|
46
|
+
### 재사용 가능한 기존 컴포넌트
|
|
47
|
+
- 클러스터링: `src/evalvault/domain/services/cluster_map_builder.py`
|
|
48
|
+
- NLP 피처 패턴: `src/evalvault/adapters/outbound/analysis/nlp_adapter.py`
|
|
49
|
+
- Storage 어댑터 패턴: `src/evalvault/adapters/outbound/storage/*_adapter.py`
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## 4. 데이터 모델/스키마 설계
|
|
54
|
+
|
|
55
|
+
### 신규 테이블
|
|
56
|
+
`src/evalvault/adapters/outbound/storage/schema.sql`
|
|
57
|
+
|
|
58
|
+
`satisfaction_feedback`
|
|
59
|
+
- `id` (PK)
|
|
60
|
+
- `run_id`
|
|
61
|
+
- `test_case_id`
|
|
62
|
+
- `satisfaction_score` (1~5, nullable)
|
|
63
|
+
- `thumb_feedback` (`up`/`down`/`none`)
|
|
64
|
+
- `comment` (nullable)
|
|
65
|
+
- `rater_id` (nullable)
|
|
66
|
+
- `created_at`
|
|
67
|
+
|
|
68
|
+
### 결과 확장
|
|
69
|
+
- 테스트 케이스 결과: `calibrated_satisfaction`, `imputed`, `imputation_source`
|
|
70
|
+
- run summary: `avg_satisfaction_score`, `thumb_up_rate`, `imputed_ratio`
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## 5. StoragePort/Adapter 설계
|
|
75
|
+
|
|
76
|
+
### StoragePort 확장
|
|
77
|
+
`src/evalvault/ports/outbound/storage_port.py`
|
|
78
|
+
- `save_feedback(...)`
|
|
79
|
+
- `list_feedback(run_id)`
|
|
80
|
+
- `get_feedback_summary(run_id)`
|
|
81
|
+
|
|
82
|
+
### 어댑터 확장
|
|
83
|
+
- `src/evalvault/adapters/outbound/storage/sqlite_adapter.py`
|
|
84
|
+
- `src/evalvault/adapters/outbound/storage/postgres_adapter.py`
|
|
85
|
+
|
|
86
|
+
### 마이그레이션
|
|
87
|
+
- 기존 DB에 `satisfaction_feedback` 테이블 추가
|
|
88
|
+
- 인덱스: `run_id`, `test_case_id`
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## 6. API 설계 (FastAPI)
|
|
93
|
+
|
|
94
|
+
### 라우터 확장
|
|
95
|
+
`src/evalvault/adapters/inbound/api/routers/runs.py`
|
|
96
|
+
|
|
97
|
+
- `POST /api/v1/runs/{run_id}/feedback`
|
|
98
|
+
- 요청: `test_case_id`, `satisfaction_score?`, `thumb_feedback?`, `comment?`, `rater_id?`
|
|
99
|
+
|
|
100
|
+
- `GET /api/v1/runs/{run_id}/feedback`
|
|
101
|
+
- 응답: 피드백 리스트
|
|
102
|
+
|
|
103
|
+
- `GET /api/v1/runs/{run_id}`
|
|
104
|
+
- summary에 `avg_satisfaction_score`, `thumb_up_rate`, `imputed_ratio` 포함
|
|
105
|
+
- results[].metrics에 `calibrated_satisfaction`, `imputed`, `imputation_source` 포함
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## 7. CLI 설계
|
|
110
|
+
|
|
111
|
+
### 명령
|
|
112
|
+
`src/evalvault/adapters/inbound/cli/commands/calibrate.py`
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
evalvault calibrate --run-id <ID> [--model linear|xgb|both] [--write-back]
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 출력
|
|
119
|
+
- 모델 성능 요약: Pearson/Spearman, MAE
|
|
120
|
+
- 피처 중요도(가능 시)
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## 8. Web UI 설계
|
|
125
|
+
|
|
126
|
+
`frontend/src/pages/RunDetails.tsx`
|
|
127
|
+
|
|
128
|
+
### UI 기능
|
|
129
|
+
- 탭: `만족도 평가`
|
|
130
|
+
- 별점(1~5), thumb up/down, 코멘트 입력
|
|
131
|
+
- 테스트 케이스별 저장
|
|
132
|
+
|
|
133
|
+
### 표시
|
|
134
|
+
- Summary 카드: 평균 만족도, Thumb Up 비율, 보정 비율
|
|
135
|
+
- 메트릭 표에 `calibrated_satisfaction` 컬럼 추가
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## 9. 보정/결측치 처리 규칙
|
|
140
|
+
|
|
141
|
+
1. `satisfaction_score` 있음 → 그대로 사용
|
|
142
|
+
2. 없고 `thumb_feedback` 있음 → 약한 레이블 매핑
|
|
143
|
+
- `up = 4.0`, `down = 2.0`
|
|
144
|
+
3. 둘 다 없으면 모델 예측값 사용
|
|
145
|
+
4. 모든 점수는 1~5로 클리핑
|
|
146
|
+
5. `imputed` 및 `imputation_source` 필드 표시
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## 10. 모델/피처 설계
|
|
151
|
+
|
|
152
|
+
### 피처
|
|
153
|
+
- RAGAS: `faithfulness`, `answer_relevancy`, `context_precision`, `context_recall`
|
|
154
|
+
- 한국어 피처:
|
|
155
|
+
- 답변 길이
|
|
156
|
+
- 질문 키워드 누락률
|
|
157
|
+
- 형태소 다양성(TTR)
|
|
158
|
+
|
|
159
|
+
### 모델
|
|
160
|
+
- 기본: 선형회귀 (설명용)
|
|
161
|
+
- 출력: XGBoost 회귀 (예측 성능용)
|
|
162
|
+
|
|
163
|
+
### 의존성
|
|
164
|
+
- `scikit-learn`은 이미 존재
|
|
165
|
+
- `xgboost`는 `pyproject.toml`의 optional dependencies에 추가 필요
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## 11. 대표 샘플링 전략
|
|
170
|
+
|
|
171
|
+
### 1차 버전
|
|
172
|
+
- `cluster_map_builder.py`의 KMeans + TF-IDF 임베딩 활용
|
|
173
|
+
- 클러스터 당 centroid 가까운 케이스 1개씩 선택
|
|
174
|
+
|
|
175
|
+
### 확장 버전
|
|
176
|
+
- 불확실성 기반 샘플 추가 (예측값 2.4~2.6 등)
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## 12. 테스트/검증 계획
|
|
181
|
+
|
|
182
|
+
### 단위 테스트
|
|
183
|
+
- StoragePort: save/list 피드백 동작
|
|
184
|
+
- 보정 모델: 학습/예측 결과 shape 및 범위
|
|
185
|
+
|
|
186
|
+
### 통합 테스트
|
|
187
|
+
- API 엔드포인트: 저장/조회 동작
|
|
188
|
+
|
|
189
|
+
### 품질 지표
|
|
190
|
+
- 상관계수, MAE
|
|
191
|
+
- Inter-rater agreement(가능 시): Cohen/Fleiss Kappa
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## 13. 단계별 일정(제안)
|
|
196
|
+
|
|
197
|
+
1. **DB/Storage 레이어 확장**
|
|
198
|
+
2. **도메인 서비스(모델/보정 로직) 구현**
|
|
199
|
+
3. **API 확장**
|
|
200
|
+
4. **CLI 구현**
|
|
201
|
+
5. **UI 통합**
|
|
202
|
+
6. **테스트 및 검증**
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## 14. 리스크 및 대응
|
|
207
|
+
|
|
208
|
+
- **라벨 노이즈**: 평가 가이드 문서화 + 다중 평가자 평균
|
|
209
|
+
- **샘플 편향**: 대표 샘플링 + 운영 중 추가 샘플링
|
|
210
|
+
- **모델 과적합**: 단순 모델 우선, 교차검증
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## 15. 참고 문서
|
|
215
|
+
|
|
216
|
+
- `docs/guides/rag_human_feedback_calibration.md`
|
|
217
|
+
- `src/evalvault/domain/services/cluster_map_builder.py`
|
|
218
|
+
- `src/evalvault/adapters/outbound/analysis/nlp_adapter.py`
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import { useEffect, useState } from "react";
|
|
2
|
+
import { useEffect, useState } from "react";
|
|
2
3
|
import { useParams, Link, useLocation } from "react-router-dom";
|
|
3
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
fetchRunDetails,
|
|
6
|
+
fetchRunFeedback,
|
|
7
|
+
saveRunFeedback,
|
|
8
|
+
fetchRunFeedbackSummary,
|
|
9
|
+
type RunDetailsResponse,
|
|
10
|
+
type FeedbackResponse
|
|
11
|
+
} from "../services/api";
|
|
4
12
|
import { Layout } from "../components/Layout";
|
|
5
13
|
import { InsightSpacePanel } from "../components/InsightSpacePanel";
|
|
6
14
|
import { formatScore, normalizeScore, safeAverage } from "../utils/score";
|
|
@@ -15,10 +23,171 @@ import {
|
|
|
15
23
|
MessageSquare,
|
|
16
24
|
BookOpen,
|
|
17
25
|
ExternalLink,
|
|
26
|
+
ThumbsUp,
|
|
27
|
+
ThumbsDown,
|
|
28
|
+
Star,
|
|
29
|
+
Save,
|
|
18
30
|
} from "lucide-react";
|
|
19
31
|
import { BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Cell } from "recharts";
|
|
20
32
|
import { SUMMARY_METRICS, SUMMARY_METRIC_THRESHOLDS } from "../utils/summaryMetrics";
|
|
21
33
|
|
|
34
|
+
function FeedbackItem({
|
|
35
|
+
result,
|
|
36
|
+
feedback,
|
|
37
|
+
onSave,
|
|
38
|
+
}: {
|
|
39
|
+
result: RunDetailsResponse["results"][number];
|
|
40
|
+
feedback?: FeedbackResponse;
|
|
41
|
+
onSave: (
|
|
42
|
+
id: string,
|
|
43
|
+
score: number | null,
|
|
44
|
+
thumb: "up" | "down" | "none" | null,
|
|
45
|
+
comment: string | null
|
|
46
|
+
) => void;
|
|
47
|
+
}) {
|
|
48
|
+
const [score, setScore] = useState<number | null>(feedback?.satisfaction_score ?? null);
|
|
49
|
+
const resolveThumb = (value: string | null | undefined): "up" | "down" | "none" => {
|
|
50
|
+
if (value === "up" || value === "down") {
|
|
51
|
+
return value;
|
|
52
|
+
}
|
|
53
|
+
return "none";
|
|
54
|
+
};
|
|
55
|
+
const [thumb, setThumb] = useState<"up" | "down" | "none" | null>(
|
|
56
|
+
resolveThumb(feedback?.thumb_feedback)
|
|
57
|
+
);
|
|
58
|
+
const [comment, setComment] = useState<string>(feedback?.comment ?? "");
|
|
59
|
+
const [isDirty, setIsDirty] = useState(false);
|
|
60
|
+
|
|
61
|
+
useEffect(() => {
|
|
62
|
+
setScore(feedback?.satisfaction_score ?? null);
|
|
63
|
+
setThumb(resolveThumb(feedback?.thumb_feedback));
|
|
64
|
+
setComment(feedback?.comment ?? "");
|
|
65
|
+
setIsDirty(false);
|
|
66
|
+
}, [feedback]);
|
|
67
|
+
|
|
68
|
+
const handleSave = () => {
|
|
69
|
+
onSave(result.test_case_id, score, thumb, comment || null);
|
|
70
|
+
setIsDirty(false);
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
return (
|
|
74
|
+
<div className="bg-card border border-border rounded-xl p-4 transition-all hover:border-primary/50">
|
|
75
|
+
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
|
76
|
+
<div className="space-y-3">
|
|
77
|
+
<div>
|
|
78
|
+
<h4 className="text-xs font-semibold text-muted-foreground uppercase tracking-wider mb-1">
|
|
79
|
+
Question
|
|
80
|
+
</h4>
|
|
81
|
+
<p className="text-sm font-medium text-foreground line-clamp-2">
|
|
82
|
+
{result.question}
|
|
83
|
+
</p>
|
|
84
|
+
</div>
|
|
85
|
+
<div>
|
|
86
|
+
<h4 className="text-xs font-semibold text-muted-foreground uppercase tracking-wider mb-1">
|
|
87
|
+
Answer
|
|
88
|
+
</h4>
|
|
89
|
+
<p className="text-sm text-muted-foreground line-clamp-3">
|
|
90
|
+
{result.answer}
|
|
91
|
+
</p>
|
|
92
|
+
</div>
|
|
93
|
+
{result.calibrated_satisfaction !== null && result.calibrated_satisfaction !== undefined && (
|
|
94
|
+
<div className="flex items-center gap-2 mt-2">
|
|
95
|
+
<span className="text-xs font-mono text-muted-foreground bg-secondary px-2 py-1 rounded">
|
|
96
|
+
Calibrated: {result.calibrated_satisfaction.toFixed(2)}
|
|
97
|
+
</span>
|
|
98
|
+
{result.imputed && (
|
|
99
|
+
<span className="text-[10px] text-amber-500 border border-amber-500/30 px-1.5 rounded">
|
|
100
|
+
Imputed
|
|
101
|
+
</span>
|
|
102
|
+
)}
|
|
103
|
+
</div>
|
|
104
|
+
)}
|
|
105
|
+
</div>
|
|
106
|
+
|
|
107
|
+
<div className="space-y-4 border-l border-border/50 pl-0 lg:pl-6">
|
|
108
|
+
<div className="flex items-center justify-between">
|
|
109
|
+
<div className="flex items-center gap-4">
|
|
110
|
+
<div className="flex items-center gap-1">
|
|
111
|
+
{[1, 2, 3, 4, 5].map((s) => (
|
|
112
|
+
<button
|
|
113
|
+
key={s}
|
|
114
|
+
onClick={() => {
|
|
115
|
+
setScore(s);
|
|
116
|
+
setIsDirty(true);
|
|
117
|
+
}}
|
|
118
|
+
className={`p-1 transition-colors ${
|
|
119
|
+
(score ?? 0) >= s
|
|
120
|
+
? "text-yellow-400"
|
|
121
|
+
: "text-muted-foreground/30 hover:text-yellow-400/50"
|
|
122
|
+
}`}
|
|
123
|
+
>
|
|
124
|
+
<Star
|
|
125
|
+
className="w-5 h-5"
|
|
126
|
+
fill={(score ?? 0) >= s ? "currentColor" : "none"}
|
|
127
|
+
/>
|
|
128
|
+
</button>
|
|
129
|
+
))}
|
|
130
|
+
</div>
|
|
131
|
+
|
|
132
|
+
<div className="flex items-center gap-2 border-l border-border pl-4">
|
|
133
|
+
<button
|
|
134
|
+
onClick={() => {
|
|
135
|
+
setThumb(thumb === "up" ? "none" : "up");
|
|
136
|
+
setIsDirty(true);
|
|
137
|
+
}}
|
|
138
|
+
className={`p-2 rounded-full transition-colors ${
|
|
139
|
+
thumb === "up"
|
|
140
|
+
? "bg-emerald-500/10 text-emerald-500"
|
|
141
|
+
: "hover:bg-secondary text-muted-foreground"
|
|
142
|
+
}`}
|
|
143
|
+
>
|
|
144
|
+
<ThumbsUp className="w-4 h-4" />
|
|
145
|
+
</button>
|
|
146
|
+
<button
|
|
147
|
+
onClick={() => {
|
|
148
|
+
setThumb(thumb === "down" ? "none" : "down");
|
|
149
|
+
setIsDirty(true);
|
|
150
|
+
}}
|
|
151
|
+
className={`p-2 rounded-full transition-colors ${
|
|
152
|
+
thumb === "down"
|
|
153
|
+
? "bg-rose-500/10 text-rose-500"
|
|
154
|
+
: "hover:bg-secondary text-muted-foreground"
|
|
155
|
+
}`}
|
|
156
|
+
>
|
|
157
|
+
<ThumbsDown className="w-4 h-4" />
|
|
158
|
+
</button>
|
|
159
|
+
</div>
|
|
160
|
+
</div>
|
|
161
|
+
|
|
162
|
+
<button
|
|
163
|
+
onClick={handleSave}
|
|
164
|
+
disabled={!isDirty}
|
|
165
|
+
className={`flex items-center gap-2 px-3 py-1.5 rounded-lg text-xs font-semibold transition-all ${
|
|
166
|
+
isDirty
|
|
167
|
+
? "bg-primary text-primary-foreground shadow-md hover:bg-primary/90"
|
|
168
|
+
: "bg-secondary text-muted-foreground opacity-50 cursor-not-allowed"
|
|
169
|
+
}`}
|
|
170
|
+
>
|
|
171
|
+
<Save className="w-3.5 h-3.5" />
|
|
172
|
+
Save
|
|
173
|
+
</button>
|
|
174
|
+
</div>
|
|
175
|
+
|
|
176
|
+
<textarea
|
|
177
|
+
value={comment}
|
|
178
|
+
onChange={(e) => {
|
|
179
|
+
setComment(e.target.value);
|
|
180
|
+
setIsDirty(true);
|
|
181
|
+
}}
|
|
182
|
+
placeholder="Add a comment about this result..."
|
|
183
|
+
className="w-full h-20 p-3 bg-secondary/20 border border-border rounded-lg text-sm focus:outline-none focus:ring-1 focus:ring-primary/50 resize-none"
|
|
184
|
+
/>
|
|
185
|
+
</div>
|
|
186
|
+
</div>
|
|
187
|
+
</div>
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
|
|
22
191
|
export function RunDetails() {
|
|
23
192
|
const { id } = useParams<{ id: string }>();
|
|
24
193
|
const location = useLocation();
|
|
@@ -26,8 +195,11 @@ export function RunDetails() {
|
|
|
26
195
|
const [loading, setLoading] = useState(true);
|
|
27
196
|
const [error, setError] = useState<string | null>(null);
|
|
28
197
|
// Tabs
|
|
29
|
-
const [activeTab, setActiveTab] = useState<"overview" | "performance">("overview");
|
|
198
|
+
const [activeTab, setActiveTab] = useState<"overview" | "performance" | "feedback">("overview");
|
|
30
199
|
const [expandedCases, setExpandedCases] = useState<Set<string>>(new Set());
|
|
200
|
+
const [feedbackMap, setFeedbackMap] = useState<Record<string, FeedbackResponse>>({});
|
|
201
|
+
const [loadingFeedback, setLoadingFeedback] = useState(false);
|
|
202
|
+
|
|
31
203
|
const summaryMetricSet = new Set(SUMMARY_METRICS);
|
|
32
204
|
|
|
33
205
|
const previewPrompt = (content?: string) => {
|
|
@@ -52,6 +224,20 @@ export function RunDetails() {
|
|
|
52
224
|
loadDetails();
|
|
53
225
|
}, [id]);
|
|
54
226
|
|
|
227
|
+
useEffect(() => {
|
|
228
|
+
if (activeTab === "feedback" && id) {
|
|
229
|
+
setLoadingFeedback(true);
|
|
230
|
+
fetchRunFeedback(id)
|
|
231
|
+
.then((feedbacks) => {
|
|
232
|
+
const map: Record<string, FeedbackResponse> = {};
|
|
233
|
+
feedbacks.forEach((f) => (map[f.test_case_id] = f));
|
|
234
|
+
setFeedbackMap(map);
|
|
235
|
+
})
|
|
236
|
+
.catch((err) => console.error("Failed to load feedback", err))
|
|
237
|
+
.finally(() => setLoadingFeedback(false));
|
|
238
|
+
}
|
|
239
|
+
}, [activeTab, id]);
|
|
240
|
+
|
|
55
241
|
useEffect(() => {
|
|
56
242
|
if (!data || !location.hash) return;
|
|
57
243
|
const match = location.hash.match(/^#case-(.+)$/);
|
|
@@ -113,6 +299,44 @@ export function RunDetails() {
|
|
|
113
299
|
setExpandedCases(newSet);
|
|
114
300
|
};
|
|
115
301
|
|
|
302
|
+
const handleSaveFeedback = async (
|
|
303
|
+
caseId: string,
|
|
304
|
+
score: number | null,
|
|
305
|
+
thumb: "up" | "down" | "none" | null,
|
|
306
|
+
comment: string | null
|
|
307
|
+
) => {
|
|
308
|
+
if (!id) return;
|
|
309
|
+
try {
|
|
310
|
+
const result = await saveRunFeedback(id, {
|
|
311
|
+
test_case_id: caseId,
|
|
312
|
+
satisfaction_score: score,
|
|
313
|
+
thumb_feedback: thumb,
|
|
314
|
+
comment: comment,
|
|
315
|
+
});
|
|
316
|
+
setFeedbackMap((prev) => ({ ...prev, [caseId]: result }));
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
const summaryData = await fetchRunFeedbackSummary(id);
|
|
320
|
+
setData((prev) => {
|
|
321
|
+
if (!prev) return prev;
|
|
322
|
+
return {
|
|
323
|
+
...prev,
|
|
324
|
+
summary: {
|
|
325
|
+
...prev.summary,
|
|
326
|
+
avg_satisfaction_score: summaryData.avg_satisfaction_score,
|
|
327
|
+
thumb_up_rate: summaryData.thumb_up_rate,
|
|
328
|
+
},
|
|
329
|
+
};
|
|
330
|
+
});
|
|
331
|
+
} catch (summaryErr) {
|
|
332
|
+
console.error("Failed to update feedback summary", summaryErr);
|
|
333
|
+
}
|
|
334
|
+
} catch (e) {
|
|
335
|
+
console.error("Failed to save feedback", e);
|
|
336
|
+
alert("Failed to save feedback");
|
|
337
|
+
}
|
|
338
|
+
};
|
|
339
|
+
|
|
116
340
|
// Prepare chart data
|
|
117
341
|
const metricScores = data?.summary.metrics_evaluated?.map(metric => {
|
|
118
342
|
if (!data?.results) return { name: metric, score: 0 };
|
|
@@ -219,6 +443,12 @@ export function RunDetails() {
|
|
|
219
443
|
>
|
|
220
444
|
Performance
|
|
221
445
|
</button>
|
|
446
|
+
<button
|
|
447
|
+
onClick={() => setActiveTab("feedback")}
|
|
448
|
+
className={`tab-pill ${activeTab === "feedback" ? "tab-pill-active" : "tab-pill-inactive"}`}
|
|
449
|
+
>
|
|
450
|
+
Feedback
|
|
451
|
+
</button>
|
|
222
452
|
</div>
|
|
223
453
|
|
|
224
454
|
{summary.phoenix_drift != null && (
|
|
@@ -306,7 +536,7 @@ export function RunDetails() {
|
|
|
306
536
|
</div>
|
|
307
537
|
)}
|
|
308
538
|
|
|
309
|
-
{activeTab === "overview"
|
|
539
|
+
{activeTab === "overview" && (
|
|
310
540
|
<>
|
|
311
541
|
{/* Charts & Summary Grid (Overview) */}
|
|
312
542
|
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6 mb-8">
|
|
@@ -402,8 +632,9 @@ export function RunDetails() {
|
|
|
402
632
|
</div>
|
|
403
633
|
)}
|
|
404
634
|
</>
|
|
405
|
-
)
|
|
406
|
-
|
|
635
|
+
)}
|
|
636
|
+
|
|
637
|
+
{activeTab === "performance" && (
|
|
407
638
|
/* Performance Tab Content */
|
|
408
639
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-8 animate-in fade-in duration-300">
|
|
409
640
|
{/* Latency Analysis */}
|
|
@@ -457,11 +688,59 @@ export function RunDetails() {
|
|
|
457
688
|
</div>
|
|
458
689
|
)}
|
|
459
690
|
|
|
460
|
-
{
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
691
|
+
{activeTab === "feedback" && (
|
|
692
|
+
<div className="animate-in fade-in duration-300">
|
|
693
|
+
<div className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-8">
|
|
694
|
+
<div className="surface-panel p-6">
|
|
695
|
+
<h3 className="font-semibold text-muted-foreground text-sm mb-2">Avg. Satisfaction</h3>
|
|
696
|
+
<p className="text-3xl font-bold text-foreground">
|
|
697
|
+
{summary.avg_satisfaction_score ? summary.avg_satisfaction_score.toFixed(2) : "N/A"}
|
|
698
|
+
<span className="text-sm font-normal text-muted-foreground ml-2">/ 5.0</span>
|
|
699
|
+
</p>
|
|
700
|
+
</div>
|
|
701
|
+
<div className="surface-panel p-6">
|
|
702
|
+
<h3 className="font-semibold text-muted-foreground text-sm mb-2">Thumb Up Rate</h3>
|
|
703
|
+
<p className="text-3xl font-bold text-emerald-500">
|
|
704
|
+
{summary.thumb_up_rate !== null && summary.thumb_up_rate !== undefined
|
|
705
|
+
? `${(summary.thumb_up_rate * 100).toFixed(1)}%`
|
|
706
|
+
: "N/A"}
|
|
707
|
+
</p>
|
|
708
|
+
</div>
|
|
709
|
+
<div className="surface-panel p-6">
|
|
710
|
+
<h3 className="font-semibold text-muted-foreground text-sm mb-2">Imputed Ratio</h3>
|
|
711
|
+
<p className="text-3xl font-bold text-amber-500">
|
|
712
|
+
{summary.imputed_ratio !== null && summary.imputed_ratio !== undefined
|
|
713
|
+
? `${(summary.imputed_ratio * 100).toFixed(1)}%`
|
|
714
|
+
: "0.0%"}
|
|
715
|
+
</p>
|
|
716
|
+
<p className="text-xs text-muted-foreground mt-1">Cases with auto-calibrated feedback</p>
|
|
717
|
+
</div>
|
|
718
|
+
</div>
|
|
719
|
+
|
|
720
|
+
<div className="space-y-4">
|
|
721
|
+
{loadingFeedback ? (
|
|
722
|
+
<div className="text-center py-10 text-muted-foreground">Loading feedback...</div>
|
|
723
|
+
) : (
|
|
724
|
+
results.map((result) => (
|
|
725
|
+
<FeedbackItem
|
|
726
|
+
key={result.test_case_id}
|
|
727
|
+
result={result}
|
|
728
|
+
feedback={feedbackMap[result.test_case_id]}
|
|
729
|
+
onSave={handleSaveFeedback}
|
|
730
|
+
/>
|
|
731
|
+
))
|
|
732
|
+
)}
|
|
733
|
+
</div>
|
|
734
|
+
</div>
|
|
735
|
+
)}
|
|
736
|
+
|
|
737
|
+
{activeTab !== "feedback" && (
|
|
738
|
+
<>
|
|
739
|
+
{/* Test Case Explorer */}
|
|
740
|
+
<h3 className="font-semibold text-xl mb-4">Test Case Explorer</h3>
|
|
741
|
+
<div className="space-y-4">
|
|
742
|
+
{(results || []).map((result) => {
|
|
743
|
+
const isExpanded = expandedCases.has(result.test_case_id);
|
|
465
744
|
const allPassed = result.metrics.every(m => m.passed);
|
|
466
745
|
|
|
467
746
|
return (
|
|
@@ -485,7 +764,14 @@ export function RunDetails() {
|
|
|
485
764
|
</div>
|
|
486
765
|
<div className="flex-1 min-w-0">
|
|
487
766
|
<p className="font-medium text-foreground line-clamp-1">{result.question}</p>
|
|
488
|
-
<
|
|
767
|
+
<div className="flex items-center gap-2 mt-1">
|
|
768
|
+
<p className="text-sm text-muted-foreground line-clamp-1">{result.answer}</p>
|
|
769
|
+
{result.calibrated_satisfaction !== null && result.calibrated_satisfaction !== undefined && (
|
|
770
|
+
<span className="shrink-0 px-1.5 py-0.5 rounded bg-secondary text-[10px] font-mono text-muted-foreground border border-border">
|
|
771
|
+
Satisf: {result.calibrated_satisfaction.toFixed(1)}
|
|
772
|
+
</span>
|
|
773
|
+
)}
|
|
774
|
+
</div>
|
|
489
775
|
</div>
|
|
490
776
|
|
|
491
777
|
<div className="flex items-center gap-3">
|
|
@@ -595,10 +881,12 @@ export function RunDetails() {
|
|
|
595
881
|
</div>
|
|
596
882
|
</div>
|
|
597
883
|
)}
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
884
|
+
</div>
|
|
885
|
+
);
|
|
886
|
+
})}
|
|
887
|
+
</div>
|
|
888
|
+
</>
|
|
889
|
+
)}
|
|
602
890
|
</div>
|
|
603
891
|
</Layout>
|
|
604
892
|
);
|