judgeval 0.0.55__tar.gz → 0.22.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.22.3/.github/ISSUE_TEMPLATE/config.yml +5 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/pull_request_template.md +1 -8
- judgeval-0.22.3/.github/workflows/ci.yaml +148 -0
- judgeval-0.22.3/.github/workflows/claude-code-review.yml +35 -0
- judgeval-0.22.3/.github/workflows/claude.yml +40 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/workflows/lint.yaml +0 -13
- judgeval-0.22.3/.github/workflows/mypy.yaml +25 -0
- judgeval-0.22.3/.github/workflows/pre-commit-autoupdate.yaml +38 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/workflows/release.yaml +36 -4
- {judgeval-0.0.55 → judgeval-0.22.3}/.pre-commit-config.yaml +7 -5
- judgeval-0.22.3/CONTRIBUTING.md +10 -0
- judgeval-0.22.3/PKG-INFO +266 -0
- judgeval-0.22.3/README.md +238 -0
- judgeval-0.22.3/assets/agent_trace_example.png +0 -0
- judgeval-0.22.3/assets/brand/company.jpg +0 -0
- judgeval-0.22.3/assets/brand/company_banner.jpg +0 -0
- judgeval-0.22.3/assets/brand/darkmode.svg +7 -0
- judgeval-0.22.3/assets/brand/full_logo.png +0 -0
- judgeval-0.22.3/assets/brand/icon.png +0 -0
- judgeval-0.22.3/assets/brand/lightmode.svg +7 -0
- judgeval-0.22.3/assets/brand/white_background.png +0 -0
- judgeval-0.22.3/assets/custom_scorer_online_abm.png +0 -0
- judgeval-0.22.3/assets/errors.png +0 -0
- judgeval-0.22.3/assets/logo_darkmode.svg +7 -0
- judgeval-0.22.3/assets/logo_lightmode.svg +7 -0
- judgeval-0.22.3/assets/online_eval.png +0 -0
- judgeval-0.22.3/assets/product_shot.png +0 -0
- judgeval-0.22.3/assets/quickstart_trajectory_ss.png +0 -0
- judgeval-0.22.3/assets/test.png +0 -0
- judgeval-0.22.3/assets/tests.png +0 -0
- judgeval-0.22.3/pyproject.toml +101 -0
- judgeval-0.22.3/scripts/api_generator.py +365 -0
- judgeval-0.22.3/scripts/api_generator_v1.py +468 -0
- judgeval-0.22.3/scripts/openapi_transform.py +126 -0
- judgeval-0.22.3/scripts/update_types.sh +38 -0
- judgeval-0.22.3/src/judgeval/__init__.py +178 -0
- judgeval-0.22.3/src/judgeval/api/__init__.py +523 -0
- judgeval-0.22.3/src/judgeval/api/api_types.py +416 -0
- judgeval-0.22.3/src/judgeval/cli.py +112 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/constants.py +7 -57
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/data/__init__.py +1 -3
- judgeval-0.22.3/src/judgeval/data/evaluation_run.py +125 -0
- judgeval-0.22.3/src/judgeval/data/example.py +35 -0
- judgeval-0.22.3/src/judgeval/data/judgment_types.py +459 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/data/result.py +12 -19
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/data/scorer_data.py +5 -28
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/data/scripts/openapi_transform.py +4 -4
- judgeval-0.22.3/src/judgeval/data/trace.py +121 -0
- judgeval-0.22.3/src/judgeval/dataset/__init__.py +264 -0
- judgeval-0.22.3/src/judgeval/env.py +53 -0
- judgeval-0.22.3/src/judgeval/evaluation/__init__.py +347 -0
- judgeval-0.22.3/src/judgeval/exceptions.py +28 -0
- judgeval-0.22.3/src/judgeval/integrations/langgraph/__init__.py +13 -0
- judgeval-0.22.3/src/judgeval/integrations/openlit/__init__.py +51 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/judges/__init__.py +2 -2
- judgeval-0.22.3/src/judgeval/judges/litellm_judge.py +129 -0
- judgeval-0.22.3/src/judgeval/judges/together_judge.py +136 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/judges/utils.py +7 -20
- judgeval-0.22.3/src/judgeval/judgment_attribute_keys.py +55 -0
- {judgeval-0.0.55/src/judgeval/common → judgeval-0.22.3/src/judgeval}/logger.py +24 -8
- judgeval-0.22.3/src/judgeval/prompt/__init__.py +330 -0
- judgeval-0.22.3/src/judgeval/scorers/__init__.py +29 -0
- judgeval-0.22.3/src/judgeval/scorers/agent_scorer.py +17 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/api_scorer.py +21 -23
- judgeval-0.22.3/src/judgeval/scorers/base_scorer.py +97 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/example_scorer.py +1 -3
- judgeval-0.22.3/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +25 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
- judgeval-0.22.3/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/score.py +65 -47
- judgeval-0.22.3/src/judgeval/scorers/utils.py +14 -0
- judgeval-0.22.3/src/judgeval/tracer/__init__.py +1123 -0
- judgeval-0.22.3/src/judgeval/tracer/constants.py +1 -0
- judgeval-0.22.3/src/judgeval/tracer/exporters/__init__.py +40 -0
- judgeval-0.22.3/src/judgeval/tracer/exporters/s3.py +119 -0
- judgeval-0.22.3/src/judgeval/tracer/exporters/store.py +59 -0
- judgeval-0.22.3/src/judgeval/tracer/exporters/utils.py +32 -0
- judgeval-0.22.3/src/judgeval/tracer/keys.py +63 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/__init__.py +7 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/config.py +78 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/constants.py +9 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_anthropic/config.py +6 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_google/config.py +6 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_google/generate_content.py +127 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_google/wrapper.py +30 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/__init__.py +3 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/config.py +6 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/responses.py +506 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/utils.py +42 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_together/config.py +6 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/llm_together/wrapper.py +52 -0
- judgeval-0.22.3/src/judgeval/tracer/llm/providers.py +19 -0
- judgeval-0.22.3/src/judgeval/tracer/managers.py +167 -0
- judgeval-0.22.3/src/judgeval/tracer/processors/__init__.py +220 -0
- judgeval-0.22.3/src/judgeval/tracer/utils.py +19 -0
- judgeval-0.22.3/src/judgeval/trainer/__init__.py +14 -0
- judgeval-0.22.3/src/judgeval/trainer/base_trainer.py +122 -0
- judgeval-0.22.3/src/judgeval/trainer/config.py +123 -0
- judgeval-0.22.3/src/judgeval/trainer/console.py +144 -0
- judgeval-0.22.3/src/judgeval/trainer/fireworks_trainer.py +392 -0
- judgeval-0.22.3/src/judgeval/trainer/trainable_model.py +252 -0
- judgeval-0.22.3/src/judgeval/trainer/trainer.py +70 -0
- judgeval-0.22.3/src/judgeval/utils/async_utils.py +39 -0
- judgeval-0.22.3/src/judgeval/utils/decorators/__init__.py +0 -0
- judgeval-0.22.3/src/judgeval/utils/decorators/dont_throw.py +37 -0
- judgeval-0.22.3/src/judgeval/utils/decorators/use_once.py +13 -0
- judgeval-0.22.3/src/judgeval/utils/file_utils.py +97 -0
- judgeval-0.22.3/src/judgeval/utils/guards.py +36 -0
- judgeval-0.22.3/src/judgeval/utils/meta.py +27 -0
- judgeval-0.22.3/src/judgeval/utils/project.py +15 -0
- judgeval-0.22.3/src/judgeval/utils/serialize.py +253 -0
- judgeval-0.22.3/src/judgeval/utils/testing.py +70 -0
- judgeval-0.22.3/src/judgeval/utils/url.py +10 -0
- {judgeval-0.0.55/src/judgeval → judgeval-0.22.3/src/judgeval/utils}/version_check.py +5 -3
- judgeval-0.22.3/src/judgeval/utils/wrappers/README.md +3 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/__init__.py +15 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/py.typed +0 -0
- judgeval-0.22.3/src/judgeval/utils/wrappers/utils.py +35 -0
- judgeval-0.22.3/src/judgeval/v1/__init__.py +88 -0
- judgeval-0.22.3/src/judgeval/v1/data/__init__.py +7 -0
- judgeval-0.22.3/src/judgeval/v1/data/example.py +44 -0
- judgeval-0.22.3/src/judgeval/v1/data/scorer_data.py +42 -0
- judgeval-0.22.3/src/judgeval/v1/data/scoring_result.py +44 -0
- judgeval-0.22.3/src/judgeval/v1/datasets/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/datasets/dataset.py +152 -0
- judgeval-0.22.3/src/judgeval/v1/datasets/dataset_factory.py +88 -0
- judgeval-0.22.3/src/judgeval/v1/evaluation/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/evaluation/evaluation.py +184 -0
- judgeval-0.22.3/src/judgeval/v1/evaluation/evaluation_factory.py +17 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/__init__.py +7 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/config.py +78 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/constants.py +11 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
- judgeval-0.22.3/src/judgeval/v1/instrumentation/llm/providers.py +19 -0
- judgeval-0.22.3/src/judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
- judgeval-0.22.3/src/judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
- judgeval-0.22.3/src/judgeval/v1/integrations/langgraph/__init__.py +13 -0
- judgeval-0.22.3/src/judgeval/v1/integrations/openlit/__init__.py +47 -0
- judgeval-0.22.3/src/judgeval/v1/internal/api/__init__.py +525 -0
- judgeval-0.22.3/src/judgeval/v1/internal/api/api_types.py +416 -0
- judgeval-0.22.3/src/judgeval/v1/prompts/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/prompts/prompt.py +29 -0
- judgeval-0.22.3/src/judgeval/v1/prompts/prompt_factory.py +189 -0
- judgeval-0.22.3/src/judgeval/v1/py.typed +0 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/api_scorer.py +82 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/base_scorer.py +17 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/__init__.py +17 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/faithfulness.py +28 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
- judgeval-0.22.3/src/judgeval/v1/scorers/scorers_factory.py +49 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/__init__.py +7 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/base_tracer.py +520 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/exporters/__init__.py +14 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/exporters/span_store.py +50 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/__init__.py +6 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/tracer.py +61 -0
- judgeval-0.22.3/src/judgeval/v1/tracer/tracer_factory.py +36 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/__init__.py +5 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/base_trainer.py +62 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/config.py +123 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/console.py +144 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/fireworks_trainer.py +392 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/trainable_model.py +252 -0
- judgeval-0.22.3/src/judgeval/v1/trainers/trainers_factory.py +37 -0
- judgeval-0.22.3/src/judgeval/v1/utils.py +18 -0
- judgeval-0.22.3/src/judgeval/version.py +5 -0
- judgeval-0.22.3/src/judgeval/warnings.py +4 -0
- judgeval-0.22.3/update_version.py +35 -0
- judgeval-0.22.3/uv.lock +5786 -0
- judgeval-0.0.55/.github/workflows/ci.yaml +0 -163
- judgeval-0.0.55/PKG-INFO +0 -1384
- judgeval-0.0.55/README.md +0 -1354
- judgeval-0.0.55/assets/logo-dark.svg +0 -23
- judgeval-0.0.55/assets/logo-light.svg +0 -18
- judgeval-0.0.55/assets/new_darkmode.svg +0 -29
- judgeval-0.0.55/assets/new_lightmode.svg +0 -34
- judgeval-0.0.55/assets/product_shot.png +0 -0
- judgeval-0.0.55/pyproject.toml +0 -77
- judgeval-0.0.55/src/.coveragerc +0 -4
- judgeval-0.0.55/src/judgeval/__init__.py +0 -13
- judgeval-0.0.55/src/judgeval/clients.py +0 -34
- judgeval-0.0.55/src/judgeval/common/__init__.py +0 -13
- judgeval-0.0.55/src/judgeval/common/exceptions.py +0 -27
- judgeval-0.0.55/src/judgeval/common/s3_storage.py +0 -98
- judgeval-0.0.55/src/judgeval/common/tracer.py +0 -3215
- judgeval-0.0.55/src/judgeval/common/utils.py +0 -940
- judgeval-0.0.55/src/judgeval/data/datasets/__init__.py +0 -4
- judgeval-0.0.55/src/judgeval/data/datasets/dataset.py +0 -341
- judgeval-0.0.55/src/judgeval/data/datasets/eval_dataset_client.py +0 -341
- judgeval-0.0.55/src/judgeval/data/example.py +0 -61
- judgeval-0.0.55/src/judgeval/data/judgment_types.py +0 -214
- judgeval-0.0.55/src/judgeval/data/tool.py +0 -5
- judgeval-0.0.55/src/judgeval/data/trace.py +0 -135
- judgeval-0.0.55/src/judgeval/data/trace_run.py +0 -40
- judgeval-0.0.55/src/judgeval/evaluation_run.py +0 -77
- judgeval-0.0.55/src/judgeval/integrations/langgraph.py +0 -964
- judgeval-0.0.55/src/judgeval/judges/litellm_judge.py +0 -68
- judgeval-0.0.55/src/judgeval/judges/mixture_of_judges.py +0 -286
- judgeval-0.0.55/src/judgeval/judges/together_judge.py +0 -65
- judgeval-0.0.55/src/judgeval/judgment_client.py +0 -563
- judgeval-0.0.55/src/judgeval/rules.py +0 -521
- judgeval-0.0.55/src/judgeval/run_evaluation.py +0 -1086
- judgeval-0.0.55/src/judgeval/scorers/__init__.py +0 -33
- judgeval-0.0.55/src/judgeval/scorers/agent_scorer.py +0 -21
- judgeval-0.0.55/src/judgeval/scorers/base_scorer.py +0 -98
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -47
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -73
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
- judgeval-0.0.55/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -53
- judgeval-0.0.55/src/judgeval/scorers/utils.py +0 -131
- judgeval-0.0.55/src/judgeval/tracer/__init__.py +0 -3
- judgeval-0.0.55/src/judgeval/utils/alerts.py +0 -93
- judgeval-0.0.55/src/judgeval/utils/file_utils.py +0 -51
- judgeval-0.0.55/src/judgeval/utils/requests.py +0 -29
- judgeval-0.0.55/src/update_types.sh +0 -14
- judgeval-0.0.55/update_version.py +0 -32
- judgeval-0.0.55/uv.lock +0 -3789
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/.gitignore +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/LICENSE.md +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/agent.gif +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/data.gif +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/document.gif +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/experiments_page.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/trace.gif +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/trace_demo.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/trace_screenshot.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/pytest.ini +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.0.55 → judgeval-0.22.3}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
|
@@ -10,14 +10,7 @@
|
|
|
10
10
|
-->
|
|
11
11
|
- [ ] 1. ...
|
|
12
12
|
|
|
13
|
-
## 🎥 Demo of Changes
|
|
14
|
-
|
|
15
|
-
<!-- Add a short 1-3 minute video describing/demoing the changes -->
|
|
16
|
-
|
|
17
13
|
## ✅ Checklist
|
|
18
14
|
|
|
19
|
-
- [ ] Tagged Linear ticket in PR title. Ie. PR Title (JUD-XXXX)
|
|
20
|
-
- [ ] Video demo of changes
|
|
21
|
-
- [ ] Reviewers assigned
|
|
22
15
|
- [ ] Docs updated ([if necessary](https://github.com/JudgmentLabs/docs))
|
|
23
|
-
- [ ]
|
|
16
|
+
- [ ] Changelogs are updated ([if necessary](https://github.com/JudgmentLabs/docs/tree/main/content/docs/changelog/%28weekly%29))
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize, reopened]
|
|
6
|
+
|
|
7
|
+
permissions: read-all
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
validate-branch:
|
|
11
|
+
uses: ./.github/workflows/merge-branch-check.yaml
|
|
12
|
+
|
|
13
|
+
run-tests:
|
|
14
|
+
needs: [validate-branch]
|
|
15
|
+
if: needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped'
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
os: [ubuntu-latest, macos-latest]
|
|
20
|
+
python-version:
|
|
21
|
+
- "3.10"
|
|
22
|
+
- "3.11"
|
|
23
|
+
- "3.12"
|
|
24
|
+
- "3.13"
|
|
25
|
+
name: Unit Tests
|
|
26
|
+
runs-on: ${{ matrix.os }}
|
|
27
|
+
env:
|
|
28
|
+
PYTHONPATH: "."
|
|
29
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
30
|
+
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
|
31
|
+
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
32
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
33
|
+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
|
34
|
+
JUDGMENT_DEV: true
|
|
35
|
+
|
|
36
|
+
steps:
|
|
37
|
+
- name: Checkout code
|
|
38
|
+
uses: actions/checkout@v4
|
|
39
|
+
|
|
40
|
+
- name: Set up Python
|
|
41
|
+
uses: actions/setup-python@v4
|
|
42
|
+
with:
|
|
43
|
+
python-version: ${{ matrix.python-version }}
|
|
44
|
+
|
|
45
|
+
- name: Install dependencies
|
|
46
|
+
run: |
|
|
47
|
+
pip install uv
|
|
48
|
+
uv sync --dev
|
|
49
|
+
|
|
50
|
+
- name: Install Claude Code CLI
|
|
51
|
+
run: |
|
|
52
|
+
npm install -g @anthropic-ai/claude-code
|
|
53
|
+
|
|
54
|
+
- name: Run tests
|
|
55
|
+
run: |
|
|
56
|
+
cd src
|
|
57
|
+
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
|
58
|
+
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
|
59
|
+
uv run pytest tests -n auto
|
|
60
|
+
|
|
61
|
+
run-e2e-tests:
|
|
62
|
+
needs: [validate-branch]
|
|
63
|
+
if: "(github.base_ref == 'staging' || github.base_ref == 'main') && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
|
|
64
|
+
strategy:
|
|
65
|
+
fail-fast: false
|
|
66
|
+
matrix:
|
|
67
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
68
|
+
name: E2E Tests
|
|
69
|
+
runs-on: ubuntu-latest
|
|
70
|
+
env:
|
|
71
|
+
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
|
72
|
+
steps:
|
|
73
|
+
- name: Configure AWS Credentials
|
|
74
|
+
uses: aws-actions/configure-aws-credentials@v4
|
|
75
|
+
with:
|
|
76
|
+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
77
|
+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
78
|
+
aws-region: us-west-1
|
|
79
|
+
|
|
80
|
+
- name: Checkout code
|
|
81
|
+
uses: actions/checkout@v4
|
|
82
|
+
|
|
83
|
+
- name: Set env based on branch
|
|
84
|
+
run: |
|
|
85
|
+
if [ "${{ github.base_ref }}" = "main" ]; then
|
|
86
|
+
echo "TARGET_ENV=main" >> "$GITHUB_ENV"
|
|
87
|
+
echo "BASE_URL=https://api.judgmentlabs.ai" >> "$GITHUB_ENV"
|
|
88
|
+
echo "SECRETS_PATH=prod/api-keys/e2e-tests" >> "$GITHUB_ENV"
|
|
89
|
+
echo "COVERAGE_ARTIFACT=coverage-html-production-${{ matrix.python-version }}" >> "$GITHUB_ENV"
|
|
90
|
+
else
|
|
91
|
+
echo "TARGET_ENV=staging" >> "$GITHUB_ENV"
|
|
92
|
+
echo "BASE_URL=https://staging.api.judgmentlabs.ai" >> "$GITHUB_ENV"
|
|
93
|
+
echo "SECRETS_PATH=stg/api-keys/e2e-tests" >> "$GITHUB_ENV"
|
|
94
|
+
echo "COVERAGE_ARTIFACT=coverage-html-staging-${{ matrix.python-version }}" >> "$GITHUB_ENV"
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
- name: Restore uv cache
|
|
98
|
+
uses: actions/cache/restore@v4
|
|
99
|
+
id: restore-uv-cache
|
|
100
|
+
with:
|
|
101
|
+
path: ~/.cache/uv/
|
|
102
|
+
key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
|
|
103
|
+
restore-keys: |
|
|
104
|
+
${{ runner.os }}-uv-judgment-
|
|
105
|
+
${{ runner.os }}-uv-
|
|
106
|
+
|
|
107
|
+
- name: Set up Python
|
|
108
|
+
uses: actions/setup-python@v4
|
|
109
|
+
with:
|
|
110
|
+
python-version: ${{ matrix.python-version }}
|
|
111
|
+
|
|
112
|
+
- name: Install judgeval dependencies
|
|
113
|
+
run: |
|
|
114
|
+
pip install uv
|
|
115
|
+
uv sync --dev
|
|
116
|
+
|
|
117
|
+
- name: Check if server is running
|
|
118
|
+
run: |
|
|
119
|
+
if ! curl -s "$BASE_URL/health" > /dev/null; then
|
|
120
|
+
echo "Judgment server ($BASE_URL) is not running properly. Check CloudWatch logs."
|
|
121
|
+
exit 1
|
|
122
|
+
else
|
|
123
|
+
echo "Server is running."
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
- name: Run E2E tests
|
|
127
|
+
working-directory: src
|
|
128
|
+
run: |
|
|
129
|
+
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id "$SECRETS_PATH" --query SecretString --output text)
|
|
130
|
+
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
|
131
|
+
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
|
132
|
+
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
|
133
|
+
export JUDGMENT_API_URL="$BASE_URL"
|
|
134
|
+
timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
|
135
|
+
|
|
136
|
+
- name: Upload coverage HTML report
|
|
137
|
+
if: always()
|
|
138
|
+
uses: actions/upload-artifact@v4
|
|
139
|
+
with:
|
|
140
|
+
name: ${{ env.COVERAGE_ARTIFACT }}
|
|
141
|
+
path: src/htmlcov
|
|
142
|
+
|
|
143
|
+
- name: Save uv cache
|
|
144
|
+
uses: actions/cache/save@v4
|
|
145
|
+
if: always() && steps.restore-uv-cache.outputs.cache-hit != 'true'
|
|
146
|
+
with:
|
|
147
|
+
path: ~/.cache/uv/
|
|
148
|
+
key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: Claude Code Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
issue_comment:
|
|
5
|
+
types: [created]
|
|
6
|
+
jobs:
|
|
7
|
+
claude-review:
|
|
8
|
+
if: github.event.issue.pull_request && contains(github.event.comment.body, '/claude review')
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
pull-requests: read
|
|
13
|
+
issues: read
|
|
14
|
+
id-token: write
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout repository
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
with:
|
|
20
|
+
fetch-depth: 1
|
|
21
|
+
|
|
22
|
+
- name: Run Claude Code Review
|
|
23
|
+
id: claude-review
|
|
24
|
+
uses: anthropics/claude-code-action@beta
|
|
25
|
+
with:
|
|
26
|
+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
27
|
+
direct_prompt: |
|
|
28
|
+
Please review this pull request and provide feedback on:
|
|
29
|
+
- Code quality and best practices
|
|
30
|
+
- Potential bugs or issues
|
|
31
|
+
- Performance considerations
|
|
32
|
+
- Security concerns
|
|
33
|
+
- Test coverage
|
|
34
|
+
|
|
35
|
+
Be constructive and helpful in your feedback.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: Claude Code
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
issue_comment:
|
|
5
|
+
types: [created]
|
|
6
|
+
pull_request_review_comment:
|
|
7
|
+
types: [created]
|
|
8
|
+
issues:
|
|
9
|
+
types: [opened, assigned]
|
|
10
|
+
pull_request_review:
|
|
11
|
+
types: [submitted]
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
claude:
|
|
15
|
+
if: |
|
|
16
|
+
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
|
17
|
+
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
|
18
|
+
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
|
19
|
+
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
permissions:
|
|
22
|
+
contents: read
|
|
23
|
+
pull-requests: read
|
|
24
|
+
issues: read
|
|
25
|
+
id-token: write
|
|
26
|
+
actions: read
|
|
27
|
+
steps:
|
|
28
|
+
- name: Checkout repository
|
|
29
|
+
uses: actions/checkout@v4
|
|
30
|
+
with:
|
|
31
|
+
fetch-depth: 1
|
|
32
|
+
|
|
33
|
+
- name: Run Claude Code
|
|
34
|
+
id: claude
|
|
35
|
+
uses: anthropics/claude-code-action@beta
|
|
36
|
+
with:
|
|
37
|
+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
38
|
+
|
|
39
|
+
additional_permissions: |
|
|
40
|
+
actions: read
|
|
@@ -10,20 +10,11 @@ jobs:
|
|
|
10
10
|
steps:
|
|
11
11
|
- uses: actions/checkout@v4
|
|
12
12
|
|
|
13
|
-
- name: Set up Python
|
|
14
|
-
uses: actions/setup-python@v5
|
|
15
|
-
with:
|
|
16
|
-
python-version: '3.11'
|
|
17
|
-
|
|
18
13
|
- name: Install ruff
|
|
19
14
|
uses: astral-sh/ruff-action@v3
|
|
20
15
|
with:
|
|
21
16
|
args: "--version"
|
|
22
17
|
|
|
23
|
-
- name: Install mypy and dependencies
|
|
24
|
-
run: |
|
|
25
|
-
pip install mypy types-requests types-PyYAML
|
|
26
|
-
|
|
27
18
|
- name: Run ruff formatter
|
|
28
19
|
if: always()
|
|
29
20
|
run: ruff format --check .
|
|
@@ -31,7 +22,3 @@ jobs:
|
|
|
31
22
|
- name: Run ruff linter
|
|
32
23
|
if: always()
|
|
33
24
|
run: ruff check .
|
|
34
|
-
|
|
35
|
-
- name: Run mypy
|
|
36
|
-
if: always()
|
|
37
|
-
run: mypy --explicit-package-bases --ignore-missing-imports .
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: MyPy Check
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [ main, staging ]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
mypy:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Set up Python
|
|
14
|
+
uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: '3.11'
|
|
17
|
+
|
|
18
|
+
- name: Install dependencies
|
|
19
|
+
run: |
|
|
20
|
+
pip install uv
|
|
21
|
+
uv sync --dev
|
|
22
|
+
|
|
23
|
+
- name: Run mypy
|
|
24
|
+
if: always()
|
|
25
|
+
run: uv run mypy ./src/judgeval/
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: Pre-commit auto-update
|
|
2
|
+
on:
|
|
3
|
+
schedule:
|
|
4
|
+
- cron: '0 0 * * 1' # Weekly on Monday at midnight UTC
|
|
5
|
+
workflow_dispatch:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
auto-update:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- name: Checkout repository
|
|
12
|
+
uses: actions/checkout@v4
|
|
13
|
+
with:
|
|
14
|
+
ref: staging
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v4
|
|
18
|
+
with:
|
|
19
|
+
python-version: '3.11'
|
|
20
|
+
|
|
21
|
+
- name: Install and update pre-commit
|
|
22
|
+
run: |
|
|
23
|
+
pip install pre-commit
|
|
24
|
+
pre-commit autoupdate
|
|
25
|
+
|
|
26
|
+
- name: Create Pull Request
|
|
27
|
+
uses: peter-evans/create-pull-request@v7
|
|
28
|
+
with:
|
|
29
|
+
commit-message: 'chore: update pre-commit hooks'
|
|
30
|
+
title: 'chore: update pre-commit hooks'
|
|
31
|
+
body: |
|
|
32
|
+
Auto-generated PR to update pre-commit hook versions.
|
|
33
|
+
|
|
34
|
+
Please review the changes and merge if everything looks good.
|
|
35
|
+
|
|
36
|
+
Updated by GitHub Actions on {{ date }}.
|
|
37
|
+
branch: update-pre-commit-hooks
|
|
38
|
+
base: staging
|
|
@@ -28,6 +28,18 @@ jobs:
|
|
|
28
28
|
version=$(curl -s https://pypi.org/pypi/judgeval/json | jq -r .info.version)
|
|
29
29
|
echo "latest_version=$version" >> $GITHUB_OUTPUT
|
|
30
30
|
|
|
31
|
+
- name: Determine bump type (minor if commit message starts with [Bump Minor Version], else patch)
|
|
32
|
+
id: bump_type
|
|
33
|
+
run: |
|
|
34
|
+
# Get the latest commit message
|
|
35
|
+
commit_message=$(git log -1 --pretty=%B)
|
|
36
|
+
# Default bump type
|
|
37
|
+
bump_type=patch
|
|
38
|
+
if [[ "$commit_message" == "[Bump Minor Version]"* ]]; then
|
|
39
|
+
bump_type=minor
|
|
40
|
+
fi
|
|
41
|
+
echo "bump_type=$bump_type" >> $GITHUB_OUTPUT
|
|
42
|
+
|
|
31
43
|
- name: Bump version and create new tag
|
|
32
44
|
id: bump_tag
|
|
33
45
|
run: |
|
|
@@ -37,9 +49,29 @@ jobs:
|
|
|
37
49
|
# Extract version numbers
|
|
38
50
|
IFS='.' read -r major minor patch <<< "$latest_version"
|
|
39
51
|
|
|
40
|
-
#
|
|
41
|
-
|
|
42
|
-
|
|
52
|
+
# Set major version (manually modify this if you want to change the major version)
|
|
53
|
+
new_major_version=0
|
|
54
|
+
|
|
55
|
+
# Validate that new major version is greater than current major version
|
|
56
|
+
if [ "$new_major_version" -lt "$major" ]; then
|
|
57
|
+
echo "Error: New major version ($new_major_version) must be greater than or equal to current major version ($major)"
|
|
58
|
+
exit 1
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
if [ "$new_major_version" -ne "$major" ]; then # If major version changed, set minor and patch to 0
|
|
62
|
+
echo "Major version bumped, setting minor and patch to 0"
|
|
63
|
+
minor=0
|
|
64
|
+
patch=0
|
|
65
|
+
elif [ "${{ steps.bump_type.outputs.bump_type }}" = "minor" ]; then
|
|
66
|
+
echo "Minor version bumped, setting patch to 0"
|
|
67
|
+
minor=$((minor + 1))
|
|
68
|
+
patch=0
|
|
69
|
+
else
|
|
70
|
+
echo "Patch version bumped"
|
|
71
|
+
patch=$((patch + 1))
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
new_version="$new_major_version.$minor.$patch"
|
|
43
75
|
|
|
44
76
|
echo "New version: $new_version"
|
|
45
77
|
echo "new_version=$new_version" >> $GITHUB_OUTPUT
|
|
@@ -59,7 +91,7 @@ jobs:
|
|
|
59
91
|
env:
|
|
60
92
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
61
93
|
|
|
62
|
-
- name: Bump pyproject.toml version
|
|
94
|
+
- name: Bump pyproject.toml and version.py version
|
|
63
95
|
run: |
|
|
64
96
|
python update_version.py ${{ steps.bump_tag.outputs.new_version }}
|
|
65
97
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
repos:
|
|
2
2
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
3
|
-
rev: 0.7
|
|
3
|
+
rev: 0.9.7
|
|
4
4
|
hooks:
|
|
5
5
|
- id: uv-lock
|
|
6
6
|
|
|
7
7
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
8
|
-
rev: v0.
|
|
8
|
+
rev: v0.14.3
|
|
9
9
|
hooks:
|
|
10
10
|
- id: ruff
|
|
11
11
|
name: ruff (linter)
|
|
@@ -14,8 +14,10 @@ repos:
|
|
|
14
14
|
name: ruff (formatter)
|
|
15
15
|
|
|
16
16
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
17
|
-
rev: v1.
|
|
17
|
+
rev: v1.18.2
|
|
18
18
|
hooks:
|
|
19
19
|
- id: mypy
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
language: system
|
|
21
|
+
# These next two lines allow commits even if mypy fails, REMOVE once we fix all mypy errors
|
|
22
|
+
verbose: true
|
|
23
|
+
entry: bash -c 'mypy src/judgeval/ || true'
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Contribute to Judgeval
|
|
2
|
+
|
|
3
|
+
There are many ways to contribute to Judgeval:
|
|
4
|
+
|
|
5
|
+
- Submit [bug reports](https://github.com/JudgmentLabs/judgeval/issues) and [feature requests](https://github.com/JudgmentLabs/judgeval/issues)
|
|
6
|
+
- Review the documentation and submit [Pull Requests](https://github.com/JudgmentLabs/judgeval/pulls) to improve it
|
|
7
|
+
- Speaking or writing about Judgment and letting us know!
|
|
8
|
+
|
|
9
|
+
<!-- Contributors collage -->
|
|
10
|
+
[](https://github.com/JudgmentLabs/judgeval/graphs/contributors)
|