judgeval 0.0.39__tar.gz → 0.0.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.0.40/.github/workflows/ci-staging.yaml +103 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/.github/workflows/ci.yaml +13 -3
- judgeval-0.0.40/.github/workflows/merge-to-main.yaml +32 -0
- judgeval-0.0.40/.github/workflows/release.yaml +92 -0
- judgeval-0.0.40/PKG-INFO +1441 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/Pipfile +1 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/Pipfile.lock +85 -4
- judgeval-0.0.40/README.md +1413 -0
- judgeval-0.0.40/assets/experiments_pagev2.png +0 -0
- judgeval-0.0.40/assets/new_darkmode.svg +29 -0
- judgeval-0.0.40/assets/new_lightmode.svg +34 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/monitoring/tracing.mdx +1 -1
- {judgeval-0.0.39 → judgeval-0.0.40}/pyproject.toml +2 -2
- judgeval-0.0.40/src/.coveragerc +4 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/clients.py +6 -4
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/tracer.py +361 -236
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/constants.py +2 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/__init__.py +2 -1
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/example.py +7 -7
- judgeval-0.0.40/src/judgeval/data/tool.py +47 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/trace.py +26 -38
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/trace_run.py +2 -1
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/evaluation_run.py +4 -7
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judgment_client.py +25 -6
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/run_evaluation.py +50 -16
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/__init__.py +4 -1
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorer.py +8 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +4 -0
- judgeval-0.0.40/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +124 -0
- judgeval-0.0.40/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +20 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +1 -1
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/prompt_scorer.py +5 -164
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/score.py +15 -15
- judgeval-0.0.40/update_version.py +32 -0
- judgeval-0.0.39/PKG-INFO +0 -247
- judgeval-0.0.39/README.md +0 -219
- judgeval-0.0.39/src/judgeval/data/tool.py +0 -19
- {judgeval-0.0.39 → judgeval-0.0.40}/.github/pull_request_template.md +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/.gitignore +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/LICENSE.md +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/experiments_page.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/logo-dark.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/logo-light.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/assets/trace_screenshot.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/README.md +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/alerts/notifications.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/alerts/platform_notifications.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/alerts/rules.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/api_reference/judgment_client.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/api_reference/trace.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/changelog/2025-04-21.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/clustering/clustering.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/compliance/certifications.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/development.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/code.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/images.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/markdown.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/navigation.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/reusable-snippets.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/essentials/settings.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/data_datasets.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/data_examples.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/data_sequences.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/experiment_comparisons.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/introduction.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/judges.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/agent/derailment.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/answer_correctness.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/answer_relevancy.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/comparison.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/contextual_precision.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/contextual_recall.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/contextual_relevancy.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/execution_order.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/faithfulness.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/groundedness.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/json_correctness.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/default/summarization.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/scorers/introduction.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/evaluation/unit_testing.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/favicon.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/getting_started.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/annotation_queue_ui.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/basic_trace_example.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/checks-passed.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/cluster.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/cluster_button.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/create_aggressive_scorer.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/create_scorer.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/dashboard_annotation_queue_button.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/evaluation_diagram.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/experiment-comparison-page-2.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/experiment-page-comparison.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/experiment-popout-comparison.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/experiments-page-comparison-2.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/experiments-page-comparison.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/export-dataset.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/hero-dark.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/hero-light.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/notifications_page.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/online_eval_fault.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/reports_modal.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/synth_data_button.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/synth_data_window.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/images/trace_ss.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/integration/langgraph.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/introduction.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/judgment_cli/installation.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/judgment_cli/self-hosting.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/judgment_cli/supabase-org-id.png +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/logo/dark.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/logo/light.svg +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/mint.json +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/monitoring/annotations.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/monitoring/introduction.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/monitoring/production_insights.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/monitoring/tracing_s3.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/notebooks/create_dataset.ipynb +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/notebooks/create_scorer.ipynb +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/notebooks/demo.ipynb +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/notebooks/prompt_scorer.ipynb +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/notebooks/quickstart.ipynb +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/optimization/osiris_agent.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/quickstart.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/self_hosting/get_started.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/snippets/snippet-intro.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/docs/synthetic_data/synthetic_data.mdx +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/pytest.ini +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/logger.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/s3_storage.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/common/utils.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/custom_example.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/datasets/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/datasets/dataset.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/datasets/eval_dataset_client.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/result.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/integrations/langgraph.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/mixture_of_judges.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/rules.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/utils/alerts.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/utils/data_utils.py +0 -0
- {judgeval-0.0.39 → judgeval-0.0.40}/src/judgeval/version_check.py +0 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
|
2
|
+
name: Staging CI Tests
|
3
|
+
|
4
|
+
on:
|
5
|
+
pull_request:
|
6
|
+
types: [opened, synchronize, reopened]
|
7
|
+
branches:
|
8
|
+
- staging
|
9
|
+
|
10
|
+
permissions: read-all
|
11
|
+
|
12
|
+
jobs:
|
13
|
+
run-tests:
|
14
|
+
strategy:
|
15
|
+
fail-fast: false
|
16
|
+
matrix:
|
17
|
+
os: [ubuntu-latest, macos-latest]
|
18
|
+
python-version:
|
19
|
+
- "3.11"
|
20
|
+
name: Test
|
21
|
+
runs-on: ${{ matrix.os }}
|
22
|
+
env:
|
23
|
+
PYTHONPATH: "."
|
24
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
25
|
+
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
26
|
+
JUDGMENT_DEV: true
|
27
|
+
|
28
|
+
steps:
|
29
|
+
- name: Checkout code
|
30
|
+
uses: actions/checkout@v4
|
31
|
+
|
32
|
+
- name: Set up Python
|
33
|
+
uses: actions/setup-python@v4
|
34
|
+
with:
|
35
|
+
python-version: ${{ matrix.python-version }}
|
36
|
+
|
37
|
+
- name: Install dependencies
|
38
|
+
run: |
|
39
|
+
pip install pipenv
|
40
|
+
pipenv install --dev
|
41
|
+
|
42
|
+
|
43
|
+
- name: Run tests
|
44
|
+
run: |
|
45
|
+
cd src
|
46
|
+
pipenv run pytest tests
|
47
|
+
|
48
|
+
run-e2e-tests-staging:
|
49
|
+
if: "!contains(github.actor, '[bot]')" # Exclude if the actor is a bot
|
50
|
+
name: Staging E2E Tests
|
51
|
+
runs-on: ubuntu-latest
|
52
|
+
env:
|
53
|
+
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
54
|
+
steps:
|
55
|
+
- name: Wait for turn
|
56
|
+
uses: softprops/turnstyle@v2
|
57
|
+
with:
|
58
|
+
poll-interval-seconds: 10
|
59
|
+
same-branch-only: false
|
60
|
+
job-to-wait-for: "Staging E2E Tests"
|
61
|
+
|
62
|
+
- name: Configure AWS Credentials
|
63
|
+
uses: aws-actions/configure-aws-credentials@v4
|
64
|
+
with:
|
65
|
+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
66
|
+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
67
|
+
aws-region: us-west-1
|
68
|
+
|
69
|
+
- name: Checkout code
|
70
|
+
uses: actions/checkout@v4
|
71
|
+
|
72
|
+
- name: Set up Python
|
73
|
+
uses: actions/setup-python@v4
|
74
|
+
with:
|
75
|
+
python-version: "3.11"
|
76
|
+
|
77
|
+
- name: Install judgeval dependencies
|
78
|
+
run: |
|
79
|
+
pip install pipenv
|
80
|
+
pipenv install --dev
|
81
|
+
|
82
|
+
- name: Check if server is running
|
83
|
+
run: |
|
84
|
+
if ! curl -s https://staging.api.judgmentlabs.ai/health > /dev/null; then
|
85
|
+
echo "Staging Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
|
86
|
+
exit 1
|
87
|
+
else
|
88
|
+
echo "Staging server is running."
|
89
|
+
fi
|
90
|
+
|
91
|
+
- name: Run E2E tests
|
92
|
+
working-directory: src
|
93
|
+
run: |
|
94
|
+
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-stg-judgeval/api-keys/judgeval --query SecretString --output text)
|
95
|
+
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
96
|
+
timeout ${TEST_TIMEOUT_SECONDS}s pipenv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
97
|
+
|
98
|
+
- name: Upload coverage HTML report
|
99
|
+
if: always()
|
100
|
+
uses: actions/upload-artifact@v4
|
101
|
+
with:
|
102
|
+
name: coverage-html
|
103
|
+
path: src/htmlcov
|
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
name: CI Tests
|
2
3
|
|
3
4
|
on:
|
@@ -48,6 +49,8 @@ jobs:
|
|
48
49
|
if: "!contains(github.actor, '[bot]')" # Exclude if the actor is a bot
|
49
50
|
name: E2E Tests
|
50
51
|
runs-on: ubuntu-latest
|
52
|
+
env:
|
53
|
+
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
51
54
|
steps:
|
52
55
|
- name: Wait for turn
|
53
56
|
uses: softprops/turnstyle@v2
|
@@ -78,7 +81,7 @@ jobs:
|
|
78
81
|
|
79
82
|
- name: Check if server is running
|
80
83
|
run: |
|
81
|
-
if ! curl -s
|
84
|
+
if ! curl -s https://api.judgmentlabs.ai/health > /dev/null; then
|
82
85
|
echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
|
83
86
|
exit 1
|
84
87
|
else
|
@@ -88,6 +91,13 @@ jobs:
|
|
88
91
|
- name: Run E2E tests
|
89
92
|
working-directory: src
|
90
93
|
run: |
|
91
|
-
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions/api-keys/judgeval --query SecretString --output text)
|
94
|
+
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-judgeval/api-keys/judgeval --query SecretString --output text)
|
92
95
|
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
93
|
-
pipenv run pytest --durations=0 ./e2etests
|
96
|
+
timeout ${TEST_TIMEOUT_SECONDS}s pipenv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
97
|
+
|
98
|
+
- name: Upload coverage HTML report
|
99
|
+
if: always()
|
100
|
+
uses: actions/upload-artifact@v4
|
101
|
+
with:
|
102
|
+
name: coverage-html
|
103
|
+
path: src/htmlcov
|
@@ -0,0 +1,32 @@
|
|
1
|
+
name: Enforce Main Branch Protection
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
types: [opened, synchronize, reopened, edited]
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
validate-branch:
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
steps:
|
11
|
+
- name: Check branch name
|
12
|
+
run: |
|
13
|
+
# Get the base and source branch names
|
14
|
+
BASE_BRANCH="${{ github.base_ref }}"
|
15
|
+
SOURCE_BRANCH="${{ github.head_ref }}"
|
16
|
+
|
17
|
+
echo "BASE_BRANCH: $BASE_BRANCH"
|
18
|
+
echo "SOURCE_BRANCH: $SOURCE_BRANCH"
|
19
|
+
|
20
|
+
# Only run validation if the base branch is main
|
21
|
+
if [[ "$BASE_BRANCH" != "main" ]]; then
|
22
|
+
echo "Skipping branch validation - not targeting main branch"
|
23
|
+
exit 0
|
24
|
+
fi
|
25
|
+
|
26
|
+
# Check if the source branch is staging or starts with hotfix/
|
27
|
+
if [[ "$SOURCE_BRANCH" != "staging" && ! "$SOURCE_BRANCH" =~ ^hotfix/ ]]; then
|
28
|
+
echo "::error::Pull requests to main can only be created from 'staging' or 'hotfix/*' branches. Current branch: $SOURCE_BRANCH"
|
29
|
+
exit 1
|
30
|
+
fi
|
31
|
+
|
32
|
+
echo "Branch validation passed. Source branch: $SOURCE_BRANCH"
|
@@ -0,0 +1,92 @@
|
|
1
|
+
name: Release on Main Merge
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches:
|
6
|
+
- main
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
release:
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
outputs:
|
12
|
+
new_version: ${{ steps.bump_tag.outputs.new_version }}
|
13
|
+
|
14
|
+
steps:
|
15
|
+
- name: Checkout code
|
16
|
+
uses: actions/checkout@v4
|
17
|
+
with:
|
18
|
+
fetch-depth: 0
|
19
|
+
|
20
|
+
- name: Install Python
|
21
|
+
uses: actions/setup-python@v4
|
22
|
+
with:
|
23
|
+
python-version: 3.11
|
24
|
+
|
25
|
+
- name: Get latest version
|
26
|
+
id: get_version
|
27
|
+
run: |
|
28
|
+
version=$(curl -s https://pypi.org/pypi/judgeval/json | jq -r .info.version)
|
29
|
+
echo "latest_version=$version" >> $GITHUB_OUTPUT
|
30
|
+
|
31
|
+
- name: Bump version and create new tag
|
32
|
+
id: bump_tag
|
33
|
+
run: |
|
34
|
+
latest_version=${{ steps.get_version.outputs.latest_version }}
|
35
|
+
echo "Latest version: $latest_version"
|
36
|
+
|
37
|
+
# Extract version numbers
|
38
|
+
IFS='.' read -r major minor patch <<< "$latest_version"
|
39
|
+
|
40
|
+
# Bump patch version
|
41
|
+
patch=$((patch + 1))
|
42
|
+
new_version="$major.$minor.$patch"
|
43
|
+
|
44
|
+
echo "New version: $new_version"
|
45
|
+
echo "new_version=$new_version" >> $GITHUB_OUTPUT
|
46
|
+
|
47
|
+
git config user.name "github-actions"
|
48
|
+
git config user.email "github-actions@github.com"
|
49
|
+
git tag v$new_version
|
50
|
+
git push origin v$new_version
|
51
|
+
|
52
|
+
- name: Create GitHub release
|
53
|
+
uses: softprops/action-gh-release@v2
|
54
|
+
with:
|
55
|
+
tag_name: v${{ steps.bump_tag.outputs.new_version }}
|
56
|
+
generate_release_notes: true
|
57
|
+
body: |
|
58
|
+
You can find this package release on PyPI: https://pypi.org/project/judgeval/${{ steps.bump_tag.outputs.new_version }}/
|
59
|
+
env:
|
60
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
61
|
+
|
62
|
+
- name: Bump pyproject.toml version
|
63
|
+
run: |
|
64
|
+
python update_version.py ${{ steps.bump_tag.outputs.new_version }}
|
65
|
+
|
66
|
+
- name: Build PyPI package
|
67
|
+
run: |
|
68
|
+
python -m pip install --upgrade build
|
69
|
+
python -m build
|
70
|
+
|
71
|
+
- name: Create PyPI release
|
72
|
+
run: |
|
73
|
+
python -m pip install --upgrade twine
|
74
|
+
python -m twine upload --repository pypi -u ${{ secrets.PYPI_USERNAME }} -p ${{ secrets.PYPI_PASSWORD }} dist/*
|
75
|
+
|
76
|
+
cleanup:
|
77
|
+
needs: release
|
78
|
+
if: failure()
|
79
|
+
runs-on: ubuntu-latest
|
80
|
+
steps:
|
81
|
+
- name: Checkout code
|
82
|
+
uses: actions/checkout@v4
|
83
|
+
|
84
|
+
- name: Authenticate GitHub CLI
|
85
|
+
run: echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
|
86
|
+
|
87
|
+
- name: Delete tag and release
|
88
|
+
run: |
|
89
|
+
gh release delete v${{ needs.release.outputs.new_version }} --yes
|
90
|
+
git push --delete origin v${{ needs.release.outputs.new_version }}
|
91
|
+
env:
|
92
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|