judgeval 0.13.0__tar.gz → 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.14.0/.github/workflows/ci.yaml +141 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/PKG-INFO +1 -1
- {judgeval-0.13.0 → judgeval-0.14.0}/pyproject.toml +1 -1
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/version.py +1 -1
- judgeval-0.13.0/.github/workflows/ci.yaml +0 -176
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/pull_request_template.md +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/claude-code-review.yml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/claude.yml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/release.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.gitignore +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/.pre-commit-config.yaml +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/LICENSE.md +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/README.md +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/agent.gif +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/agent_trace_example.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/company.jpg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/company_banner.jpg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/darkmode.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/full_logo.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/icon.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/lightmode.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/white_background.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/data.gif +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/document.gif +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/errors.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_page.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/logo-dark.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/logo-light.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/new_darkmode.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/new_lightmode.svg +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/online_eval.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/product_shot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/test.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/tests.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace.gif +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_demo.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_screenshot.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/pytest.ini +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/scripts/api_generator.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/scripts/openapi_transform.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/scripts/update_types.sh +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/api/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/api/api_types.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/cli.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/constants.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/evaluation_run.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/example.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/judgment_types.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/result.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/dataset/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/env.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/evaluation/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/exceptions.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/logger.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/constants.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/s3.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/store.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/keys.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/anthropic/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/google/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/groq/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/openai/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/providers.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/together/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/local_eval_queue.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/managers.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/processors/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/__init__.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/config.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/console.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/trainable_model.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/trainer.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/async_utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/decorators.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/guards.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/meta.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/serialize.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/testing.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/url.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/version_check.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/warnings.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/update_version.py +0 -0
- {judgeval-0.13.0 → judgeval-0.14.0}/uv.lock +0 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
types: [opened, synchronize, reopened]
|
6
|
+
|
7
|
+
permissions: read-all
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
validate-branch:
|
11
|
+
uses: ./.github/workflows/merge-branch-check.yaml
|
12
|
+
|
13
|
+
run-tests:
|
14
|
+
needs: [validate-branch]
|
15
|
+
if: needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped'
|
16
|
+
strategy:
|
17
|
+
fail-fast: false
|
18
|
+
matrix:
|
19
|
+
os: [ubuntu-latest, macos-latest]
|
20
|
+
python-version:
|
21
|
+
- "3.10"
|
22
|
+
- "3.11"
|
23
|
+
- "3.12"
|
24
|
+
- "3.13"
|
25
|
+
name: Unit Tests
|
26
|
+
runs-on: ${{ matrix.os }}
|
27
|
+
env:
|
28
|
+
PYTHONPATH: "."
|
29
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
30
|
+
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
31
|
+
JUDGMENT_DEV: true
|
32
|
+
|
33
|
+
steps:
|
34
|
+
- name: Checkout code
|
35
|
+
uses: actions/checkout@v4
|
36
|
+
|
37
|
+
- name: Set up Python
|
38
|
+
uses: actions/setup-python@v4
|
39
|
+
with:
|
40
|
+
python-version: ${{ matrix.python-version }}
|
41
|
+
|
42
|
+
- name: Install dependencies
|
43
|
+
run: |
|
44
|
+
pip install uv
|
45
|
+
uv sync --dev
|
46
|
+
|
47
|
+
- name: Run tests
|
48
|
+
run: |
|
49
|
+
cd src
|
50
|
+
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
51
|
+
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
52
|
+
uv run pytest tests
|
53
|
+
|
54
|
+
run-e2e-tests:
|
55
|
+
needs: [validate-branch]
|
56
|
+
if: "(github.base_ref == 'staging' || github.base_ref == 'main') && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
|
57
|
+
strategy:
|
58
|
+
fail-fast: false
|
59
|
+
matrix:
|
60
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
61
|
+
name: E2E Tests
|
62
|
+
runs-on: ubuntu-latest
|
63
|
+
env:
|
64
|
+
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
65
|
+
steps:
|
66
|
+
- name: Configure AWS Credentials
|
67
|
+
uses: aws-actions/configure-aws-credentials@v4
|
68
|
+
with:
|
69
|
+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
70
|
+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
71
|
+
aws-region: us-west-1
|
72
|
+
|
73
|
+
- name: Checkout code
|
74
|
+
uses: actions/checkout@v4
|
75
|
+
|
76
|
+
- name: Set env based on branch
|
77
|
+
run: |
|
78
|
+
if [ "${{ github.base_ref }}" = "main" ]; then
|
79
|
+
echo "TARGET_ENV=main" >> "$GITHUB_ENV"
|
80
|
+
echo "BASE_URL=https://api.judgmentlabs.ai" >> "$GITHUB_ENV"
|
81
|
+
echo "SECRETS_PATH=prod/api-keys/e2e-tests" >> "$GITHUB_ENV"
|
82
|
+
echo "COVERAGE_ARTIFACT=coverage-html-production-${{ matrix.python-version }}" >> "$GITHUB_ENV"
|
83
|
+
else
|
84
|
+
echo "TARGET_ENV=staging" >> "$GITHUB_ENV"
|
85
|
+
echo "BASE_URL=https://staging.api.judgmentlabs.ai" >> "$GITHUB_ENV"
|
86
|
+
echo "SECRETS_PATH=stg/api-keys/e2e-tests" >> "$GITHUB_ENV"
|
87
|
+
echo "COVERAGE_ARTIFACT=coverage-html-staging-${{ matrix.python-version }}" >> "$GITHUB_ENV"
|
88
|
+
fi
|
89
|
+
|
90
|
+
- name: Restore uv cache
|
91
|
+
uses: actions/cache/restore@v4
|
92
|
+
id: restore-uv-cache
|
93
|
+
with:
|
94
|
+
path: ~/.cache/uv/
|
95
|
+
key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
|
96
|
+
restore-keys: |
|
97
|
+
${{ runner.os }}-uv-judgment-
|
98
|
+
${{ runner.os }}-uv-
|
99
|
+
|
100
|
+
- name: Set up Python
|
101
|
+
uses: actions/setup-python@v4
|
102
|
+
with:
|
103
|
+
python-version: ${{ matrix.python-version }}
|
104
|
+
|
105
|
+
- name: Install judgeval dependencies
|
106
|
+
run: |
|
107
|
+
pip install uv
|
108
|
+
uv sync --dev
|
109
|
+
|
110
|
+
- name: Check if server is running
|
111
|
+
run: |
|
112
|
+
if ! curl -s "$BASE_URL/health" > /dev/null; then
|
113
|
+
echo "Judgment server ($BASE_URL) is not running properly. Check CloudWatch logs."
|
114
|
+
exit 1
|
115
|
+
else
|
116
|
+
echo "Server is running."
|
117
|
+
fi
|
118
|
+
|
119
|
+
- name: Run E2E tests
|
120
|
+
working-directory: src
|
121
|
+
run: |
|
122
|
+
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id "$SECRETS_PATH" --query SecretString --output text)
|
123
|
+
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
124
|
+
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
125
|
+
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
126
|
+
export JUDGMENT_API_URL="$BASE_URL"
|
127
|
+
timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
128
|
+
|
129
|
+
- name: Upload coverage HTML report
|
130
|
+
if: always()
|
131
|
+
uses: actions/upload-artifact@v4
|
132
|
+
with:
|
133
|
+
name: ${{ env.COVERAGE_ARTIFACT }}
|
134
|
+
path: src/htmlcov
|
135
|
+
|
136
|
+
- name: Save uv cache
|
137
|
+
uses: actions/cache/save@v4
|
138
|
+
if: always() && steps.restore-uv-cache.outputs.cache-hit != 'true'
|
139
|
+
with:
|
140
|
+
path: ~/.cache/uv/
|
141
|
+
key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
|
@@ -1,176 +0,0 @@
|
|
1
|
-
name: CI
|
2
|
-
|
3
|
-
on:
|
4
|
-
pull_request:
|
5
|
-
types: [opened, synchronize, reopened]
|
6
|
-
|
7
|
-
permissions: read-all
|
8
|
-
|
9
|
-
jobs:
|
10
|
-
validate-branch:
|
11
|
-
uses: ./.github/workflows/merge-branch-check.yaml
|
12
|
-
|
13
|
-
run-tests:
|
14
|
-
needs: [validate-branch]
|
15
|
-
if: needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped'
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
os: [ubuntu-latest, macos-latest]
|
20
|
-
python-version:
|
21
|
-
- "3.10"
|
22
|
-
- "3.11"
|
23
|
-
- "3.12"
|
24
|
-
- "3.13"
|
25
|
-
name: Unit Tests
|
26
|
-
runs-on: ${{ matrix.os }}
|
27
|
-
env:
|
28
|
-
PYTHONPATH: "."
|
29
|
-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
30
|
-
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
31
|
-
JUDGMENT_DEV: true
|
32
|
-
|
33
|
-
steps:
|
34
|
-
- name: Checkout code
|
35
|
-
uses: actions/checkout@v4
|
36
|
-
|
37
|
-
- name: Set up Python
|
38
|
-
uses: actions/setup-python@v4
|
39
|
-
with:
|
40
|
-
python-version: ${{ matrix.python-version }}
|
41
|
-
|
42
|
-
- name: Install dependencies
|
43
|
-
run: |
|
44
|
-
pip install uv
|
45
|
-
uv sync --dev
|
46
|
-
|
47
|
-
- name: Run tests
|
48
|
-
run: |
|
49
|
-
cd src
|
50
|
-
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
51
|
-
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
52
|
-
uv run pytest tests
|
53
|
-
|
54
|
-
run-e2e-tests-staging:
|
55
|
-
needs: [validate-branch]
|
56
|
-
if: "github.base_ref == 'staging' && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
|
57
|
-
strategy:
|
58
|
-
fail-fast: false
|
59
|
-
matrix:
|
60
|
-
python-version:
|
61
|
-
- "3.10"
|
62
|
-
- "3.11"
|
63
|
-
- "3.12"
|
64
|
-
- "3.13"
|
65
|
-
name: Staging E2E Tests
|
66
|
-
runs-on: ubuntu-latest
|
67
|
-
env:
|
68
|
-
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
69
|
-
steps:
|
70
|
-
- name: Configure AWS Credentials
|
71
|
-
uses: aws-actions/configure-aws-credentials@v4
|
72
|
-
with:
|
73
|
-
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
74
|
-
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
75
|
-
aws-region: us-west-1
|
76
|
-
|
77
|
-
- name: Checkout code
|
78
|
-
uses: actions/checkout@v4
|
79
|
-
|
80
|
-
- name: Set up Python
|
81
|
-
uses: actions/setup-python@v4
|
82
|
-
with:
|
83
|
-
python-version: ${{ matrix.python-version }}
|
84
|
-
|
85
|
-
- name: Install judgeval dependencies
|
86
|
-
run: |
|
87
|
-
pip install uv
|
88
|
-
uv sync --dev
|
89
|
-
|
90
|
-
- name: Check if server is running
|
91
|
-
run: |
|
92
|
-
if ! curl -s https://staging.api.judgmentlabs.ai/health > /dev/null; then
|
93
|
-
echo "Staging Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
|
94
|
-
exit 1
|
95
|
-
else
|
96
|
-
echo "Staging server is running."
|
97
|
-
fi
|
98
|
-
|
99
|
-
- name: Run E2E tests
|
100
|
-
working-directory: src
|
101
|
-
run: |
|
102
|
-
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id stg/api-keys/e2e-tests --query SecretString --output text)
|
103
|
-
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
104
|
-
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
105
|
-
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
106
|
-
export JUDGMENT_API_URL=https://staging.api.judgmentlabs.ai
|
107
|
-
timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
108
|
-
|
109
|
-
- name: Upload coverage HTML report (staging)
|
110
|
-
if: always()
|
111
|
-
uses: actions/upload-artifact@v4
|
112
|
-
with:
|
113
|
-
name: coverage-html-staging-${{ matrix.python-version }}
|
114
|
-
path: src/htmlcov
|
115
|
-
|
116
|
-
run-e2e-tests-main:
|
117
|
-
needs: [validate-branch]
|
118
|
-
if: "github.base_ref == 'main' && !contains(github.actor, '[bot]') && needs.validate-branch.result == 'success'"
|
119
|
-
strategy:
|
120
|
-
fail-fast: false
|
121
|
-
matrix:
|
122
|
-
python-version:
|
123
|
-
- "3.10"
|
124
|
-
- "3.11"
|
125
|
-
- "3.12"
|
126
|
-
- "3.13"
|
127
|
-
name: Production E2E Tests
|
128
|
-
runs-on: ubuntu-latest
|
129
|
-
env:
|
130
|
-
TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
|
131
|
-
steps:
|
132
|
-
- name: Configure AWS Credentials
|
133
|
-
uses: aws-actions/configure-aws-credentials@v4
|
134
|
-
with:
|
135
|
-
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
136
|
-
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
137
|
-
aws-region: us-west-1
|
138
|
-
|
139
|
-
- name: Checkout code
|
140
|
-
uses: actions/checkout@v4
|
141
|
-
|
142
|
-
- name: Set up Python
|
143
|
-
uses: actions/setup-python@v4
|
144
|
-
with:
|
145
|
-
python-version: ${{ matrix.python-version }}
|
146
|
-
|
147
|
-
- name: Install judgeval dependencies
|
148
|
-
run: |
|
149
|
-
pip install uv
|
150
|
-
uv sync --dev
|
151
|
-
|
152
|
-
- name: Check if server is running
|
153
|
-
run: |
|
154
|
-
if ! curl -s https://api.judgmentlabs.ai/health > /dev/null; then
|
155
|
-
echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
|
156
|
-
exit 1
|
157
|
-
else
|
158
|
-
echo "Production server is running."
|
159
|
-
fi
|
160
|
-
|
161
|
-
- name: Run E2E tests
|
162
|
-
working-directory: src
|
163
|
-
run: |
|
164
|
-
SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id prod/api-keys/e2e-tests --query SecretString --output text)
|
165
|
-
export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
|
166
|
-
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
167
|
-
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
168
|
-
export JUDGMENT_API_URL=https://api.judgmentlabs.ai
|
169
|
-
timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
|
170
|
-
|
171
|
-
- name: Upload coverage HTML report (production)
|
172
|
-
if: always()
|
173
|
-
uses: actions/upload-artifact@v4
|
174
|
-
with:
|
175
|
-
name: coverage-html-production-${{ matrix.python-version }}
|
176
|
-
path: src/htmlcov
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.13.0 → judgeval-0.14.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|