judgeval 0.13.0__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. judgeval-0.14.0/.github/workflows/ci.yaml +141 -0
  2. {judgeval-0.13.0 → judgeval-0.14.0}/PKG-INFO +1 -1
  3. {judgeval-0.13.0 → judgeval-0.14.0}/pyproject.toml +1 -1
  4. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/version.py +1 -1
  5. judgeval-0.13.0/.github/workflows/ci.yaml +0 -176
  6. {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  7. {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  8. {judgeval-0.13.0 → judgeval-0.14.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  9. {judgeval-0.13.0 → judgeval-0.14.0}/.github/pull_request_template.md +0 -0
  10. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/blocked-pr.yaml +0 -0
  11. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/claude-code-review.yml +0 -0
  12. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/claude.yml +0 -0
  13. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/lint.yaml +0 -0
  14. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/merge-branch-check.yaml +0 -0
  15. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/mypy.yaml +0 -0
  16. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  17. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/release.yaml +0 -0
  18. {judgeval-0.13.0 → judgeval-0.14.0}/.github/workflows/validate-branch.yaml +0 -0
  19. {judgeval-0.13.0 → judgeval-0.14.0}/.gitignore +0 -0
  20. {judgeval-0.13.0 → judgeval-0.14.0}/.pre-commit-config.yaml +0 -0
  21. {judgeval-0.13.0 → judgeval-0.14.0}/LICENSE.md +0 -0
  22. {judgeval-0.13.0 → judgeval-0.14.0}/README.md +0 -0
  23. {judgeval-0.13.0 → judgeval-0.14.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  24. {judgeval-0.13.0 → judgeval-0.14.0}/assets/agent.gif +0 -0
  25. {judgeval-0.13.0 → judgeval-0.14.0}/assets/agent_trace_example.png +0 -0
  26. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/company.jpg +0 -0
  27. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/company_banner.jpg +0 -0
  28. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/darkmode.svg +0 -0
  29. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/full_logo.png +0 -0
  30. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/icon.png +0 -0
  31. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/lightmode.svg +0 -0
  32. {judgeval-0.13.0 → judgeval-0.14.0}/assets/brand/white_background.png +0 -0
  33. {judgeval-0.13.0 → judgeval-0.14.0}/assets/data.gif +0 -0
  34. {judgeval-0.13.0 → judgeval-0.14.0}/assets/dataset_clustering_screenshot.png +0 -0
  35. {judgeval-0.13.0 → judgeval-0.14.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
  36. {judgeval-0.13.0 → judgeval-0.14.0}/assets/datasets_preview_screenshot.png +0 -0
  37. {judgeval-0.13.0 → judgeval-0.14.0}/assets/document.gif +0 -0
  38. {judgeval-0.13.0 → judgeval-0.14.0}/assets/error_analysis_dashboard.png +0 -0
  39. {judgeval-0.13.0 → judgeval-0.14.0}/assets/errors.png +0 -0
  40. {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_dashboard_screenshot.png +0 -0
  41. {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_page.png +0 -0
  42. {judgeval-0.13.0 → judgeval-0.14.0}/assets/experiments_pagev2.png +0 -0
  43. {judgeval-0.13.0 → judgeval-0.14.0}/assets/logo-dark.svg +0 -0
  44. {judgeval-0.13.0 → judgeval-0.14.0}/assets/logo-light.svg +0 -0
  45. {judgeval-0.13.0 → judgeval-0.14.0}/assets/monitoring_screenshot.png +0 -0
  46. {judgeval-0.13.0 → judgeval-0.14.0}/assets/new_darkmode.svg +0 -0
  47. {judgeval-0.13.0 → judgeval-0.14.0}/assets/new_lightmode.svg +0 -0
  48. {judgeval-0.13.0 → judgeval-0.14.0}/assets/online_eval.png +0 -0
  49. {judgeval-0.13.0 → judgeval-0.14.0}/assets/product_shot.png +0 -0
  50. {judgeval-0.13.0 → judgeval-0.14.0}/assets/test.png +0 -0
  51. {judgeval-0.13.0 → judgeval-0.14.0}/assets/tests.png +0 -0
  52. {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace.gif +0 -0
  53. {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_demo.png +0 -0
  54. {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_screenshot.png +0 -0
  55. {judgeval-0.13.0 → judgeval-0.14.0}/assets/trace_screenshot_old.png +0 -0
  56. {judgeval-0.13.0 → judgeval-0.14.0}/pytest.ini +0 -0
  57. {judgeval-0.13.0 → judgeval-0.14.0}/scripts/api_generator.py +0 -0
  58. {judgeval-0.13.0 → judgeval-0.14.0}/scripts/openapi_transform.py +0 -0
  59. {judgeval-0.13.0 → judgeval-0.14.0}/scripts/update_types.sh +0 -0
  60. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/__init__.py +0 -0
  61. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/api/__init__.py +0 -0
  62. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/api/api_types.py +0 -0
  63. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/cli.py +0 -0
  64. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/constants.py +0 -0
  65. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/__init__.py +0 -0
  66. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/evaluation_run.py +0 -0
  67. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/example.py +0 -0
  68. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/judgment_types.py +0 -0
  69. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/result.py +0 -0
  70. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scorer_data.py +0 -0
  71. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  72. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  73. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/data/trace.py +0 -0
  74. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/dataset/__init__.py +0 -0
  75. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/env.py +0 -0
  76. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/evaluation/__init__.py +0 -0
  77. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/exceptions.py +0 -0
  78. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
  79. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/__init__.py +0 -0
  80. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/base_judge.py +0 -0
  81. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/litellm_judge.py +0 -0
  82. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/together_judge.py +0 -0
  83. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/judges/utils.py +0 -0
  84. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/logger.py +0 -0
  85. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/__init__.py +0 -0
  86. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/agent_scorer.py +0 -0
  87. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/api_scorer.py +0 -0
  88. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/base_scorer.py +0 -0
  89. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/example_scorer.py +0 -0
  90. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/exceptions.py +0 -0
  91. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  92. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  93. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  94. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  95. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  96. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  97. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  98. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/score.py +0 -0
  99. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/scorers/utils.py +0 -0
  100. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/__init__.py +0 -0
  101. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/constants.py +0 -0
  102. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
  103. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/s3.py +0 -0
  104. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/store.py +0 -0
  105. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/exporters/utils.py +0 -0
  106. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/keys.py +0 -0
  107. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/__init__.py +0 -0
  108. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/anthropic/__init__.py +0 -0
  109. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/google/__init__.py +0 -0
  110. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/groq/__init__.py +0 -0
  111. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/openai/__init__.py +0 -0
  112. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/providers.py +0 -0
  113. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/llm/together/__init__.py +0 -0
  114. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/local_eval_queue.py +0 -0
  115. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/managers.py +0 -0
  116. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/processors/__init__.py +0 -0
  117. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/tracer/utils.py +0 -0
  118. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/__init__.py +0 -0
  119. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/config.py +0 -0
  120. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/console.py +0 -0
  121. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/trainable_model.py +0 -0
  122. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/trainer/trainer.py +0 -0
  123. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/async_utils.py +0 -0
  124. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/decorators.py +0 -0
  125. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/file_utils.py +0 -0
  126. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/guards.py +0 -0
  127. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/meta.py +0 -0
  128. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/serialize.py +0 -0
  129. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/testing.py +0 -0
  130. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/url.py +0 -0
  131. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/utils/version_check.py +0 -0
  132. {judgeval-0.13.0 → judgeval-0.14.0}/src/judgeval/warnings.py +0 -0
  133. {judgeval-0.13.0 → judgeval-0.14.0}/update_version.py +0 -0
  134. {judgeval-0.13.0 → judgeval-0.14.0}/uv.lock +0 -0
@@ -0,0 +1,141 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, reopened]
6
+
7
+ permissions: read-all
8
+
9
+ jobs:
10
+ validate-branch:
11
+ uses: ./.github/workflows/merge-branch-check.yaml
12
+
13
+ run-tests:
14
+ needs: [validate-branch]
15
+ if: needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped'
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ os: [ubuntu-latest, macos-latest]
20
+ python-version:
21
+ - "3.10"
22
+ - "3.11"
23
+ - "3.12"
24
+ - "3.13"
25
+ name: Unit Tests
26
+ runs-on: ${{ matrix.os }}
27
+ env:
28
+ PYTHONPATH: "."
29
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
30
+ TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
31
+ JUDGMENT_DEV: true
32
+
33
+ steps:
34
+ - name: Checkout code
35
+ uses: actions/checkout@v4
36
+
37
+ - name: Set up Python
38
+ uses: actions/setup-python@v4
39
+ with:
40
+ python-version: ${{ matrix.python-version }}
41
+
42
+ - name: Install dependencies
43
+ run: |
44
+ pip install uv
45
+ uv sync --dev
46
+
47
+ - name: Run tests
48
+ run: |
49
+ cd src
50
+ export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
51
+ export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
52
+ uv run pytest tests
53
+
54
+ run-e2e-tests:
55
+ needs: [validate-branch]
56
+ if: "(github.base_ref == 'staging' || github.base_ref == 'main') && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
57
+ strategy:
58
+ fail-fast: false
59
+ matrix:
60
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
61
+ name: E2E Tests
62
+ runs-on: ubuntu-latest
63
+ env:
64
+ TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
65
+ steps:
66
+ - name: Configure AWS Credentials
67
+ uses: aws-actions/configure-aws-credentials@v4
68
+ with:
69
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
70
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
71
+ aws-region: us-west-1
72
+
73
+ - name: Checkout code
74
+ uses: actions/checkout@v4
75
+
76
+ - name: Set env based on branch
77
+ run: |
78
+ if [ "${{ github.base_ref }}" = "main" ]; then
79
+ echo "TARGET_ENV=main" >> "$GITHUB_ENV"
80
+ echo "BASE_URL=https://api.judgmentlabs.ai" >> "$GITHUB_ENV"
81
+ echo "SECRETS_PATH=prod/api-keys/e2e-tests" >> "$GITHUB_ENV"
82
+ echo "COVERAGE_ARTIFACT=coverage-html-production-${{ matrix.python-version }}" >> "$GITHUB_ENV"
83
+ else
84
+ echo "TARGET_ENV=staging" >> "$GITHUB_ENV"
85
+ echo "BASE_URL=https://staging.api.judgmentlabs.ai" >> "$GITHUB_ENV"
86
+ echo "SECRETS_PATH=stg/api-keys/e2e-tests" >> "$GITHUB_ENV"
87
+ echo "COVERAGE_ARTIFACT=coverage-html-staging-${{ matrix.python-version }}" >> "$GITHUB_ENV"
88
+ fi
89
+
90
+ - name: Restore uv cache
91
+ uses: actions/cache/restore@v4
92
+ id: restore-uv-cache
93
+ with:
94
+ path: ~/.cache/uv/
95
+ key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
96
+ restore-keys: |
97
+ ${{ runner.os }}-uv-judgment-
98
+ ${{ runner.os }}-uv-
99
+
100
+ - name: Set up Python
101
+ uses: actions/setup-python@v4
102
+ with:
103
+ python-version: ${{ matrix.python-version }}
104
+
105
+ - name: Install judgeval dependencies
106
+ run: |
107
+ pip install uv
108
+ uv sync --dev
109
+
110
+ - name: Check if server is running
111
+ run: |
112
+ if ! curl -s "$BASE_URL/health" > /dev/null; then
113
+ echo "Judgment server ($BASE_URL) is not running properly. Check CloudWatch logs."
114
+ exit 1
115
+ else
116
+ echo "Server is running."
117
+ fi
118
+
119
+ - name: Run E2E tests
120
+ working-directory: src
121
+ run: |
122
+ SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id "$SECRETS_PATH" --query SecretString --output text)
123
+ export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
124
+ export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
125
+ export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
126
+ export JUDGMENT_API_URL="$BASE_URL"
127
+ timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
128
+
129
+ - name: Upload coverage HTML report
130
+ if: always()
131
+ uses: actions/upload-artifact@v4
132
+ with:
133
+ name: ${{ env.COVERAGE_ARTIFACT }}
134
+ path: src/htmlcov
135
+
136
+ - name: Save uv cache
137
+ uses: actions/cache/save@v4
138
+ if: always() && steps.restore-uv-cache.outputs.cache-hit != 'true'
139
+ with:
140
+ path: ~/.cache/uv/
141
+ key: ${{ runner.os }}-uv-judgment-${{ hashFiles('./**/uv.lock') }}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.13.0"
3
+ version = "0.14.0"
4
4
  authors = [
5
5
  { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
6
  { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -1,4 +1,4 @@
1
- __version__ = "0.13.0"
1
+ __version__ = "0.14.0"
2
2
 
3
3
 
4
4
  def get_version() -> str:
@@ -1,176 +0,0 @@
1
- name: CI
2
-
3
- on:
4
- pull_request:
5
- types: [opened, synchronize, reopened]
6
-
7
- permissions: read-all
8
-
9
- jobs:
10
- validate-branch:
11
- uses: ./.github/workflows/merge-branch-check.yaml
12
-
13
- run-tests:
14
- needs: [validate-branch]
15
- if: needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped'
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- os: [ubuntu-latest, macos-latest]
20
- python-version:
21
- - "3.10"
22
- - "3.11"
23
- - "3.12"
24
- - "3.13"
25
- name: Unit Tests
26
- runs-on: ${{ matrix.os }}
27
- env:
28
- PYTHONPATH: "."
29
- OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
30
- TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
31
- JUDGMENT_DEV: true
32
-
33
- steps:
34
- - name: Checkout code
35
- uses: actions/checkout@v4
36
-
37
- - name: Set up Python
38
- uses: actions/setup-python@v4
39
- with:
40
- python-version: ${{ matrix.python-version }}
41
-
42
- - name: Install dependencies
43
- run: |
44
- pip install uv
45
- uv sync --dev
46
-
47
- - name: Run tests
48
- run: |
49
- cd src
50
- export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
51
- export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
52
- uv run pytest tests
53
-
54
- run-e2e-tests-staging:
55
- needs: [validate-branch]
56
- if: "github.base_ref == 'staging' && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
57
- strategy:
58
- fail-fast: false
59
- matrix:
60
- python-version:
61
- - "3.10"
62
- - "3.11"
63
- - "3.12"
64
- - "3.13"
65
- name: Staging E2E Tests
66
- runs-on: ubuntu-latest
67
- env:
68
- TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
69
- steps:
70
- - name: Configure AWS Credentials
71
- uses: aws-actions/configure-aws-credentials@v4
72
- with:
73
- aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
74
- aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
75
- aws-region: us-west-1
76
-
77
- - name: Checkout code
78
- uses: actions/checkout@v4
79
-
80
- - name: Set up Python
81
- uses: actions/setup-python@v4
82
- with:
83
- python-version: ${{ matrix.python-version }}
84
-
85
- - name: Install judgeval dependencies
86
- run: |
87
- pip install uv
88
- uv sync --dev
89
-
90
- - name: Check if server is running
91
- run: |
92
- if ! curl -s https://staging.api.judgmentlabs.ai/health > /dev/null; then
93
- echo "Staging Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
94
- exit 1
95
- else
96
- echo "Staging server is running."
97
- fi
98
-
99
- - name: Run E2E tests
100
- working-directory: src
101
- run: |
102
- SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id stg/api-keys/e2e-tests --query SecretString --output text)
103
- export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
104
- export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
105
- export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
106
- export JUDGMENT_API_URL=https://staging.api.judgmentlabs.ai
107
- timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
108
-
109
- - name: Upload coverage HTML report (staging)
110
- if: always()
111
- uses: actions/upload-artifact@v4
112
- with:
113
- name: coverage-html-staging-${{ matrix.python-version }}
114
- path: src/htmlcov
115
-
116
- run-e2e-tests-main:
117
- needs: [validate-branch]
118
- if: "github.base_ref == 'main' && !contains(github.actor, '[bot]') && needs.validate-branch.result == 'success'"
119
- strategy:
120
- fail-fast: false
121
- matrix:
122
- python-version:
123
- - "3.10"
124
- - "3.11"
125
- - "3.12"
126
- - "3.13"
127
- name: Production E2E Tests
128
- runs-on: ubuntu-latest
129
- env:
130
- TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
131
- steps:
132
- - name: Configure AWS Credentials
133
- uses: aws-actions/configure-aws-credentials@v4
134
- with:
135
- aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
136
- aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
137
- aws-region: us-west-1
138
-
139
- - name: Checkout code
140
- uses: actions/checkout@v4
141
-
142
- - name: Set up Python
143
- uses: actions/setup-python@v4
144
- with:
145
- python-version: ${{ matrix.python-version }}
146
-
147
- - name: Install judgeval dependencies
148
- run: |
149
- pip install uv
150
- uv sync --dev
151
-
152
- - name: Check if server is running
153
- run: |
154
- if ! curl -s https://api.judgmentlabs.ai/health > /dev/null; then
155
- echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
156
- exit 1
157
- else
158
- echo "Production server is running."
159
- fi
160
-
161
- - name: Run E2E tests
162
- working-directory: src
163
- run: |
164
- SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id prod/api-keys/e2e-tests --query SecretString --output text)
165
- export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
166
- export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
167
- export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
168
- export JUDGMENT_API_URL=https://api.judgmentlabs.ai
169
- timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
170
-
171
- - name: Upload coverage HTML report (production)
172
- if: always()
173
- uses: actions/upload-artifact@v4
174
- with:
175
- name: coverage-html-production-${{ matrix.python-version }}
176
- path: src/htmlcov
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes