judgeval 0.9.3__tar.gz → 0.9.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/ci.yaml +25 -20
  2. {judgeval-0.9.3 → judgeval-0.9.4}/PKG-INFO +2 -2
  3. {judgeval-0.9.3 → judgeval-0.9.4}/pyproject.toml +3 -2
  4. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/__init__.py +27 -43
  5. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/processors/__init__.py +84 -6
  6. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/serialize.py +7 -1
  7. {judgeval-0.9.3 → judgeval-0.9.4}/uv.lock +2591 -2039
  8. {judgeval-0.9.3 → judgeval-0.9.4}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  9. {judgeval-0.9.3 → judgeval-0.9.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  10. {judgeval-0.9.3 → judgeval-0.9.4}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  11. {judgeval-0.9.3 → judgeval-0.9.4}/.github/pull_request_template.md +0 -0
  12. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/blocked-pr.yaml +0 -0
  13. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/claude-code-review.yml +0 -0
  14. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/claude.yml +0 -0
  15. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/lint.yaml +0 -0
  16. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/merge-branch-check.yaml +0 -0
  17. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/mypy.yaml +0 -0
  18. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  19. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/release.yaml +0 -0
  20. {judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/validate-branch.yaml +0 -0
  21. {judgeval-0.9.3 → judgeval-0.9.4}/.gitignore +0 -0
  22. {judgeval-0.9.3 → judgeval-0.9.4}/.pre-commit-config.yaml +0 -0
  23. {judgeval-0.9.3 → judgeval-0.9.4}/LICENSE.md +0 -0
  24. {judgeval-0.9.3 → judgeval-0.9.4}/README.md +0 -0
  25. {judgeval-0.9.3 → judgeval-0.9.4}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  26. {judgeval-0.9.3 → judgeval-0.9.4}/assets/agent.gif +0 -0
  27. {judgeval-0.9.3 → judgeval-0.9.4}/assets/agent_trace_example.png +0 -0
  28. {judgeval-0.9.3 → judgeval-0.9.4}/assets/data.gif +0 -0
  29. {judgeval-0.9.3 → judgeval-0.9.4}/assets/dataset_clustering_screenshot.png +0 -0
  30. {judgeval-0.9.3 → judgeval-0.9.4}/assets/dataset_clustering_screenshot_dm.png +0 -0
  31. {judgeval-0.9.3 → judgeval-0.9.4}/assets/datasets_preview_screenshot.png +0 -0
  32. {judgeval-0.9.3 → judgeval-0.9.4}/assets/document.gif +0 -0
  33. {judgeval-0.9.3 → judgeval-0.9.4}/assets/error_analysis_dashboard.png +0 -0
  34. {judgeval-0.9.3 → judgeval-0.9.4}/assets/errors.png +0 -0
  35. {judgeval-0.9.3 → judgeval-0.9.4}/assets/experiments_dashboard_screenshot.png +0 -0
  36. {judgeval-0.9.3 → judgeval-0.9.4}/assets/experiments_page.png +0 -0
  37. {judgeval-0.9.3 → judgeval-0.9.4}/assets/experiments_pagev2.png +0 -0
  38. {judgeval-0.9.3 → judgeval-0.9.4}/assets/logo-dark.svg +0 -0
  39. {judgeval-0.9.3 → judgeval-0.9.4}/assets/logo-light.svg +0 -0
  40. {judgeval-0.9.3 → judgeval-0.9.4}/assets/monitoring_screenshot.png +0 -0
  41. {judgeval-0.9.3 → judgeval-0.9.4}/assets/new_darkmode.svg +0 -0
  42. {judgeval-0.9.3 → judgeval-0.9.4}/assets/new_lightmode.svg +0 -0
  43. {judgeval-0.9.3 → judgeval-0.9.4}/assets/online_eval.png +0 -0
  44. {judgeval-0.9.3 → judgeval-0.9.4}/assets/product_shot.png +0 -0
  45. {judgeval-0.9.3 → judgeval-0.9.4}/assets/test.png +0 -0
  46. {judgeval-0.9.3 → judgeval-0.9.4}/assets/tests.png +0 -0
  47. {judgeval-0.9.3 → judgeval-0.9.4}/assets/trace.gif +0 -0
  48. {judgeval-0.9.3 → judgeval-0.9.4}/assets/trace_demo.png +0 -0
  49. {judgeval-0.9.3 → judgeval-0.9.4}/assets/trace_screenshot.png +0 -0
  50. {judgeval-0.9.3 → judgeval-0.9.4}/assets/trace_screenshot_old.png +0 -0
  51. {judgeval-0.9.3 → judgeval-0.9.4}/pytest.ini +0 -0
  52. {judgeval-0.9.3 → judgeval-0.9.4}/scripts/api_generator.py +0 -0
  53. {judgeval-0.9.3 → judgeval-0.9.4}/scripts/openapi_transform.py +0 -0
  54. {judgeval-0.9.3 → judgeval-0.9.4}/scripts/update_types.sh +0 -0
  55. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/__init__.py +0 -0
  56. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/api/__init__.py +0 -0
  57. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/api/api_types.py +0 -0
  58. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/cli.py +0 -0
  59. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/constants.py +0 -0
  60. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/__init__.py +0 -0
  61. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/evaluation_run.py +0 -0
  62. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/example.py +0 -0
  63. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/judgment_types.py +0 -0
  64. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/result.py +0 -0
  65. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/scorer_data.py +0 -0
  66. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  67. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  68. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/tool.py +0 -0
  69. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/trace.py +0 -0
  70. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/data/trace_run.py +0 -0
  71. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/dataset/__init__.py +0 -0
  72. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/env.py +0 -0
  73. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/evaluation/__init__.py +0 -0
  74. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/exceptions.py +0 -0
  75. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/integrations/langgraph/__init__.py +0 -0
  76. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/judges/__init__.py +0 -0
  77. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/judges/base_judge.py +0 -0
  78. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/judges/litellm_judge.py +0 -0
  79. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/judges/together_judge.py +0 -0
  80. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/judges/utils.py +0 -0
  81. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/logger.py +0 -0
  82. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/__init__.py +0 -0
  83. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/agent_scorer.py +0 -0
  84. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/api_scorer.py +0 -0
  85. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/base_scorer.py +0 -0
  86. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/example_scorer.py +0 -0
  87. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/exceptions.py +0 -0
  88. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  89. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  90. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  91. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  92. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
  93. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  94. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  95. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  96. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
  97. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
  98. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/score.py +0 -0
  99. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/trace_api_scorer.py +0 -0
  100. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/scorers/utils.py +0 -0
  101. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/constants.py +0 -0
  102. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/exporters/__init__.py +0 -0
  103. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/exporters/s3.py +0 -0
  104. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/exporters/store.py +0 -0
  105. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/exporters/utils.py +0 -0
  106. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/keys.py +0 -0
  107. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/llm/__init__.py +0 -0
  108. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/llm/providers.py +0 -0
  109. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/local_eval_queue.py +0 -0
  110. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/managers.py +0 -0
  111. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/utils.py +0 -0
  112. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/trainer/__init__.py +0 -0
  113. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/trainer/config.py +0 -0
  114. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/trainer/console.py +0 -0
  115. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/trainer/trainable_model.py +0 -0
  116. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/trainer/trainer.py +0 -0
  117. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/async_utils.py +0 -0
  118. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/decorators.py +0 -0
  119. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/file_utils.py +0 -0
  120. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/guards.py +0 -0
  121. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/meta.py +0 -0
  122. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/testing.py +0 -0
  123. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/url.py +0 -0
  124. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/version_check.py +0 -0
  125. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/version.py +0 -0
  126. {judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/warnings.py +0 -0
  127. {judgeval-0.9.3 → judgeval-0.9.4}/update_version.py +0 -0
@@ -18,7 +18,10 @@ jobs:
18
18
  matrix:
19
19
  os: [ubuntu-latest, macos-latest]
20
20
  python-version:
21
+ - "3.10"
21
22
  - "3.11"
23
+ - "3.12"
24
+ - "3.13"
22
25
  name: Unit Tests
23
26
  runs-on: ${{ matrix.os }}
24
27
  env:
@@ -49,18 +52,19 @@ jobs:
49
52
  run-e2e-tests-staging:
50
53
  needs: [validate-branch]
51
54
  if: "github.base_ref == 'staging' && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
55
+ strategy:
56
+ fail-fast: false
57
+ matrix:
58
+ python-version:
59
+ - "3.10"
60
+ - "3.11"
61
+ - "3.12"
62
+ - "3.13"
52
63
  name: Staging E2E Tests
53
64
  runs-on: ubuntu-latest
54
65
  env:
55
66
  TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
56
67
  steps:
57
- - name: Wait for turn
58
- uses: softprops/turnstyle@v2
59
- with:
60
- poll-interval-seconds: 10
61
- same-branch-only: false
62
- job-to-wait-for: "Staging E2E Tests"
63
-
64
68
  - name: Configure AWS Credentials
65
69
  uses: aws-actions/configure-aws-credentials@v4
66
70
  with:
@@ -74,7 +78,7 @@ jobs:
74
78
  - name: Set up Python
75
79
  uses: actions/setup-python@v4
76
80
  with:
77
- python-version: "3.11"
81
+ python-version: ${{ matrix.python-version }}
78
82
 
79
83
  - name: Install judgeval dependencies
80
84
  run: |
@@ -95,30 +99,31 @@ jobs:
95
99
  run: |
96
100
  SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-stg-judgeval/api-keys/judgeval --query SecretString --output text)
97
101
  export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
98
- timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
102
+ timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
99
103
 
100
104
  - name: Upload coverage HTML report (staging)
101
105
  if: always()
102
106
  uses: actions/upload-artifact@v4
103
107
  with:
104
- name: coverage-html-staging
108
+ name: coverage-html-staging-${{ matrix.python-version }}
105
109
  path: src/htmlcov
106
110
 
107
111
  run-e2e-tests-main:
108
112
  needs: [validate-branch]
109
113
  if: "github.base_ref == 'main' && !contains(github.actor, '[bot]') && needs.validate-branch.result == 'success'"
114
+ strategy:
115
+ fail-fast: false
116
+ matrix:
117
+ python-version:
118
+ - "3.10"
119
+ - "3.11"
120
+ - "3.12"
121
+ - "3.13"
110
122
  name: Production E2E Tests
111
123
  runs-on: ubuntu-latest
112
124
  env:
113
125
  TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
114
126
  steps:
115
- - name: Wait for turn
116
- uses: softprops/turnstyle@v2
117
- with:
118
- poll-interval-seconds: 10
119
- same-branch-only: false
120
- job-to-wait-for: "Production E2E Tests"
121
-
122
127
  - name: Configure AWS Credentials
123
128
  uses: aws-actions/configure-aws-credentials@v4
124
129
  with:
@@ -132,7 +137,7 @@ jobs:
132
137
  - name: Set up Python
133
138
  uses: actions/setup-python@v4
134
139
  with:
135
- python-version: "3.11"
140
+ python-version: ${{ matrix.python-version }}
136
141
 
137
142
  - name: Install judgeval dependencies
138
143
  run: |
@@ -153,11 +158,11 @@ jobs:
153
158
  run: |
154
159
  SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-judgeval/api-keys/judgeval --query SecretString --output text)
155
160
  export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
156
- timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
161
+ timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
157
162
 
158
163
  - name: Upload coverage HTML report (production)
159
164
  if: always()
160
165
  uses: actions/upload-artifact@v4
161
166
  with:
162
- name: coverage-html-production
167
+ name: coverage-html-production-${{ matrix.python-version }}
163
168
  path: src/htmlcov
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.9.3
3
+ Version: 0.9.4
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -9,7 +9,7 @@ License-Expression: Apache-2.0
9
9
  License-File: LICENSE.md
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
- Requires-Python: >=3.11
12
+ Requires-Python: >=3.10
13
13
  Requires-Dist: boto3>=1.40.11
14
14
  Requires-Dist: click<8.2.0
15
15
  Requires-Dist: dotenv
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.9.3"
3
+ version = "0.9.4"
4
4
  authors = [
5
5
  { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
6
  { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -8,7 +8,7 @@ authors = [
8
8
  ]
9
9
  description = "Judgeval Package"
10
10
  readme = "README.md"
11
- requires-python = ">=3.11"
11
+ requires-python = ">=3.10"
12
12
  classifiers = [
13
13
  "Programming Language :: Python :: 3",
14
14
  "Operating System :: OS Independent",
@@ -75,6 +75,7 @@ dev = [
75
75
  "pytest-cov>=6.2.1",
76
76
  "types-tqdm>=4.67.0.20250809",
77
77
  "pytest-asyncio>=1.1.0",
78
+ "pytest-xdist>=3.8.0",
78
79
  ]
79
80
 
80
81
 
@@ -57,7 +57,7 @@ from judgeval.utils.serialize import safe_serialize
57
57
  from judgeval.version import get_version
58
58
  from judgeval.warnings import JudgmentWarning
59
59
 
60
- from judgeval.tracer.keys import AttributeKeys, ResourceKeys, InternalAttributeKeys
60
+ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
61
61
  from judgeval.api import JudgmentSyncClient
62
62
  from judgeval.tracer.llm import wrap_provider
63
63
  from judgeval.utils.url import url_for
@@ -65,6 +65,7 @@ from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
65
65
  from judgeval.tracer.processors import (
66
66
  JudgmentSpanProcessor,
67
67
  NoOpJudgmentSpanProcessor,
68
+ NoOpSpanProcessor,
68
69
  )
69
70
  from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
70
71
 
@@ -85,19 +86,6 @@ class AgentContext(TypedDict):
85
86
  parent_agent_id: str | None
86
87
 
87
88
 
88
- def resolve_project_id(
89
- api_key: str, organization_id: str, project_name: str
90
- ) -> str | None:
91
- try:
92
- client = JudgmentSyncClient(
93
- api_key=api_key,
94
- organization_id=organization_id,
95
- )
96
- return client.projects_resolve({"project_name": project_name})["project_id"]
97
- except Exception:
98
- return None
99
-
100
-
101
89
  class Tracer:
102
90
  _active_tracers: List[Tracer] = []
103
91
 
@@ -188,38 +176,20 @@ class Tracer:
188
176
  self.cost_context = ContextVar("current_cost_context", default=None)
189
177
 
190
178
  if self.enable_monitoring:
191
- project_id = resolve_project_id(
192
- self.api_key, self.organization_id, self.project_name
193
- )
194
-
195
- resource_attributes = resource_attributes or {}
196
- resource_attributes.update(
197
- {
198
- ResourceKeys.SERVICE_NAME: self.project_name,
199
- ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
200
- ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
201
- }
202
- )
203
-
204
- if project_id is not None:
205
- resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = project_id
206
- else:
207
- judgeval_logger.error(
208
- f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
209
- )
210
-
211
- resource = Resource.create(resource_attributes)
212
-
213
179
  self.judgment_processor = JudgmentSpanProcessor(
214
180
  self,
215
- self.api_url,
181
+ self.project_name,
216
182
  self.api_key,
217
183
  self.organization_id,
218
184
  max_queue_size=2**18,
219
185
  export_timeout_millis=30000,
186
+ resource_attributes=resource_attributes,
220
187
  )
221
- self.processors.append(self.judgment_processor)
188
+
189
+ resource = Resource.create(self.judgment_processor.resource_attributes)
222
190
  self.provider = TracerProvider(resource=resource)
191
+
192
+ self.processors.append(self.judgment_processor)
223
193
  for processor in self.processors:
224
194
  self.provider.add_span_processor(processor)
225
195
 
@@ -253,6 +223,14 @@ class Tracer:
253
223
  def get_current_cost_context(self):
254
224
  return self.cost_context
255
225
 
226
+ def get_processor(self):
227
+ """Get the judgment span processor instance.
228
+
229
+ Returns:
230
+ The JudgmentSpanProcessor or NoOpJudgmentSpanProcessor instance used by this tracer.
231
+ """
232
+ return self.judgment_processor
233
+
256
234
  def set_customer_id(self, customer_id: str) -> None:
257
235
  span = self.get_current_span()
258
236
  if span and span.is_recording():
@@ -913,11 +891,7 @@ class Tracer:
913
891
  proper cleanup before program termination.
914
892
  """
915
893
  try:
916
- success = self.force_flush(timeout_millis=30000)
917
- if not success:
918
- judgeval_logger.warning(
919
- "Some spans may not have been exported before program exit"
920
- )
894
+ self.force_flush(timeout_millis=30000)
921
895
  except Exception as e:
922
896
  judgeval_logger.warning(f"Error during atexit flush: {e}")
923
897
 
@@ -1074,3 +1048,13 @@ def format_inputs(
1074
1048
  return inputs
1075
1049
  except Exception:
1076
1050
  return {}
1051
+
1052
+
1053
+ # Export processor classes for direct access
1054
+ __all__ = [
1055
+ "Tracer",
1056
+ "wrap",
1057
+ "JudgmentSpanProcessor",
1058
+ "NoOpJudgmentSpanProcessor",
1059
+ "NoOpSpanProcessor",
1060
+ ]
@@ -6,8 +6,13 @@ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor, SpanConte
6
6
  from opentelemetry.sdk.trace.export import (
7
7
  BatchSpanProcessor,
8
8
  )
9
+ from opentelemetry.sdk.resources import Resource
9
10
  from judgeval.tracer.exporters import JudgmentSpanExporter
10
- from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
11
+ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys, ResourceKeys
12
+ from judgeval.api import JudgmentSyncClient
13
+ from judgeval.logger import judgeval_logger
14
+ from judgeval.utils.url import url_for
15
+ from judgeval.version import get_version
11
16
 
12
17
  if TYPE_CHECKING:
13
18
  from judgeval.tracer import Tracer
@@ -31,15 +36,27 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
31
36
  def __init__(
32
37
  self,
33
38
  tracer: Tracer,
34
- endpoint: str,
39
+ project_name: str,
35
40
  api_key: str,
36
41
  organization_id: str,
37
42
  /,
38
43
  *,
39
44
  max_queue_size: int = 2**18,
40
45
  export_timeout_millis: int = 30000,
46
+ resource_attributes: Optional[dict[str, Any]] = None,
41
47
  ):
42
48
  self.tracer = tracer
49
+ self.project_name = project_name
50
+ self.api_key = api_key
51
+ self.organization_id = organization_id
52
+
53
+ # Resolve project_id
54
+ self.project_id = self._resolve_project_id()
55
+
56
+ # Set up resource attributes with project_id
57
+ self._setup_resource_attributes(resource_attributes or {})
58
+
59
+ endpoint = url_for("/otel/v1/traces")
43
60
  super().__init__(
44
61
  JudgmentSpanExporter(
45
62
  endpoint=endpoint,
@@ -53,6 +70,38 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
53
70
  defaultdict(dict)
54
71
  )
55
72
 
73
+ def _resolve_project_id(self) -> str | None:
74
+ """Resolve project_id from project_name using the API."""
75
+ try:
76
+ client = JudgmentSyncClient(
77
+ api_key=self.api_key,
78
+ organization_id=self.organization_id,
79
+ )
80
+ return client.projects_resolve({"project_name": self.project_name})[
81
+ "project_id"
82
+ ]
83
+ except Exception:
84
+ return None
85
+
86
+ def _setup_resource_attributes(self, resource_attributes: dict[str, Any]) -> None:
87
+ """Set up resource attributes including project_id."""
88
+ resource_attributes.update(
89
+ {
90
+ ResourceKeys.SERVICE_NAME: self.project_name,
91
+ ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
92
+ ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
93
+ }
94
+ )
95
+
96
+ if self.project_id is not None:
97
+ resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = self.project_id
98
+ else:
99
+ judgeval_logger.error(
100
+ f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
101
+ )
102
+
103
+ self.resource_attributes = resource_attributes
104
+
56
105
  def _get_span_key(self, span_context: SpanContext) -> tuple[int, int]:
57
106
  return (span_context.trace_id, span_context.span_id)
58
107
 
@@ -103,11 +152,18 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
103
152
 
104
153
  attributes = dict(current_span.attributes or {})
105
154
  attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = current_update_id
155
+
156
+ existing_resource_attrs = (
157
+ dict(current_span.resource.attributes) if current_span.resource else {}
158
+ )
159
+ merged_resource_attrs = {**existing_resource_attrs, **self.resource_attributes}
160
+ merged_resource = Resource.create(merged_resource_attrs)
161
+
106
162
  partial_span = ReadableSpan(
107
163
  name=current_span.name,
108
164
  context=span_context,
109
165
  parent=current_span.parent,
110
- resource=current_span.resource,
166
+ resource=merged_resource,
111
167
  attributes=attributes,
112
168
  events=current_span.events,
113
169
  links=current_span.links,
@@ -137,11 +193,20 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
137
193
  attributes = dict(span.attributes or {})
138
194
  attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = 20
139
195
 
196
+ existing_resource_attrs = (
197
+ dict(span.resource.attributes) if span.resource else {}
198
+ )
199
+ merged_resource_attrs = {
200
+ **existing_resource_attrs,
201
+ **self.resource_attributes,
202
+ }
203
+ merged_resource = Resource.create(merged_resource_attrs)
204
+
140
205
  final_span = ReadableSpan(
141
206
  name=span.name,
142
207
  context=span.context,
143
208
  parent=span.parent,
144
- resource=span.resource,
209
+ resource=merged_resource,
145
210
  attributes=attributes,
146
211
  events=span.events,
147
212
  links=span.links,
@@ -160,7 +225,7 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
160
225
 
161
226
  class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
162
227
  def __init__(self):
163
- super().__init__(None, "", "", "") # type: ignore[arg-type]
228
+ pass
164
229
 
165
230
  def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
166
231
  pass
@@ -177,5 +242,18 @@ class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
177
242
  def emit_partial(self) -> None:
178
243
  pass
179
244
 
245
+ def set_internal_attribute(
246
+ self, span_context: SpanContext, key: str, value: Any
247
+ ) -> None:
248
+ pass
249
+
250
+ def get_internal_attribute(
251
+ self, span_context: SpanContext, key: str, default: Any = None
252
+ ) -> Any:
253
+ return default
254
+
255
+ def increment_update_id(self, span_context: SpanContext) -> int:
256
+ return 0
257
+
180
258
 
181
- __all__ = ("NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor")
259
+ __all__ = ["NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor"]
@@ -19,6 +19,8 @@ from pydantic import BaseModel
19
19
  from pydantic.types import SecretBytes, SecretStr
20
20
  import orjson
21
21
 
22
+ from judgeval.logger import judgeval_logger
23
+
22
24
 
23
25
  """
24
26
  This module contains the encoders used by jsonable_encoder to convert Python objects to JSON serializable data types.
@@ -244,4 +246,8 @@ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
244
246
 
245
247
  # Seralize arbitrary object to a json string
246
248
  def safe_serialize(obj: Any) -> str:
247
- return orjson.dumps(json_encoder(obj)).decode()
249
+ try:
250
+ return orjson.dumps(json_encoder(obj)).decode()
251
+ except Exception as e:
252
+ judgeval_logger.warning(f"Error serializing object: {e}")
253
+ return orjson.dumps(repr(obj)).decode()