judgeval 0.16.6__tar.gz → 0.16.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/ci.yaml +3 -1
- {judgeval-0.16.6 → judgeval-0.16.8}/.pre-commit-config.yaml +2 -2
- {judgeval-0.16.6 → judgeval-0.16.8}/PKG-INFO +1 -1
- {judgeval-0.16.6 → judgeval-0.16.8}/pyproject.toml +2 -2
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/api/api_types.py +1 -2
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/judgment_types.py +1 -2
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/__init__.py +7 -52
- judgeval-0.16.8/src/judgeval/tracer/llm/config.py +78 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/llm/constants.py +0 -1
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/config.py +6 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/config.py +6 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/generate_content.py +125 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/wrapper.py +30 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/config.py +6 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/responses.py +444 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/config.py +6 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/wrapper.py +52 -0
- judgeval-0.16.8/src/judgeval/tracer/llm/providers.py +19 -0
- judgeval-0.16.8/src/judgeval/utils/decorators/dont_throw.py +37 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/README.md +3 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/__init__.py +15 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval-0.16.8/src/judgeval/utils/wrappers/utils.py +35 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/version.py +1 -1
- {judgeval-0.16.6 → judgeval-0.16.8}/uv.lock +744 -626
- judgeval-0.16.6/src/judgeval/tracer/llm/config.py +0 -110
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_anthropic/config.py +0 -20
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +0 -640
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_google/config.py +0 -24
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_google/wrapper.py +0 -465
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_groq/config.py +0 -23
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_openai/config.py +0 -32
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_openai/wrapper.py +0 -661
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_together/__init__.py +0 -0
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_together/config.py +0 -23
- judgeval-0.16.6/src/judgeval/tracer/llm/llm_together/wrapper.py +0 -503
- judgeval-0.16.6/src/judgeval/tracer/llm/providers.py +0 -63
- judgeval-0.16.6/src/judgeval/tracer/local_eval_queue.py +0 -199
- judgeval-0.16.6/src/judgeval/utils/decorators/__init__.py +0 -0
- judgeval-0.16.6/src/judgeval/utils/decorators/dont_throw.py +0 -21
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/pull_request_template.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/claude-code-review.yml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/claude.yml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/release.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/.gitignore +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/CONTRIBUTING.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/LICENSE.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/README.md +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/agent.gif +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/agent_trace_example.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/company.jpg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/company_banner.jpg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/darkmode.svg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/full_logo.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/icon.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/lightmode.svg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/brand/white_background.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/custom_scorer_online_abm.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/data.gif +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/document.gif +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/errors.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/experiments_page.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/logo_darkmode.svg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/logo_lightmode.svg +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/online_eval.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/product_shot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/quickstart_trajectory_ss.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/test.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/tests.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/trace.gif +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/trace_demo.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/trace_screenshot.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/pytest.ini +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/scripts/api_generator.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/scripts/openapi_transform.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/scripts/update_types.sh +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/api/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/cli.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/constants.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/evaluation_run.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/example.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/result.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/dataset/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/env.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/evaluation/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/exceptions.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/integrations/langgraph/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/integrations/openlit/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/logger.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/constants.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/exporters/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/exporters/s3.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/exporters/store.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/exporters/utils.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/keys.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/llm/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/llm/llm_anthropic/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/llm/llm_openai/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/managers.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/processors/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/utils.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/trainer/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/trainer/config.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/trainer/console.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/trainer/trainable_model.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/trainer/trainer.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/async_utils.py +0 -0
- {judgeval-0.16.6/src/judgeval/tracer/llm/llm_google → judgeval-0.16.8/src/judgeval/utils/decorators}/__init__.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/decorators/use_once.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/guards.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/meta.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/serialize.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/testing.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/url.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/utils/version_check.py +0 -0
- /judgeval-0.16.6/src/judgeval/tracer/llm/llm_groq/__init__.py → /judgeval-0.16.8/src/judgeval/utils/wrappers/py.typed +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/warnings.py +0 -0
- {judgeval-0.16.6 → judgeval-0.16.8}/update_version.py +0 -0
|
@@ -28,6 +28,8 @@ jobs:
|
|
|
28
28
|
PYTHONPATH: "."
|
|
29
29
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
30
30
|
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
|
31
|
+
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
32
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
31
33
|
JUDGMENT_DEV: true
|
|
32
34
|
|
|
33
35
|
steps:
|
|
@@ -49,7 +51,7 @@ jobs:
|
|
|
49
51
|
cd src
|
|
50
52
|
export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
|
|
51
53
|
export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
|
|
52
|
-
uv run pytest tests
|
|
54
|
+
uv run pytest tests -n auto
|
|
53
55
|
|
|
54
56
|
run-e2e-tests:
|
|
55
57
|
needs: [validate-branch]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "judgeval"
|
|
3
|
-
version = "0.16.
|
|
3
|
+
version = "0.16.8"
|
|
4
4
|
authors = [
|
|
5
5
|
{ name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
|
|
6
6
|
{ name = "Alex Shan", email = "alex@judgmentlabs.ai" },
|
|
@@ -19,7 +19,7 @@ license-files = ["LICENSE.md"]
|
|
|
19
19
|
dependencies = [
|
|
20
20
|
"dotenv",
|
|
21
21
|
"httpx>=0.28.1",
|
|
22
|
-
"litellm>=1.75.0",
|
|
22
|
+
"litellm>=1.75.0",
|
|
23
23
|
"opentelemetry-exporter-otlp>=1.36.0",
|
|
24
24
|
"opentelemetry-sdk>=1.36.0",
|
|
25
25
|
"orjson>=3.9.0",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-15T19:25:00+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
@@ -94,7 +94,6 @@ class ResolveProjectNameRequest(TypedDict):
|
|
|
94
94
|
|
|
95
95
|
class ResolveProjectNameResponse(TypedDict):
|
|
96
96
|
project_id: str
|
|
97
|
-
project_created: bool
|
|
98
97
|
|
|
99
98
|
|
|
100
99
|
class TraceIdRequest(TypedDict):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-15T19:24:59+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Annotated, Any, Dict, List, Optional, Union
|
|
@@ -101,7 +101,6 @@ class ResolveProjectNameRequest(BaseModel):
|
|
|
101
101
|
|
|
102
102
|
class ResolveProjectNameResponse(BaseModel):
|
|
103
103
|
project_id: Annotated[str, Field(title="Project Id")]
|
|
104
|
-
project_created: Annotated[bool, Field(title="Project Created")]
|
|
105
104
|
|
|
106
105
|
|
|
107
106
|
class TraceIdRequest(BaseModel):
|
|
@@ -66,7 +66,6 @@ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
|
|
|
66
66
|
from judgeval.api import JudgmentSyncClient
|
|
67
67
|
from judgeval.tracer.llm import wrap_provider
|
|
68
68
|
from judgeval.utils.url import url_for
|
|
69
|
-
from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
|
|
70
69
|
from judgeval.tracer.processors import (
|
|
71
70
|
JudgmentSpanProcessor,
|
|
72
71
|
NoOpJudgmentSpanProcessor,
|
|
@@ -99,7 +98,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
99
98
|
"enable_evaluation",
|
|
100
99
|
"resource_attributes",
|
|
101
100
|
"api_client",
|
|
102
|
-
"local_eval_queue",
|
|
103
101
|
"judgment_processor",
|
|
104
102
|
"tracer",
|
|
105
103
|
"agent_context",
|
|
@@ -113,7 +111,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
113
111
|
enable_evaluation: bool
|
|
114
112
|
resource_attributes: Optional[Dict[str, Any]]
|
|
115
113
|
api_client: JudgmentSyncClient
|
|
116
|
-
local_eval_queue: LocalEvaluationQueue
|
|
117
114
|
judgment_processor: JudgmentSpanProcessor
|
|
118
115
|
tracer: ABCTracer
|
|
119
116
|
agent_context: ContextVar[Optional[AgentContext]]
|
|
@@ -148,7 +145,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
148
145
|
api_key=self.api_key,
|
|
149
146
|
organization_id=self.organization_id,
|
|
150
147
|
)
|
|
151
|
-
self.local_eval_queue = LocalEvaluationQueue()
|
|
152
148
|
|
|
153
149
|
if initialize:
|
|
154
150
|
self.initialize()
|
|
@@ -159,14 +155,10 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
159
155
|
|
|
160
156
|
self.judgment_processor = NoOpJudgmentSpanProcessor()
|
|
161
157
|
if self.enable_monitoring:
|
|
162
|
-
project_id
|
|
158
|
+
project_id = Tracer._resolve_project_id(
|
|
163
159
|
self.project_name, self.api_key, self.organization_id
|
|
164
|
-
)
|
|
160
|
+
)
|
|
165
161
|
if project_id:
|
|
166
|
-
if project_created:
|
|
167
|
-
judgeval_logger.info(
|
|
168
|
-
f"Project {self.project_name} was autocreated successfully."
|
|
169
|
-
)
|
|
170
162
|
self.judgment_processor = self.get_processor(
|
|
171
163
|
tracer=self,
|
|
172
164
|
project_name=self.project_name,
|
|
@@ -190,9 +182,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
190
182
|
get_version(),
|
|
191
183
|
)
|
|
192
184
|
|
|
193
|
-
if self.enable_evaluation and self.enable_monitoring:
|
|
194
|
-
self.local_eval_queue.start_workers()
|
|
195
|
-
|
|
196
185
|
self._initialized = True
|
|
197
186
|
atexit.register(self._atexit_flush)
|
|
198
187
|
return self
|
|
@@ -240,14 +229,14 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
240
229
|
@staticmethod
|
|
241
230
|
def _resolve_project_id(
|
|
242
231
|
project_name: str, api_key: str, organization_id: str
|
|
243
|
-
) ->
|
|
232
|
+
) -> str:
|
|
244
233
|
"""Resolve project_id from project_name using the API."""
|
|
245
234
|
client = JudgmentSyncClient(
|
|
246
235
|
api_key=api_key,
|
|
247
236
|
organization_id=organization_id,
|
|
248
237
|
)
|
|
249
238
|
response = client.projects_resolve({"project_name": project_name})
|
|
250
|
-
return response["project_id"]
|
|
239
|
+
return response["project_id"]
|
|
251
240
|
|
|
252
241
|
def get_current_span(self):
|
|
253
242
|
return get_current_span()
|
|
@@ -299,6 +288,7 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
299
288
|
)
|
|
300
289
|
current_agent_context["is_agent_entry_point"] = False
|
|
301
290
|
|
|
291
|
+
@dont_throw
|
|
302
292
|
def record_instance_state(self, record_point: Literal["before", "after"], span):
|
|
303
293
|
current_agent_context = self.agent_context.get()
|
|
304
294
|
|
|
@@ -955,45 +945,10 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
955
945
|
eval_run.model_dump(warnings=False) # type: ignore
|
|
956
946
|
)
|
|
957
947
|
else:
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
|
|
962
|
-
"""Wait for all evaluations and span processing to complete.
|
|
963
|
-
|
|
964
|
-
This method blocks until all queued evaluations are processed and
|
|
965
|
-
all pending spans are flushed to the server.
|
|
966
|
-
|
|
967
|
-
Args:
|
|
968
|
-
timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
|
|
969
|
-
None means wait indefinitely.
|
|
970
|
-
|
|
971
|
-
Returns:
|
|
972
|
-
True if all processing completed within the timeout, False otherwise.
|
|
973
|
-
|
|
974
|
-
"""
|
|
975
|
-
try:
|
|
976
|
-
judgeval_logger.debug(
|
|
977
|
-
"Waiting for all evaluations and spans to complete..."
|
|
948
|
+
judgeval_logger.warning(
|
|
949
|
+
"The scorer provided is not hosted, skipping evaluation."
|
|
978
950
|
)
|
|
979
951
|
|
|
980
|
-
# Wait for all queued evaluation work to complete
|
|
981
|
-
eval_completed = self.local_eval_queue.wait_for_completion()
|
|
982
|
-
if not eval_completed:
|
|
983
|
-
judgeval_logger.warning(
|
|
984
|
-
f"Local evaluation queue did not complete within {timeout} seconds"
|
|
985
|
-
)
|
|
986
|
-
return False
|
|
987
|
-
|
|
988
|
-
self.force_flush()
|
|
989
|
-
|
|
990
|
-
judgeval_logger.debug("All evaluations and spans completed successfully")
|
|
991
|
-
return True
|
|
992
|
-
|
|
993
|
-
except Exception as e:
|
|
994
|
-
judgeval_logger.warning(f"Error while waiting for completion: {e}")
|
|
995
|
-
return False
|
|
996
|
-
|
|
997
952
|
|
|
998
953
|
def wrap(client: ApiClient) -> ApiClient:
|
|
999
954
|
try:
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
from judgeval.logger import judgeval_logger
|
|
4
|
+
|
|
5
|
+
from judgeval.tracer.llm.constants import ProviderType
|
|
6
|
+
from judgeval.tracer.llm.providers import (
|
|
7
|
+
HAS_OPENAI,
|
|
8
|
+
HAS_TOGETHER,
|
|
9
|
+
HAS_ANTHROPIC,
|
|
10
|
+
HAS_GOOGLE_GENAI,
|
|
11
|
+
ApiClient,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from judgeval.tracer import Tracer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _detect_provider(client: ApiClient) -> ProviderType:
|
|
19
|
+
if HAS_OPENAI:
|
|
20
|
+
from openai import OpenAI, AsyncOpenAI
|
|
21
|
+
|
|
22
|
+
if isinstance(client, (OpenAI, AsyncOpenAI)):
|
|
23
|
+
return ProviderType.OPENAI
|
|
24
|
+
|
|
25
|
+
if HAS_ANTHROPIC:
|
|
26
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
27
|
+
|
|
28
|
+
if isinstance(client, (Anthropic, AsyncAnthropic)):
|
|
29
|
+
return ProviderType.ANTHROPIC
|
|
30
|
+
|
|
31
|
+
if HAS_TOGETHER:
|
|
32
|
+
from together import Together, AsyncTogether # type: ignore[import-untyped]
|
|
33
|
+
|
|
34
|
+
if isinstance(client, (Together, AsyncTogether)):
|
|
35
|
+
return ProviderType.TOGETHER
|
|
36
|
+
|
|
37
|
+
if HAS_GOOGLE_GENAI:
|
|
38
|
+
from google.genai import Client as GoogleClient
|
|
39
|
+
|
|
40
|
+
if isinstance(client, GoogleClient):
|
|
41
|
+
return ProviderType.GOOGLE
|
|
42
|
+
|
|
43
|
+
judgeval_logger.warning(
|
|
44
|
+
f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
|
|
45
|
+
"If this is a mistake or you think we should support this client, please file an issue at https://github.com/JudgmentLabs/judgeval/issues!"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return ProviderType.DEFAULT
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
|
|
52
|
+
"""
|
|
53
|
+
Wraps an API client to add tracing capabilities.
|
|
54
|
+
Supports OpenAI, Together, Anthropic, and Google GenAI clients.
|
|
55
|
+
"""
|
|
56
|
+
provider_type = _detect_provider(client)
|
|
57
|
+
|
|
58
|
+
if provider_type == ProviderType.OPENAI:
|
|
59
|
+
from .llm_openai.wrapper import wrap_openai_client
|
|
60
|
+
|
|
61
|
+
return wrap_openai_client(tracer, client)
|
|
62
|
+
elif provider_type == ProviderType.ANTHROPIC:
|
|
63
|
+
from .llm_anthropic.wrapper import wrap_anthropic_client
|
|
64
|
+
|
|
65
|
+
return wrap_anthropic_client(tracer, client)
|
|
66
|
+
elif provider_type == ProviderType.TOGETHER:
|
|
67
|
+
from .llm_together.wrapper import wrap_together_client
|
|
68
|
+
|
|
69
|
+
return wrap_together_client(tracer, client)
|
|
70
|
+
elif provider_type == ProviderType.GOOGLE:
|
|
71
|
+
from .llm_google.wrapper import wrap_google_client
|
|
72
|
+
|
|
73
|
+
return wrap_google_client(tracer, client)
|
|
74
|
+
else:
|
|
75
|
+
# Default to OpenAI-compatible wrapping for unknown clients
|
|
76
|
+
from .llm_openai.wrapper import wrap_openai_client
|
|
77
|
+
|
|
78
|
+
return wrap_openai_client(tracer, client)
|