judgeval 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/release.yaml +1 -1
- {judgeval-0.11.0 → judgeval-0.12.0}/PKG-INFO +1 -1
- {judgeval-0.11.0 → judgeval-0.12.0}/pyproject.toml +1 -1
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/__init__.py +3 -3
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/evaluation_run.py +5 -3
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/__init__.py +6 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/api_scorer.py +8 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +4 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/__init__.py +6 -6
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/utils.py +2 -2
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/trainer/trainer.py +4 -4
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/version.py +1 -1
- judgeval-0.12.0/update_version.py +35 -0
- judgeval-0.11.0/update_version.py +0 -32
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/pull_request_template.md +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/claude-code-review.yml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/claude.yml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.gitignore +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/.pre-commit-config.yaml +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/LICENSE.md +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/README.md +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/agent.gif +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/agent_trace_example.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/data.gif +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/document.gif +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/errors.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/experiments_page.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/logo-dark.svg +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/logo-light.svg +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/new_darkmode.svg +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/new_lightmode.svg +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/online_eval.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/product_shot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/test.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/tests.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/trace.gif +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/trace_demo.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/trace_screenshot.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/pytest.ini +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/scripts/api_generator.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/scripts/openapi_transform.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/scripts/update_types.sh +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/api/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/api/api_types.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/cli.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/constants.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/example.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/judgment_types.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/result.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/dataset/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/env.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/evaluation/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/exceptions.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/logger.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/constants.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/exporters/s3.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/exporters/store.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/exporters/utils.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/keys.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/llm/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/llm/providers.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/local_eval_queue.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/managers.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/tracer/processors/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/trainer/__init__.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/trainer/config.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/trainer/console.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/trainer/trainable_model.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/async_utils.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/decorators.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/guards.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/meta.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/serialize.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/testing.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/url.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/utils/version_check.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/warnings.py +0 -0
- {judgeval-0.11.0 → judgeval-0.12.0}/uv.lock +0 -0
@@ -5,8 +5,8 @@ from judgeval.evaluation import run_eval
|
|
5
5
|
from judgeval.data.evaluation_run import ExampleEvaluationRun
|
6
6
|
|
7
7
|
|
8
|
-
from typing import List, Optional, Union
|
9
|
-
from judgeval.scorers import
|
8
|
+
from typing import List, Optional, Union, Sequence
|
9
|
+
from judgeval.scorers import ExampleAPIScorerConfig
|
10
10
|
from judgeval.scorers.example_scorer import ExampleScorer
|
11
11
|
from judgeval.data.example import Example
|
12
12
|
from judgeval.logger import judgeval_logger
|
@@ -39,7 +39,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
39
39
|
def run_evaluation(
|
40
40
|
self,
|
41
41
|
examples: List[Example],
|
42
|
-
scorers:
|
42
|
+
scorers: Sequence[Union[ExampleAPIScorerConfig, ExampleScorer]],
|
43
43
|
project_name: str = "default_project",
|
44
44
|
eval_run_name: str = "default_eval_run",
|
45
45
|
model: str = JUDGMENT_DEFAULT_GPT_MODEL,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, Optional, Union, Tuple
|
1
|
+
from typing import List, Optional, Union, Tuple, Sequence
|
2
2
|
from pydantic import field_validator, model_validator, Field, BaseModel
|
3
3
|
from datetime import datetime, timezone
|
4
4
|
import uuid
|
@@ -19,8 +19,10 @@ class EvaluationRun(BaseModel):
|
|
19
19
|
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
20
20
|
)
|
21
21
|
custom_scorers: List[ExampleScorer] = Field(default_factory=list)
|
22
|
-
judgment_scorers:
|
23
|
-
scorers:
|
22
|
+
judgment_scorers: Sequence[APIScorerConfig] = Field(default_factory=list)
|
23
|
+
scorers: Sequence[Union[ExampleScorer, APIScorerConfig]] = Field(
|
24
|
+
default_factory=list
|
25
|
+
)
|
24
26
|
model: str
|
25
27
|
|
26
28
|
def __init__(
|
@@ -1,7 +1,10 @@
|
|
1
1
|
from judgeval.scorers.api_scorer import (
|
2
2
|
APIScorerConfig,
|
3
|
+
ExampleAPIScorerConfig,
|
4
|
+
TraceAPIScorerConfig,
|
3
5
|
)
|
4
6
|
from judgeval.scorers.base_scorer import BaseScorer
|
7
|
+
from judgeval.scorers.example_scorer import ExampleScorer
|
5
8
|
from judgeval.scorers.judgeval_scorers.api_scorers import (
|
6
9
|
FaithfulnessScorer,
|
7
10
|
AnswerRelevancyScorer,
|
@@ -13,7 +16,10 @@ from judgeval.scorers.judgeval_scorers.api_scorers import (
|
|
13
16
|
|
14
17
|
__all__ = [
|
15
18
|
"APIScorerConfig",
|
19
|
+
"ExampleAPIScorerConfig",
|
20
|
+
"TraceAPIScorerConfig",
|
16
21
|
"BaseScorer",
|
22
|
+
"ExampleScorer",
|
17
23
|
"TracePromptScorer",
|
18
24
|
"PromptScorer",
|
19
25
|
"FaithfulnessScorer",
|
@@ -63,3 +63,11 @@ class APIScorerConfig(BaseModel):
|
|
63
63
|
|
64
64
|
def __str__(self):
|
65
65
|
return f"JudgmentScorer(score_type={self.score_type.value}, threshold={self.threshold})"
|
66
|
+
|
67
|
+
|
68
|
+
class ExampleAPIScorerConfig(APIScorerConfig):
|
69
|
+
pass
|
70
|
+
|
71
|
+
|
72
|
+
class TraceAPIScorerConfig(APIScorerConfig):
|
73
|
+
pass
|
@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Internal imports
|
9
|
-
from judgeval.scorers.api_scorer import
|
9
|
+
from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
|
10
10
|
from judgeval.constants import APIScorerType
|
11
11
|
from judgeval.data import ExampleParams
|
12
12
|
from typing import List
|
13
13
|
|
14
14
|
|
15
|
-
class AnswerCorrectnessScorer(
|
15
|
+
class AnswerCorrectnessScorer(ExampleAPIScorerConfig):
|
16
16
|
score_type: APIScorerType = APIScorerType.ANSWER_CORRECTNESS
|
17
17
|
required_params: List[ExampleParams] = [
|
18
18
|
ExampleParams.INPUT,
|
@@ -1,10 +1,10 @@
|
|
1
|
-
from judgeval.scorers.api_scorer import
|
1
|
+
from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
|
2
2
|
from judgeval.constants import APIScorerType
|
3
3
|
from judgeval.data import ExampleParams
|
4
4
|
from typing import List
|
5
5
|
|
6
6
|
|
7
|
-
class AnswerRelevancyScorer(
|
7
|
+
class AnswerRelevancyScorer(ExampleAPIScorerConfig):
|
8
8
|
score_type: APIScorerType = APIScorerType.ANSWER_RELEVANCY
|
9
9
|
required_params: List[ExampleParams] = [
|
10
10
|
ExampleParams.INPUT,
|
@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Internal imports
|
9
|
-
from judgeval.scorers.api_scorer import
|
9
|
+
from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
|
10
10
|
from judgeval.constants import APIScorerType
|
11
11
|
from judgeval.data import ExampleParams
|
12
12
|
from typing import List
|
13
13
|
|
14
14
|
|
15
|
-
class FaithfulnessScorer(
|
15
|
+
class FaithfulnessScorer(ExampleAPIScorerConfig):
|
16
16
|
score_type: APIScorerType = APIScorerType.FAITHFULNESS
|
17
17
|
required_params: List[ExampleParams] = [
|
18
18
|
ExampleParams.INPUT,
|
@@ -6,12 +6,12 @@ TODO add link to docs page for this scorer
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Internal imports
|
9
|
-
from judgeval.scorers.api_scorer import
|
9
|
+
from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
|
10
10
|
from judgeval.constants import APIScorerType
|
11
11
|
from judgeval.data import ExampleParams
|
12
12
|
|
13
13
|
|
14
|
-
class InstructionAdherenceScorer(
|
14
|
+
class InstructionAdherenceScorer(ExampleAPIScorerConfig):
|
15
15
|
def __init__(self, threshold: float):
|
16
16
|
super().__init__(
|
17
17
|
threshold=threshold,
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from judgeval.scorers.api_scorer import (
|
2
2
|
APIScorerConfig,
|
3
|
+
ExampleAPIScorerConfig,
|
4
|
+
TraceAPIScorerConfig,
|
3
5
|
)
|
4
6
|
from judgeval.constants import APIScorerType
|
5
7
|
from typing import Dict, Any, Optional
|
@@ -282,9 +284,9 @@ class BasePromptScorer(ABC, APIScorerConfig):
|
|
282
284
|
return base
|
283
285
|
|
284
286
|
|
285
|
-
class PromptScorer(BasePromptScorer,
|
287
|
+
class PromptScorer(BasePromptScorer, ExampleAPIScorerConfig):
|
286
288
|
pass
|
287
289
|
|
288
290
|
|
289
|
-
class TracePromptScorer(BasePromptScorer,
|
291
|
+
class TracePromptScorer(BasePromptScorer, TraceAPIScorerConfig):
|
290
292
|
pass
|
@@ -43,7 +43,7 @@ from judgeval.env import (
|
|
43
43
|
JUDGMENT_ORG_ID,
|
44
44
|
)
|
45
45
|
from judgeval.logger import judgeval_logger
|
46
|
-
from judgeval.scorers.api_scorer import
|
46
|
+
from judgeval.scorers.api_scorer import TraceAPIScorerConfig, ExampleAPIScorerConfig
|
47
47
|
from judgeval.scorers.example_scorer import ExampleScorer
|
48
48
|
from judgeval.tracer.constants import JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME
|
49
49
|
from judgeval.tracer.managers import (
|
@@ -328,7 +328,7 @@ class Tracer:
|
|
328
328
|
run_condition = scorer_config.run_condition
|
329
329
|
sampling_rate = scorer_config.sampling_rate
|
330
330
|
|
331
|
-
if not isinstance(scorer, (
|
331
|
+
if not isinstance(scorer, (TraceAPIScorerConfig)):
|
332
332
|
judgeval_logger.error(
|
333
333
|
"Scorer must be an instance of TraceAPIScorerConfig, got %s, skipping evaluation."
|
334
334
|
% type(scorer)
|
@@ -861,7 +861,7 @@ class Tracer:
|
|
861
861
|
self,
|
862
862
|
/,
|
863
863
|
*,
|
864
|
-
scorer: Union[
|
864
|
+
scorer: Union[ExampleAPIScorerConfig, ExampleScorer],
|
865
865
|
example: Example,
|
866
866
|
model: str = JUDGMENT_DEFAULT_GPT_MODEL,
|
867
867
|
sampling_rate: float = 1.0,
|
@@ -870,9 +870,9 @@ class Tracer:
|
|
870
870
|
judgeval_logger.info("Evaluation is not enabled, skipping evaluation")
|
871
871
|
return
|
872
872
|
|
873
|
-
if not isinstance(scorer, (
|
873
|
+
if not isinstance(scorer, (ExampleAPIScorerConfig, ExampleScorer)):
|
874
874
|
judgeval_logger.error(
|
875
|
-
"Scorer must be an instance of ExampleAPIScorerConfig or
|
875
|
+
"Scorer must be an instance of ExampleAPIScorerConfig or ExampleScorer, got %s, skipping evaluation."
|
876
876
|
% type(scorer)
|
877
877
|
)
|
878
878
|
return
|
@@ -901,7 +901,7 @@ class Tracer:
|
|
901
901
|
span_context = self.get_current_span().get_span_context()
|
902
902
|
trace_id = format(span_context.trace_id, "032x")
|
903
903
|
span_id = format(span_context.span_id, "016x")
|
904
|
-
hosted_scoring = isinstance(scorer,
|
904
|
+
hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
|
905
905
|
isinstance(scorer, ExampleScorer) and scorer.server_hosted
|
906
906
|
)
|
907
907
|
eval_run_name = f"async_evaluate_{span_id}" # note this name doesnt matter because we don't save the experiment only the example and scorer_data
|
@@ -2,7 +2,7 @@ from typing import Any
|
|
2
2
|
from opentelemetry.trace import Span
|
3
3
|
from pydantic import BaseModel
|
4
4
|
from typing import Callable, Optional
|
5
|
-
from judgeval.scorers.api_scorer import
|
5
|
+
from judgeval.scorers.api_scorer import TraceAPIScorerConfig
|
6
6
|
from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
|
7
7
|
|
8
8
|
|
@@ -14,7 +14,7 @@ def set_span_attribute(span: Span, name: str, value: Any):
|
|
14
14
|
|
15
15
|
|
16
16
|
class TraceScorerConfig(BaseModel):
|
17
|
-
scorer:
|
17
|
+
scorer: TraceAPIScorerConfig
|
18
18
|
model: str = JUDGMENT_DEFAULT_GPT_MODEL
|
19
19
|
sampling_rate: float = 1.0
|
20
20
|
run_condition: Optional[Callable[..., bool]] = None
|
@@ -10,7 +10,7 @@ from judgeval.tracer.exporters.store import SpanStore
|
|
10
10
|
from judgeval.tracer.exporters import InMemorySpanExporter
|
11
11
|
from judgeval.tracer.keys import AttributeKeys
|
12
12
|
from judgeval import JudgmentClient
|
13
|
-
from judgeval.scorers import
|
13
|
+
from judgeval.scorers import ExampleScorer, ExampleAPIScorerConfig
|
14
14
|
from judgeval.data import Example
|
15
15
|
from .console import _spinner_progress, _print_progress, _print_progress_update
|
16
16
|
from judgeval.exceptions import JudgmentRuntimeError
|
@@ -156,7 +156,7 @@ class JudgmentTrainer:
|
|
156
156
|
async def generate_rollouts_and_rewards(
|
157
157
|
self,
|
158
158
|
agent_function: Callable[[Any], Any],
|
159
|
-
scorers: List[Union[
|
159
|
+
scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
|
160
160
|
prompts: List[Any],
|
161
161
|
num_prompts_per_step: Optional[int] = None,
|
162
162
|
num_generations_per_prompt: Optional[int] = None,
|
@@ -266,7 +266,7 @@ class JudgmentTrainer:
|
|
266
266
|
async def run_reinforcement_learning(
|
267
267
|
self,
|
268
268
|
agent_function: Callable[[Any], Any],
|
269
|
-
scorers: List[Union[
|
269
|
+
scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
|
270
270
|
prompts: List[Any],
|
271
271
|
) -> ModelConfig:
|
272
272
|
"""
|
@@ -372,7 +372,7 @@ class JudgmentTrainer:
|
|
372
372
|
async def train(
|
373
373
|
self,
|
374
374
|
agent_function: Callable[[Any], Any],
|
375
|
-
scorers: List[Union[
|
375
|
+
scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
|
376
376
|
prompts: List[Any],
|
377
377
|
rft_provider: Optional[str] = None,
|
378
378
|
) -> ModelConfig:
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
if len(sys.argv) != 2:
|
4
|
+
print("Usage: python set_version.py <new_version>")
|
5
|
+
sys.exit(1)
|
6
|
+
|
7
|
+
new_version = sys.argv[1]
|
8
|
+
version_placeholder = "0.0.0"
|
9
|
+
|
10
|
+
|
11
|
+
def update_file(file_path, placeholder, new_val):
|
12
|
+
try:
|
13
|
+
with open(file_path, "r") as f:
|
14
|
+
content = f.read()
|
15
|
+
except IOError as e:
|
16
|
+
print(f"Error: Failed to read '{file_path}': {e}")
|
17
|
+
sys.exit(1)
|
18
|
+
|
19
|
+
if placeholder not in content:
|
20
|
+
print(f"Warning: No version placeholder '{placeholder}' found in '{file_path}'")
|
21
|
+
sys.exit(1)
|
22
|
+
|
23
|
+
new_content = content.replace(placeholder, new_val, 1)
|
24
|
+
|
25
|
+
try:
|
26
|
+
with open(file_path, "w") as f:
|
27
|
+
f.write(new_content)
|
28
|
+
except IOError as e:
|
29
|
+
print(f"Error: Failed to write to '{file_path}': {e}")
|
30
|
+
sys.exit(1)
|
31
|
+
|
32
|
+
|
33
|
+
files_to_update = ["pyproject.toml", "src/judgeval/version.py"]
|
34
|
+
for file in files_to_update:
|
35
|
+
update_file(file, version_placeholder, new_version)
|
@@ -1,32 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
|
3
|
-
if len(sys.argv) != 2:
|
4
|
-
print("Usage: python set_version.py <new_version>")
|
5
|
-
sys.exit(1)
|
6
|
-
|
7
|
-
new_version = sys.argv[1]
|
8
|
-
version_placeholder = "0.0.0"
|
9
|
-
found = False
|
10
|
-
|
11
|
-
try:
|
12
|
-
with open("pyproject.toml", "r") as f:
|
13
|
-
lines = f.readlines()
|
14
|
-
except IOError as e:
|
15
|
-
print(f"Error: Failed to read 'pyproject.toml': {e}")
|
16
|
-
sys.exit(1)
|
17
|
-
|
18
|
-
try:
|
19
|
-
with open("pyproject.toml", "w") as f:
|
20
|
-
for line in lines: # Assumes 'lines' was successfully read earlier
|
21
|
-
if not found and version_placeholder in line:
|
22
|
-
f.write(line.replace(version_placeholder, new_version))
|
23
|
-
found = True
|
24
|
-
else:
|
25
|
-
f.write(line)
|
26
|
-
except IOError as e:
|
27
|
-
print(f"Error: Failed to write to 'pyproject.toml': {e}")
|
28
|
-
sys.exit(1)
|
29
|
-
|
30
|
-
if not found:
|
31
|
-
print("Warning: No '<version_placeholder>' found in pyproject.toml")
|
32
|
-
sys.exit(1)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.11.0 → judgeval-0.12.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.11.0 → judgeval-0.12.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|