judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +173 -10
- judgeval/api/__init__.py +523 -0
- judgeval/api/api_types.py +413 -0
- judgeval/cli.py +112 -0
- judgeval/constants.py +7 -30
- judgeval/data/__init__.py +1 -3
- judgeval/data/evaluation_run.py +125 -0
- judgeval/data/example.py +14 -40
- judgeval/data/judgment_types.py +396 -146
- judgeval/data/result.py +11 -18
- judgeval/data/scorer_data.py +3 -26
- judgeval/data/scripts/openapi_transform.py +5 -5
- judgeval/data/trace.py +115 -194
- judgeval/dataset/__init__.py +335 -0
- judgeval/env.py +55 -0
- judgeval/evaluation/__init__.py +346 -0
- judgeval/exceptions.py +28 -0
- judgeval/integrations/langgraph/__init__.py +13 -0
- judgeval/integrations/openlit/__init__.py +51 -0
- judgeval/judges/__init__.py +2 -2
- judgeval/judges/litellm_judge.py +77 -16
- judgeval/judges/together_judge.py +88 -17
- judgeval/judges/utils.py +7 -20
- judgeval/judgment_attribute_keys.py +55 -0
- judgeval/{common/logger.py → logger.py} +24 -8
- judgeval/prompt/__init__.py +330 -0
- judgeval/scorers/__init__.py +11 -11
- judgeval/scorers/agent_scorer.py +15 -19
- judgeval/scorers/api_scorer.py +21 -23
- judgeval/scorers/base_scorer.py +54 -36
- judgeval/scorers/example_scorer.py +1 -3
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
- judgeval/scorers/score.py +64 -47
- judgeval/scorers/utils.py +2 -107
- judgeval/tracer/__init__.py +1111 -2
- judgeval/tracer/constants.py +1 -0
- judgeval/tracer/exporters/__init__.py +40 -0
- judgeval/tracer/exporters/s3.py +119 -0
- judgeval/tracer/exporters/store.py +59 -0
- judgeval/tracer/exporters/utils.py +32 -0
- judgeval/tracer/keys.py +63 -0
- judgeval/tracer/llm/__init__.py +7 -0
- judgeval/tracer/llm/config.py +78 -0
- judgeval/tracer/llm/constants.py +9 -0
- judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
- judgeval/tracer/llm/llm_anthropic/config.py +6 -0
- judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +6 -0
- judgeval/tracer/llm/llm_google/generate_content.py +127 -0
- judgeval/tracer/llm/llm_google/wrapper.py +30 -0
- judgeval/tracer/llm/llm_openai/__init__.py +3 -0
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
- judgeval/tracer/llm/llm_openai/config.py +6 -0
- judgeval/tracer/llm/llm_openai/responses.py +506 -0
- judgeval/tracer/llm/llm_openai/utils.py +42 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
- judgeval/tracer/llm/llm_together/config.py +6 -0
- judgeval/tracer/llm/llm_together/wrapper.py +52 -0
- judgeval/tracer/llm/providers.py +19 -0
- judgeval/tracer/managers.py +167 -0
- judgeval/tracer/processors/__init__.py +220 -0
- judgeval/tracer/utils.py +19 -0
- judgeval/trainer/__init__.py +14 -0
- judgeval/trainer/base_trainer.py +122 -0
- judgeval/trainer/config.py +123 -0
- judgeval/trainer/console.py +144 -0
- judgeval/trainer/fireworks_trainer.py +392 -0
- judgeval/trainer/trainable_model.py +252 -0
- judgeval/trainer/trainer.py +70 -0
- judgeval/utils/async_utils.py +39 -0
- judgeval/utils/decorators/__init__.py +0 -0
- judgeval/utils/decorators/dont_throw.py +37 -0
- judgeval/utils/decorators/use_once.py +13 -0
- judgeval/utils/file_utils.py +74 -28
- judgeval/utils/guards.py +36 -0
- judgeval/utils/meta.py +27 -0
- judgeval/utils/project.py +15 -0
- judgeval/utils/serialize.py +253 -0
- judgeval/utils/testing.py +70 -0
- judgeval/utils/url.py +10 -0
- judgeval/{version_check.py → utils/version_check.py} +5 -3
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/py.typed +0 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/v1/__init__.py +88 -0
- judgeval/v1/data/__init__.py +7 -0
- judgeval/v1/data/example.py +44 -0
- judgeval/v1/data/scorer_data.py +42 -0
- judgeval/v1/data/scoring_result.py +44 -0
- judgeval/v1/datasets/__init__.py +6 -0
- judgeval/v1/datasets/dataset.py +214 -0
- judgeval/v1/datasets/dataset_factory.py +94 -0
- judgeval/v1/evaluation/__init__.py +6 -0
- judgeval/v1/evaluation/evaluation.py +182 -0
- judgeval/v1/evaluation/evaluation_factory.py +17 -0
- judgeval/v1/instrumentation/__init__.py +6 -0
- judgeval/v1/instrumentation/llm/__init__.py +7 -0
- judgeval/v1/instrumentation/llm/config.py +78 -0
- judgeval/v1/instrumentation/llm/constants.py +11 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
- judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
- judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
- judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
- judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
- judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
- judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
- judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
- judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
- judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
- judgeval/v1/instrumentation/llm/providers.py +19 -0
- judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
- judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
- judgeval/v1/integrations/langgraph/__init__.py +13 -0
- judgeval/v1/integrations/openlit/__init__.py +47 -0
- judgeval/v1/internal/api/__init__.py +525 -0
- judgeval/v1/internal/api/api_types.py +413 -0
- judgeval/v1/prompts/__init__.py +6 -0
- judgeval/v1/prompts/prompt.py +29 -0
- judgeval/v1/prompts/prompt_factory.py +189 -0
- judgeval/v1/py.typed +0 -0
- judgeval/v1/scorers/__init__.py +6 -0
- judgeval/v1/scorers/api_scorer.py +82 -0
- judgeval/v1/scorers/base_scorer.py +17 -0
- judgeval/v1/scorers/built_in/__init__.py +17 -0
- judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
- judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
- judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
- judgeval/v1/scorers/built_in/faithfulness.py +28 -0
- judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
- judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
- judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
- judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
- judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
- judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
- judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
- judgeval/v1/scorers/scorers_factory.py +49 -0
- judgeval/v1/tracer/__init__.py +7 -0
- judgeval/v1/tracer/base_tracer.py +520 -0
- judgeval/v1/tracer/exporters/__init__.py +14 -0
- judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
- judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
- judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
- judgeval/v1/tracer/exporters/span_store.py +50 -0
- judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
- judgeval/v1/tracer/processors/__init__.py +6 -0
- judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
- judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
- judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
- judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
- judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
- judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
- judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
- judgeval/v1/tracer/tracer.py +67 -0
- judgeval/v1/tracer/tracer_factory.py +38 -0
- judgeval/v1/trainers/__init__.py +5 -0
- judgeval/v1/trainers/base_trainer.py +62 -0
- judgeval/v1/trainers/config.py +123 -0
- judgeval/v1/trainers/console.py +144 -0
- judgeval/v1/trainers/fireworks_trainer.py +392 -0
- judgeval/v1/trainers/trainable_model.py +252 -0
- judgeval/v1/trainers/trainers_factory.py +37 -0
- judgeval/v1/utils.py +18 -0
- judgeval/version.py +5 -0
- judgeval/warnings.py +4 -0
- judgeval-0.23.0.dist-info/METADATA +266 -0
- judgeval-0.23.0.dist-info/RECORD +201 -0
- judgeval-0.23.0.dist-info/entry_points.txt +2 -0
- judgeval/clients.py +0 -34
- judgeval/common/__init__.py +0 -13
- judgeval/common/api/__init__.py +0 -3
- judgeval/common/api/api.py +0 -352
- judgeval/common/api/constants.py +0 -165
- judgeval/common/exceptions.py +0 -27
- judgeval/common/storage/__init__.py +0 -6
- judgeval/common/storage/s3_storage.py +0 -98
- judgeval/common/tracer/__init__.py +0 -31
- judgeval/common/tracer/constants.py +0 -22
- judgeval/common/tracer/core.py +0 -1916
- judgeval/common/tracer/otel_exporter.py +0 -108
- judgeval/common/tracer/otel_span_processor.py +0 -234
- judgeval/common/tracer/span_processor.py +0 -37
- judgeval/common/tracer/span_transformer.py +0 -211
- judgeval/common/tracer/trace_manager.py +0 -92
- judgeval/common/utils.py +0 -940
- judgeval/data/datasets/__init__.py +0 -4
- judgeval/data/datasets/dataset.py +0 -341
- judgeval/data/datasets/eval_dataset_client.py +0 -214
- judgeval/data/tool.py +0 -5
- judgeval/data/trace_run.py +0 -37
- judgeval/evaluation_run.py +0 -75
- judgeval/integrations/langgraph.py +0 -843
- judgeval/judges/mixture_of_judges.py +0 -286
- judgeval/judgment_client.py +0 -369
- judgeval/rules.py +0 -521
- judgeval/run_evaluation.py +0 -684
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
- judgeval/utils/alerts.py +0 -93
- judgeval/utils/requests.py +0 -50
- judgeval-0.1.0.dist-info/METADATA +0 -202
- judgeval-0.1.0.dist-info/RECORD +0 -73
- {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
- {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from typing import List, Optional, Union, Tuple, Sequence
|
|
2
|
+
from pydantic import field_validator, model_validator, Field, BaseModel
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from judgeval.data import Example
|
|
7
|
+
from judgeval.scorers import APIScorerConfig
|
|
8
|
+
from judgeval.scorers.example_scorer import ExampleScorer
|
|
9
|
+
from judgeval.constants import ACCEPTABLE_MODELS
|
|
10
|
+
from judgeval.data.judgment_types import (
|
|
11
|
+
ExampleEvaluationRun as ExampleEvaluationRunJudgmentType,
|
|
12
|
+
TraceEvaluationRun as TraceEvaluationRunJudgmentType,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class EvaluationRun(BaseModel):
|
|
17
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
18
|
+
created_at: str = Field(
|
|
19
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
20
|
+
)
|
|
21
|
+
custom_scorers: List[ExampleScorer] = Field(default_factory=list)
|
|
22
|
+
judgment_scorers: Sequence[APIScorerConfig] = Field(default_factory=list)
|
|
23
|
+
scorers: Sequence[Union[ExampleScorer, APIScorerConfig]] = Field(
|
|
24
|
+
default_factory=list
|
|
25
|
+
)
|
|
26
|
+
model: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
scorers: Optional[List[Union[ExampleScorer, APIScorerConfig]]] = None,
|
|
31
|
+
**kwargs,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Initialize EvaluationRun with automatic scorer classification.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
scorers: List of scorers that will be automatically sorted into custom_scorers or judgment_scorers
|
|
38
|
+
**kwargs: Other initialization arguments
|
|
39
|
+
"""
|
|
40
|
+
if scorers is not None:
|
|
41
|
+
# Automatically sort scorers into appropriate fields
|
|
42
|
+
custom_scorers = [s for s in scorers if isinstance(s, ExampleScorer)]
|
|
43
|
+
judgment_scorers = [s for s in scorers if isinstance(s, APIScorerConfig)]
|
|
44
|
+
|
|
45
|
+
# Always set both fields as lists (even if empty) to satisfy validation
|
|
46
|
+
kwargs["custom_scorers"] = custom_scorers
|
|
47
|
+
kwargs["judgment_scorers"] = judgment_scorers
|
|
48
|
+
|
|
49
|
+
super().__init__(**kwargs)
|
|
50
|
+
|
|
51
|
+
def model_dump(self, **kwargs):
|
|
52
|
+
data = super().model_dump(**kwargs)
|
|
53
|
+
data["custom_scorers"] = [s.model_dump() for s in self.custom_scorers]
|
|
54
|
+
data["judgment_scorers"] = [s.model_dump() for s in self.judgment_scorers]
|
|
55
|
+
|
|
56
|
+
return data
|
|
57
|
+
|
|
58
|
+
@model_validator(mode="after")
|
|
59
|
+
@classmethod
|
|
60
|
+
def validate_scorer_lists(cls, values):
|
|
61
|
+
custom_scorers = values.custom_scorers
|
|
62
|
+
judgment_scorers = values.judgment_scorers
|
|
63
|
+
|
|
64
|
+
# Check that both lists are not empty
|
|
65
|
+
if not custom_scorers and not judgment_scorers:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
"At least one of custom_scorers or judgment_scorers must be provided."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Check that only one list is filled
|
|
71
|
+
if custom_scorers and judgment_scorers:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"Only one of custom_scorers or judgment_scorers can be provided, not both."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return values
|
|
77
|
+
|
|
78
|
+
@field_validator("model")
|
|
79
|
+
def validate_model(cls, v, values):
|
|
80
|
+
# Check if model is string or list of strings
|
|
81
|
+
if v is not None and isinstance(v, str):
|
|
82
|
+
if v not in ACCEPTABLE_MODELS:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
f"Model name {v} not recognized. Please select a valid model name.)"
|
|
85
|
+
)
|
|
86
|
+
return v
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ExampleEvaluationRun(EvaluationRun, ExampleEvaluationRunJudgmentType): # type: ignore
|
|
90
|
+
"""
|
|
91
|
+
Stores example and evaluation scorers together for running an eval task
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
project_name (str): The name of the project the evaluation results belong to
|
|
95
|
+
eval_name (str): A name for this evaluation run
|
|
96
|
+
examples (List[Example]): The examples to evaluate
|
|
97
|
+
scorers (List[Union[BaseScorer, APIScorerConfig]]): A list of scorers to use for evaluation
|
|
98
|
+
model (str): The model used as a judge when using LLM as a Judge
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
examples: List[Example] # type: ignore
|
|
102
|
+
|
|
103
|
+
@field_validator("examples")
|
|
104
|
+
def validate_examples(cls, v):
|
|
105
|
+
if not v:
|
|
106
|
+
raise ValueError("Examples cannot be empty.")
|
|
107
|
+
for item in v:
|
|
108
|
+
if not isinstance(item, Example):
|
|
109
|
+
raise ValueError(f"Item of type {type(item)} is not a Example")
|
|
110
|
+
return v
|
|
111
|
+
|
|
112
|
+
def model_dump(self, **kwargs):
|
|
113
|
+
data = super().model_dump(**kwargs)
|
|
114
|
+
data["examples"] = [example.model_dump() for example in self.examples]
|
|
115
|
+
return data
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TraceEvaluationRun(EvaluationRun, TraceEvaluationRunJudgmentType): # type: ignore
|
|
119
|
+
trace_and_span_ids: List[Tuple[str, str]] # type: ignore
|
|
120
|
+
|
|
121
|
+
@field_validator("trace_and_span_ids")
|
|
122
|
+
def validate_trace_and_span_ids(cls, v):
|
|
123
|
+
if not v:
|
|
124
|
+
raise ValueError("Trace and span IDs are required for trace evaluations.")
|
|
125
|
+
return v
|
judgeval/data/example.py
CHANGED
|
@@ -4,7 +4,10 @@ Classes for representing examples in a dataset.
|
|
|
4
4
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from datetime import datetime
|
|
7
|
-
from
|
|
7
|
+
from typing import Dict, Any, Optional
|
|
8
|
+
from judgeval.data.judgment_types import Example as JudgmentExample
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
from pydantic import Field
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class ExampleParams(str, Enum):
|
|
@@ -15,47 +18,18 @@ class ExampleParams(str, Enum):
|
|
|
15
18
|
RETRIEVAL_CONTEXT = "retrieval_context"
|
|
16
19
|
TOOLS_CALLED = "tools_called"
|
|
17
20
|
EXPECTED_TOOLS = "expected_tools"
|
|
18
|
-
REASONING = "reasoning"
|
|
19
21
|
ADDITIONAL_METADATA = "additional_metadata"
|
|
20
22
|
|
|
21
23
|
|
|
22
|
-
class Example(
|
|
23
|
-
example_id: str =
|
|
24
|
+
class Example(JudgmentExample):
|
|
25
|
+
example_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
26
|
+
created_at: str = Field(default_factory=lambda: datetime.now().isoformat())
|
|
27
|
+
name: Optional[str] = None
|
|
24
28
|
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
super().__init__(**data)
|
|
29
|
-
self.example_id = None
|
|
29
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
30
|
+
data = super().model_dump(warnings=False)
|
|
31
|
+
return data
|
|
30
32
|
|
|
31
|
-
def
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"actual_output": self.actual_output,
|
|
35
|
-
"expected_output": self.expected_output,
|
|
36
|
-
"context": self.context,
|
|
37
|
-
"retrieval_context": self.retrieval_context,
|
|
38
|
-
"additional_metadata": self.additional_metadata,
|
|
39
|
-
"tools_called": self.tools_called,
|
|
40
|
-
"expected_tools": self.expected_tools,
|
|
41
|
-
"name": self.name,
|
|
42
|
-
"example_id": self.example_id,
|
|
43
|
-
"example_index": self.example_index,
|
|
44
|
-
"created_at": self.created_at,
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
def __str__(self):
|
|
48
|
-
return (
|
|
49
|
-
f"Example(input={self.input}, "
|
|
50
|
-
f"actual_output={self.actual_output}, "
|
|
51
|
-
f"expected_output={self.expected_output}, "
|
|
52
|
-
f"context={self.context}, "
|
|
53
|
-
f"retrieval_context={self.retrieval_context}, "
|
|
54
|
-
f"additional_metadata={self.additional_metadata}, "
|
|
55
|
-
f"tools_called={self.tools_called}, "
|
|
56
|
-
f"expected_tools={self.expected_tools}, "
|
|
57
|
-
f"name={self.name}, "
|
|
58
|
-
f"example_id={self.example_id}, "
|
|
59
|
-
f"example_index={self.example_index}, "
|
|
60
|
-
f"created_at={self.created_at}, "
|
|
61
|
-
)
|
|
33
|
+
def get_fields(self):
|
|
34
|
+
excluded = {"example_id", "name", "created_at"}
|
|
35
|
+
return self.model_dump(exclude=excluded)
|