judgeval 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/data/__init__.py CHANGED
@@ -2,8 +2,7 @@ from judgeval.data.example import Example, ExampleParams
2
2
  from judgeval.data.api_example import ProcessExample, create_process_example
3
3
  from judgeval.data.scorer_data import ScorerData, create_scorer_data
4
4
  from judgeval.data.result import ScoringResult, generate_scoring_result
5
- from judgeval.data.ground_truth import GroundTruthExample
6
-
5
+ from judgeval.data.custom_example import CustomExample
7
6
  __all__ = [
8
7
  "Example",
9
8
  "ExampleParams",
@@ -13,5 +12,5 @@ __all__ = [
13
12
  "create_scorer_data",
14
13
  "ScoringResult",
15
14
  "generate_scoring_result",
16
- "GroundTruthExample",
15
+ "CustomExample",
17
16
  ]
@@ -0,0 +1,98 @@
1
+ from pydantic import BaseModel, Field, field_validator
2
+ from typing import Optional, Dict, Any
3
+ from uuid import uuid4
4
+ from datetime import datetime
5
+ import json
6
+ import warnings
7
+
8
+ # Brainstorming what are the requirements for the fields?
9
+ class CustomExample(BaseModel):
10
+ name: Optional[str] = None
11
+ additional_metadata: Optional[Dict[str, Any]] = None
12
+ example_id: str = Field(default_factory=lambda: str(uuid4()))
13
+ example_index: Optional[int] = None
14
+ timestamp: Optional[str] = None
15
+ trace_id: Optional[str] = None
16
+
17
+ model_config = {
18
+ "extra": "allow", # Allow extra fields with any types
19
+ }
20
+
21
+ def __init__(self, **data):
22
+ if 'example_id' not in data:
23
+ data['example_id'] = str(uuid4())
24
+ # Set timestamp if not provided
25
+ if 'timestamp' not in data:
26
+ data['timestamp'] = datetime.now().isoformat()
27
+ super().__init__(**data)
28
+
29
+ @field_validator('additional_metadata', mode='before')
30
+ @classmethod
31
+ def validate_additional_metadata(cls, v):
32
+ if v is not None and not isinstance(v, dict):
33
+ raise ValueError(f"Additional metadata must be a dictionary or None but got {v} of type {type(v)}")
34
+ return v
35
+
36
+ @field_validator('example_index', mode='before')
37
+ @classmethod
38
+ def validate_example_index(cls, v):
39
+ if v is not None and not isinstance(v, int):
40
+ raise ValueError(f"Example index must be an integer or None but got {v} of type {type(v)}")
41
+ return v
42
+
43
+ @field_validator('timestamp', mode='before')
44
+ @classmethod
45
+ def validate_timestamp(cls, v):
46
+ if v is not None and not isinstance(v, str):
47
+ raise ValueError(f"Timestamp must be a string or None but got {v} of type {type(v)}")
48
+ return v
49
+
50
+ @field_validator('trace_id', mode='before')
51
+ @classmethod
52
+ def validate_trace_id(cls, v):
53
+ if v is not None and not isinstance(v, str):
54
+ raise ValueError(f"Trace ID must be a string or None but got {v} of type {type(v)}")
55
+ return v
56
+
57
+ def to_dict(self):
58
+ return self.model_dump()
59
+
60
+ def __str__(self):
61
+ return str(self.model_dump())
62
+
63
+ def model_dump(self, **kwargs):
64
+ """
65
+ Custom serialization that handles special cases for fields that might fail standard serialization.
66
+ """
67
+ data = super().model_dump(**kwargs)
68
+
69
+ # Get all fields including custom ones
70
+ all_fields = self.__dict__
71
+
72
+ for field_name, value in all_fields.items():
73
+ try:
74
+ # Check if the field has its own serialization method
75
+ if hasattr(value, 'to_dict'):
76
+ data[field_name] = value.to_dict()
77
+ elif hasattr(value, 'model_dump'):
78
+ data[field_name] = value.model_dump()
79
+ # Field is already in data from super().model_dump()
80
+ elif field_name in data:
81
+ continue
82
+ else:
83
+ # Try standard JSON serialization
84
+ json.dumps(value)
85
+ data[field_name] = value
86
+ except (TypeError, OverflowError, ValueError):
87
+ # Handle non-serializable objects
88
+ try:
89
+ # Try converting to string
90
+ data[field_name] = str(value)
91
+ except Exception as _:
92
+ # If all else fails, store as None and optionally warn
93
+ warnings.warn(f"Could not serialize field {field_name}, setting to None")
94
+ data[field_name] = None
95
+
96
+ return data
97
+
98
+
@@ -7,12 +7,11 @@ import yaml
7
7
  from dataclasses import dataclass, field
8
8
  from typing import List, Union, Literal
9
9
 
10
- from judgeval.data import Example, GroundTruthExample
10
+ from judgeval.data import Example
11
11
  from judgeval.common.logger import debug, error, warning, info
12
12
 
13
13
  @dataclass
14
14
  class EvalDataset:
15
- ground_truths: List[GroundTruthExample]
16
15
  examples: List[Example]
17
16
  _alias: Union[str, None] = field(default=None)
18
17
  _id: Union[str, None] = field(default=None)
@@ -21,13 +20,11 @@ class EvalDataset:
21
20
  def __init__(self,
22
21
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
23
22
  organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
24
- ground_truths: List[GroundTruthExample] = [],
25
23
  examples: List[Example] = [],
26
24
  ):
27
- debug(f"Initializing EvalDataset with {len(ground_truths)} ground truths and {len(examples)} examples")
25
+ debug(f"Initializing EvalDataset with {len(examples)} examples")
28
26
  if not judgment_api_key:
29
27
  warning("No judgment_api_key provided")
30
- self.ground_truths = ground_truths
31
28
  self.examples = examples
32
29
  self._alias = None
33
30
  self._id = None
@@ -37,38 +34,13 @@ class EvalDataset:
37
34
  def add_from_json(self, file_path: str) -> None:
38
35
  debug(f"Loading dataset from JSON file: {file_path}")
39
36
  """
40
- Adds examples and ground truths from a JSON file.
37
+ Adds examples from a JSON file.
41
38
 
42
- The format of the JSON file is expected to be a dictionary with two keys: "examples" and "ground_truths".
43
- The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth.
39
+ The format of the JSON file is expected to be a dictionary with one key: "examples".
40
+ The value of the key is a list of dictionaries, where each dictionary represents an example.
44
41
 
45
42
  The JSON file is expected to have the following format:
46
43
  {
47
- "ground_truths": [
48
- {
49
- "input": "test input",
50
- "actual_output": null,
51
- "expected_output": "expected output",
52
- "context": [
53
- "context1"
54
- ],
55
- "retrieval_context": [
56
- "retrieval1"
57
- ],
58
- "additional_metadata": {
59
- "key": "value"
60
- },
61
- "comments": "test comment",
62
- "tools_called": [
63
- "tool1"
64
- ],
65
- "expected_tools": [
66
- "tool1"
67
- ],
68
- "source_file": "test.py",
69
- "trace_id": "094121"
70
- }
71
- ],
72
44
  "examples": [
73
45
  {
74
46
  "input": "test input",
@@ -103,7 +75,6 @@ class EvalDataset:
103
75
  with open(file_path, "r") as file:
104
76
  payload = json.load(file)
105
77
  examples = payload.get("examples", [])
106
- ground_truths = payload.get("ground_truths", [])
107
78
  except FileNotFoundError:
108
79
  error(f"JSON file not found: {file_path}")
109
80
  raise FileNotFoundError(f"The file {file_path} was not found.")
@@ -111,21 +82,17 @@ class EvalDataset:
111
82
  error(f"Invalid JSON file: {file_path}")
112
83
  raise ValueError(f"The file {file_path} is not a valid JSON file.")
113
84
 
114
- info(f"Added {len(examples)} examples and {len(ground_truths)} ground truths from JSON")
85
+ info(f"Added {len(examples)} examples from JSON")
115
86
  new_examples = [Example(**e) for e in examples]
116
87
  for e in new_examples:
117
88
  self.add_example(e)
118
-
119
- new_ground_truths = [GroundTruthExample(**g) for g in ground_truths]
120
- for g in new_ground_truths:
121
- self.add_ground_truth(g)
122
89
 
123
90
  def add_from_csv(
124
91
  self,
125
92
  file_path: str,
126
93
  ) -> None:
127
94
  """
128
- Add Examples and GroundTruthExamples from a CSV file.
95
+ Add Examples from a CSV file.
129
96
  """
130
97
  try:
131
98
  import pandas as pd
@@ -144,14 +111,14 @@ class EvalDataset:
144
111
  "expected_tools", "name", "comments", "source_file", "example", \
145
112
  "trace_id"
146
113
 
147
- We want to collect the examples and ground truths separately which can
114
+ We want to collect the examples separately which can
148
115
  be determined by the "example" column. If the value is True, then it is an
149
- example, otherwise it is a ground truth.
116
+ example
150
117
 
151
118
  We also assume that if there are multiple retrieval contexts or contexts, they are separated by semicolons.
152
119
  This can be adjusted using the `context_delimiter` and `retrieval_context_delimiter` parameters.
153
120
  """
154
- examples, ground_truths = [], []
121
+ examples = []
155
122
 
156
123
  for _, row in df.iterrows():
157
124
  data = {
@@ -174,49 +141,20 @@ class EvalDataset:
174
141
  examples.append(e)
175
142
  else:
176
143
  raise ValueError("Every example must have an 'input' and 'actual_output' field.")
177
- else:
178
- # GroundTruthExample has `comments` and `source_file` fields
179
- data["comments"] = row["comments"] if pd.notna(row["comments"]) else None
180
- data["source_file"] = row["source_file"] if pd.notna(row["source_file"]) else None
181
- # every GroundTruthExample has `input` field
182
- if data["input"] is not None:
183
- g = GroundTruthExample(**data)
184
- ground_truths.append(g)
185
- else:
186
- raise ValueError("Every ground truth must have an 'input' field.")
144
+
187
145
 
188
146
  for e in examples:
189
147
  self.add_example(e)
190
148
 
191
- for g in ground_truths:
192
- self.add_ground_truth(g)
193
-
194
149
  def add_from_yaml(self, file_path: str) -> None:
195
150
  debug(f"Loading dataset from YAML file: {file_path}")
196
151
  """
197
- Adds examples and ground truths from a YAML file.
152
+ Adds examples from a YAML file.
198
153
 
199
- The format of the YAML file is expected to be a dictionary with two keys: "examples" and "ground_truths".
200
- The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth.
154
+ The format of the YAML file is expected to be a dictionary with one key: "examples".
155
+ The value of the key is a list of dictionaries, where each dictionary represents an example.
201
156
 
202
157
  The YAML file is expected to have the following format:
203
- ground_truths:
204
- - input: "test input"
205
- actual_output: null
206
- expected_output: "expected output"
207
- context:
208
- - "context1"
209
- retrieval_context:
210
- - "retrieval1"
211
- additional_metadata:
212
- key: "value"
213
- comments: "test comment"
214
- tools_called:
215
- - "tool1"
216
- expected_tools:
217
- - "tool1"
218
- source_file: "test.py"
219
- trace_id: "094121"
220
158
  examples:
221
159
  - input: "test input"
222
160
  actual_output: "test output"
@@ -244,7 +182,6 @@ class EvalDataset:
244
182
  if payload is None:
245
183
  raise ValueError("The YAML file is empty.")
246
184
  examples = payload.get("examples", [])
247
- ground_truths = payload.get("ground_truths", [])
248
185
  except FileNotFoundError:
249
186
  error(f"YAML file not found: {file_path}")
250
187
  raise FileNotFoundError(f"The file {file_path} was not found.")
@@ -252,25 +189,18 @@ class EvalDataset:
252
189
  error(f"Invalid YAML file: {file_path}")
253
190
  raise ValueError(f"The file {file_path} is not a valid YAML file.")
254
191
 
255
- info(f"Added {len(examples)} examples and {len(ground_truths)} ground truths from YAML")
192
+ info(f"Added {len(examples)} examples from YAML")
256
193
  new_examples = [Example(**e) for e in examples]
257
194
  for e in new_examples:
258
195
  self.add_example(e)
259
196
 
260
- new_ground_truths = [GroundTruthExample(**g) for g in ground_truths]
261
- for g in new_ground_truths:
262
- self.add_ground_truth(g)
263
-
264
197
  def add_example(self, e: Example) -> None:
265
198
  self.examples = self.examples + [e]
266
199
  # TODO if we need to add rank, then we need to do it here
267
-
268
- def add_ground_truth(self, g: GroundTruthExample) -> None:
269
- self.ground_truths = self.ground_truths + [g]
270
200
 
271
201
  def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
272
202
  """
273
- Saves the dataset as a file. Save both the ground truths and examples.
203
+ Saves the dataset as a file. Save only the examples.
274
204
 
275
205
  Args:
276
206
  file_type (Literal["json", "csv"]): The file type to save the dataset as.
@@ -285,7 +215,6 @@ class EvalDataset:
285
215
  with open(complete_path, "w") as file:
286
216
  json.dump(
287
217
  {
288
- "ground_truths": [g.to_dict() for g in self.ground_truths],
289
218
  "examples": [e.to_dict() for e in self.examples],
290
219
  },
291
220
  file,
@@ -319,24 +248,7 @@ class EvalDataset:
319
248
  ]
320
249
  )
321
250
 
322
- for g in self.ground_truths:
323
- writer.writerow(
324
- [
325
- g.input,
326
- g.actual_output,
327
- g.expected_output,
328
- ";".join(g.context),
329
- ";".join(g.retrieval_context),
330
- g.additional_metadata,
331
- ";".join(g.tools_called),
332
- ";".join(g.expected_tools),
333
- None, # GroundTruthExample does not have name
334
- g.comments,
335
- g.source_file,
336
- False, # Adding a GroundTruthExample, not an Example
337
- g.trace_id
338
- ]
339
- )
251
+
340
252
  elif file_type == "yaml":
341
253
  with open(complete_path, "w") as file:
342
254
  yaml_data = {
@@ -358,24 +270,6 @@ class EvalDataset:
358
270
  }
359
271
  for e in self.examples
360
272
  ],
361
- "ground_truths": [
362
- {
363
- "input": g.input,
364
- "actual_output": g.actual_output,
365
- "expected_output": g.expected_output,
366
- "context": g.context,
367
- "retrieval_context": g.retrieval_context,
368
- "additional_metadata": g.additional_metadata,
369
- "tools_called": g.tools_called,
370
- "expected_tools": g.expected_tools,
371
- "name": None, # GroundTruthExample does not have name
372
- "comments": g.comments,
373
- "source_file": g.source_file,
374
- "example": False, # Adding a GroundTruthExample, not an Example
375
- "trace_id": g.trace_id
376
- }
377
- for g in self.ground_truths
378
- ]
379
273
  }
380
274
  yaml.dump(yaml_data, file, default_flow_style=False)
381
275
  else:
@@ -391,7 +285,6 @@ class EvalDataset:
391
285
  def __str__(self):
392
286
  return (
393
287
  f"{self.__class__.__name__}("
394
- f"ground_truths={self.ground_truths}, "
395
288
  f"examples={self.examples}, "
396
289
  f"_alias={self._alias}, "
397
290
  f"_id={self._id}"
@@ -11,7 +11,7 @@ from judgeval.constants import (
11
11
  JUDGMENT_DATASETS_EDIT_API_URL,
12
12
  JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
13
13
  )
14
- from judgeval.data import Example, GroundTruthExample
14
+ from judgeval.data import Example
15
15
  from judgeval.data.datasets import EvalDataset
16
16
 
17
17
 
@@ -35,7 +35,6 @@ class EvalDatasetClient:
35
35
  Mock request:
36
36
  dataset = {
37
37
  "alias": alias,
38
- "ground_truths": [...],
39
38
  "examples": [...],
40
39
  "overwrite": overwrite
41
40
  } ==>
@@ -55,7 +54,6 @@ class EvalDatasetClient:
55
54
  )
56
55
  content = {
57
56
  "alias": alias,
58
- "ground_truths": [g.to_dict() for g in dataset.ground_truths],
59
57
  "examples": [e.to_dict() for e in dataset.examples],
60
58
  "overwrite": overwrite,
61
59
  }
@@ -102,7 +100,6 @@ class EvalDatasetClient:
102
100
  }
103
101
  ==>
104
102
  {
105
- "ground_truths": [...],
106
103
  "examples": [...],
107
104
  "_alias": alias,
108
105
  "_id": "..." # ID of the dataset
@@ -142,7 +139,6 @@ class EvalDatasetClient:
142
139
 
143
140
  info(f"Successfully pulled dataset with alias '{alias}'")
144
141
  payload = response.json()
145
- dataset.ground_truths = [GroundTruthExample(**g) for g in payload.get("ground_truths", [])]
146
142
  dataset.examples = [Example(**e) for e in payload.get("examples", [])]
147
143
  dataset._alias = payload.get("_alias")
148
144
  dataset._id = payload.get("_id")
@@ -164,8 +160,8 @@ class EvalDatasetClient:
164
160
  }
165
161
  ==>
166
162
  {
167
- "test_dataset_1": {"examples_count": len(dataset1.examples), "ground_truths_count": len(dataset1.ground_truths)},
168
- "test_dataset_2": {"examples_count": len(dataset2.examples), "ground_truths_count": len(dataset2.ground_truths)},
163
+ "test_dataset_1": {"examples_count": len(dataset1.examples)},
164
+ "test_dataset_2": {"examples_count": len(dataset2.examples)},
169
165
  ...
170
166
  }
171
167
  """
@@ -209,15 +205,14 @@ class EvalDatasetClient:
209
205
 
210
206
  return payload
211
207
 
212
- def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
208
+ def edit_dataset(self, alias: str, examples: List[Example]) -> bool:
213
209
  """
214
- Edits the dataset on Judgment platform by adding new examples and ground truths
210
+ Edits the dataset on Judgment platform by adding new examples
215
211
 
216
212
  Mock request:
217
213
  {
218
214
  "alias": alias,
219
215
  "examples": [...],
220
- "ground_truths": [...],
221
216
  "judgment_api_key": self.judgment_api_key
222
217
  }
223
218
  """
@@ -234,7 +229,6 @@ class EvalDatasetClient:
234
229
  content = {
235
230
  "alias": alias,
236
231
  "examples": [e.to_dict() for e in examples],
237
- "ground_truths": [g.to_dict() for g in ground_truths],
238
232
  }
239
233
 
240
234
  try:
@@ -1,73 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from judgeval.data import Example, GroundTruthExample
4
-
5
-
6
- def examples_to_ground_truths(examples: List[Example]) -> List[GroundTruthExample]:
7
- """
8
- Convert a list of `Example` objects to a list of `GroundTruthExample` objects.
9
-
10
- Args:
11
- examples (List[Example]): A list of `Example` objects to convert.
12
-
13
- Returns:
14
- List[GroundTruthExample]: A list of `GroundTruthExample` objects.
15
- """
16
-
17
- if not isinstance(examples, list):
18
- raise TypeError("Input should be a list of `Example` objects")
19
-
20
- ground_truths = []
21
- ground_truths = []
22
- for e in examples:
23
- g_truth = {
24
- "input": e.input,
25
- "actual_output": e.actual_output,
26
- "expected_output": e.expected_output,
27
- "context": e.context,
28
- "retrieval_context": e.retrieval_context,
29
- "tools_called": e.tools_called,
30
- "expected_tools": e.expected_tools,
31
- }
32
- ground_truths.append(GroundTruthExample(**g_truth))
33
- return ground_truths
34
-
35
-
36
- def ground_truths_to_examples(
37
- ground_truths: List[GroundTruthExample],
38
- _alias: Optional[str] = None,
39
- _id: Optional[str] = None,
40
- ) -> List[Example]:
41
- """
42
- Converts a list of `GroundTruthExample` objects to a list of `Example` objects.
43
-
44
- Args:
45
- ground_truths (List[GroundTruthExample]): A list of `GroundTruthExample` objects to convert.
46
- _alias (Optional[str]): The alias of the dataset.
47
- _id (Optional[str]): The ID of the dataset.
48
-
49
- Returns:
50
- List[Example]: A list of `Example` objects.
51
- """
52
-
53
- if not isinstance(ground_truths, list):
54
- raise TypeError("Input should be a list of `GroundTruthExample` objects")
55
-
56
- examples = []
57
- for index, ground_truth in enumerate(ground_truths):
58
- e = Example(
59
- input=ground_truth.input,
60
- actual_output=ground_truth.actual_output,
61
- expected_output=ground_truth.expected_output,
62
- context=ground_truth.context,
63
- retrieval_context=ground_truth.retrieval_context,
64
- additional_metadata=ground_truth.additional_metadata,
65
- tools_called=ground_truth.tools_called,
66
- expected_tools=ground_truth.expected_tools,
67
- comments=ground_truth.comments,
68
- _dataset_alias=_alias,
69
- _dataset_id=_id,
70
- _dataset_rank=index,
71
- )
72
- examples.append(e)
73
- return examples
@@ -1,54 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import Optional, Dict, List
3
-
4
-
5
- class GroundTruthExample(BaseModel):
6
- """
7
- GroundTruthExample is the atomic unit of a `Dataset`. It is essentially the same
8
- as an `Example`, but the `actual_output` field is optional to enable users to
9
- run their workflow on the `input` field at test-time to evaluate their current
10
- workflow's performance.
11
- """
12
- input: str
13
- actual_output: Optional[str] = None
14
- expected_output: Optional[str] = None
15
- context: Optional[List[str]] = None
16
- retrieval_context: Optional[List[str]] = None
17
- additional_metadata: Optional[Dict] = None
18
- comments: Optional[str] = None
19
- tools_called: Optional[List[str]] = None
20
- expected_tools: Optional[List[str]] = None
21
- source_file: Optional[str] = None
22
- trace_id: Optional[str] = None
23
-
24
- def to_dict(self):
25
- return {
26
- "input": self.input,
27
- "actual_output": self.actual_output,
28
- "expected_output": self.expected_output,
29
- "context": self.context,
30
- "retrieval_context": self.retrieval_context,
31
- "additional_metadata": self.additional_metadata,
32
- "comments": self.comments,
33
- "tools_called": self.tools_called,
34
- "expected_tools": self.expected_tools,
35
- "source_file": self.source_file,
36
- "trace_id": self.trace_id,
37
- }
38
-
39
- def __str__(self):
40
- return (
41
- f"{self.__class__.__name__}("
42
- f"input={self.input}, "
43
- f"actual_output={self.actual_output}, "
44
- f"expected_output={self.expected_output}, "
45
- f"context={self.context}, "
46
- f"retrieval_context={self.retrieval_context}, "
47
- f"additional_metadata={self.additional_metadata}, "
48
- f"comments={self.comments}, "
49
- f"tools_called={self.tools_called}, "
50
- f"expected_tools={self.expected_tools}, "
51
- f"source_file={self.source_file}, "
52
- f"trace_id={self.trace_id}"
53
- f")"
54
- )
@@ -10,7 +10,6 @@ from judgeval.data.datasets import EvalDataset, EvalDatasetClient
10
10
  from judgeval.data import (
11
11
  ScoringResult,
12
12
  Example,
13
- GroundTruthExample
14
13
  )
15
14
  from judgeval.scorers import (
16
15
  APIJudgmentScorer,
@@ -283,11 +282,11 @@ class JudgmentClient:
283
282
  """
284
283
  return self.eval_dataset_client.pull_all_user_dataset_stats()
285
284
 
286
- def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
285
+ def edit_dataset(self, alias: str, examples: List[Example]) -> bool:
287
286
  """
288
- Edits the dataset on Judgment platform by adding new examples and ground truths
287
+ Edits the dataset on Judgment platform by adding new examples
289
288
  """
290
- return self.eval_dataset_client.edit_dataset(alias, examples, ground_truths)
289
+ return self.eval_dataset_client.edit_dataset(alias, examples)
291
290
 
292
291
  # Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
293
292
  def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -2,7 +2,7 @@ judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
2
2
  judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
3
  judgeval/constants.py,sha256=VhJppAECTUDQwzC_FpzJw2wPlkYoogsadHxaJIY_J8U,5073
4
4
  judgeval/evaluation_run.py,sha256=RgJD60lJsunNQzObjo7iXnAzXWgubCLOAAuuamAAuoI,6354
5
- judgeval/judgment_client.py,sha256=5lqp9X67qPzBUu7kQYETslsc3L5JjxrDVgVLslF07A0,24173
5
+ judgeval/judgment_client.py,sha256=e-2e4KK-xy8-WLgzg8H0D6pZC8By9IWdu2iK-lHe39A,24076
6
6
  judgeval/rules.py,sha256=ebsiDEBVAnYTQxwVNvh_RpmKeWBnjQXgHs8KofTjcAs,15526
7
7
  judgeval/run_evaluation.py,sha256=YOzkyeWl-r3vaz0jB5nM-1VULi7ALmJ9_f58ENqexXk,23827
8
8
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
@@ -10,16 +10,17 @@ judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E
10
10
  judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
11
11
  judgeval/common/tracer.py,sha256=WFjFNf3NZ2BN8UAu2MG0F3Om9LgJNma3m_GrxyXgJqE,46655
12
12
  judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
13
- judgeval/data/__init__.py,sha256=QykVE22Qf-I2f1g-jC9-iQyLNXgDmX1-vHbCgZg8Ra8,558
13
+ judgeval/data/__init__.py,sha256=6ADbugtS3AporRv23Hxm67qcghU4tj0OScS8t3xLd6U,549
14
14
  judgeval/data/api_example.py,sha256=dzkrQ0xno08y6qNfqL2djXbapUyc2B2aQ5iANn0o4CY,3667
15
+ judgeval/data/custom_example.py,sha256=C-j9iVenBy52dwnL6PIjJAdKsBO1ajKjsaRr4RJthUo,3676
15
16
  judgeval/data/example.py,sha256=BhGBhamFWgH6wtvrRYM8dGtDfXh-cDxDhtNL5Gbdz_M,5892
16
- judgeval/data/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
17
+ judgeval/data/ground_truth.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  judgeval/data/result.py,sha256=4fgjKtUmT3br7K6fkRiNIxTGKUuwMeGyRLqzkpxwXKE,4436
18
19
  judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
19
20
  judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
20
- judgeval/data/datasets/dataset.py,sha256=LrBK8y3y1R9_BKmXxTzdXMMIQvXlq7tf7TM-u7jgSxE,16839
21
- judgeval/data/datasets/eval_dataset_client.py,sha256=QsfHyFC4WePV7uJGYUVjiIwtk1Ie_VpWUrnd2Q4kKdU,11479
22
- judgeval/data/datasets/utils.py,sha256=6DpGCPmGFNOKIGNcVCOSjTOdWemrpAuYnlo778sGG7g,2455
21
+ judgeval/data/datasets/dataset.py,sha256=DjJNy-qvviXMGBl_JhiBzvgiJH1_3rYtAWeHP6Daw6E,11897
22
+ judgeval/data/datasets/eval_dataset_client.py,sha256=B4bRy0Di2oFlaBbvp4_hRx2g_9e6Cs0y3ZUT9reMyhw,10926
23
+ judgeval/data/datasets/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
24
  judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
24
25
  judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
25
26
  judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
@@ -88,7 +89,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
88
89
  judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
89
90
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
90
91
  judgeval/utils/alerts.py,sha256=RgW5R9Dn3Jtim0OyAYDbNzjoX2s6SA4Mw16GyyaikjI,1424
91
- judgeval-0.0.21.dist-info/METADATA,sha256=jQW4w6jGNaHvPWTcqX3ZGr_SKeCpNl7DsNr-cwrYHsA,1378
92
- judgeval-0.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- judgeval-0.0.21.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
94
- judgeval-0.0.21.dist-info/RECORD,,
92
+ judgeval-0.0.22.dist-info/METADATA,sha256=1bpJcDJOKSGkGbbNVMFwUKwA932c2j1MCLxUA1PdD4o,1378
93
+ judgeval-0.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
94
+ judgeval-0.0.22.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
95
+ judgeval-0.0.22.dist-info/RECORD,,