python-flexeval 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flexeval/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.0"
1
+ __version__ = "0.3.0"
@@ -0,0 +1,107 @@
1
+ import json
2
+ from collections import UserDict
3
+
4
+
5
+ class JsonViewDict(UserDict):
6
+ """Dictionary that syncs changes back to the model field."""
7
+
8
+ def __init__(
9
+ self,
10
+ model_instance,
11
+ text_field_attr_name,
12
+ json_dumps_fn=json.dumps,
13
+ json_loads_fn=json.loads,
14
+ ):
15
+ self.model_instance = model_instance
16
+ self.text_field_attr_name = text_field_attr_name
17
+ self.json_dumps_fn = json_dumps_fn
18
+ self.json_loads_fn = json_loads_fn
19
+
20
+ text_value = getattr(model_instance, text_field_attr_name)
21
+ initial_data = self.json_loads_fn(text_value)
22
+ super().__init__(initial_data)
23
+
24
+ def _sync_to_model(self):
25
+ """Sync the current data back to the model field."""
26
+ json_str = self.json_loads_fn(self.data)
27
+ setattr(self.model_instance, self.text_field_attr_name, json_str)
28
+
29
+ # Override mutating methods to trigger sync
30
+ def __setitem__(self, key, value):
31
+ super().__setitem__(key, value)
32
+ self._sync_to_model()
33
+
34
+ def __delitem__(self, key):
35
+ super().__delitem__(key)
36
+ self._sync_to_model()
37
+
38
+ def clear(self):
39
+ super().clear()
40
+ self._sync_to_model()
41
+
42
+ def pop(self, key, *args):
43
+ result = super().pop(key, *args)
44
+ self._sync_to_model()
45
+ return result
46
+
47
+ def popitem(self):
48
+ result = super().popitem()
49
+ self._sync_to_model()
50
+ return result
51
+
52
+ def setdefault(self, key, default=None):
53
+ result = super().setdefault(key, default)
54
+ self._sync_to_model()
55
+ return result
56
+
57
+ def update(self, *args, **kwargs):
58
+ super().update(*args, **kwargs)
59
+ self._sync_to_model()
60
+
61
+
62
+ class JsonView:
63
+ """Descriptor that provides dict-like access to a JSON text field.
64
+
65
+ Example:
66
+ class SomeModel(pw.Model):
67
+ some_field = pw.TextField(default="{}")
68
+ some_field_dict = JsonView(text_field_attr_name="some_field")
69
+
70
+ m = SomeModel()
71
+ m.some_field_dict["chosen_mistake"] = "whatever"
72
+ """
73
+
74
+ def __init__(self, text_field_attr_name):
75
+ self.text_field_attr_name = text_field_attr_name
76
+ self.attr_name = None
77
+
78
+ def __set_name__(self, owner, name):
79
+ """Called when the descriptor is assigned to a class attribute."""
80
+ self.attr_name = f"_{name}_dict"
81
+
82
+ def __get__(self, instance, owner):
83
+ if instance is None:
84
+ return self
85
+
86
+ # Check if we already have a cached JsonViewDict
87
+ if not hasattr(instance, self.attr_name):
88
+ if not hasattr(instance, self.text_field_attr_name):
89
+ raise ValueError(
90
+ f"Failed to link this JsonView to field '{self.text_field_attr_name}' because it doesn't exist on this model instance."
91
+ )
92
+ # Cache a new JsonViewDict
93
+ json_dict = JsonViewDict(instance, self.text_field_attr_name)
94
+ setattr(instance, self.attr_name, json_dict)
95
+
96
+ return getattr(instance, self.attr_name)
97
+
98
+ def __set__(self, instance, value):
99
+ """Allow setting the entire dict."""
100
+ if isinstance(value, dict):
101
+ json_dict = JsonViewDict(instance, self.text_field_attr_name)
102
+ json_dict.update(value)
103
+ setattr(instance, self.attr_name, json_dict)
104
+ else:
105
+ raise ValueError(
106
+ f"This JsonView must be a dictionary to set linked field '{self.text_field_attr_name}' correctly."
107
+ )
@@ -10,6 +10,7 @@ from flexeval.classes.dataset import Dataset
10
10
  from flexeval.classes.eval_set_run import EvalSetRun
11
11
  from flexeval.classes.thread import Thread
12
12
  from flexeval.classes.turn import Turn
13
+ from flexeval.classes.jsonview import JsonView
13
14
  from flexeval.configuration import completion_functions
14
15
 
15
16
  logger = logging.getLogger(__name__)
@@ -34,6 +35,10 @@ class Message(BaseModel):
34
35
  content = pw.TextField()
35
36
  context = pw.TextField(null=True) # Previous messages
36
37
 
38
+ # metadata
39
+ metadata = pw.TextField(default="{}", null=False)
40
+ metadata_dict = JsonView("metadata")
41
+
37
42
  # helpers
38
43
  system_prompt = pw.TextField(null=True)
39
44
  is_flexeval_completion = pw.BooleanField(null=True)
@@ -3,6 +3,7 @@ import peewee as pw
3
3
  from flexeval.classes.base import BaseModel
4
4
  from flexeval.classes.dataset import Dataset
5
5
  from flexeval.classes.eval_set_run import EvalSetRun
6
+ from flexeval.classes.jsonview import JsonView
6
7
 
7
8
 
8
9
  class Thread(BaseModel):
@@ -20,6 +21,9 @@ class Thread(BaseModel):
20
21
 
21
22
  system_prompt = pw.TextField(null=True)
22
23
 
24
+ metadata = pw.TextField(default="{}", null=False)
25
+ metadata_dict = JsonView("metadata")
26
+
23
27
  def __init__(self, **kwargs):
24
28
  super().__init__(**kwargs)
25
29
  self.metrics_to_evaluate = []
flexeval/data_loader.py CHANGED
@@ -54,18 +54,13 @@ def load_jsonl(
54
54
  max(1, nb_evaluations_per_thread)
55
55
  ): # duplicate stored threads for averaged evaluation results
56
56
  if thread_id in selected_thread_ids:
57
- thread_object = Thread.create(
58
- evalsetrun=dataset.evalsetrun,
59
- dataset=dataset,
60
- jsonl_thread_id=thread_id,
61
- eval_run_thread_id=str(thread_id)
62
- + "_"
63
- + str(thread_eval_run_id),
64
- )
57
+ thread_json = json.loads(thread)
58
+ # extract any metadata
59
+ thread_metadata = thread_json.copy()
60
+ del thread_metadata["input"]
65
61
 
66
- # Context
67
62
  context = []
68
- thread_input = json.loads(thread)["input"]
63
+ thread_input = thread_json["input"]
69
64
 
70
65
  # Get system prompt used in the thread - assuming only 1
71
66
  for message in thread_input:
@@ -78,15 +73,35 @@ def load_jsonl(
78
73
  # Add the system prompt as context
79
74
  context.append({"role": "system", "content": system_prompt})
80
75
 
76
+ thread_object: Thread = Thread.create(
77
+ evalsetrun=dataset.evalsetrun,
78
+ dataset=dataset,
79
+ jsonl_thread_id=thread_id,
80
+ eval_run_thread_id=str(thread_id)
81
+ + "_"
82
+ + str(thread_eval_run_id),
83
+ system_prompt=system_prompt,
84
+ metadata=json.dumps(thread_metadata),
85
+ )
86
+
81
87
  # Create messages
82
88
  index_in_thread = 0
83
89
  for message in thread_input:
90
+ if not isinstance(message, dict):
91
+ raise ValueError(
92
+ f"Can't load unknown object type; expected dict. Check JSONL format: {message}"
93
+ )
84
94
  role = message.get("role", None)
85
95
  if role != "system":
86
96
  # System message shouldn't be added as a separate message
87
97
  system_prompt_for_this_message = ""
88
98
  if role != "user":
89
99
  system_prompt_for_this_message = system_prompt
100
+ message_metadata = message.copy()
101
+ if "content" in message_metadata:
102
+ del message_metadata["content"]
103
+ if "role" in message_metadata:
104
+ del message_metadata["role"]
90
105
  Message.create(
91
106
  evalsetrun=dataset.evalsetrun,
92
107
  dataset=dataset,
@@ -95,9 +110,9 @@ def load_jsonl(
95
110
  role=role,
96
111
  content=message.get("content", None),
97
112
  context=json.dumps(context),
98
- metadata=message.get("metadata", None),
99
113
  is_flexeval_completion=False,
100
114
  system_prompt=system_prompt_for_this_message,
115
+ metadata=json.dumps(message_metadata),
101
116
  )
102
117
  # Update context
103
118
  context.append(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-flexeval
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: FlexEval is a tool for designing custom metrics, completion functions, and LLM-graded rubrics for evaluating the behavior of LLM-powered systems.
5
5
  Project-URL: Homepage, https://digitalharborfoundation.github.io/FlexEval/
6
6
  Project-URL: GitHub, https://github.com/DigitalHarborFoundation/FlexEval
@@ -1,11 +1,11 @@
1
- flexeval/__about__.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
1
+ flexeval/__about__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
2
2
  flexeval/__init__.py,sha256=UXI_xdSxnGAK2plDODBbPF3df-N7E9YJ418QHK7XN-Q,391
3
3
  flexeval/__main__.py,sha256=c9NQqsea3e-_6b736gBeIO3O_zdXQ1wtY3-Scj5NiPg,126
4
4
  flexeval/cli.py,sha256=RwtRk121OivbLQyYpYxJ7PugPIYQ8J4qXHFN2SxxPy4,2985
5
5
  flexeval/completions.py,sha256=pi_tYK4m3vKSqAC1ym9Jc3e4srcQSXfx-mX4qI5qisQ,5686
6
6
  flexeval/compute_metrics.py,sha256=4X6XFk0qUKcaCDllNeJreuhlnDHmfRPlsf0f8fWFOxA,37277
7
7
  flexeval/config.yaml,sha256=dpkFdW0rKf7StGoVeIGaCNw0n0yOfYWig0xmIfsDdbg,530
8
- flexeval/data_loader.py,sha256=EKc6wdpQuhrB2ai2U_fQxojzt1RR716ELisiZXpfu58,25311
8
+ flexeval/data_loader.py,sha256=UP-HWqh5o_euqT2GvTbUYmA-yJcbTKtmug4w63w2CbA,26153
9
9
  flexeval/db_utils.py,sha256=2jgqexLCAqShvgPrImZz12UkMZtfERhP8iXjratXYok,1612
10
10
  flexeval/dependency_graph.py,sha256=SaG9gjkw2Q0NykqQWs4JzPkv5sMj2aXXmhjJ7yRkV4Q,10539
11
11
  flexeval/eval_schema.json,sha256=BQetj8O0_4rorj3Mpqk-sj_SCaRkGMrvBUcxhuw6zLE,13111
@@ -20,9 +20,10 @@ flexeval/classes/base.py,sha256=xxkTa8joPe39CFwveeTPW56LW-x7rsi5oBAIxrvM5iI,944
20
20
  flexeval/classes/dataset.py,sha256=Y_EdEIuhx526SSvkqk2tFBzkOgBkVY-5FeraYMtU5lo,2913
21
21
  flexeval/classes/eval_runner.py,sha256=ZvCpyaD7lorDK_mYJSZqQbvI6FfLbIWRFHNarWTAMQU,6270
22
22
  flexeval/classes/eval_set_run.py,sha256=fq_wBOaxuq7dLxiZIw76WGIwhRBNbQWDUhpiK0wDG_A,1116
23
- flexeval/classes/message.py,sha256=zuDm_v1gmK49Fw5m-HTWiqndrI_xtLotlXD8nhRDDTg,7518
23
+ flexeval/classes/jsonview.py,sha256=3XJTh46ODfqdNbrXYDEV6kRO8KbeiHJo5pb4aJrbHRY,3459
24
+ flexeval/classes/message.py,sha256=gDejDfaHGQKgS_CpJqjPAVzpiRD2JddKo17Yi1wVeiw,7676
24
25
  flexeval/classes/metric.py,sha256=d8l39_QwnQDmTJvy9TIulU4p0jqD7ldMUi4m5zfK2Es,2806
25
- flexeval/classes/thread.py,sha256=LchsK9mmrY4K-zSTMAAmywlzPVwnpZ7rOHqBGPIlda8,2779
26
+ flexeval/classes/thread.py,sha256=cFQu3Mwzk8-Def8xccB8F6zKv64Srvhz5n83yLELvKo,2922
26
27
  flexeval/classes/tool_call.py,sha256=CteT2Hajor0PlHEEn7apfZux5_mremSIDrQmZ0iB7K0,1748
27
28
  flexeval/classes/turn.py,sha256=kLmgnYQ-4a8sydzGK1HTQRyUDXZIedmt_NFR3shLJFE,8635
28
29
  flexeval/configuration/__init__.py,sha256=wP_gpYyaEp5DxCSH8-4KHchH07JMZZOk8eCFMfd5LBw,75
@@ -41,8 +42,8 @@ flexeval/schema/eval_schema.py,sha256=iHMbanW4Ef_sp51KiaZKeP3Dn4Z6pWCGa7N2SPvsFK
41
42
  flexeval/schema/evalrun_schema.py,sha256=M7JY01DhlLzwZc2jJTIeGPs9vt6TFMPir51MFhtRllA,3526
42
43
  flexeval/schema/rubric_schema.py,sha256=uxcf7MHWKW3EmABUnWeCinGUP6LBjskiq7zkEPHmAvU,1615
43
44
  flexeval/schema/schema_utils.py,sha256=Fg1foqRA-9X-hl_vqIF3bpYdE51hNEgdw739Q-s3iQc,698
44
- python_flexeval-0.2.0.dist-info/METADATA,sha256=bEifn06Ok5-8YllS4uYxBN2KNuZvf7vJg8b_GarkttU,5599
45
- python_flexeval-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
46
- python_flexeval-0.2.0.dist-info/entry_points.txt,sha256=wSyluqXhrX3xySVYAtM-Kv23p4OauKQCSBuNNfzEGtI,52
47
- python_flexeval-0.2.0.dist-info/licenses/LICENSE,sha256=OlAu_c13gw6-fJ9UdhZBMeNr5STLrnWG_0Hv0SCXtu4,1082
48
- python_flexeval-0.2.0.dist-info/RECORD,,
45
+ python_flexeval-0.3.0.dist-info/METADATA,sha256=xBbeZrF4aEdl94pg-L2P_Di6cxtxA3aZnu6fxFjUf-8,5599
46
+ python_flexeval-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
47
+ python_flexeval-0.3.0.dist-info/entry_points.txt,sha256=wSyluqXhrX3xySVYAtM-Kv23p4OauKQCSBuNNfzEGtI,52
48
+ python_flexeval-0.3.0.dist-info/licenses/LICENSE,sha256=OlAu_c13gw6-fJ9UdhZBMeNr5STLrnWG_0Hv0SCXtu4,1082
49
+ python_flexeval-0.3.0.dist-info/RECORD,,