edsl 0.1.40.dev2__py3-none-any.whl → 0.1.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +1 -0
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1 -1
- edsl/agents/Invigilator.py +6 -4
- edsl/agents/InvigilatorBase.py +2 -1
- edsl/agents/QuestionTemplateReplacementsBuilder.py +7 -2
- edsl/coop/coop.py +37 -2
- edsl/data/Cache.py +7 -0
- edsl/data/RemoteCacheSync.py +16 -16
- edsl/enums.py +3 -0
- edsl/exceptions/jobs.py +1 -9
- edsl/exceptions/language_models.py +8 -4
- edsl/exceptions/questions.py +8 -11
- edsl/inference_services/DeepSeekService.py +18 -0
- edsl/inference_services/registry.py +2 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +1 -1
- edsl/jobs/Jobs.py +42 -34
- edsl/jobs/JobsPrompts.py +11 -1
- edsl/jobs/JobsRemoteInferenceHandler.py +1 -0
- edsl/jobs/JobsRemoteInferenceLogger.py +1 -1
- edsl/jobs/interviews/Interview.py +2 -6
- edsl/jobs/interviews/InterviewExceptionEntry.py +14 -4
- edsl/jobs/loggers/HTMLTableJobLogger.py +6 -1
- edsl/jobs/results_exceptions_handler.py +2 -7
- edsl/jobs/runners/JobsRunnerAsyncio.py +18 -6
- edsl/jobs/runners/JobsRunnerStatus.py +2 -1
- edsl/jobs/tasks/TaskHistory.py +49 -17
- edsl/language_models/LanguageModel.py +7 -4
- edsl/language_models/ModelList.py +1 -1
- edsl/language_models/key_management/KeyLookupBuilder.py +7 -3
- edsl/language_models/model.py +49 -0
- edsl/questions/QuestionBudget.py +2 -2
- edsl/questions/QuestionDict.py +343 -0
- edsl/questions/QuestionExtract.py +1 -1
- edsl/questions/__init__.py +1 -0
- edsl/questions/answer_validator_mixin.py +29 -0
- edsl/questions/derived/QuestionLinearScale.py +1 -1
- edsl/questions/descriptors.py +49 -5
- edsl/questions/question_registry.py +1 -1
- edsl/questions/templates/dict/__init__.py +0 -0
- edsl/questions/templates/dict/answering_instructions.jinja +21 -0
- edsl/questions/templates/dict/question_presentation.jinja +1 -0
- edsl/results/Result.py +25 -3
- edsl/results/Results.py +17 -5
- edsl/scenarios/FileStore.py +32 -0
- edsl/scenarios/PdfExtractor.py +3 -6
- edsl/scenarios/Scenario.py +2 -1
- edsl/scenarios/handlers/csv.py +11 -0
- edsl/surveys/Survey.py +5 -1
- edsl/templates/error_reporting/base.html +2 -4
- edsl/templates/error_reporting/exceptions_table.html +35 -0
- edsl/templates/error_reporting/interview_details.html +67 -53
- edsl/templates/error_reporting/interviews.html +4 -17
- edsl/templates/error_reporting/overview.html +31 -5
- edsl/templates/error_reporting/performance_plot.html +1 -1
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.42.dist-info}/METADATA +1 -1
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.42.dist-info}/RECORD +59 -53
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.42.dist-info}/LICENSE +0 -0
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.42.dist-info}/WHEEL +0 -0
@@ -0,0 +1,343 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Union, Optional, Dict, List, Any, Type
|
3
|
+
from pydantic import BaseModel, Field, field_validator
|
4
|
+
from jinja2 import Environment, FileSystemLoader, TemplateNotFound
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
from edsl.questions.QuestionBase import QuestionBase
|
8
|
+
from edsl.questions.descriptors import (
|
9
|
+
AnswerKeysDescriptor,
|
10
|
+
ValueTypesDescriptor,
|
11
|
+
ValueDescriptionsDescriptor,
|
12
|
+
QuestionTextDescriptor,
|
13
|
+
)
|
14
|
+
from edsl.questions.response_validator_abc import ResponseValidatorABC
|
15
|
+
from edsl.exceptions.questions import QuestionCreationValidationError
|
16
|
+
from edsl.questions.decorators import inject_exception
|
17
|
+
|
18
|
+
|
19
|
+
class DictResponseValidator(ResponseValidatorABC):
|
20
|
+
required_params = ["answer_keys", "permissive"]
|
21
|
+
|
22
|
+
valid_examples = [
|
23
|
+
(
|
24
|
+
{
|
25
|
+
"answer": {
|
26
|
+
"name": "Hot Chocolate",
|
27
|
+
"num_ingredients": 5,
|
28
|
+
"ingredients": ["milk", "cocoa", "sugar"]
|
29
|
+
}
|
30
|
+
},
|
31
|
+
{
|
32
|
+
"answer_keys": ["name", "num_ingredients", "ingredients"],
|
33
|
+
"value_types": ["str", "int", "list[str]"]
|
34
|
+
},
|
35
|
+
)
|
36
|
+
]
|
37
|
+
invalid_examples = [
|
38
|
+
(
|
39
|
+
{"answer": {"name": 123}}, # Name should be a string
|
40
|
+
{"answer_keys": ["name"], "value_types": ["str"]},
|
41
|
+
"Key 'name' has value of type int, expected str",
|
42
|
+
),
|
43
|
+
(
|
44
|
+
{"answer": {"ingredients": "milk"}}, # Should be a list
|
45
|
+
{"answer_keys": ["ingredients"], "value_types": ["list"]},
|
46
|
+
"Key 'ingredients' should be a list, got str",
|
47
|
+
)
|
48
|
+
]
|
49
|
+
|
50
|
+
|
51
|
+
class QuestionDict(QuestionBase):
|
52
|
+
question_type = "dict"
|
53
|
+
question_text: str = QuestionTextDescriptor()
|
54
|
+
answer_keys: List[str] = AnswerKeysDescriptor()
|
55
|
+
value_types: Optional[List[str]] = ValueTypesDescriptor()
|
56
|
+
value_descriptions: Optional[List[str]] = ValueDescriptionsDescriptor()
|
57
|
+
|
58
|
+
_response_model = None
|
59
|
+
response_validator_class = DictResponseValidator
|
60
|
+
|
61
|
+
def _get_default_answer(self) -> Dict[str, Any]:
|
62
|
+
"""Get default answer based on types."""
|
63
|
+
answer = {}
|
64
|
+
if not self.value_types:
|
65
|
+
return {
|
66
|
+
"title": "Sample Recipe",
|
67
|
+
"ingredients": ["ingredient1", "ingredient2"],
|
68
|
+
"num_ingredients": 2,
|
69
|
+
"instructions": "Sample instructions"
|
70
|
+
}
|
71
|
+
|
72
|
+
for key, type_str in zip(self.answer_keys, self.value_types):
|
73
|
+
if type_str.startswith(('list[', 'list')):
|
74
|
+
if '[' in type_str:
|
75
|
+
element_type = type_str[type_str.index('[') + 1:type_str.rindex(']')].lower()
|
76
|
+
if element_type == 'str':
|
77
|
+
answer[key] = ["sample_string"]
|
78
|
+
elif element_type == 'int':
|
79
|
+
answer[key] = [1]
|
80
|
+
elif element_type == 'float':
|
81
|
+
answer[key] = [1.0]
|
82
|
+
else:
|
83
|
+
answer[key] = []
|
84
|
+
else:
|
85
|
+
answer[key] = []
|
86
|
+
else:
|
87
|
+
if type_str == 'str':
|
88
|
+
answer[key] = "sample_string"
|
89
|
+
elif type_str == 'int':
|
90
|
+
answer[key] = 1
|
91
|
+
elif type_str == 'float':
|
92
|
+
answer[key] = 1.0
|
93
|
+
else:
|
94
|
+
answer[key] = None
|
95
|
+
|
96
|
+
return answer
|
97
|
+
|
98
|
+
def create_response_model(
|
99
|
+
self,
|
100
|
+
) -> Type[BaseModel]:
|
101
|
+
"""Create a response model for dict questions."""
|
102
|
+
default_answer = self._get_default_answer()
|
103
|
+
|
104
|
+
class DictResponse(BaseModel):
|
105
|
+
answer: Dict[str, Any] = Field(
|
106
|
+
default_factory=lambda: default_answer.copy()
|
107
|
+
)
|
108
|
+
comment: Optional[str] = None
|
109
|
+
|
110
|
+
@field_validator("answer")
|
111
|
+
def validate_answer(cls, v, values, **kwargs):
|
112
|
+
# Ensure all keys exist
|
113
|
+
missing_keys = set(self.answer_keys) - set(v.keys())
|
114
|
+
if missing_keys:
|
115
|
+
raise ValueError(f"Missing required keys: {missing_keys}")
|
116
|
+
|
117
|
+
# Validate value types if not permissive
|
118
|
+
if not self.permissive and self.value_types:
|
119
|
+
for key, type_str in zip(self.answer_keys, self.value_types):
|
120
|
+
if key not in v:
|
121
|
+
continue
|
122
|
+
|
123
|
+
value = v[key]
|
124
|
+
type_str = type_str.lower() # Normalize to lowercase
|
125
|
+
|
126
|
+
# Handle list types
|
127
|
+
if type_str.startswith(('list[', 'list')):
|
128
|
+
if not isinstance(value, list):
|
129
|
+
raise ValueError(f"Key '{key}' should be a list, got {type(value).__name__}")
|
130
|
+
|
131
|
+
# If it's a parameterized list, check element types
|
132
|
+
if '[' in type_str:
|
133
|
+
element_type = type_str[type_str.index('[') + 1:type_str.rindex(']')]
|
134
|
+
element_type = element_type.lower().strip()
|
135
|
+
|
136
|
+
for i, elem in enumerate(value):
|
137
|
+
expected_type = {
|
138
|
+
'str': str,
|
139
|
+
'int': int,
|
140
|
+
'float': float,
|
141
|
+
'list': list
|
142
|
+
}.get(element_type)
|
143
|
+
|
144
|
+
if expected_type and not isinstance(elem, expected_type):
|
145
|
+
raise ValueError(
|
146
|
+
f"List element at index {i} for key '{key}' "
|
147
|
+
f"has type {type(elem).__name__}, expected {element_type}"
|
148
|
+
)
|
149
|
+
else:
|
150
|
+
# Handle basic types
|
151
|
+
expected_type = {
|
152
|
+
'str': str,
|
153
|
+
'int': int,
|
154
|
+
'float': float,
|
155
|
+
'list': list,
|
156
|
+
}.get(type_str)
|
157
|
+
|
158
|
+
if expected_type and not isinstance(value, expected_type):
|
159
|
+
raise ValueError(
|
160
|
+
f"Key '{key}' has value of type {type(value).__name__}, expected {type_str}"
|
161
|
+
)
|
162
|
+
return v
|
163
|
+
|
164
|
+
model_config = {
|
165
|
+
"json_schema_extra": {
|
166
|
+
"examples": [{
|
167
|
+
"answer": default_answer,
|
168
|
+
"comment": None
|
169
|
+
}]
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
DictResponse.__name__ = "DictResponse"
|
174
|
+
return DictResponse
|
175
|
+
|
176
|
+
def __init__(
|
177
|
+
self,
|
178
|
+
question_name: str,
|
179
|
+
question_text: str,
|
180
|
+
answer_keys: List[str],
|
181
|
+
value_types: Optional[List[Union[str, type]]] = None,
|
182
|
+
value_descriptions: Optional[List[str]] = None,
|
183
|
+
include_comment: bool = True,
|
184
|
+
question_presentation: Optional[str] = None,
|
185
|
+
answering_instructions: Optional[str] = None,
|
186
|
+
permissive: bool = False,
|
187
|
+
):
|
188
|
+
self.question_name = question_name
|
189
|
+
self.question_text = question_text
|
190
|
+
self.answer_keys = answer_keys
|
191
|
+
self.value_types = self._normalize_value_types(value_types)
|
192
|
+
self.value_descriptions = value_descriptions
|
193
|
+
self.include_comment = include_comment
|
194
|
+
self.question_presentation = question_presentation or self._render_template(
|
195
|
+
"question_presentation.jinja"
|
196
|
+
)
|
197
|
+
self.answering_instructions = answering_instructions or self._render_template(
|
198
|
+
"answering_instructions.jinja"
|
199
|
+
)
|
200
|
+
self.permissive = permissive
|
201
|
+
|
202
|
+
# Validation
|
203
|
+
if self.value_types and len(self.value_types) != len(self.answer_keys):
|
204
|
+
raise QuestionCreationValidationError(
|
205
|
+
"Length of value_types must match length of answer_keys."
|
206
|
+
)
|
207
|
+
if self.value_descriptions and len(self.value_descriptions) != len(self.answer_keys):
|
208
|
+
raise QuestionCreationValidationError(
|
209
|
+
"Length of value_descriptions must match length of answer_keys."
|
210
|
+
)
|
211
|
+
|
212
|
+
@staticmethod
|
213
|
+
def _normalize_value_types(value_types: Optional[List[Union[str, type]]]) -> Optional[List[str]]:
|
214
|
+
"""Convert all value_types to string representations, including type hints."""
|
215
|
+
if not value_types:
|
216
|
+
return None
|
217
|
+
|
218
|
+
def normalize_type(t) -> str:
|
219
|
+
# Handle string representations of List
|
220
|
+
t_str = str(t)
|
221
|
+
if t_str == 'List':
|
222
|
+
return 'list'
|
223
|
+
|
224
|
+
# Handle string inputs
|
225
|
+
if isinstance(t, str):
|
226
|
+
t = t.lower()
|
227
|
+
# Handle list types
|
228
|
+
if t.startswith(('list[', 'list')):
|
229
|
+
if '[' in t:
|
230
|
+
# Normalize the inner type
|
231
|
+
inner_type = t[t.index('[') + 1:t.rindex(']')].strip().lower()
|
232
|
+
return f"list[{inner_type}]"
|
233
|
+
return "list"
|
234
|
+
return t
|
235
|
+
|
236
|
+
# Handle List the same as list
|
237
|
+
if t_str == "<class 'List'>":
|
238
|
+
return "list"
|
239
|
+
|
240
|
+
# If it's list type
|
241
|
+
if t is list:
|
242
|
+
return "list"
|
243
|
+
|
244
|
+
# If it's a basic type
|
245
|
+
if hasattr(t, "__name__"):
|
246
|
+
return t.__name__.lower()
|
247
|
+
|
248
|
+
# If it's a typing.List
|
249
|
+
if t_str.startswith(('list[', 'list')):
|
250
|
+
return t_str.replace('typing.', '').lower()
|
251
|
+
|
252
|
+
# Handle generic types
|
253
|
+
if hasattr(t, "__origin__"):
|
254
|
+
origin = t.__origin__.__name__.lower()
|
255
|
+
args = [
|
256
|
+
arg.__name__.lower() if hasattr(arg, "__name__") else str(arg).lower()
|
257
|
+
for arg in t.__args__
|
258
|
+
]
|
259
|
+
return f"{origin}[{', '.join(args)}]"
|
260
|
+
|
261
|
+
raise QuestionCreationValidationError(
|
262
|
+
f"Invalid type in value_types: {t}. Must be a type or string."
|
263
|
+
)
|
264
|
+
|
265
|
+
normalized = []
|
266
|
+
for t in value_types:
|
267
|
+
try:
|
268
|
+
normalized.append(normalize_type(t))
|
269
|
+
except Exception as e:
|
270
|
+
raise QuestionCreationValidationError(f"Error normalizing type {t}: {str(e)}")
|
271
|
+
|
272
|
+
return normalized
|
273
|
+
|
274
|
+
def _render_template(self, template_name: str) -> str:
|
275
|
+
"""Render a template using Jinja."""
|
276
|
+
try:
|
277
|
+
template_dir = Path(__file__).parent / "templates" / "dict"
|
278
|
+
env = Environment(loader=FileSystemLoader(template_dir))
|
279
|
+
template = env.get_template(template_name)
|
280
|
+
return template.render(
|
281
|
+
question_name=self.question_name,
|
282
|
+
question_text=self.question_text,
|
283
|
+
answer_keys=self.answer_keys,
|
284
|
+
value_types=self.value_types,
|
285
|
+
value_descriptions=self.value_descriptions,
|
286
|
+
include_comment=self.include_comment,
|
287
|
+
)
|
288
|
+
except TemplateNotFound:
|
289
|
+
return f"Template {template_name} not found in {template_dir}."
|
290
|
+
|
291
|
+
def to_dict(self, add_edsl_version: bool = True) -> dict:
|
292
|
+
"""Serialize to JSON-compatible dictionary."""
|
293
|
+
return {
|
294
|
+
"question_type": self.question_type,
|
295
|
+
"question_name": self.question_name,
|
296
|
+
"question_text": self.question_text,
|
297
|
+
"answer_keys": self.answer_keys,
|
298
|
+
"value_types": self.value_types or [],
|
299
|
+
"value_descriptions": self.value_descriptions or [],
|
300
|
+
"include_comment": self.include_comment,
|
301
|
+
"permissive": self.permissive,
|
302
|
+
}
|
303
|
+
|
304
|
+
@classmethod
|
305
|
+
def from_dict(cls, data: dict) -> 'QuestionDict':
|
306
|
+
"""Recreate from a dictionary."""
|
307
|
+
return cls(
|
308
|
+
question_name=data["question_name"],
|
309
|
+
question_text=data["question_text"],
|
310
|
+
answer_keys=data["answer_keys"],
|
311
|
+
value_types=data.get("value_types"),
|
312
|
+
value_descriptions=data.get("value_descriptions"),
|
313
|
+
include_comment=data.get("include_comment", True),
|
314
|
+
permissive=data.get("permissive", False),
|
315
|
+
)
|
316
|
+
|
317
|
+
@classmethod
|
318
|
+
@inject_exception
|
319
|
+
def example(cls) -> 'QuestionDict':
|
320
|
+
"""Return an example question."""
|
321
|
+
return cls(
|
322
|
+
question_name="example",
|
323
|
+
question_text="Please provide a simple recipe for hot chocolate.",
|
324
|
+
answer_keys=["title", "ingredients", "num_ingredients", "instructions"],
|
325
|
+
value_types=["str", "list[str]", "int", "str"],
|
326
|
+
value_descriptions=[
|
327
|
+
"The title of the recipe.",
|
328
|
+
"A list of ingredients.",
|
329
|
+
"The number of ingredients.",
|
330
|
+
"The instructions for making the recipe."
|
331
|
+
],
|
332
|
+
)
|
333
|
+
|
334
|
+
def _simulate_answer(self) -> dict:
|
335
|
+
"""Simulate an answer for the question."""
|
336
|
+
return {
|
337
|
+
"answer": self._get_default_answer(),
|
338
|
+
"comment": None
|
339
|
+
}
|
340
|
+
|
341
|
+
if __name__ == "__main__":
|
342
|
+
q = QuestionDict.example()
|
343
|
+
print(q.to_dict())
|
@@ -50,7 +50,7 @@ def extract_json(text, expected_keys, verbose=False):
|
|
50
50
|
|
51
51
|
def dict_to_pydantic_model(input_dict: Dict[str, Any]) -> Any:
|
52
52
|
field_definitions = {
|
53
|
-
key: (
|
53
|
+
key: (type(value), Field(default=value)) for key, value in input_dict.items()
|
54
54
|
}
|
55
55
|
|
56
56
|
DynamicModel = create_model("DynamicModel", **field_definitions)
|
edsl/questions/__init__.py
CHANGED
@@ -12,6 +12,7 @@ from edsl.questions.QuestionFreeText import QuestionFreeText
|
|
12
12
|
from edsl.questions.QuestionFunctional import QuestionFunctional
|
13
13
|
from edsl.questions.QuestionList import QuestionList
|
14
14
|
from edsl.questions.QuestionMatrix import QuestionMatrix
|
15
|
+
from edsl.questions.QuestionDict import QuestionDict
|
15
16
|
from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
|
16
17
|
from edsl.questions.QuestionNumerical import QuestionNumerical
|
17
18
|
from edsl.questions.QuestionBudget import QuestionBudget
|
@@ -324,6 +324,35 @@ class AnswerValidatorMixin:
|
|
324
324
|
f"Must be one of: {valid_options}"
|
325
325
|
)
|
326
326
|
|
327
|
+
def _validate_answer_dict(self, answer: dict[str, Any]) -> None:
|
328
|
+
"""Validate QuestionDict-specific answer.
|
329
|
+
|
330
|
+
Check that answer["answer"]:
|
331
|
+
- is a dictionary
|
332
|
+
- has all required answer_keys as keys
|
333
|
+
"""
|
334
|
+
value = answer.get("answer")
|
335
|
+
|
336
|
+
# Check that answer is a dictionary
|
337
|
+
if not isinstance(value, dict):
|
338
|
+
raise QuestionAnswerValidationError(
|
339
|
+
f"Dict answer must be a dictionary mapping values to specified keys (got {value})"
|
340
|
+
)
|
341
|
+
|
342
|
+
# Check that all required answer keys are present
|
343
|
+
required_keys = set(self.answer_keys)
|
344
|
+
provided_keys = set(value.keys())
|
345
|
+
|
346
|
+
if missing_keys := (required_keys - provided_keys):
|
347
|
+
raise QuestionAnswerValidationError(
|
348
|
+
f"Missing required keys: {missing_keys}"
|
349
|
+
)
|
350
|
+
|
351
|
+
if extra_keys := (provided_keys - required_keys):
|
352
|
+
raise QuestionAnswerValidationError(
|
353
|
+
f"Unexpected keys: {extra_keys}"
|
354
|
+
)
|
355
|
+
|
327
356
|
|
328
357
|
if __name__ == "__main__":
|
329
358
|
pass
|
@@ -36,7 +36,7 @@ class QuestionLinearScale(QuestionMultipleChoice):
|
|
36
36
|
question_name=question_name,
|
37
37
|
question_text=question_text,
|
38
38
|
question_options=question_options,
|
39
|
-
use_code=False, # question linear scale will have
|
39
|
+
use_code=False, # question linear scale will have its own code
|
40
40
|
include_comment=include_comment,
|
41
41
|
)
|
42
42
|
self.question_options = question_options
|
edsl/questions/descriptors.py
CHANGED
@@ -302,10 +302,10 @@ class QuestionOptionsDescriptor(BaseDescriptor):
|
|
302
302
|
raise QuestionCreationValidationError(
|
303
303
|
f"Question options must be a list (got {value})."
|
304
304
|
)
|
305
|
-
if len(value) > Settings.MAX_NUM_OPTIONS:
|
306
|
-
|
307
|
-
|
308
|
-
|
305
|
+
# if len(value) > Settings.MAX_NUM_OPTIONS:
|
306
|
+
# raise QuestionCreationValidationError(
|
307
|
+
# f"Too many question options (got {value})."
|
308
|
+
# )
|
309
309
|
if len(value) < Settings.MIN_NUM_OPTIONS:
|
310
310
|
raise QuestionCreationValidationError(
|
311
311
|
f"Too few question options (got {value})."
|
@@ -408,7 +408,7 @@ class QuestionTextDescriptor(BaseDescriptor):
|
|
408
408
|
# Automatically replace single braces with double braces
|
409
409
|
# This is here because if the user is using an f-string, the double brace will get converted to a single brace.
|
410
410
|
# This undoes that.
|
411
|
-
value = re.sub(r"\{([^\{\}]+)\}", r"{{\1}}", value)
|
411
|
+
# value = re.sub(r"\{([^\{\}]+)\}", r"{{\1}}", value)
|
412
412
|
return value
|
413
413
|
|
414
414
|
# iterate through all doubles braces and check if they are valid python identifiers
|
@@ -421,6 +421,50 @@ class QuestionTextDescriptor(BaseDescriptor):
|
|
421
421
|
return None
|
422
422
|
|
423
423
|
|
424
|
+
class ValueTypesDescriptor(BaseDescriptor):
|
425
|
+
def validate(self, value, instance):
|
426
|
+
"""Validate the value is a list of strings or None."""
|
427
|
+
if value is None: # Allow None as a valid value
|
428
|
+
return None
|
429
|
+
if not isinstance(value, list):
|
430
|
+
raise QuestionCreationValidationError(
|
431
|
+
f"`value_types` must be a list or None (got {value})."
|
432
|
+
)
|
433
|
+
# Convert all items in the list to strings
|
434
|
+
return [str(item) for item in value]
|
435
|
+
|
436
|
+
|
437
|
+
class ValueDescriptionsDescriptor(BaseDescriptor):
|
438
|
+
def validate(self, value, instance):
|
439
|
+
"""Validate the value is a list of strings or None."""
|
440
|
+
if value is None: # Allow None as a valid value
|
441
|
+
return None
|
442
|
+
if not isinstance(value, list):
|
443
|
+
raise QuestionCreationValidationError(
|
444
|
+
f"`value_descriptions` must be a list or None (got {value})."
|
445
|
+
)
|
446
|
+
if not all(isinstance(x, str) for x in value):
|
447
|
+
raise QuestionCreationValidationError(
|
448
|
+
f"`value_descriptions` must be a list of strings (got {value})."
|
449
|
+
)
|
450
|
+
return value
|
451
|
+
|
452
|
+
|
453
|
+
class AnswerKeysDescriptor(BaseDescriptor):
|
454
|
+
"""Validate that the `answer_keys` attribute is a list of strings or integers."""
|
455
|
+
|
456
|
+
def validate(self, value, instance):
|
457
|
+
"""Validate the value is a list of strings or integers."""
|
458
|
+
if not isinstance(value, list):
|
459
|
+
raise QuestionCreationValidationError(
|
460
|
+
f"`answer_keys` must be a list (got {value})."
|
461
|
+
)
|
462
|
+
if not all(isinstance(x, (str, int)) for x in value):
|
463
|
+
raise QuestionCreationValidationError(
|
464
|
+
f"`answer_keys` must be a list of strings or integers (got {value})."
|
465
|
+
)
|
466
|
+
|
467
|
+
|
424
468
|
if __name__ == "__main__":
|
425
469
|
import doctest
|
426
470
|
|
@@ -96,7 +96,7 @@ class Question(metaclass=Meta):
|
|
96
96
|
|
97
97
|
>>> from edsl import Question
|
98
98
|
>>> Question.list_question_types()
|
99
|
-
['checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'matrix', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
|
99
|
+
['checkbox', 'dict', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'matrix', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
|
100
100
|
"""
|
101
101
|
return [
|
102
102
|
q
|
File without changes
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Please respond with a dictionary using the following keys: {{ answer_keys | join(', ') }}.
|
2
|
+
|
3
|
+
{% if value_descriptions %}
|
4
|
+
Here are descriptions of the values to provide:
|
5
|
+
{% for idx in range(answer_keys | length) %}
|
6
|
+
- "{{ answer_keys[idx] }}": "{{ value_descriptions[idx] }}"
|
7
|
+
{% endfor %}
|
8
|
+
{% endif %}
|
9
|
+
|
10
|
+
{% if value_types %}
|
11
|
+
The values should be formatted in the following types:
|
12
|
+
{% for idx in range(answer_keys | length) %}
|
13
|
+
- "{{ answer_keys[idx] }}": "{{ value_types[idx] }}"
|
14
|
+
{% endfor %}
|
15
|
+
{% endif %}
|
16
|
+
|
17
|
+
If you do not have a value for a given key, use "null".
|
18
|
+
|
19
|
+
{% if include_comment %}
|
20
|
+
After the answer, you can put a comment explaining your response on the next line.
|
21
|
+
{% endif %}
|
@@ -0,0 +1 @@
|
|
1
|
+
{{question_text}}
|
edsl/results/Result.py
CHANGED
@@ -56,6 +56,7 @@ class Result(Base, UserDict):
|
|
56
56
|
comments_dict: Optional[dict] = None,
|
57
57
|
cache_used_dict: Optional[dict[QuestionName, bool]] = None,
|
58
58
|
indices: Optional[dict] = None,
|
59
|
+
cache_keys: Optional[dict[QuestionName, str]] = None,
|
59
60
|
):
|
60
61
|
"""Initialize a Result object.
|
61
62
|
|
@@ -77,7 +78,6 @@ class Result(Base, UserDict):
|
|
77
78
|
self.question_to_attributes = (
|
78
79
|
question_to_attributes or self._create_question_to_attributes(survey)
|
79
80
|
)
|
80
|
-
|
81
81
|
data = {
|
82
82
|
"agent": agent,
|
83
83
|
"scenario": scenario,
|
@@ -86,10 +86,11 @@ class Result(Base, UserDict):
|
|
86
86
|
"answer": answer,
|
87
87
|
"prompt": prompt or {},
|
88
88
|
"raw_model_response": raw_model_response or {},
|
89
|
-
"question_to_attributes": question_to_attributes,
|
89
|
+
"question_to_attributes": self.question_to_attributes,
|
90
90
|
"generated_tokens": generated_tokens or {},
|
91
91
|
"comments_dict": comments_dict or {},
|
92
92
|
"cache_used_dict": cache_used_dict or {},
|
93
|
+
"cache_keys": cache_keys or {},
|
93
94
|
}
|
94
95
|
super().__init__(**data)
|
95
96
|
self.indices = indices
|
@@ -152,7 +153,9 @@ class Result(Base, UserDict):
|
|
152
153
|
@staticmethod
|
153
154
|
def _create_model_sub_dict(model) -> dict:
|
154
155
|
return {
|
155
|
-
"model": model.parameters
|
156
|
+
"model": model.parameters
|
157
|
+
| {"model": model.model}
|
158
|
+
| {"inference_service": model._inference_service_},
|
156
159
|
}
|
157
160
|
|
158
161
|
@staticmethod
|
@@ -163,6 +166,7 @@ class Result(Base, UserDict):
|
|
163
166
|
|
164
167
|
def _construct_sub_dicts(self) -> dict[str, dict]:
|
165
168
|
"""Construct a dictionary of sub-dictionaries for the Result object."""
|
169
|
+
|
166
170
|
sub_dicts_needing_new_keys = {
|
167
171
|
"question_text": {},
|
168
172
|
"question_options": {},
|
@@ -181,6 +185,8 @@ class Result(Base, UserDict):
|
|
181
185
|
f"{k}_cache_used": v for k, v in self.data["cache_used_dict"].items()
|
182
186
|
}
|
183
187
|
|
188
|
+
cache_keys = {f"{k}_cache_key": v for k, v in self.data["cache_keys"].items()}
|
189
|
+
|
184
190
|
d = {
|
185
191
|
**self._create_agent_sub_dict(self.data["agent"]),
|
186
192
|
**self._create_model_sub_dict(self.data["model"]),
|
@@ -195,11 +201,13 @@ class Result(Base, UserDict):
|
|
195
201
|
"question_options": sub_dicts_needing_new_keys["question_options"],
|
196
202
|
"question_type": sub_dicts_needing_new_keys["question_type"],
|
197
203
|
"cache_used": new_cache_dict,
|
204
|
+
"cache_keys": cache_keys,
|
198
205
|
}
|
199
206
|
if hasattr(self, "indices") and self.indices is not None:
|
200
207
|
d["agent"].update({"agent_index": self.indices["agent"]})
|
201
208
|
d["scenario"].update({"scenario_index": self.indices["scenario"]})
|
202
209
|
d["model"].update({"model_index": self.indices["model"]})
|
210
|
+
|
203
211
|
return d
|
204
212
|
|
205
213
|
@property
|
@@ -358,6 +366,10 @@ class Result(Base, UserDict):
|
|
358
366
|
else prompt_obj.to_dict()
|
359
367
|
)
|
360
368
|
d[key] = new_prompt_dict
|
369
|
+
|
370
|
+
if self.indices is not None:
|
371
|
+
d["indices"] = self.indices
|
372
|
+
|
361
373
|
if add_edsl_version:
|
362
374
|
from edsl import __version__
|
363
375
|
|
@@ -406,6 +418,8 @@ class Result(Base, UserDict):
|
|
406
418
|
generated_tokens=json_dict.get("generated_tokens", {}),
|
407
419
|
comments_dict=json_dict.get("comments_dict", {}),
|
408
420
|
cache_used_dict=json_dict.get("cache_used_dict", {}),
|
421
|
+
cache_keys=json_dict.get("cache_keys", {}),
|
422
|
+
indices = json_dict.get("indices", None)
|
409
423
|
)
|
410
424
|
return result
|
411
425
|
|
@@ -459,6 +473,12 @@ class Result(Base, UserDict):
|
|
459
473
|
question_results[result.question_name] = result
|
460
474
|
return question_results
|
461
475
|
|
476
|
+
def get_cache_keys(model_response_objects) -> dict[str, bool]:
|
477
|
+
cache_keys = {}
|
478
|
+
for result in model_response_objects:
|
479
|
+
cache_keys[result.question_name] = result.cache_key
|
480
|
+
return cache_keys
|
481
|
+
|
462
482
|
def get_generated_tokens_dict(answer_key_names) -> dict[str, str]:
|
463
483
|
generated_tokens_dict = {
|
464
484
|
k + "_generated_tokens": question_results[k].generated_tokens
|
@@ -523,6 +543,7 @@ class Result(Base, UserDict):
|
|
523
543
|
generated_tokens_dict = get_generated_tokens_dict(answer_key_names)
|
524
544
|
comments_dict = get_comments_dict(answer_key_names)
|
525
545
|
answer_dict = {k: extracted_answers[k] for k in answer_key_names}
|
546
|
+
cache_keys = get_cache_keys(model_response_objects)
|
526
547
|
|
527
548
|
question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
|
528
549
|
prompt_dictionary = get_prompt_dictionary(
|
@@ -546,6 +567,7 @@ class Result(Base, UserDict):
|
|
546
567
|
comments_dict=comments_dict,
|
547
568
|
cache_used_dict=cache_used_dictionary,
|
548
569
|
indices=interview.indices,
|
570
|
+
cache_keys=cache_keys,
|
549
571
|
)
|
550
572
|
result.interview_hash = interview.initial_hash
|
551
573
|
return result
|