edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/agents/Invigilator.py +7 -2
- edsl/agents/PromptConstructionMixin.py +18 -1
- edsl/config.py +4 -0
- edsl/conjure/Conjure.py +6 -0
- edsl/coop/coop.py +4 -0
- edsl/coop/utils.py +9 -1
- edsl/data/CacheHandler.py +3 -4
- edsl/enums.py +2 -0
- edsl/inference_services/DeepInfraService.py +6 -91
- edsl/inference_services/GroqService.py +18 -0
- edsl/inference_services/InferenceServicesCollection.py +13 -5
- edsl/inference_services/OpenAIService.py +64 -21
- edsl/inference_services/registry.py +2 -1
- edsl/jobs/Jobs.py +80 -33
- edsl/jobs/buckets/TokenBucket.py +15 -7
- edsl/jobs/interviews/Interview.py +41 -19
- edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +58 -40
- edsl/jobs/interviews/interview_exception_tracking.py +68 -10
- edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
- edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
- edsl/jobs/tasks/QuestionTaskCreator.py +2 -3
- edsl/jobs/tasks/TaskCreators.py +8 -2
- edsl/jobs/tasks/TaskHistory.py +145 -1
- edsl/language_models/LanguageModel.py +133 -75
- edsl/language_models/ModelList.py +8 -2
- edsl/language_models/registry.py +16 -0
- edsl/questions/QuestionFunctional.py +8 -7
- edsl/questions/QuestionMultipleChoice.py +15 -12
- edsl/questions/QuestionNumerical.py +0 -1
- edsl/questions/descriptors.py +6 -4
- edsl/results/DatasetExportMixin.py +185 -78
- edsl/results/Result.py +13 -11
- edsl/results/Results.py +19 -16
- edsl/results/ResultsToolsMixin.py +1 -1
- edsl/scenarios/Scenario.py +14 -0
- edsl/scenarios/ScenarioList.py +59 -21
- edsl/scenarios/ScenarioListExportMixin.py +16 -5
- edsl/scenarios/ScenarioListPdfMixin.py +3 -0
- edsl/surveys/Survey.py +11 -8
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/METADATA +4 -2
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/RECORD +46 -44
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/LICENSE +0 -0
- {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/WHEEL +0 -0
edsl/jobs/tasks/TaskHistory.py
CHANGED
@@ -11,6 +11,8 @@ class TaskHistory:
|
|
11
11
|
|
12
12
|
[Interview.exceptions, Interview.exceptions, Interview.exceptions, ...]
|
13
13
|
|
14
|
+
>>> _ = TaskHistory.example()
|
15
|
+
...
|
14
16
|
"""
|
15
17
|
|
16
18
|
self.total_interviews = interviews
|
@@ -18,8 +20,26 @@ class TaskHistory:
|
|
18
20
|
|
19
21
|
self._interviews = {index: i for index, i in enumerate(self.total_interviews)}
|
20
22
|
|
23
|
+
@classmethod
|
24
|
+
def example(cls):
|
25
|
+
from edsl.jobs.interviews.Interview import Interview
|
26
|
+
|
27
|
+
from edsl.jobs.Jobs import Jobs
|
28
|
+
|
29
|
+
j = Jobs.example(throw_exception_probability=1, test_model=True)
|
30
|
+
|
31
|
+
from edsl.config import CONFIG
|
32
|
+
|
33
|
+
results = j.run(print_exceptions=False, skip_retry=True, cache = False)
|
34
|
+
|
35
|
+
return cls(results.task_history.total_interviews)
|
36
|
+
|
21
37
|
@property
|
22
38
|
def exceptions(self):
|
39
|
+
"""
|
40
|
+
>>> len(TaskHistory.example().exceptions)
|
41
|
+
4
|
42
|
+
"""
|
23
43
|
return [i.exceptions for k, i in self._interviews.items() if i.exceptions != {}]
|
24
44
|
|
25
45
|
@property
|
@@ -42,7 +62,12 @@ class TaskHistory:
|
|
42
62
|
|
43
63
|
@property
|
44
64
|
def has_exceptions(self) -> bool:
|
45
|
-
"""Return True if there are any exceptions.
|
65
|
+
"""Return True if there are any exceptions.
|
66
|
+
|
67
|
+
>>> TaskHistory.example().has_exceptions
|
68
|
+
True
|
69
|
+
|
70
|
+
"""
|
46
71
|
return len(self.exceptions) > 0
|
47
72
|
|
48
73
|
def _repr_html_(self):
|
@@ -216,6 +241,47 @@ class TaskHistory:
|
|
216
241
|
}
|
217
242
|
"""
|
218
243
|
|
244
|
+
@property
|
245
|
+
def exceptions_by_type(self) -> dict:
|
246
|
+
"""Return a dictionary of exceptions by type."""
|
247
|
+
exceptions_by_type = {}
|
248
|
+
for interview in self.total_interviews:
|
249
|
+
for question_name, exceptions in interview.exceptions.items():
|
250
|
+
for exception in exceptions:
|
251
|
+
exception_type = exception["exception"]
|
252
|
+
if exception_type in exceptions_by_type:
|
253
|
+
exceptions_by_type[exception_type] += 1
|
254
|
+
else:
|
255
|
+
exceptions_by_type[exception_type] = 1
|
256
|
+
return exceptions_by_type
|
257
|
+
|
258
|
+
@property
|
259
|
+
def exceptions_by_question_name(self) -> dict:
|
260
|
+
"""Return a dictionary of exceptions tallied by question name."""
|
261
|
+
exceptions_by_question_name = {}
|
262
|
+
for interview in self.total_interviews:
|
263
|
+
for question_name, exceptions in interview.exceptions.items():
|
264
|
+
if question_name not in exceptions_by_question_name:
|
265
|
+
exceptions_by_question_name[question_name] = 0
|
266
|
+
exceptions_by_question_name[question_name] += len(exceptions)
|
267
|
+
|
268
|
+
for question in self.total_interviews[0].survey.questions:
|
269
|
+
if question.question_name not in exceptions_by_question_name:
|
270
|
+
exceptions_by_question_name[question.question_name] = 0
|
271
|
+
return exceptions_by_question_name
|
272
|
+
|
273
|
+
@property
|
274
|
+
def exceptions_by_model(self) -> dict:
|
275
|
+
"""Return a dictionary of exceptions tallied by model and question name."""
|
276
|
+
exceptions_by_model = {}
|
277
|
+
for interview in self.total_interviews:
|
278
|
+
model = interview.model
|
279
|
+
if model not in exceptions_by_model:
|
280
|
+
exceptions_by_model[model.model] = 0
|
281
|
+
if interview.exceptions != {}:
|
282
|
+
exceptions_by_model[model.model] += len(interview.exceptions)
|
283
|
+
return exceptions_by_model
|
284
|
+
|
219
285
|
def html(
|
220
286
|
self,
|
221
287
|
filename: Optional[str] = None,
|
@@ -236,6 +302,8 @@ class TaskHistory:
|
|
236
302
|
if css is None:
|
237
303
|
css = self.css()
|
238
304
|
|
305
|
+
models_used = set([i.model for index, i in self._interviews.items()])
|
306
|
+
|
239
307
|
template = Template(
|
240
308
|
"""
|
241
309
|
<!DOCTYPE html>
|
@@ -249,6 +317,69 @@ class TaskHistory:
|
|
249
317
|
</style>
|
250
318
|
</head>
|
251
319
|
<body>
|
320
|
+
<h1>Overview</h1>
|
321
|
+
<p>There were {{ interviews|length }} total interviews. The number of interviews with exceptions was {{ num_exceptions }}.</p>
|
322
|
+
<p>The models used were: {{ models_used }}.</p>
|
323
|
+
<p>For documentation on dealing with exceptions on Expected Parrot,
|
324
|
+
see <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">here</a>.</p>
|
325
|
+
|
326
|
+
<h2>Exceptions by Type</h2>
|
327
|
+
<table>
|
328
|
+
<thead>
|
329
|
+
<tr>
|
330
|
+
<th>Exception Type</th>
|
331
|
+
<th>Number</th>
|
332
|
+
</tr>
|
333
|
+
</thead>
|
334
|
+
<tbody>
|
335
|
+
{% for exception_type, exceptions in exceptions_by_type.items() %}
|
336
|
+
<tr>
|
337
|
+
<td>{{ exception_type }}</td>
|
338
|
+
<td>{{ exceptions }}</td>
|
339
|
+
</tr>
|
340
|
+
{% endfor %}
|
341
|
+
</tbody>
|
342
|
+
</table>
|
343
|
+
|
344
|
+
|
345
|
+
<h2>Exceptions by Model</h2>
|
346
|
+
<table>
|
347
|
+
<thead>
|
348
|
+
<tr>
|
349
|
+
<th>Model</th>
|
350
|
+
<th>Number</th>
|
351
|
+
</tr>
|
352
|
+
</thead>
|
353
|
+
<tbody>
|
354
|
+
{% for model, exceptions in exceptions_by_model.items() %}
|
355
|
+
<tr>
|
356
|
+
<td>{{ model }}</td>
|
357
|
+
<td>{{ exceptions }}</td>
|
358
|
+
</tr>
|
359
|
+
{% endfor %}
|
360
|
+
</tbody>
|
361
|
+
</table>
|
362
|
+
|
363
|
+
|
364
|
+
<h2>Exceptions by Question Name</h2>
|
365
|
+
<table>
|
366
|
+
<thead>
|
367
|
+
<tr>
|
368
|
+
<th>Question Name</th>
|
369
|
+
<th>Number of Exceptions</th>
|
370
|
+
</tr>
|
371
|
+
</thead>
|
372
|
+
<tbody>
|
373
|
+
{% for question_name, exception_count in exceptions_by_question_name.items() %}
|
374
|
+
<tr>
|
375
|
+
<td>{{ question_name }}</td>
|
376
|
+
<td>{{ exception_count }}</td>
|
377
|
+
</tr>
|
378
|
+
{% endfor %}
|
379
|
+
</tbody>
|
380
|
+
</table>
|
381
|
+
|
382
|
+
|
252
383
|
{% for index, interview in interviews.items() %}
|
253
384
|
{% if interview.exceptions != {} %}
|
254
385
|
<div class="interview">Interview: {{ index }} </div>
|
@@ -296,11 +427,18 @@ class TaskHistory:
|
|
296
427
|
"""
|
297
428
|
)
|
298
429
|
|
430
|
+
# breakpoint()
|
431
|
+
|
299
432
|
# Render the template with data
|
300
433
|
output = template.render(
|
301
434
|
interviews=self._interviews,
|
302
435
|
css=css,
|
436
|
+
num_exceptions=len(self.exceptions),
|
303
437
|
performance_plot_html=performance_plot_html,
|
438
|
+
exceptions_by_type=self.exceptions_by_type,
|
439
|
+
exceptions_by_question_name=self.exceptions_by_question_name,
|
440
|
+
exceptions_by_model=self.exceptions_by_model,
|
441
|
+
models_used=models_used,
|
304
442
|
)
|
305
443
|
|
306
444
|
# Save the rendered output to a file
|
@@ -344,3 +482,9 @@ class TaskHistory:
|
|
344
482
|
|
345
483
|
if return_link:
|
346
484
|
return filename
|
485
|
+
|
486
|
+
|
487
|
+
if __name__ == "__main__":
|
488
|
+
import doctest
|
489
|
+
|
490
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -7,9 +7,37 @@ import asyncio
|
|
7
7
|
import json
|
8
8
|
import time
|
9
9
|
import os
|
10
|
+
import hashlib
|
10
11
|
from typing import Coroutine, Any, Callable, Type, List, get_type_hints
|
11
12
|
from abc import ABC, abstractmethod
|
12
13
|
|
14
|
+
|
15
|
+
class IntendedModelCallOutcome:
|
16
|
+
"This is a tuple-like class that holds the response, cache_used, and cache_key."
|
17
|
+
|
18
|
+
def __init__(self, response: dict, cache_used: bool, cache_key: str):
|
19
|
+
self.response = response
|
20
|
+
self.cache_used = cache_used
|
21
|
+
self.cache_key = cache_key
|
22
|
+
|
23
|
+
def __iter__(self):
|
24
|
+
"""Iterate over the class attributes.
|
25
|
+
|
26
|
+
>>> a, b, c = IntendedModelCallOutcome({'answer': "yes"}, True, 'x1289')
|
27
|
+
>>> a
|
28
|
+
{'answer': 'yes'}
|
29
|
+
"""
|
30
|
+
yield self.response
|
31
|
+
yield self.cache_used
|
32
|
+
yield self.cache_key
|
33
|
+
|
34
|
+
def __len__(self):
|
35
|
+
return 3
|
36
|
+
|
37
|
+
def __repr__(self):
|
38
|
+
return f"IntendedModelCallOutcome(response = {self.response}, cache_used = {self.cache_used}, cache_key = '{self.cache_key}')"
|
39
|
+
|
40
|
+
|
13
41
|
from edsl.config import CONFIG
|
14
42
|
|
15
43
|
from edsl.utilities.decorators import sync_wrapper, jupyter_nb_handler
|
@@ -96,6 +124,11 @@ class LanguageModel(
|
|
96
124
|
# Skip the API key check. Sometimes this is useful for testing.
|
97
125
|
self._api_token = None
|
98
126
|
|
127
|
+
def ask_question(self, question):
|
128
|
+
user_prompt = question.get_instructions().render(question.data).text
|
129
|
+
system_prompt = "You are a helpful agent pretending to be a human."
|
130
|
+
return self.execute_model_call(user_prompt, system_prompt)
|
131
|
+
|
99
132
|
@property
|
100
133
|
def api_token(self) -> str:
|
101
134
|
if not hasattr(self, "_api_token"):
|
@@ -149,7 +182,7 @@ class LanguageModel(
|
|
149
182
|
key_value = os.getenv(key_name)
|
150
183
|
return key_value is not None
|
151
184
|
|
152
|
-
def __hash__(self):
|
185
|
+
def __hash__(self) -> str:
|
153
186
|
"""Allow the model to be used as a key in a dictionary."""
|
154
187
|
from edsl.utilities.utilities import dict_hash
|
155
188
|
|
@@ -216,19 +249,25 @@ class LanguageModel(
|
|
216
249
|
>>> LanguageModel._overide_default_parameters(passed_parameter_dict={"temperature": 0.5}, default_parameter_dict={"temperature":0.9, "max_tokens": 1000})
|
217
250
|
{'temperature': 0.5, 'max_tokens': 1000}
|
218
251
|
"""
|
219
|
-
parameters = dict({})
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
252
|
+
# parameters = dict({})
|
253
|
+
|
254
|
+
return {
|
255
|
+
parameter_name: passed_parameter_dict.get(parameter_name, default_value)
|
256
|
+
for parameter_name, default_value in default_parameter_dict.items()
|
257
|
+
}
|
258
|
+
|
259
|
+
def __call__(self, user_prompt: str, system_prompt: str):
|
260
|
+
return self.execute_model_call(user_prompt, system_prompt)
|
226
261
|
|
227
262
|
@abstractmethod
|
228
263
|
async def async_execute_model_call(user_prompt: str, system_prompt: str):
|
229
|
-
"""Execute the model call and returns
|
264
|
+
"""Execute the model call and returns a coroutine.
|
230
265
|
|
231
266
|
>>> m = LanguageModel.example(test_model = True)
|
267
|
+
>>> async def test(): return await m.async_execute_model_call("Hello, model!", "You are a helpful agent.")
|
268
|
+
>>> asyncio.run(test())
|
269
|
+
{'message': '{"answer": "Hello world"}'}
|
270
|
+
|
232
271
|
>>> m.execute_model_call("Hello, model!", "You are a helpful agent.")
|
233
272
|
{'message': '{"answer": "Hello world"}'}
|
234
273
|
|
@@ -274,11 +313,38 @@ class LanguageModel(
|
|
274
313
|
|
275
314
|
What is returned by the API is model-specific and often includes meta-data that we do not need.
|
276
315
|
For example, here is the results from a call to GPT-4:
|
277
|
-
To actually
|
316
|
+
To actually track the response, we need to grab
|
278
317
|
data["choices[0]"]["message"]["content"].
|
279
318
|
"""
|
280
319
|
raise NotImplementedError
|
281
320
|
|
321
|
+
async def _async_prepare_response(
|
322
|
+
self, model_call_outcome: IntendedModelCallOutcome, cache: "Cache"
|
323
|
+
) -> dict:
|
324
|
+
"""Prepare the response for return."""
|
325
|
+
|
326
|
+
model_response = {
|
327
|
+
"cache_used": model_call_outcome.cache_used,
|
328
|
+
"cache_key": model_call_outcome.cache_key,
|
329
|
+
"usage": model_call_outcome.response.get("usage", {}),
|
330
|
+
"raw_model_response": model_call_outcome.response,
|
331
|
+
}
|
332
|
+
|
333
|
+
answer_portion = self.parse_response(model_call_outcome.response)
|
334
|
+
try:
|
335
|
+
answer_dict = json.loads(answer_portion)
|
336
|
+
except json.JSONDecodeError as e:
|
337
|
+
# TODO: Turn into logs to generate issues
|
338
|
+
answer_dict, success = await repair(
|
339
|
+
bad_json=answer_portion, error_message=str(e), cache=cache
|
340
|
+
)
|
341
|
+
if not success:
|
342
|
+
raise Exception(
|
343
|
+
f"""Even the repair failed. The error was: {e}. The response was: {answer_portion}."""
|
344
|
+
)
|
345
|
+
|
346
|
+
return {**model_response, **answer_dict}
|
347
|
+
|
282
348
|
async def async_get_raw_response(
|
283
349
|
self,
|
284
350
|
user_prompt: str,
|
@@ -286,7 +352,28 @@ class LanguageModel(
|
|
286
352
|
cache: "Cache",
|
287
353
|
iteration: int = 0,
|
288
354
|
encoded_image=None,
|
289
|
-
) ->
|
355
|
+
) -> IntendedModelCallOutcome:
|
356
|
+
import warnings
|
357
|
+
|
358
|
+
warnings.warn(
|
359
|
+
"This method is deprecated. Use async_get_intended_model_call_outcome."
|
360
|
+
)
|
361
|
+
return await self._async_get_intended_model_call_outcome(
|
362
|
+
user_prompt=user_prompt,
|
363
|
+
system_prompt=system_prompt,
|
364
|
+
cache=cache,
|
365
|
+
iteration=iteration,
|
366
|
+
encoded_image=encoded_image,
|
367
|
+
)
|
368
|
+
|
369
|
+
async def _async_get_intended_model_call_outcome(
|
370
|
+
self,
|
371
|
+
user_prompt: str,
|
372
|
+
system_prompt: str,
|
373
|
+
cache: "Cache",
|
374
|
+
iteration: int = 0,
|
375
|
+
encoded_image=None,
|
376
|
+
) -> IntendedModelCallOutcome:
|
290
377
|
"""Handle caching of responses.
|
291
378
|
|
292
379
|
:param user_prompt: The user's prompt.
|
@@ -304,52 +391,49 @@ class LanguageModel(
|
|
304
391
|
|
305
392
|
>>> from edsl import Cache
|
306
393
|
>>> m = LanguageModel.example(test_model = True)
|
307
|
-
>>> m.
|
308
|
-
({'message': '{"answer": "Hello world"}'}, False, '24ff6ac2bc2f1729f817f261e0792577')
|
394
|
+
>>> m._get_intended_model_call_outcome(user_prompt = "Hello", system_prompt = "hello", cache = Cache())
|
395
|
+
IntendedModelCallOutcome(response = {'message': '{"answer": "Hello world"}'}, cache_used = False, cache_key = '24ff6ac2bc2f1729f817f261e0792577')
|
309
396
|
"""
|
310
|
-
|
397
|
+
|
398
|
+
if encoded_image:
|
399
|
+
# the image has is appended to the user_prompt for hash-lookup purposes
|
400
|
+
image_hash = hashlib.md5(encoded_image.encode()).hexdigest()
|
311
401
|
|
312
402
|
cache_call_params = {
|
313
403
|
"model": str(self.model),
|
314
404
|
"parameters": self.parameters,
|
315
405
|
"system_prompt": system_prompt,
|
316
|
-
"user_prompt": user_prompt,
|
406
|
+
"user_prompt": user_prompt + "" if not encoded_image else f" {image_hash}",
|
317
407
|
"iteration": iteration,
|
318
408
|
}
|
319
|
-
|
320
|
-
if encoded_image:
|
321
|
-
import hashlib
|
322
|
-
|
323
|
-
image_hash = hashlib.md5(encoded_image.encode()).hexdigest()
|
324
|
-
cache_call_params["user_prompt"] = f"{user_prompt} {image_hash}"
|
325
|
-
|
326
409
|
cached_response, cache_key = cache.fetch(**cache_call_params)
|
327
|
-
|
410
|
+
|
411
|
+
if cache_used := cached_response is not None:
|
328
412
|
response = json.loads(cached_response)
|
329
|
-
cache_used = True
|
330
413
|
else:
|
331
|
-
remote_call = hasattr(self, "remote") and self.remote
|
332
414
|
f = (
|
333
415
|
self.remote_async_execute_model_call
|
334
|
-
if
|
416
|
+
if hasattr(self, "remote") and self.remote
|
335
417
|
else self.async_execute_model_call
|
336
418
|
)
|
337
|
-
params = {
|
338
|
-
|
339
|
-
|
419
|
+
params = {
|
420
|
+
"user_prompt": user_prompt,
|
421
|
+
"system_prompt": system_prompt,
|
422
|
+
**({"encoded_image": encoded_image} if encoded_image else {}),
|
423
|
+
}
|
340
424
|
response = await f(**params)
|
341
425
|
new_cache_key = cache.store(
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
assert new_cache_key == cache_key
|
350
|
-
cache_used = False
|
426
|
+
**cache_call_params, response=response
|
427
|
+
) # store the response in the cache
|
428
|
+
assert new_cache_key == cache_key # should be the same
|
429
|
+
|
430
|
+
return IntendedModelCallOutcome(
|
431
|
+
response=response, cache_used=cache_used, cache_key=cache_key
|
432
|
+
)
|
351
433
|
|
352
|
-
|
434
|
+
_get_intended_model_call_outcome = sync_wrapper(
|
435
|
+
_async_get_intended_model_call_outcome
|
436
|
+
)
|
353
437
|
|
354
438
|
get_raw_response = sync_wrapper(async_get_raw_response)
|
355
439
|
|
@@ -370,7 +454,7 @@ class LanguageModel(
|
|
370
454
|
self,
|
371
455
|
user_prompt: str,
|
372
456
|
system_prompt: str,
|
373
|
-
cache: Cache,
|
457
|
+
cache: "Cache",
|
374
458
|
iteration: int = 1,
|
375
459
|
encoded_image=None,
|
376
460
|
) -> dict:
|
@@ -388,36 +472,10 @@ class LanguageModel(
|
|
388
472
|
"system_prompt": system_prompt,
|
389
473
|
"iteration": iteration,
|
390
474
|
"cache": cache,
|
475
|
+
**({"encoded_image": encoded_image} if encoded_image else {}),
|
391
476
|
}
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
raw_response, cache_used, cache_key = await self.async_get_raw_response(
|
396
|
-
**params
|
397
|
-
)
|
398
|
-
response = self.parse_response(raw_response)
|
399
|
-
|
400
|
-
try:
|
401
|
-
dict_response = json.loads(response)
|
402
|
-
except json.JSONDecodeError as e:
|
403
|
-
# TODO: Turn into logs to generate issues
|
404
|
-
dict_response, success = await repair(
|
405
|
-
bad_json=response, error_message=str(e), cache=cache
|
406
|
-
)
|
407
|
-
if not success:
|
408
|
-
raise Exception(
|
409
|
-
f"""Even the repair failed. The error was: {e}. The response was: {response}."""
|
410
|
-
)
|
411
|
-
|
412
|
-
dict_response.update(
|
413
|
-
{
|
414
|
-
"cache_used": cache_used,
|
415
|
-
"cache_key": cache_key,
|
416
|
-
"usage": raw_response.get("usage", {}),
|
417
|
-
"raw_model_response": raw_response,
|
418
|
-
}
|
419
|
-
)
|
420
|
-
return dict_response
|
477
|
+
model_call_outcome = await self._async_get_intended_model_call_outcome(**params)
|
478
|
+
return await self._async_prepare_response(model_call_outcome, cache=cache)
|
421
479
|
|
422
480
|
get_response = sync_wrapper(async_get_response)
|
423
481
|
|
@@ -494,7 +552,12 @@ class LanguageModel(
|
|
494
552
|
return table
|
495
553
|
|
496
554
|
@classmethod
|
497
|
-
def example(
|
555
|
+
def example(
|
556
|
+
cls,
|
557
|
+
test_model: bool = False,
|
558
|
+
canned_response: str = "Hello world",
|
559
|
+
throw_exception: bool = False,
|
560
|
+
):
|
498
561
|
"""Return a default instance of the class.
|
499
562
|
|
500
563
|
>>> from edsl.language_models import LanguageModel
|
@@ -538,8 +601,3 @@ if __name__ == "__main__":
|
|
538
601
|
import doctest
|
539
602
|
|
540
603
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
541
|
-
|
542
|
-
# from edsl.language_models import LanguageModel
|
543
|
-
|
544
|
-
# from edsl.language_models import LanguageModel
|
545
|
-
# print(LanguageModel.example())
|
@@ -86,8 +86,14 @@ class ModelList(Base, UserList):
|
|
86
86
|
pass
|
87
87
|
|
88
88
|
@classmethod
|
89
|
-
def example(
|
90
|
-
|
89
|
+
def example(cls, randomize: bool = False) -> "ModelList":
|
90
|
+
"""
|
91
|
+
Returns an example ModelList instance.
|
92
|
+
|
93
|
+
:param randomize: If True, uses Model's randomize method.
|
94
|
+
"""
|
95
|
+
|
96
|
+
return cls([Model.example(randomize) for _ in range(3)])
|
91
97
|
|
92
98
|
|
93
99
|
if __name__ == "__main__":
|
edsl/language_models/registry.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import textwrap
|
2
|
+
from random import random
|
2
3
|
|
3
4
|
|
4
5
|
def get_model_class(model_name, registry=None):
|
@@ -35,6 +36,10 @@ class Model(metaclass=Meta):
|
|
35
36
|
from edsl.inference_services.registry import default
|
36
37
|
|
37
38
|
registry = registry or default
|
39
|
+
|
40
|
+
if isinstance(model_name, int):
|
41
|
+
model_name = cls.available(name_only=True)[model_name]
|
42
|
+
|
38
43
|
factory = registry.create_model_factory(model_name)
|
39
44
|
return factory(*args, **kwargs)
|
40
45
|
|
@@ -92,6 +97,17 @@ class Model(metaclass=Meta):
|
|
92
97
|
print("OK!")
|
93
98
|
print("\n")
|
94
99
|
|
100
|
+
@classmethod
|
101
|
+
def example(cls, randomize: bool = False) -> "Model":
|
102
|
+
"""
|
103
|
+
Returns an example Model instance.
|
104
|
+
|
105
|
+
:param randomize: If True, the temperature is set to a random decimal between 0 and 1.
|
106
|
+
"""
|
107
|
+
temperature = 0.5 if not randomize else round(random(), 2)
|
108
|
+
model_name = cls.default_model
|
109
|
+
return cls(model_name, temperature=temperature)
|
110
|
+
|
95
111
|
|
96
112
|
if __name__ == "__main__":
|
97
113
|
import doctest
|
@@ -6,11 +6,12 @@ from edsl.questions.QuestionBase import QuestionBase
|
|
6
6
|
from edsl.utilities.restricted_python import create_restricted_function
|
7
7
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
8
8
|
|
9
|
+
|
9
10
|
class QuestionFunctional(QuestionBase):
|
10
11
|
"""A special type of question that is *not* answered by an LLM.
|
11
|
-
|
12
|
+
|
12
13
|
>>> from edsl import Scenario, Agent
|
13
|
-
|
14
|
+
|
14
15
|
# Create an instance of QuestionFunctional with the new function
|
15
16
|
>>> question = QuestionFunctional.example()
|
16
17
|
|
@@ -21,7 +22,7 @@ class QuestionFunctional(QuestionBase):
|
|
21
22
|
>>> results = question.by(scenario).by(agent).run()
|
22
23
|
>>> results.select("answer.*").to_list()[0] == 150
|
23
24
|
True
|
24
|
-
|
25
|
+
|
25
26
|
# Serialize the question to a dictionary
|
26
27
|
|
27
28
|
>>> from edsl.questions.QuestionBase import QuestionBase
|
@@ -105,8 +106,6 @@ class QuestionFunctional(QuestionBase):
|
|
105
106
|
"requires_loop": self.requires_loop,
|
106
107
|
"function_name": self.function_name,
|
107
108
|
}
|
108
|
-
|
109
|
-
|
110
109
|
|
111
110
|
@classmethod
|
112
111
|
def example(cls):
|
@@ -141,7 +140,9 @@ def main():
|
|
141
140
|
results = question.by(scenario).by(agent).run()
|
142
141
|
assert results.select("answer.*").to_list()[0] == 150
|
143
142
|
|
143
|
+
|
144
144
|
if __name__ == "__main__":
|
145
|
-
#main()
|
145
|
+
# main()
|
146
146
|
import doctest
|
147
|
-
|
147
|
+
|
148
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -11,7 +11,7 @@ from edsl.questions.descriptors import QuestionOptionsDescriptor
|
|
11
11
|
|
12
12
|
class QuestionMultipleChoice(QuestionBase):
|
13
13
|
"""This question prompts the agent to select one option from a list of options.
|
14
|
-
|
14
|
+
|
15
15
|
https://docs.expectedparrot.com/en/latest/questions.html#questionmultiplechoice-class
|
16
16
|
|
17
17
|
"""
|
@@ -51,7 +51,7 @@ class QuestionMultipleChoice(QuestionBase):
|
|
51
51
|
self, answer: dict[str, Union[str, int]]
|
52
52
|
) -> dict[str, Union[str, int]]:
|
53
53
|
"""Validate the answer.
|
54
|
-
|
54
|
+
|
55
55
|
>>> q = QuestionMultipleChoice.example()
|
56
56
|
>>> q._validate_answer({"answer": 0, "comment": "I like custard"})
|
57
57
|
{'answer': 0, 'comment': 'I like custard'}
|
@@ -67,19 +67,17 @@ class QuestionMultipleChoice(QuestionBase):
|
|
67
67
|
return answer
|
68
68
|
|
69
69
|
def _translate_answer_code_to_answer(
|
70
|
-
self,
|
71
|
-
answer_code: int,
|
72
|
-
scenario: Optional["Scenario"] = None
|
70
|
+
self, answer_code: int, scenario: Optional["Scenario"] = None
|
73
71
|
):
|
74
72
|
"""Translate the answer code to the actual answer.
|
75
73
|
|
76
|
-
It is used to translate the answer code to the actual answer.
|
74
|
+
It is used to translate the answer code to the actual answer.
|
77
75
|
The question options might be templates, so they need to be rendered with the scenario.
|
78
|
-
|
76
|
+
|
79
77
|
>>> q = QuestionMultipleChoice.example()
|
80
78
|
>>> q._translate_answer_code_to_answer(0, {})
|
81
79
|
'Good'
|
82
|
-
|
80
|
+
|
83
81
|
>>> q = QuestionMultipleChoice(question_name="how_feeling", question_text="How are you?", question_options=["{{emotion[0]}}", "emotion[1]"])
|
84
82
|
>>> q._translate_answer_code_to_answer(0, {"emotion": ["Happy", "Sad"]})
|
85
83
|
'Happy'
|
@@ -92,16 +90,21 @@ class QuestionMultipleChoice(QuestionBase):
|
|
92
90
|
if isinstance(self.question_options, str):
|
93
91
|
# If dynamic options are provided like {{ options }}, render them with the scenario
|
94
92
|
from jinja2 import Environment, meta
|
93
|
+
|
95
94
|
env = Environment()
|
96
95
|
parsed_content = env.parse(self.question_options)
|
97
|
-
question_option_key = list(meta.find_undeclared_variables(parsed_content))[
|
96
|
+
question_option_key = list(meta.find_undeclared_variables(parsed_content))[
|
97
|
+
0
|
98
|
+
]
|
99
|
+
# breakpoint()
|
98
100
|
translated_options = scenario.get(question_option_key)
|
99
101
|
else:
|
100
102
|
translated_options = [
|
101
|
-
Template(str(option)).render(scenario)
|
103
|
+
Template(str(option)).render(scenario)
|
104
|
+
for option in self.question_options
|
102
105
|
]
|
103
|
-
#print("Translated options:", translated_options)
|
104
|
-
#breakpoint()
|
106
|
+
# print("Translated options:", translated_options)
|
107
|
+
# breakpoint()
|
105
108
|
return translated_options[int(answer_code)]
|
106
109
|
|
107
110
|
def _simulate_answer(
|
@@ -26,7 +26,6 @@ class QuestionNumerical(QuestionBase):
|
|
26
26
|
|
27
27
|
:param question_name: The name of the question.
|
28
28
|
:param question_text: The text of the question.
|
29
|
-
:param instructions: Instructions for the question. If not provided, the default instructions are used. To view them, run `QuestionNumerical.default_instructions`.
|
30
29
|
:param min_value: The minimum value of the answer.
|
31
30
|
:param max_value: The maximum value of the answer.
|
32
31
|
"""
|