edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +107 -30
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +25 -21
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +103 -46
- edsl/agents/AgentList.py +97 -13
- edsl/agents/Invigilator.py +23 -10
- edsl/agents/InvigilatorBase.py +19 -14
- edsl/agents/PromptConstructionMixin.py +342 -100
- edsl/agents/descriptors.py +5 -2
- edsl/base/Base.py +289 -0
- edsl/config.py +2 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +659 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +337 -121
- edsl/coop/utils.py +56 -70
- edsl/data/Cache.py +74 -22
- edsl/data/CacheHandler.py +10 -9
- edsl/data/SQLiteDict.py +11 -3
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Answers.py +15 -1
- edsl/jobs/Jobs.py +322 -73
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/buckets/ModelBuckets.py +4 -2
- edsl/jobs/buckets/TokenBucket.py +1 -2
- edsl/jobs/interviews/Interview.py +7 -10
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/jobs/tasks/TaskHistory.py +4 -3
- edsl/language_models/LanguageModel.py +42 -55
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +97 -25
- edsl/notebooks/Notebook.py +157 -32
- edsl/prompts/Prompt.py +31 -19
- edsl/questions/QuestionBase.py +145 -23
- edsl/questions/QuestionBudget.py +5 -6
- edsl/questions/QuestionCheckBox.py +7 -3
- edsl/questions/QuestionExtract.py +5 -3
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +0 -3
- edsl/questions/QuestionList.py +3 -4
- edsl/questions/QuestionMultipleChoice.py +16 -8
- edsl/questions/QuestionNumerical.py +4 -3
- edsl/questions/QuestionRank.py +5 -3
- edsl/questions/__init__.py +4 -3
- edsl/questions/descriptors.py +9 -4
- edsl/questions/question_registry.py +27 -31
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +31 -0
- edsl/results/DatasetExportMixin.py +493 -0
- edsl/results/Result.py +42 -82
- edsl/results/Results.py +178 -66
- edsl/results/ResultsDBMixin.py +10 -9
- edsl/results/ResultsExportMixin.py +23 -507
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +9 -9
- edsl/scenarios/FileStore.py +140 -0
- edsl/scenarios/Scenario.py +59 -6
- edsl/scenarios/ScenarioList.py +138 -52
- edsl/scenarios/ScenarioListExportMixin.py +32 -0
- edsl/scenarios/ScenarioListPdfMixin.py +2 -1
- edsl/scenarios/__init__.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +73 -0
- edsl/study/Study.py +498 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/MemoryPlan.py +11 -4
- edsl/surveys/Survey.py +124 -37
- edsl/surveys/SurveyExportMixin.py +25 -5
- edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
- edsl/tools/plotting.py +4 -2
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +90 -73
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +59 -6
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
- edsl-0.1.29.dist-info/RECORD +203 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- edsl-0.1.27.dev2.dist-info/RECORD +0 -172
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/jobs/Jobs.py
CHANGED
@@ -1,24 +1,15 @@
|
|
1
1
|
# """The Jobs class is a collection of agents, scenarios and models and one survey."""
|
2
2
|
from __future__ import annotations
|
3
|
-
import
|
3
|
+
import warnings
|
4
4
|
from itertools import product
|
5
5
|
from typing import Optional, Union, Sequence, Generator
|
6
|
-
|
7
|
-
from edsl.agents import Agent
|
6
|
+
|
8
7
|
from edsl.Base import Base
|
9
|
-
from edsl.data.Cache import Cache
|
10
|
-
from edsl.data.CacheHandler import CacheHandler
|
11
|
-
from edsl.results.Dataset import Dataset
|
12
8
|
|
13
|
-
from edsl.exceptions.jobs import MissingRemoteInferenceError
|
14
9
|
from edsl.exceptions import MissingAPIKeyError
|
15
10
|
from edsl.jobs.buckets.BucketCollection import BucketCollection
|
16
11
|
from edsl.jobs.interviews.Interview import Interview
|
17
|
-
from edsl.
|
18
|
-
from edsl.results import Results
|
19
|
-
from edsl.scenarios import Scenario
|
20
|
-
from edsl.surveys import Survey
|
21
|
-
|
12
|
+
from edsl.jobs.runners.JobsRunnerAsyncio import JobsRunnerAsyncio
|
22
13
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
23
14
|
|
24
15
|
|
@@ -31,10 +22,10 @@ class Jobs(Base):
|
|
31
22
|
|
32
23
|
def __init__(
|
33
24
|
self,
|
34
|
-
survey: Survey,
|
35
|
-
agents: Optional[list[Agent]] = None,
|
36
|
-
models: Optional[list[LanguageModel]] = None,
|
37
|
-
scenarios: Optional[list[Scenario]] = None,
|
25
|
+
survey: "Survey",
|
26
|
+
agents: Optional[list["Agent"]] = None,
|
27
|
+
models: Optional[list["LanguageModel"]] = None,
|
28
|
+
scenarios: Optional[list["Scenario"]] = None,
|
38
29
|
):
|
39
30
|
"""Initialize a Jobs instance.
|
40
31
|
|
@@ -44,18 +35,67 @@ class Jobs(Base):
|
|
44
35
|
:param scenarios: a list of scenarios
|
45
36
|
"""
|
46
37
|
self.survey = survey
|
47
|
-
self.agents = agents
|
48
|
-
self.
|
49
|
-
self.
|
38
|
+
self.agents: "AgentList" = agents
|
39
|
+
self.scenarios: "ScenarioList" = scenarios
|
40
|
+
self.models = models
|
41
|
+
|
50
42
|
self.__bucket_collection = None
|
51
43
|
|
44
|
+
@property
|
45
|
+
def models(self):
|
46
|
+
return self._models
|
47
|
+
|
48
|
+
@models.setter
|
49
|
+
def models(self, value):
|
50
|
+
from edsl import ModelList
|
51
|
+
|
52
|
+
if value:
|
53
|
+
if not isinstance(value, ModelList):
|
54
|
+
self._models = ModelList(value)
|
55
|
+
else:
|
56
|
+
self._models = value
|
57
|
+
else:
|
58
|
+
self._models = ModelList([])
|
59
|
+
|
60
|
+
@property
|
61
|
+
def agents(self):
|
62
|
+
return self._agents
|
63
|
+
|
64
|
+
@agents.setter
|
65
|
+
def agents(self, value):
|
66
|
+
from edsl import AgentList
|
67
|
+
|
68
|
+
if value:
|
69
|
+
if not isinstance(value, AgentList):
|
70
|
+
self._agents = AgentList(value)
|
71
|
+
else:
|
72
|
+
self._agents = value
|
73
|
+
else:
|
74
|
+
self._agents = AgentList([])
|
75
|
+
|
76
|
+
@property
|
77
|
+
def scenarios(self):
|
78
|
+
return self._scenarios
|
79
|
+
|
80
|
+
@scenarios.setter
|
81
|
+
def scenarios(self, value):
|
82
|
+
from edsl import ScenarioList
|
83
|
+
|
84
|
+
if value:
|
85
|
+
if not isinstance(value, ScenarioList):
|
86
|
+
self._scenarios = ScenarioList(value)
|
87
|
+
else:
|
88
|
+
self._scenarios = value
|
89
|
+
else:
|
90
|
+
self._scenarios = ScenarioList([])
|
91
|
+
|
52
92
|
def by(
|
53
93
|
self,
|
54
94
|
*args: Union[
|
55
|
-
Agent,
|
56
|
-
Scenario,
|
57
|
-
LanguageModel,
|
58
|
-
Sequence[Union[Agent, Scenario, LanguageModel]],
|
95
|
+
"Agent",
|
96
|
+
"Scenario",
|
97
|
+
"LanguageModel",
|
98
|
+
Sequence[Union["Agent", "Scenario", "LanguageModel"]],
|
59
99
|
],
|
60
100
|
) -> Jobs:
|
61
101
|
"""
|
@@ -68,10 +108,10 @@ class Jobs(Base):
|
|
68
108
|
>>> q = QuestionFreeText(question_name="name", question_text="What is your name?")
|
69
109
|
>>> j = Jobs(survey = Survey(questions=[q]))
|
70
110
|
>>> j
|
71
|
-
Jobs(survey=Survey(...), agents=[], models=[], scenarios=[])
|
111
|
+
Jobs(survey=Survey(...), agents=AgentList([]), models=ModelList([]), scenarios=ScenarioList([]))
|
72
112
|
>>> from edsl import Agent; a = Agent(traits = {"status": "Sad"})
|
73
113
|
>>> j.by(a).agents
|
74
|
-
[Agent(traits = {'status': 'Sad'})]
|
114
|
+
AgentList([Agent(traits = {'status': 'Sad'})])
|
75
115
|
|
76
116
|
:param args: objects or a sequence (list, tuple, ...) of objects of the same type
|
77
117
|
|
@@ -95,13 +135,13 @@ class Jobs(Base):
|
|
95
135
|
setattr(self, objects_key, new_objects) # update the job
|
96
136
|
return self
|
97
137
|
|
98
|
-
def prompts(self) -> Dataset:
|
138
|
+
def prompts(self) -> "Dataset":
|
99
139
|
"""Return a Dataset of prompts that will be used.
|
100
140
|
|
101
141
|
|
102
142
|
>>> from edsl.jobs import Jobs
|
103
143
|
>>> Jobs.example().prompts()
|
104
|
-
Dataset(
|
144
|
+
Dataset(...)
|
105
145
|
"""
|
106
146
|
|
107
147
|
interviews = self.interviews()
|
@@ -111,6 +151,7 @@ class Jobs(Base):
|
|
111
151
|
user_prompts = []
|
112
152
|
system_prompts = []
|
113
153
|
scenario_indices = []
|
154
|
+
from edsl.results.Dataset import Dataset
|
114
155
|
|
115
156
|
for interview_index, interview in enumerate(interviews):
|
116
157
|
invigilators = list(interview._build_invigilators(debug=False))
|
@@ -131,6 +172,20 @@ class Jobs(Base):
|
|
131
172
|
]
|
132
173
|
)
|
133
174
|
|
175
|
+
@staticmethod
|
176
|
+
def _get_container_class(object):
|
177
|
+
from edsl.agents.AgentList import AgentList
|
178
|
+
from edsl.agents.Agent import Agent
|
179
|
+
from edsl.scenarios.Scenario import Scenario
|
180
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
181
|
+
|
182
|
+
if isinstance(object, Agent):
|
183
|
+
return AgentList
|
184
|
+
elif isinstance(object, Scenario):
|
185
|
+
return ScenarioList
|
186
|
+
else:
|
187
|
+
return list
|
188
|
+
|
134
189
|
@staticmethod
|
135
190
|
def _turn_args_to_list(args):
|
136
191
|
"""Return a list of the first argument if it is a sequence, otherwise returns a list of all the arguments."""
|
@@ -149,19 +204,25 @@ class Jobs(Base):
|
|
149
204
|
return len(args) == 1 and isinstance(args[0], Sequence)
|
150
205
|
|
151
206
|
if did_user_pass_a_sequence(args):
|
152
|
-
|
207
|
+
container_class = Jobs._get_container_class(args[0][0])
|
208
|
+
return container_class(args[0])
|
153
209
|
else:
|
154
|
-
|
210
|
+
container_class = Jobs._get_container_class(args[0])
|
211
|
+
return container_class(args)
|
155
212
|
|
156
213
|
def _get_current_objects_of_this_type(
|
157
214
|
self, object: Union[Agent, Scenario, LanguageModel]
|
158
215
|
) -> tuple[list, str]:
|
216
|
+
from edsl.agents.Agent import Agent
|
217
|
+
from edsl.scenarios.Scenario import Scenario
|
218
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
219
|
+
|
159
220
|
"""Return the current objects of the same type as the first argument.
|
160
221
|
|
161
222
|
>>> from edsl.jobs import Jobs
|
162
223
|
>>> j = Jobs.example()
|
163
224
|
>>> j._get_current_objects_of_this_type(j.agents[0])
|
164
|
-
([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'})], 'agents')
|
225
|
+
(AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'})]), 'agents')
|
165
226
|
"""
|
166
227
|
class_to_key = {
|
167
228
|
Agent: "agents",
|
@@ -181,6 +242,20 @@ class Jobs(Base):
|
|
181
242
|
current_objects = getattr(self, key, None)
|
182
243
|
return current_objects, key
|
183
244
|
|
245
|
+
@staticmethod
|
246
|
+
def _get_empty_container_object(object):
|
247
|
+
from edsl import AgentList
|
248
|
+
from edsl import Agent
|
249
|
+
from edsl import Scenario
|
250
|
+
from edsl import ScenarioList
|
251
|
+
|
252
|
+
if isinstance(object, Agent):
|
253
|
+
return AgentList([])
|
254
|
+
elif isinstance(object, Scenario):
|
255
|
+
return ScenarioList([])
|
256
|
+
else:
|
257
|
+
return []
|
258
|
+
|
184
259
|
@staticmethod
|
185
260
|
def _merge_objects(passed_objects, current_objects) -> list:
|
186
261
|
"""
|
@@ -192,7 +267,7 @@ class Jobs(Base):
|
|
192
267
|
>>> Jobs(survey = [])._merge_objects([1,2,3], [4,5,6])
|
193
268
|
[5, 6, 7, 6, 7, 8, 7, 8, 9]
|
194
269
|
"""
|
195
|
-
new_objects = []
|
270
|
+
new_objects = Jobs._get_empty_container_object(passed_objects[0])
|
196
271
|
for current_object in current_objects:
|
197
272
|
for new_object in passed_objects:
|
198
273
|
new_objects.append(current_object + new_object)
|
@@ -237,12 +312,12 @@ class Jobs(Base):
|
|
237
312
|
with us filling in defaults.
|
238
313
|
"""
|
239
314
|
# if no agents, models, or scenarios are set, set them to defaults
|
315
|
+
from edsl.agents.Agent import Agent
|
316
|
+
from edsl.language_models.registry import Model
|
317
|
+
from edsl.scenarios.Scenario import Scenario
|
318
|
+
|
240
319
|
self.agents = self.agents or [Agent()]
|
241
320
|
self.models = self.models or [Model()]
|
242
|
-
# if remote, set all the models to remote
|
243
|
-
if hasattr(self, "remote") and self.remote:
|
244
|
-
for model in self.models:
|
245
|
-
model.remote = True
|
246
321
|
self.scenarios = self.scenarios or [Scenario()]
|
247
322
|
for agent, scenario, model in product(self.agents, self.scenarios, self.models):
|
248
323
|
yield Interview(
|
@@ -256,6 +331,7 @@ class Jobs(Base):
|
|
256
331
|
These buckets are used to track API calls and token usage.
|
257
332
|
|
258
333
|
>>> from edsl.jobs import Jobs
|
334
|
+
>>> from edsl import Model
|
259
335
|
>>> j = Jobs.example().by(Model(temperature = 1), Model(temperature = 0.5))
|
260
336
|
>>> bc = j.create_bucket_collection()
|
261
337
|
>>> bc
|
@@ -284,6 +360,57 @@ class Jobs(Base):
|
|
284
360
|
)
|
285
361
|
return links
|
286
362
|
|
363
|
+
def __hash__(self):
|
364
|
+
"""Allow the model to be used as a key in a dictionary."""
|
365
|
+
from edsl.utilities.utilities import dict_hash
|
366
|
+
|
367
|
+
return dict_hash(self.to_dict())
|
368
|
+
|
369
|
+
def _output(self, message) -> None:
|
370
|
+
"""Check if a Job is verbose. If so, print the message."""
|
371
|
+
if self.verbose:
|
372
|
+
print(message)
|
373
|
+
|
374
|
+
def _check_parameters(self, strict=False, warn=False) -> None:
|
375
|
+
"""Check if the parameters in the survey and scenarios are consistent.
|
376
|
+
|
377
|
+
>>> from edsl import QuestionFreeText
|
378
|
+
>>> from edsl import Survey
|
379
|
+
>>> from edsl import Scenario
|
380
|
+
>>> q = QuestionFreeText(question_text = "{{poo}}", question_name = "ugly_question")
|
381
|
+
>>> j = Jobs(survey = Survey(questions=[q]))
|
382
|
+
>>> with warnings.catch_warnings(record=True) as w:
|
383
|
+
... j._check_parameters(warn = True)
|
384
|
+
... assert len(w) == 1
|
385
|
+
... assert issubclass(w[-1].category, UserWarning)
|
386
|
+
... assert "The following parameters are in the survey but not in the scenarios" in str(w[-1].message)
|
387
|
+
|
388
|
+
>>> q = QuestionFreeText(question_text = "{{poo}}", question_name = "ugly_question")
|
389
|
+
>>> s = Scenario({'plop': "A", 'poo': "B"})
|
390
|
+
>>> j = Jobs(survey = Survey(questions=[q])).by(s)
|
391
|
+
>>> j._check_parameters(strict = True)
|
392
|
+
Traceback (most recent call last):
|
393
|
+
...
|
394
|
+
ValueError: The following parameters are in the scenarios but not in the survey: {'plop'}
|
395
|
+
"""
|
396
|
+
survey_parameters: set = self.survey.parameters
|
397
|
+
scenario_parameters: set = self.scenarios.parameters
|
398
|
+
|
399
|
+
msg1, msg2 = None, None
|
400
|
+
|
401
|
+
if in_survey_but_not_in_scenarios := survey_parameters - scenario_parameters:
|
402
|
+
msg1 = f"The following parameters are in the survey but not in the scenarios: {in_survey_but_not_in_scenarios}"
|
403
|
+
if in_scenarios_but_not_in_survey := scenario_parameters - survey_parameters:
|
404
|
+
msg2 = f"The following parameters are in the scenarios but not in the survey: {in_scenarios_but_not_in_survey}"
|
405
|
+
|
406
|
+
if msg1 or msg2:
|
407
|
+
message = "\n".join(filter(None, [msg1, msg2]))
|
408
|
+
if strict:
|
409
|
+
raise ValueError(message)
|
410
|
+
else:
|
411
|
+
if warn:
|
412
|
+
warnings.warn(message)
|
413
|
+
|
287
414
|
def run(
|
288
415
|
self,
|
289
416
|
n: int = 1,
|
@@ -291,41 +418,81 @@ class Jobs(Base):
|
|
291
418
|
progress_bar: bool = False,
|
292
419
|
stop_on_exception: bool = False,
|
293
420
|
cache: Union[Cache, bool] = None,
|
294
|
-
remote: bool = (
|
295
|
-
False if os.getenv("DEFAULT_RUN_MODE", "local") == "local" else True
|
296
|
-
),
|
297
421
|
check_api_keys: bool = False,
|
298
422
|
sidecar_model: Optional[LanguageModel] = None,
|
299
423
|
batch_mode: Optional[bool] = None,
|
300
|
-
|
424
|
+
verbose: bool = False,
|
425
|
+
print_exceptions=True,
|
426
|
+
remote_cache_description: Optional[str] = None,
|
427
|
+
remote_inference_description: Optional[str] = None,
|
301
428
|
) -> Results:
|
302
429
|
"""
|
303
430
|
Runs the Job: conducts Interviews and returns their results.
|
304
431
|
|
305
432
|
:param n: how many times to run each interview
|
306
433
|
:param debug: prints debug messages
|
307
|
-
:param verbose: prints messages
|
308
434
|
:param progress_bar: shows a progress bar
|
309
435
|
:param stop_on_exception: stops the job if an exception is raised
|
310
436
|
:param cache: a cache object to store results
|
311
|
-
:param remote: run the job remotely
|
312
437
|
:param check_api_keys: check if the API keys are valid
|
313
|
-
:batch_mode: run the job in batch mode i.e., no expecation of interaction with the user
|
314
|
-
|
438
|
+
:param batch_mode: run the job in batch mode i.e., no expecation of interaction with the user
|
439
|
+
:param verbose: prints messages
|
440
|
+
:param remote_cache_description: specifies a description for this group of entries in the remote cache
|
441
|
+
:param remote_inference_description: specifies a description for the remote inference job
|
315
442
|
"""
|
443
|
+
from edsl.coop.coop import Coop
|
444
|
+
|
445
|
+
self._check_parameters()
|
446
|
+
|
316
447
|
if batch_mode is not None:
|
317
448
|
raise NotImplementedError(
|
318
449
|
"Batch mode is deprecated. Please update your code to not include 'batch_mode' in the 'run' method."
|
319
450
|
)
|
320
451
|
|
321
|
-
self.
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
452
|
+
self.verbose = verbose
|
453
|
+
|
454
|
+
try:
|
455
|
+
coop = Coop()
|
456
|
+
user_edsl_settings = coop.edsl_settings
|
457
|
+
remote_cache = user_edsl_settings["remote_caching"]
|
458
|
+
remote_inference = user_edsl_settings["remote_inference"]
|
459
|
+
except Exception:
|
460
|
+
remote_cache = False
|
461
|
+
remote_inference = False
|
462
|
+
|
463
|
+
if remote_inference:
|
464
|
+
self._output("Remote inference activated. Sending job to server...")
|
465
|
+
if remote_cache:
|
466
|
+
self._output(
|
467
|
+
"Remote caching activated. The remote cache will be used for this job."
|
468
|
+
)
|
327
469
|
|
328
|
-
|
470
|
+
remote_job_data = coop.remote_inference_create(
|
471
|
+
self,
|
472
|
+
description=remote_inference_description,
|
473
|
+
status="queued",
|
474
|
+
)
|
475
|
+
self._output("Job sent!")
|
476
|
+
# Create mock results object to store job data
|
477
|
+
results = Results(
|
478
|
+
survey=Survey(),
|
479
|
+
data=[
|
480
|
+
Result(
|
481
|
+
agent=Agent.example(),
|
482
|
+
scenario=Scenario.example(),
|
483
|
+
model=Model(),
|
484
|
+
iteration=1,
|
485
|
+
answer={"info": "Remote job details"},
|
486
|
+
)
|
487
|
+
],
|
488
|
+
)
|
489
|
+
results.add_columns_from_dict([remote_job_data])
|
490
|
+
if self.verbose:
|
491
|
+
results.select(["info", "uuid", "status", "version"]).print(
|
492
|
+
format="rich"
|
493
|
+
)
|
494
|
+
return results
|
495
|
+
else:
|
329
496
|
if check_api_keys:
|
330
497
|
for model in self.models + [Model()]:
|
331
498
|
if not model.has_valid_api_key():
|
@@ -336,30 +503,111 @@ class Jobs(Base):
|
|
336
503
|
|
337
504
|
# handle cache
|
338
505
|
if cache is None:
|
506
|
+
from edsl.data.CacheHandler import CacheHandler
|
507
|
+
|
339
508
|
cache = CacheHandler().get_cache()
|
340
509
|
if cache is False:
|
510
|
+
from edsl.data.Cache import Cache
|
511
|
+
|
341
512
|
cache = Cache()
|
342
513
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
514
|
+
if not remote_cache:
|
515
|
+
results = self._run_local(
|
516
|
+
n=n,
|
517
|
+
debug=debug,
|
518
|
+
progress_bar=progress_bar,
|
519
|
+
cache=cache,
|
520
|
+
stop_on_exception=stop_on_exception,
|
521
|
+
sidecar_model=sidecar_model,
|
522
|
+
print_exceptions=print_exceptions,
|
523
|
+
)
|
524
|
+
|
525
|
+
results.cache = cache.new_entries_cache()
|
526
|
+
|
527
|
+
self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
|
528
|
+
else:
|
529
|
+
cache_difference = coop.remote_cache_get_diff(cache.keys())
|
530
|
+
|
531
|
+
client_missing_cacheentries = cache_difference.get(
|
532
|
+
"client_missing_cacheentries", []
|
533
|
+
)
|
534
|
+
|
535
|
+
missing_entry_count = len(client_missing_cacheentries)
|
536
|
+
if missing_entry_count > 0:
|
537
|
+
self._output(
|
538
|
+
f"Updating local cache with {missing_entry_count:,} new "
|
539
|
+
f"{'entry' if missing_entry_count == 1 else 'entries'} from remote..."
|
540
|
+
)
|
541
|
+
cache.add_from_dict(
|
542
|
+
{entry.key: entry for entry in client_missing_cacheentries}
|
543
|
+
)
|
544
|
+
self._output("Local cache updated!")
|
545
|
+
else:
|
546
|
+
self._output("No new entries to add to local cache.")
|
547
|
+
|
548
|
+
server_missing_cacheentry_keys = cache_difference.get(
|
549
|
+
"server_missing_cacheentry_keys", []
|
550
|
+
)
|
551
|
+
server_missing_cacheentries = [
|
552
|
+
entry
|
553
|
+
for key in server_missing_cacheentry_keys
|
554
|
+
if (entry := cache.data.get(key)) is not None
|
555
|
+
]
|
556
|
+
old_entry_keys = [key for key in cache.keys()]
|
557
|
+
|
558
|
+
self._output("Running job...")
|
559
|
+
results = self._run_local(
|
560
|
+
n=n,
|
561
|
+
debug=debug,
|
562
|
+
progress_bar=progress_bar,
|
563
|
+
cache=cache,
|
564
|
+
stop_on_exception=stop_on_exception,
|
565
|
+
sidecar_model=sidecar_model,
|
566
|
+
print_exceptions=print_exceptions,
|
567
|
+
)
|
568
|
+
self._output("Job completed!")
|
569
|
+
|
570
|
+
new_cache_entries = list(
|
571
|
+
[entry for entry in cache.values() if entry.key not in old_entry_keys]
|
572
|
+
)
|
573
|
+
server_missing_cacheentries.extend(new_cache_entries)
|
574
|
+
|
575
|
+
new_entry_count = len(server_missing_cacheentries)
|
576
|
+
if new_entry_count > 0:
|
577
|
+
self._output(
|
578
|
+
f"Updating remote cache with {new_entry_count:,} new "
|
579
|
+
f"{'entry' if new_entry_count == 1 else 'entries'}..."
|
580
|
+
)
|
581
|
+
coop.remote_cache_create_many(
|
582
|
+
server_missing_cacheentries,
|
583
|
+
visibility="private",
|
584
|
+
description=remote_cache_description,
|
585
|
+
)
|
586
|
+
self._output("Remote cache updated!")
|
587
|
+
else:
|
588
|
+
self._output("No new entries to add to remote cache.")
|
589
|
+
|
590
|
+
results.cache = cache.new_entries_cache()
|
591
|
+
|
592
|
+
self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
|
353
593
|
|
354
594
|
return results
|
355
595
|
|
356
596
|
def _run_local(self, *args, **kwargs):
|
357
597
|
"""Run the job locally."""
|
358
|
-
from edsl.jobs.runners.JobsRunnerAsyncio import JobsRunnerAsyncio
|
359
598
|
|
360
599
|
results = JobsRunnerAsyncio(self).run(*args, **kwargs)
|
361
600
|
return results
|
362
601
|
|
602
|
+
async def run_async(self, cache=None, **kwargs):
|
603
|
+
"""Run the job asynchronously."""
|
604
|
+
results = await JobsRunnerAsyncio(self).run_async(cache=cache, **kwargs)
|
605
|
+
return results
|
606
|
+
|
607
|
+
def all_question_parameters(self):
|
608
|
+
"""Return all the fields in the questions in the survey."""
|
609
|
+
return set.union(*[question.parameters for question in self.survey.questions])
|
610
|
+
|
363
611
|
#######################
|
364
612
|
# Dunder methods
|
365
613
|
#######################
|
@@ -412,6 +660,11 @@ class Jobs(Base):
|
|
412
660
|
@remove_edsl_version
|
413
661
|
def from_dict(cls, data: dict) -> Jobs:
|
414
662
|
"""Creates a Jobs instance from a dictionary."""
|
663
|
+
from edsl import Survey
|
664
|
+
from edsl.agents.Agent import Agent
|
665
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
666
|
+
from edsl.scenarios.Scenario import Scenario
|
667
|
+
|
415
668
|
return cls(
|
416
669
|
survey=Survey.from_dict(data["survey"]),
|
417
670
|
agents=[Agent.from_dict(agent) for agent in data["agents"]],
|
@@ -438,7 +691,8 @@ class Jobs(Base):
|
|
438
691
|
"""
|
439
692
|
import random
|
440
693
|
from edsl.questions import QuestionMultipleChoice
|
441
|
-
from edsl import Agent
|
694
|
+
from edsl.agents.Agent import Agent
|
695
|
+
from edsl.scenarios.Scenario import Scenario
|
442
696
|
|
443
697
|
# (status, question, period)
|
444
698
|
agent_answers = {
|
@@ -477,11 +731,14 @@ class Jobs(Base):
|
|
477
731
|
question_options=["Good", "Great", "OK", "Terrible"],
|
478
732
|
question_name="how_feeling_yesterday",
|
479
733
|
)
|
734
|
+
from edsl import Survey, ScenarioList
|
735
|
+
|
480
736
|
base_survey = Survey(questions=[q1, q2])
|
481
737
|
|
482
|
-
|
483
|
-
Scenario({"period": "morning"}), Scenario({"period": "afternoon"})
|
484
|
-
)
|
738
|
+
scenario_list = ScenarioList(
|
739
|
+
[Scenario({"period": "morning"}), Scenario({"period": "afternoon"})]
|
740
|
+
)
|
741
|
+
job = base_survey.by(scenario_list).by(joy_agent, sad_agent)
|
485
742
|
|
486
743
|
return job
|
487
744
|
|
@@ -516,11 +773,3 @@ if __name__ == "__main__":
|
|
516
773
|
import doctest
|
517
774
|
|
518
775
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
519
|
-
|
520
|
-
# from edsl.jobs import Jobs
|
521
|
-
|
522
|
-
# job = Jobs.example()
|
523
|
-
# len(job) == 8
|
524
|
-
# results, info = job.run(debug=True)
|
525
|
-
# len(results) == 8
|
526
|
-
# results
|
@@ -10,8 +10,9 @@ class BucketCollection(UserDict):
|
|
10
10
|
Models themselves are hashable, so this works.
|
11
11
|
"""
|
12
12
|
|
13
|
-
def __init__(self):
|
13
|
+
def __init__(self, infinity_buckets=False):
|
14
14
|
super().__init__()
|
15
|
+
self.infinity_buckets = infinity_buckets
|
15
16
|
|
16
17
|
def __repr__(self):
|
17
18
|
return f"BucketCollection({self.data})"
|
@@ -21,8 +22,13 @@ class BucketCollection(UserDict):
|
|
21
22
|
|
22
23
|
This will create the token and request buckets for the model."""
|
23
24
|
# compute the TPS and RPS from the model
|
24
|
-
|
25
|
-
|
25
|
+
if not self.infinity_buckets:
|
26
|
+
TPS = model.TPM / 60.0
|
27
|
+
RPS = model.RPM / 60.0
|
28
|
+
else:
|
29
|
+
TPS = float("inf")
|
30
|
+
RPS = float("inf")
|
31
|
+
|
26
32
|
# create the buckets
|
27
33
|
requests_bucket = TokenBucket(
|
28
34
|
bucket_name=model.model,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from edsl.jobs.buckets.TokenBucket import TokenBucket
|
1
|
+
# from edsl.jobs.buckets.TokenBucket import TokenBucket
|
2
2
|
|
3
3
|
|
4
4
|
class ModelBuckets:
|
@@ -8,7 +8,7 @@ class ModelBuckets:
|
|
8
8
|
A request is one call to the service. The number of tokens required for a request depends on parameters.
|
9
9
|
"""
|
10
10
|
|
11
|
-
def __init__(self, requests_bucket: TokenBucket, tokens_bucket: TokenBucket):
|
11
|
+
def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
|
12
12
|
"""Initialize the model buckets.
|
13
13
|
|
14
14
|
The requests bucket captures requests per unit of time.
|
@@ -28,6 +28,8 @@ class ModelBuckets:
|
|
28
28
|
@classmethod
|
29
29
|
def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
|
30
30
|
"""Create a bucket with infinite capacity and refill rate."""
|
31
|
+
from edsl.jobs.buckets.TokenBucket import TokenBucket
|
32
|
+
|
31
33
|
return cls(
|
32
34
|
requests_bucket=TokenBucket(
|
33
35
|
bucket_name=model_name,
|
edsl/jobs/buckets/TokenBucket.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
from typing import Union, List, Any
|
2
2
|
import asyncio
|
3
3
|
import time
|
4
|
-
from collections import UserDict
|
5
|
-
from matplotlib import pyplot as plt
|
6
4
|
|
7
5
|
|
8
6
|
class TokenBucket:
|
@@ -114,6 +112,7 @@ class TokenBucket:
|
|
114
112
|
times, tokens = zip(*self.get_log())
|
115
113
|
start_time = times[0]
|
116
114
|
times = [t - start_time for t in times] # Normalize time to start from 0
|
115
|
+
from matplotlib import pyplot as plt
|
117
116
|
|
118
117
|
plt.figure(figsize=(10, 6))
|
119
118
|
plt.plot(times, tokens, label="Tokens Available")
|