edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. edsl/Base.py +107 -30
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +25 -21
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +103 -46
  6. edsl/agents/AgentList.py +97 -13
  7. edsl/agents/Invigilator.py +23 -10
  8. edsl/agents/InvigilatorBase.py +19 -14
  9. edsl/agents/PromptConstructionMixin.py +342 -100
  10. edsl/agents/descriptors.py +5 -2
  11. edsl/base/Base.py +289 -0
  12. edsl/config.py +2 -1
  13. edsl/conjure/AgentConstructionMixin.py +152 -0
  14. edsl/conjure/Conjure.py +56 -0
  15. edsl/conjure/InputData.py +659 -0
  16. edsl/conjure/InputDataCSV.py +48 -0
  17. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  18. edsl/conjure/InputDataPyRead.py +91 -0
  19. edsl/conjure/InputDataSPSS.py +8 -0
  20. edsl/conjure/InputDataStata.py +8 -0
  21. edsl/conjure/QuestionOptionMixin.py +76 -0
  22. edsl/conjure/QuestionTypeMixin.py +23 -0
  23. edsl/conjure/RawQuestion.py +65 -0
  24. edsl/conjure/SurveyResponses.py +7 -0
  25. edsl/conjure/__init__.py +9 -4
  26. edsl/conjure/examples/placeholder.txt +0 -0
  27. edsl/conjure/naming_utilities.py +263 -0
  28. edsl/conjure/utilities.py +165 -28
  29. edsl/conversation/Conversation.py +238 -0
  30. edsl/conversation/car_buying.py +58 -0
  31. edsl/conversation/mug_negotiation.py +81 -0
  32. edsl/conversation/next_speaker_utilities.py +93 -0
  33. edsl/coop/coop.py +337 -121
  34. edsl/coop/utils.py +56 -70
  35. edsl/data/Cache.py +74 -22
  36. edsl/data/CacheHandler.py +10 -9
  37. edsl/data/SQLiteDict.py +11 -3
  38. edsl/inference_services/AnthropicService.py +1 -0
  39. edsl/inference_services/DeepInfraService.py +20 -13
  40. edsl/inference_services/GoogleService.py +7 -1
  41. edsl/inference_services/InferenceServicesCollection.py +33 -7
  42. edsl/inference_services/OpenAIService.py +17 -10
  43. edsl/inference_services/models_available_cache.py +69 -0
  44. edsl/inference_services/rate_limits_cache.py +25 -0
  45. edsl/inference_services/write_available.py +10 -0
  46. edsl/jobs/Answers.py +15 -1
  47. edsl/jobs/Jobs.py +322 -73
  48. edsl/jobs/buckets/BucketCollection.py +9 -3
  49. edsl/jobs/buckets/ModelBuckets.py +4 -2
  50. edsl/jobs/buckets/TokenBucket.py +1 -2
  51. edsl/jobs/interviews/Interview.py +7 -10
  52. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  53. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
  54. edsl/jobs/interviews/retry_management.py +4 -4
  55. edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
  56. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  57. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  58. edsl/jobs/tasks/TaskHistory.py +4 -3
  59. edsl/language_models/LanguageModel.py +42 -55
  60. edsl/language_models/ModelList.py +96 -0
  61. edsl/language_models/registry.py +14 -0
  62. edsl/language_models/repair.py +97 -25
  63. edsl/notebooks/Notebook.py +157 -32
  64. edsl/prompts/Prompt.py +31 -19
  65. edsl/questions/QuestionBase.py +145 -23
  66. edsl/questions/QuestionBudget.py +5 -6
  67. edsl/questions/QuestionCheckBox.py +7 -3
  68. edsl/questions/QuestionExtract.py +5 -3
  69. edsl/questions/QuestionFreeText.py +3 -3
  70. edsl/questions/QuestionFunctional.py +0 -3
  71. edsl/questions/QuestionList.py +3 -4
  72. edsl/questions/QuestionMultipleChoice.py +16 -8
  73. edsl/questions/QuestionNumerical.py +4 -3
  74. edsl/questions/QuestionRank.py +5 -3
  75. edsl/questions/__init__.py +4 -3
  76. edsl/questions/descriptors.py +9 -4
  77. edsl/questions/question_registry.py +27 -31
  78. edsl/questions/settings.py +1 -1
  79. edsl/results/Dataset.py +31 -0
  80. edsl/results/DatasetExportMixin.py +493 -0
  81. edsl/results/Result.py +42 -82
  82. edsl/results/Results.py +178 -66
  83. edsl/results/ResultsDBMixin.py +10 -9
  84. edsl/results/ResultsExportMixin.py +23 -507
  85. edsl/results/ResultsGGMixin.py +3 -3
  86. edsl/results/ResultsToolsMixin.py +9 -9
  87. edsl/scenarios/FileStore.py +140 -0
  88. edsl/scenarios/Scenario.py +59 -6
  89. edsl/scenarios/ScenarioList.py +138 -52
  90. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  91. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  92. edsl/scenarios/__init__.py +1 -0
  93. edsl/study/ObjectEntry.py +173 -0
  94. edsl/study/ProofOfWork.py +113 -0
  95. edsl/study/SnapShot.py +73 -0
  96. edsl/study/Study.py +498 -0
  97. edsl/study/__init__.py +4 -0
  98. edsl/surveys/MemoryPlan.py +11 -4
  99. edsl/surveys/Survey.py +124 -37
  100. edsl/surveys/SurveyExportMixin.py +25 -5
  101. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  102. edsl/tools/plotting.py +4 -2
  103. edsl/utilities/__init__.py +21 -20
  104. edsl/utilities/gcp_bucket/__init__.py +0 -0
  105. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  106. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  107. edsl/utilities/interface.py +90 -73
  108. edsl/utilities/repair_functions.py +28 -0
  109. edsl/utilities/utilities.py +59 -6
  110. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
  111. edsl-0.1.29.dist-info/RECORD +203 -0
  112. edsl/conjure/RawResponseColumn.py +0 -327
  113. edsl/conjure/SurveyBuilder.py +0 -308
  114. edsl/conjure/SurveyBuilderCSV.py +0 -78
  115. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  116. edsl/data/RemoteDict.py +0 -103
  117. edsl-0.1.27.dev2.dist-info/RECORD +0 -172
  118. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
  119. {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/jobs/Jobs.py CHANGED
@@ -1,24 +1,15 @@
1
1
  # """The Jobs class is a collection of agents, scenarios and models and one survey."""
2
2
  from __future__ import annotations
3
- import os
3
+ import warnings
4
4
  from itertools import product
5
5
  from typing import Optional, Union, Sequence, Generator
6
- from edsl import Model
7
- from edsl.agents import Agent
6
+
8
7
  from edsl.Base import Base
9
- from edsl.data.Cache import Cache
10
- from edsl.data.CacheHandler import CacheHandler
11
- from edsl.results.Dataset import Dataset
12
8
 
13
- from edsl.exceptions.jobs import MissingRemoteInferenceError
14
9
  from edsl.exceptions import MissingAPIKeyError
15
10
  from edsl.jobs.buckets.BucketCollection import BucketCollection
16
11
  from edsl.jobs.interviews.Interview import Interview
17
- from edsl.language_models import LanguageModel
18
- from edsl.results import Results
19
- from edsl.scenarios import Scenario
20
- from edsl.surveys import Survey
21
-
12
+ from edsl.jobs.runners.JobsRunnerAsyncio import JobsRunnerAsyncio
22
13
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
23
14
 
24
15
 
@@ -31,10 +22,10 @@ class Jobs(Base):
31
22
 
32
23
  def __init__(
33
24
  self,
34
- survey: Survey,
35
- agents: Optional[list[Agent]] = None,
36
- models: Optional[list[LanguageModel]] = None,
37
- scenarios: Optional[list[Scenario]] = None,
25
+ survey: "Survey",
26
+ agents: Optional[list["Agent"]] = None,
27
+ models: Optional[list["LanguageModel"]] = None,
28
+ scenarios: Optional[list["Scenario"]] = None,
38
29
  ):
39
30
  """Initialize a Jobs instance.
40
31
 
@@ -44,18 +35,67 @@ class Jobs(Base):
44
35
  :param scenarios: a list of scenarios
45
36
  """
46
37
  self.survey = survey
47
- self.agents = agents or []
48
- self.models = models or []
49
- self.scenarios = scenarios or []
38
+ self.agents: "AgentList" = agents
39
+ self.scenarios: "ScenarioList" = scenarios
40
+ self.models = models
41
+
50
42
  self.__bucket_collection = None
51
43
 
44
+ @property
45
+ def models(self):
46
+ return self._models
47
+
48
+ @models.setter
49
+ def models(self, value):
50
+ from edsl import ModelList
51
+
52
+ if value:
53
+ if not isinstance(value, ModelList):
54
+ self._models = ModelList(value)
55
+ else:
56
+ self._models = value
57
+ else:
58
+ self._models = ModelList([])
59
+
60
+ @property
61
+ def agents(self):
62
+ return self._agents
63
+
64
+ @agents.setter
65
+ def agents(self, value):
66
+ from edsl import AgentList
67
+
68
+ if value:
69
+ if not isinstance(value, AgentList):
70
+ self._agents = AgentList(value)
71
+ else:
72
+ self._agents = value
73
+ else:
74
+ self._agents = AgentList([])
75
+
76
+ @property
77
+ def scenarios(self):
78
+ return self._scenarios
79
+
80
+ @scenarios.setter
81
+ def scenarios(self, value):
82
+ from edsl import ScenarioList
83
+
84
+ if value:
85
+ if not isinstance(value, ScenarioList):
86
+ self._scenarios = ScenarioList(value)
87
+ else:
88
+ self._scenarios = value
89
+ else:
90
+ self._scenarios = ScenarioList([])
91
+
52
92
  def by(
53
93
  self,
54
94
  *args: Union[
55
- Agent,
56
- Scenario,
57
- LanguageModel,
58
- Sequence[Union[Agent, Scenario, LanguageModel]],
95
+ "Agent",
96
+ "Scenario",
97
+ "LanguageModel",
98
+ Sequence[Union["Agent", "Scenario", "LanguageModel"]],
59
99
  ],
60
100
  ) -> Jobs:
61
101
  """
@@ -68,10 +108,10 @@ class Jobs(Base):
68
108
  >>> q = QuestionFreeText(question_name="name", question_text="What is your name?")
69
109
  >>> j = Jobs(survey = Survey(questions=[q]))
70
110
  >>> j
71
- Jobs(survey=Survey(...), agents=[], models=[], scenarios=[])
111
+ Jobs(survey=Survey(...), agents=AgentList([]), models=ModelList([]), scenarios=ScenarioList([]))
72
112
  >>> from edsl import Agent; a = Agent(traits = {"status": "Sad"})
73
113
  >>> j.by(a).agents
74
- [Agent(traits = {'status': 'Sad'})]
114
+ AgentList([Agent(traits = {'status': 'Sad'})])
75
115
 
76
116
  :param args: objects or a sequence (list, tuple, ...) of objects of the same type
77
117
 
@@ -95,13 +135,13 @@ class Jobs(Base):
95
135
  setattr(self, objects_key, new_objects) # update the job
96
136
  return self
97
137
 
98
- def prompts(self) -> Dataset:
138
+ def prompts(self) -> "Dataset":
99
139
  """Return a Dataset of prompts that will be used.
100
140
 
101
141
 
102
142
  >>> from edsl.jobs import Jobs
103
143
  >>> Jobs.example().prompts()
104
- Dataset([{'interview_index': [0, 0, 1, 1, 2, 2, 3, 3]}, {'question_index': ['how_feeling', 'how_feeling_yesterday', 'how_feeling', 'how_feeling_yesterday', 'how_feeling', 'how_feeling_yesterday', 'how_feeling', 'how_feeling_yesterday']}, {'user_prompt': [Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA')]}, {'scenario_index': [Scenario({'period': 'morning'}), Scenario({'period': 'morning'}), Scenario({'period': 'afternoon'}), Scenario({'period': 'afternoon'}), Scenario({'period': 'morning'}), Scenario({'period': 'morning'}), Scenario({'period': 'afternoon'}), Scenario({'period': 'afternoon'})]}, {'system_prompt': [Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA'), Prompt(text='NA')]}])
144
+ Dataset(...)
105
145
  """
106
146
 
107
147
  interviews = self.interviews()
@@ -111,6 +151,7 @@ class Jobs(Base):
111
151
  user_prompts = []
112
152
  system_prompts = []
113
153
  scenario_indices = []
154
+ from edsl.results.Dataset import Dataset
114
155
 
115
156
  for interview_index, interview in enumerate(interviews):
116
157
  invigilators = list(interview._build_invigilators(debug=False))
@@ -131,6 +172,20 @@ class Jobs(Base):
131
172
  ]
132
173
  )
133
174
 
175
+ @staticmethod
176
+ def _get_container_class(object):
177
+ from edsl.agents.AgentList import AgentList
178
+ from edsl.agents.Agent import Agent
179
+ from edsl.scenarios.Scenario import Scenario
180
+ from edsl.scenarios.ScenarioList import ScenarioList
181
+
182
+ if isinstance(object, Agent):
183
+ return AgentList
184
+ elif isinstance(object, Scenario):
185
+ return ScenarioList
186
+ else:
187
+ return list
188
+
134
189
  @staticmethod
135
190
  def _turn_args_to_list(args):
136
191
  """Return a list of the first argument if it is a sequence, otherwise returns a list of all the arguments."""
@@ -149,19 +204,25 @@ class Jobs(Base):
149
204
  return len(args) == 1 and isinstance(args[0], Sequence)
150
205
 
151
206
  if did_user_pass_a_sequence(args):
152
- return list(args[0])
207
+ container_class = Jobs._get_container_class(args[0][0])
208
+ return container_class(args[0])
153
209
  else:
154
- return list(args)
210
+ container_class = Jobs._get_container_class(args[0])
211
+ return container_class(args)
155
212
 
156
213
  def _get_current_objects_of_this_type(
157
214
  self, object: Union[Agent, Scenario, LanguageModel]
158
215
  ) -> tuple[list, str]:
216
+ from edsl.agents.Agent import Agent
217
+ from edsl.scenarios.Scenario import Scenario
218
+ from edsl.language_models.LanguageModel import LanguageModel
219
+
159
220
  """Return the current objects of the same type as the first argument.
160
221
 
161
222
  >>> from edsl.jobs import Jobs
162
223
  >>> j = Jobs.example()
163
224
  >>> j._get_current_objects_of_this_type(j.agents[0])
164
- ([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'})], 'agents')
225
+ (AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'})]), 'agents')
165
226
  """
166
227
  class_to_key = {
167
228
  Agent: "agents",
@@ -181,6 +242,20 @@ class Jobs(Base):
181
242
  current_objects = getattr(self, key, None)
182
243
  return current_objects, key
183
244
 
245
+ @staticmethod
246
+ def _get_empty_container_object(object):
247
+ from edsl import AgentList
248
+ from edsl import Agent
249
+ from edsl import Scenario
250
+ from edsl import ScenarioList
251
+
252
+ if isinstance(object, Agent):
253
+ return AgentList([])
254
+ elif isinstance(object, Scenario):
255
+ return ScenarioList([])
256
+ else:
257
+ return []
258
+
184
259
  @staticmethod
185
260
  def _merge_objects(passed_objects, current_objects) -> list:
186
261
  """
@@ -192,7 +267,7 @@ class Jobs(Base):
192
267
  >>> Jobs(survey = [])._merge_objects([1,2,3], [4,5,6])
193
268
  [5, 6, 7, 6, 7, 8, 7, 8, 9]
194
269
  """
195
- new_objects = []
270
+ new_objects = Jobs._get_empty_container_object(passed_objects[0])
196
271
  for current_object in current_objects:
197
272
  for new_object in passed_objects:
198
273
  new_objects.append(current_object + new_object)
@@ -237,12 +312,12 @@ class Jobs(Base):
237
312
  with us filling in defaults.
238
313
  """
239
314
  # if no agents, models, or scenarios are set, set them to defaults
315
+ from edsl.agents.Agent import Agent
316
+ from edsl.language_models.registry import Model
317
+ from edsl.scenarios.Scenario import Scenario
318
+
240
319
  self.agents = self.agents or [Agent()]
241
320
  self.models = self.models or [Model()]
242
- # if remote, set all the models to remote
243
- if hasattr(self, "remote") and self.remote:
244
- for model in self.models:
245
- model.remote = True
246
321
  self.scenarios = self.scenarios or [Scenario()]
247
322
  for agent, scenario, model in product(self.agents, self.scenarios, self.models):
248
323
  yield Interview(
@@ -256,6 +331,7 @@ class Jobs(Base):
256
331
  These buckets are used to track API calls and token usage.
257
332
 
258
333
  >>> from edsl.jobs import Jobs
334
+ >>> from edsl import Model
259
335
  >>> j = Jobs.example().by(Model(temperature = 1), Model(temperature = 0.5))
260
336
  >>> bc = j.create_bucket_collection()
261
337
  >>> bc
@@ -284,6 +360,57 @@ class Jobs(Base):
284
360
  )
285
361
  return links
286
362
 
363
+ def __hash__(self):
364
+ """Allow the model to be used as a key in a dictionary."""
365
+ from edsl.utilities.utilities import dict_hash
366
+
367
+ return dict_hash(self.to_dict())
368
+
369
+ def _output(self, message) -> None:
370
+ """Check if a Job is verbose. If so, print the message."""
371
+ if self.verbose:
372
+ print(message)
373
+
374
+ def _check_parameters(self, strict=False, warn=False) -> None:
375
+ """Check if the parameters in the survey and scenarios are consistent.
376
+
377
+ >>> from edsl import QuestionFreeText
378
+ >>> from edsl import Survey
379
+ >>> from edsl import Scenario
380
+ >>> q = QuestionFreeText(question_text = "{{poo}}", question_name = "ugly_question")
381
+ >>> j = Jobs(survey = Survey(questions=[q]))
382
+ >>> with warnings.catch_warnings(record=True) as w:
383
+ ... j._check_parameters(warn = True)
384
+ ... assert len(w) == 1
385
+ ... assert issubclass(w[-1].category, UserWarning)
386
+ ... assert "The following parameters are in the survey but not in the scenarios" in str(w[-1].message)
387
+
388
+ >>> q = QuestionFreeText(question_text = "{{poo}}", question_name = "ugly_question")
389
+ >>> s = Scenario({'plop': "A", 'poo': "B"})
390
+ >>> j = Jobs(survey = Survey(questions=[q])).by(s)
391
+ >>> j._check_parameters(strict = True)
392
+ Traceback (most recent call last):
393
+ ...
394
+ ValueError: The following parameters are in the scenarios but not in the survey: {'plop'}
395
+ """
396
+ survey_parameters: set = self.survey.parameters
397
+ scenario_parameters: set = self.scenarios.parameters
398
+
399
+ msg1, msg2 = None, None
400
+
401
+ if in_survey_but_not_in_scenarios := survey_parameters - scenario_parameters:
402
+ msg1 = f"The following parameters are in the survey but not in the scenarios: {in_survey_but_not_in_scenarios}"
403
+ if in_scenarios_but_not_in_survey := scenario_parameters - survey_parameters:
404
+ msg2 = f"The following parameters are in the scenarios but not in the survey: {in_scenarios_but_not_in_survey}"
405
+
406
+ if msg1 or msg2:
407
+ message = "\n".join(filter(None, [msg1, msg2]))
408
+ if strict:
409
+ raise ValueError(message)
410
+ else:
411
+ if warn:
412
+ warnings.warn(message)
413
+
287
414
  def run(
288
415
  self,
289
416
  n: int = 1,
@@ -291,41 +418,81 @@ class Jobs(Base):
291
418
  progress_bar: bool = False,
292
419
  stop_on_exception: bool = False,
293
420
  cache: Union[Cache, bool] = None,
294
- remote: bool = (
295
- False if os.getenv("DEFAULT_RUN_MODE", "local") == "local" else True
296
- ),
297
421
  check_api_keys: bool = False,
298
422
  sidecar_model: Optional[LanguageModel] = None,
299
423
  batch_mode: Optional[bool] = None,
300
- print_exceptions=False,
424
+ verbose: bool = False,
425
+ print_exceptions=True,
426
+ remote_cache_description: Optional[str] = None,
427
+ remote_inference_description: Optional[str] = None,
301
428
  ) -> Results:
302
429
  """
303
430
  Runs the Job: conducts Interviews and returns their results.
304
431
 
305
432
  :param n: how many times to run each interview
306
433
  :param debug: prints debug messages
307
- :param verbose: prints messages
308
434
  :param progress_bar: shows a progress bar
309
435
  :param stop_on_exception: stops the job if an exception is raised
310
436
  :param cache: a cache object to store results
311
- :param remote: run the job remotely
312
437
  :param check_api_keys: check if the API keys are valid
313
- :batch_mode: run the job in batch mode i.e., no expecation of interaction with the user
314
-
438
+ :param batch_mode: run the job in batch mode i.e., no expecation of interaction with the user
439
+ :param verbose: prints messages
440
+ :param remote_cache_description: specifies a description for this group of entries in the remote cache
441
+ :param remote_inference_description: specifies a description for the remote inference job
315
442
  """
443
+ from edsl.coop.coop import Coop
444
+
445
+ self._check_parameters()
446
+
316
447
  if batch_mode is not None:
317
448
  raise NotImplementedError(
318
449
  "Batch mode is deprecated. Please update your code to not include 'batch_mode' in the 'run' method."
319
450
  )
320
451
 
321
- self.remote = remote
322
-
323
- if self.remote:
324
- ## TODO: This should be a coop check
325
- if os.getenv("EXPECTED_PARROT_API_KEY", None) is None:
326
- raise MissingRemoteInferenceError()
452
+ self.verbose = verbose
453
+
454
+ try:
455
+ coop = Coop()
456
+ user_edsl_settings = coop.edsl_settings
457
+ remote_cache = user_edsl_settings["remote_caching"]
458
+ remote_inference = user_edsl_settings["remote_inference"]
459
+ except Exception:
460
+ remote_cache = False
461
+ remote_inference = False
462
+
463
+ if remote_inference:
464
+ self._output("Remote inference activated. Sending job to server...")
465
+ if remote_cache:
466
+ self._output(
467
+ "Remote caching activated. The remote cache will be used for this job."
468
+ )
327
469
 
328
- if not self.remote:
470
+ remote_job_data = coop.remote_inference_create(
471
+ self,
472
+ description=remote_inference_description,
473
+ status="queued",
474
+ )
475
+ self._output("Job sent!")
476
+ # Create mock results object to store job data
477
+ results = Results(
478
+ survey=Survey(),
479
+ data=[
480
+ Result(
481
+ agent=Agent.example(),
482
+ scenario=Scenario.example(),
483
+ model=Model(),
484
+ iteration=1,
485
+ answer={"info": "Remote job details"},
486
+ )
487
+ ],
488
+ )
489
+ results.add_columns_from_dict([remote_job_data])
490
+ if self.verbose:
491
+ results.select(["info", "uuid", "status", "version"]).print(
492
+ format="rich"
493
+ )
494
+ return results
495
+ else:
329
496
  if check_api_keys:
330
497
  for model in self.models + [Model()]:
331
498
  if not model.has_valid_api_key():
@@ -336,30 +503,111 @@ class Jobs(Base):
336
503
 
337
504
  # handle cache
338
505
  if cache is None:
506
+ from edsl.data.CacheHandler import CacheHandler
507
+
339
508
  cache = CacheHandler().get_cache()
340
509
  if cache is False:
510
+ from edsl.data.Cache import Cache
511
+
341
512
  cache = Cache()
342
513
 
343
- results = self._run_local(
344
- n=n,
345
- debug=debug,
346
- progress_bar=progress_bar,
347
- cache=cache,
348
- stop_on_exception=stop_on_exception,
349
- sidecar_model=sidecar_model,
350
- print_exceptions=print_exceptions,
351
- )
352
- results.cache = cache.new_entries_cache()
514
+ if not remote_cache:
515
+ results = self._run_local(
516
+ n=n,
517
+ debug=debug,
518
+ progress_bar=progress_bar,
519
+ cache=cache,
520
+ stop_on_exception=stop_on_exception,
521
+ sidecar_model=sidecar_model,
522
+ print_exceptions=print_exceptions,
523
+ )
524
+
525
+ results.cache = cache.new_entries_cache()
526
+
527
+ self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
528
+ else:
529
+ cache_difference = coop.remote_cache_get_diff(cache.keys())
530
+
531
+ client_missing_cacheentries = cache_difference.get(
532
+ "client_missing_cacheentries", []
533
+ )
534
+
535
+ missing_entry_count = len(client_missing_cacheentries)
536
+ if missing_entry_count > 0:
537
+ self._output(
538
+ f"Updating local cache with {missing_entry_count:,} new "
539
+ f"{'entry' if missing_entry_count == 1 else 'entries'} from remote..."
540
+ )
541
+ cache.add_from_dict(
542
+ {entry.key: entry for entry in client_missing_cacheentries}
543
+ )
544
+ self._output("Local cache updated!")
545
+ else:
546
+ self._output("No new entries to add to local cache.")
547
+
548
+ server_missing_cacheentry_keys = cache_difference.get(
549
+ "server_missing_cacheentry_keys", []
550
+ )
551
+ server_missing_cacheentries = [
552
+ entry
553
+ for key in server_missing_cacheentry_keys
554
+ if (entry := cache.data.get(key)) is not None
555
+ ]
556
+ old_entry_keys = [key for key in cache.keys()]
557
+
558
+ self._output("Running job...")
559
+ results = self._run_local(
560
+ n=n,
561
+ debug=debug,
562
+ progress_bar=progress_bar,
563
+ cache=cache,
564
+ stop_on_exception=stop_on_exception,
565
+ sidecar_model=sidecar_model,
566
+ print_exceptions=print_exceptions,
567
+ )
568
+ self._output("Job completed!")
569
+
570
+ new_cache_entries = list(
571
+ [entry for entry in cache.values() if entry.key not in old_entry_keys]
572
+ )
573
+ server_missing_cacheentries.extend(new_cache_entries)
574
+
575
+ new_entry_count = len(server_missing_cacheentries)
576
+ if new_entry_count > 0:
577
+ self._output(
578
+ f"Updating remote cache with {new_entry_count:,} new "
579
+ f"{'entry' if new_entry_count == 1 else 'entries'}..."
580
+ )
581
+ coop.remote_cache_create_many(
582
+ server_missing_cacheentries,
583
+ visibility="private",
584
+ description=remote_cache_description,
585
+ )
586
+ self._output("Remote cache updated!")
587
+ else:
588
+ self._output("No new entries to add to remote cache.")
589
+
590
+ results.cache = cache.new_entries_cache()
591
+
592
+ self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
353
593
 
354
594
  return results
355
595
 
356
596
  def _run_local(self, *args, **kwargs):
357
597
  """Run the job locally."""
358
- from edsl.jobs.runners.JobsRunnerAsyncio import JobsRunnerAsyncio
359
598
 
360
599
  results = JobsRunnerAsyncio(self).run(*args, **kwargs)
361
600
  return results
362
601
 
602
+ async def run_async(self, cache=None, **kwargs):
603
+ """Run the job asynchronously."""
604
+ results = await JobsRunnerAsyncio(self).run_async(cache=cache, **kwargs)
605
+ return results
606
+
607
+ def all_question_parameters(self):
608
+ """Return all the fields in the questions in the survey."""
609
+ return set.union(*[question.parameters for question in self.survey.questions])
610
+
363
611
  #######################
364
612
  # Dunder methods
365
613
  #######################
@@ -412,6 +660,11 @@ class Jobs(Base):
412
660
  @remove_edsl_version
413
661
  def from_dict(cls, data: dict) -> Jobs:
414
662
  """Creates a Jobs instance from a dictionary."""
663
+ from edsl import Survey
664
+ from edsl.agents.Agent import Agent
665
+ from edsl.language_models.LanguageModel import LanguageModel
666
+ from edsl.scenarios.Scenario import Scenario
667
+
415
668
  return cls(
416
669
  survey=Survey.from_dict(data["survey"]),
417
670
  agents=[Agent.from_dict(agent) for agent in data["agents"]],
@@ -438,7 +691,8 @@ class Jobs(Base):
438
691
  """
439
692
  import random
440
693
  from edsl.questions import QuestionMultipleChoice
441
- from edsl import Agent
694
+ from edsl.agents.Agent import Agent
695
+ from edsl.scenarios.Scenario import Scenario
442
696
 
443
697
  # (status, question, period)
444
698
  agent_answers = {
@@ -477,11 +731,14 @@ class Jobs(Base):
477
731
  question_options=["Good", "Great", "OK", "Terrible"],
478
732
  question_name="how_feeling_yesterday",
479
733
  )
734
+ from edsl import Survey, ScenarioList
735
+
480
736
  base_survey = Survey(questions=[q1, q2])
481
737
 
482
- job = base_survey.by(
483
- Scenario({"period": "morning"}), Scenario({"period": "afternoon"})
484
- ).by(joy_agent, sad_agent)
738
+ scenario_list = ScenarioList(
739
+ [Scenario({"period": "morning"}), Scenario({"period": "afternoon"})]
740
+ )
741
+ job = base_survey.by(scenario_list).by(joy_agent, sad_agent)
485
742
 
486
743
  return job
487
744
 
@@ -516,11 +773,3 @@ if __name__ == "__main__":
516
773
  import doctest
517
774
 
518
775
  doctest.testmod(optionflags=doctest.ELLIPSIS)
519
-
520
- # from edsl.jobs import Jobs
521
-
522
- # job = Jobs.example()
523
- # len(job) == 8
524
- # results, info = job.run(debug=True)
525
- # len(results) == 8
526
- # results
@@ -10,8 +10,9 @@ class BucketCollection(UserDict):
10
10
  Models themselves are hashable, so this works.
11
11
  """
12
12
 
13
- def __init__(self):
13
+ def __init__(self, infinity_buckets=False):
14
14
  super().__init__()
15
+ self.infinity_buckets = infinity_buckets
15
16
 
16
17
  def __repr__(self):
17
18
  return f"BucketCollection({self.data})"
@@ -21,8 +22,13 @@ class BucketCollection(UserDict):
21
22
 
22
23
  This will create the token and request buckets for the model."""
23
24
  # compute the TPS and RPS from the model
24
- TPS = model.TPM / 60.0
25
- RPS = model.RPM / 60.0
25
+ if not self.infinity_buckets:
26
+ TPS = model.TPM / 60.0
27
+ RPS = model.RPM / 60.0
28
+ else:
29
+ TPS = float("inf")
30
+ RPS = float("inf")
31
+
26
32
  # create the buckets
27
33
  requests_bucket = TokenBucket(
28
34
  bucket_name=model.model,
@@ -1,4 +1,4 @@
1
- from edsl.jobs.buckets.TokenBucket import TokenBucket
1
+ # from edsl.jobs.buckets.TokenBucket import TokenBucket
2
2
 
3
3
 
4
4
  class ModelBuckets:
@@ -8,7 +8,7 @@ class ModelBuckets:
8
8
  A request is one call to the service. The number of tokens required for a request depends on parameters.
9
9
  """
10
10
 
11
- def __init__(self, requests_bucket: TokenBucket, tokens_bucket: TokenBucket):
11
+ def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
12
12
  """Initialize the model buckets.
13
13
 
14
14
  The requests bucket captures requests per unit of time.
@@ -28,6 +28,8 @@ class ModelBuckets:
28
28
  @classmethod
29
29
  def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
30
30
  """Create a bucket with infinite capacity and refill rate."""
31
+ from edsl.jobs.buckets.TokenBucket import TokenBucket
32
+
31
33
  return cls(
32
34
  requests_bucket=TokenBucket(
33
35
  bucket_name=model_name,
@@ -1,8 +1,6 @@
1
1
  from typing import Union, List, Any
2
2
  import asyncio
3
3
  import time
4
- from collections import UserDict
5
- from matplotlib import pyplot as plt
6
4
 
7
5
 
8
6
  class TokenBucket:
@@ -114,6 +112,7 @@ class TokenBucket:
114
112
  times, tokens = zip(*self.get_log())
115
113
  start_time = times[0]
116
114
  times = [t - start_time for t in times] # Normalize time to start from 0
115
+ from matplotlib import pyplot as plt
117
116
 
118
117
  plt.figure(figsize=(10, 6))
119
118
  plt.plot(times, tokens, label="Tokens Available")