edsl 0.1.39__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. edsl/Base.py +0 -28
  2. edsl/__init__.py +1 -1
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +17 -9
  5. edsl/agents/Invigilator.py +14 -13
  6. edsl/agents/InvigilatorBase.py +1 -4
  7. edsl/agents/PromptConstructor.py +22 -42
  8. edsl/agents/QuestionInstructionPromptBuilder.py +1 -1
  9. edsl/auto/AutoStudy.py +5 -18
  10. edsl/auto/StageBase.py +40 -53
  11. edsl/auto/StageQuestions.py +1 -2
  12. edsl/auto/utilities.py +6 -0
  13. edsl/coop/coop.py +5 -21
  14. edsl/data/Cache.py +18 -29
  15. edsl/data/CacheHandler.py +2 -0
  16. edsl/data/RemoteCacheSync.py +46 -154
  17. edsl/enums.py +0 -7
  18. edsl/inference_services/AnthropicService.py +16 -38
  19. edsl/inference_services/AvailableModelFetcher.py +1 -7
  20. edsl/inference_services/GoogleService.py +1 -5
  21. edsl/inference_services/InferenceServicesCollection.py +2 -18
  22. edsl/inference_services/OpenAIService.py +31 -46
  23. edsl/inference_services/TestService.py +3 -1
  24. edsl/inference_services/TogetherAIService.py +3 -5
  25. edsl/inference_services/data_structures.py +2 -74
  26. edsl/jobs/AnswerQuestionFunctionConstructor.py +113 -148
  27. edsl/jobs/FetchInvigilator.py +3 -10
  28. edsl/jobs/InterviewsConstructor.py +4 -6
  29. edsl/jobs/Jobs.py +233 -299
  30. edsl/jobs/JobsChecks.py +2 -2
  31. edsl/jobs/JobsPrompts.py +1 -1
  32. edsl/jobs/JobsRemoteInferenceHandler.py +136 -160
  33. edsl/jobs/interviews/Interview.py +42 -80
  34. edsl/jobs/runners/JobsRunnerAsyncio.py +358 -88
  35. edsl/jobs/runners/JobsRunnerStatus.py +165 -133
  36. edsl/jobs/tasks/TaskHistory.py +3 -24
  37. edsl/language_models/LanguageModel.py +4 -59
  38. edsl/language_models/ModelList.py +8 -19
  39. edsl/language_models/__init__.py +1 -1
  40. edsl/language_models/registry.py +180 -0
  41. edsl/language_models/repair.py +1 -1
  42. edsl/questions/QuestionBase.py +26 -35
  43. edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +49 -52
  44. edsl/questions/QuestionBasePromptsMixin.py +1 -1
  45. edsl/questions/QuestionBudget.py +1 -1
  46. edsl/questions/QuestionCheckBox.py +2 -2
  47. edsl/questions/QuestionExtract.py +7 -5
  48. edsl/questions/QuestionFreeText.py +1 -1
  49. edsl/questions/QuestionList.py +15 -9
  50. edsl/questions/QuestionMatrix.py +1 -1
  51. edsl/questions/QuestionMultipleChoice.py +1 -1
  52. edsl/questions/QuestionNumerical.py +1 -1
  53. edsl/questions/QuestionRank.py +1 -1
  54. edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +18 -6
  55. edsl/questions/{response_validator_factory.py → ResponseValidatorFactory.py} +1 -7
  56. edsl/questions/SimpleAskMixin.py +1 -1
  57. edsl/questions/__init__.py +1 -1
  58. edsl/results/DatasetExportMixin.py +119 -60
  59. edsl/results/Result.py +3 -109
  60. edsl/results/Results.py +39 -50
  61. edsl/scenarios/FileStore.py +0 -32
  62. edsl/scenarios/ScenarioList.py +7 -35
  63. edsl/scenarios/handlers/csv.py +0 -11
  64. edsl/surveys/Survey.py +20 -71
  65. {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +1 -1
  66. {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/RECORD +78 -84
  67. {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +1 -1
  68. edsl/jobs/async_interview_runner.py +0 -138
  69. edsl/jobs/check_survey_scenario_compatibility.py +0 -85
  70. edsl/jobs/data_structures.py +0 -120
  71. edsl/jobs/results_exceptions_handler.py +0 -98
  72. edsl/language_models/model.py +0 -256
  73. edsl/questions/data_structures.py +0 -20
  74. edsl/results/file_exports.py +0 -252
  75. /edsl/agents/{question_option_processor.py → QuestionOptionProcessor.py} +0 -0
  76. /edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +0 -0
  77. /edsl/questions/{loop_processor.py → LoopProcessor.py} +0 -0
  78. /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
  79. /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
  80. /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
  81. /edsl/results/{results_selector.py → Selector.py} +0 -0
  82. /edsl/scenarios/{directory_scanner.py → DirectoryScanner.py} +0 -0
  83. /edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +0 -0
  84. /edsl/scenarios/{scenario_selector.py → ScenarioSelector.py} +0 -0
  85. {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
@@ -327,38 +327,6 @@ class FileStore(Scenario):
327
327
 
328
328
  return ConstructDownloadLink(self).create_link(custom_filename, style)
329
329
 
330
- def to_pandas(self):
331
- """
332
- Convert the file content to a pandas DataFrame if supported by the file handler.
333
-
334
- Returns:
335
- pandas.DataFrame: The data from the file as a DataFrame
336
-
337
- Raises:
338
- AttributeError: If the file type's handler doesn't support pandas conversion
339
- """
340
- handler = FileMethods.get_handler(self.suffix)
341
- if handler and hasattr(handler, "to_pandas"):
342
- return handler(self.path).to_pandas()
343
- raise AttributeError(
344
- f"Converting {self.suffix} files to pandas DataFrame is not supported"
345
- )
346
-
347
- def __getattr__(self, name):
348
- """
349
- Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
350
- """
351
- if self.suffix == "csv":
352
- # Get the pandas DataFrame
353
- df = self.to_pandas()
354
- # Check if the requested attribute exists in the DataFrame
355
- if hasattr(df, name):
356
- return getattr(df, name)
357
- # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
358
- raise AttributeError(
359
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
360
- )
361
-
362
330
 
363
331
  class CSVFileStore(FileStore):
364
332
  @classmethod
@@ -45,7 +45,7 @@ from edsl.utilities.naming_utilities import sanitize_string
45
45
  from edsl.utilities.is_valid_variable_name import is_valid_variable_name
46
46
  from edsl.exceptions.scenarios import ScenarioError
47
47
 
48
- from edsl.scenarios.directory_scanner import DirectoryScanner
48
+ from edsl.scenarios.DirectoryScanner import DirectoryScanner
49
49
 
50
50
 
51
51
  class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
@@ -661,7 +661,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
661
661
  >>> s.select('a')
662
662
  ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
663
663
  """
664
- from edsl.scenarios.scenario_selector import ScenarioSelector
664
+ from edsl.scenarios.ScenarioSelector import ScenarioSelector
665
665
 
666
666
  return ScenarioSelector(self).select(*fields)
667
667
 
@@ -840,25 +840,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
840
840
  ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
841
841
  """
842
842
  sl = self.duplicate()
843
- if len(values) != len(sl):
844
- raise ScenarioError(
845
- f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
846
- )
847
843
  for i, value in enumerate(values):
848
844
  sl[i][name] = value
849
845
  return sl
850
846
 
851
- @classmethod
852
- def create_empty_scenario_list(cls, n: int) -> ScenarioList:
853
- """Create an empty ScenarioList with n scenarios.
854
-
855
- Example:
856
-
857
- >>> ScenarioList.create_empty_scenario_list(3)
858
- ScenarioList([Scenario({}), Scenario({}), Scenario({})])
859
- """
860
- return ScenarioList([Scenario({}) for _ in range(n)])
861
-
862
847
  def add_value(self, name: str, value: Any) -> ScenarioList:
863
848
  """Add a value to all scenarios in a ScenarioList.
864
849
 
@@ -1237,7 +1222,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1237
1222
  >>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
1238
1223
  True
1239
1224
  """
1240
- from edsl.scenarios.scenario_join import ScenarioJoin
1225
+ from edsl.scenarios.ScenarioJoin import ScenarioJoin
1241
1226
 
1242
1227
  sj = ScenarioJoin(self, other)
1243
1228
  return sj.left_join(by)
@@ -1259,7 +1244,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1259
1244
  else:
1260
1245
  data = self
1261
1246
  d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
1262
-
1263
1247
  if add_edsl_version:
1264
1248
  from edsl import __version__
1265
1249
 
@@ -1312,22 +1296,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1312
1296
 
1313
1297
  @classmethod
1314
1298
  def from_nested_dict(cls, data: dict) -> ScenarioList:
1315
- """Create a `ScenarioList` from a nested dictionary.
1316
-
1317
- >>> data = {"headline": ["Armistice Signed, War Over: Celebrations Erupt Across City"], "date": ["1918-11-11"], "author": ["Jane Smith"]}
1318
- >>> ScenarioList.from_nested_dict(data)
1319
- ScenarioList([Scenario({'headline': 'Armistice Signed, War Over: Celebrations Erupt Across City', 'date': '1918-11-11', 'author': 'Jane Smith'})])
1320
-
1321
- """
1322
- length_of_first_list = len(next(iter(data.values())))
1323
- s = ScenarioList.create_empty_scenario_list(n=length_of_first_list)
1324
-
1325
- if any(len(v) != length_of_first_list for v in data.values()):
1326
- raise ValueError(
1327
- "All lists in the dictionary must be of the same length.",
1328
- )
1329
- for key, list_of_values in data.items():
1330
- s = s.add_list(key, list_of_values)
1299
+ """Create a `ScenarioList` from a nested dictionary."""
1300
+ s = ScenarioList()
1301
+ for key, value in data.items():
1302
+ s.add_list(key, value)
1331
1303
  return s
1332
1304
 
1333
1305
  def code(self) -> str:
@@ -36,14 +36,3 @@ class CsvMethods(FileMethods):
36
36
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
37
37
  df.to_csv(f.name, index=False)
38
38
  return f.name
39
-
40
- def to_pandas(self):
41
- """
42
- Convert the CSV file to a pandas DataFrame.
43
-
44
- Returns:
45
- pandas.DataFrame: The data from the CSV as a DataFrame
46
- """
47
- import pandas as pd
48
-
49
- return pd.read_csv(self.path)
edsl/surveys/Survey.py CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
  import re
5
- import random
6
5
 
7
6
  from typing import (
8
7
  Any,
@@ -116,7 +115,6 @@ class Survey(SurveyExportMixin, Base):
116
115
  rule_collection: Optional["RuleCollection"] = None,
117
116
  question_groups: Optional["QuestionGroupType"] = None,
118
117
  name: Optional[str] = None,
119
- questions_to_randomize: Optional[List[str]] = None,
120
118
  ):
121
119
  """Create a new survey.
122
120
 
@@ -165,33 +163,6 @@ class Survey(SurveyExportMixin, Base):
165
163
 
166
164
  warnings.warn("name parameter to a survey is deprecated.")
167
165
 
168
- if questions_to_randomize is not None:
169
- self.questions_to_randomize = questions_to_randomize
170
- else:
171
- self.questions_to_randomize = []
172
-
173
- self._seed = None
174
-
175
- def draw(self) -> "Survey":
176
- """Return a new survey with a randomly selected permutation of the options."""
177
- if self._seed is None: # only set once
178
- self._seed = hash(self)
179
- random.seed(self._seed)
180
-
181
- if len(self.questions_to_randomize) == 0:
182
- return self
183
-
184
- new_questions = []
185
- for question in self.questions:
186
- if question.question_name in self.questions_to_randomize:
187
- new_questions.append(question.draw())
188
- else:
189
- new_questions.append(question.duplicate())
190
-
191
- d = self.to_dict()
192
- d["questions"] = [q.to_dict() for q in new_questions]
193
- return Survey.from_dict(d)
194
-
195
166
  def _process_raw_questions(self, questions: Optional[List["QuestionType"]]) -> list:
196
167
  """Process the raw questions passed to the survey."""
197
168
  handler = InstructionHandler(self)
@@ -345,9 +316,7 @@ class Survey(SurveyExportMixin, Base):
345
316
  >>> s.to_dict(add_edsl_version = False).keys()
346
317
  dict_keys(['questions', 'memory_plan', 'rule_collection', 'question_groups'])
347
318
  """
348
- from edsl import __version__
349
-
350
- d = {
319
+ return {
351
320
  "questions": [
352
321
  q.to_dict(add_edsl_version=add_edsl_version)
353
322
  for q in self._recombined_questions_and_instructions()
@@ -358,13 +327,6 @@ class Survey(SurveyExportMixin, Base):
358
327
  ),
359
328
  "question_groups": self.question_groups,
360
329
  }
361
- if self.questions_to_randomize != []:
362
- d["questions_to_randomize"] = self.questions_to_randomize
363
-
364
- if add_edsl_version:
365
- d["edsl_version"] = __version__
366
- d["edsl_class_name"] = "Survey"
367
- return d
368
330
 
369
331
  @classmethod
370
332
  @remove_edsl_version
@@ -408,16 +370,11 @@ class Survey(SurveyExportMixin, Base):
408
370
  get_class(q_dict).from_dict(q_dict) for q_dict in data["questions"]
409
371
  ]
410
372
  memory_plan = MemoryPlan.from_dict(data["memory_plan"])
411
- if "questions_to_randomize" in data:
412
- questions_to_randomize = data["questions_to_randomize"]
413
- else:
414
- questions_to_randomize = None
415
373
  survey = cls(
416
374
  questions=questions,
417
375
  memory_plan=memory_plan,
418
376
  rule_collection=RuleCollection.from_dict(data["rule_collection"]),
419
377
  question_groups=data["question_groups"],
420
- questions_to_randomize=questions_to_randomize,
421
378
  )
422
379
  return survey
423
380
 
@@ -913,7 +870,6 @@ class Survey(SurveyExportMixin, Base):
913
870
  agent: Optional["Agent"] = None,
914
871
  cache: Optional["Cache"] = None,
915
872
  disable_remote_inference: bool = False,
916
- disable_remote_cache: bool = False,
917
873
  **kwargs,
918
874
  ):
919
875
  """Run the survey with default model, taking the required survey as arguments.
@@ -923,7 +879,7 @@ class Survey(SurveyExportMixin, Base):
923
879
  >>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
924
880
  >>> q = QuestionFunctional(question_name = "q0", func = f)
925
881
  >>> s = Survey([q])
926
- >>> async def test_run_async(): result = await s.run_async(period="morning", disable_remote_inference = True, disable_remote_cache=True); print(result.select("answer.q0").first())
882
+ >>> async def test_run_async(): result = await s.run_async(period="morning", disable_remote_inference = True); print(result.select("answer.q0").first())
927
883
  >>> asyncio.run(test_run_async())
928
884
  yes
929
885
  >>> import asyncio
@@ -931,23 +887,20 @@ class Survey(SurveyExportMixin, Base):
931
887
  >>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
932
888
  >>> q = QuestionFunctional(question_name = "q0", func = f)
933
889
  >>> s = Survey([q])
934
- >>> async def test_run_async(): result = await s.run_async(period="evening", disable_remote_inference = True, disable_remote_cache = True); print(result.select("answer.q0").first())
935
- >>> results = asyncio.run(test_run_async())
890
+ >>> async def test_run_async(): result = await s.run_async(period="evening", disable_remote_inference = True); print(result.select("answer.q0").first())
891
+ >>> asyncio.run(test_run_async())
936
892
  no
937
893
  """
938
894
  # TODO: temp fix by creating a cache
939
895
  if cache is None:
940
896
  from edsl.data import Cache
897
+
941
898
  c = Cache()
942
899
  else:
943
900
  c = cache
944
-
945
-
946
-
947
- jobs: "Jobs" = self.get_job(model=model, agent=agent, **kwargs).using(c)
901
+ jobs: "Jobs" = self.get_job(model=model, agent=agent, **kwargs)
948
902
  return await jobs.run_async(
949
- disable_remote_inference=disable_remote_inference,
950
- disable_remote_cache=disable_remote_cache,
903
+ cache=c, disable_remote_inference=disable_remote_inference
951
904
  )
952
905
 
953
906
  def run(self, *args, **kwargs) -> "Results":
@@ -965,12 +918,6 @@ class Survey(SurveyExportMixin, Base):
965
918
 
966
919
  return Jobs(survey=self).run(*args, **kwargs)
967
920
 
968
- def using(self, obj: Union["Cache", "KeyLookup", "BucketCollection"]) -> "Jobs":
969
- """Turn the survey into a Job and appends the arguments to the Job."""
970
- from edsl.jobs.Jobs import Jobs
971
-
972
- return Jobs(survey=self).using(obj)
973
-
974
921
  def duplicate(self):
975
922
  """Duplicate the survey.
976
923
 
@@ -1139,7 +1086,7 @@ class Survey(SurveyExportMixin, Base):
1139
1086
  # questions_string = ", ".join([repr(q) for q in self._questions])
1140
1087
  questions_string = ", ".join([repr(q) for q in self.raw_passed_questions or []])
1141
1088
  # question_names_string = ", ".join([repr(name) for name in self.question_names])
1142
- return f"Survey(questions=[{questions_string}], memory_plan={self.memory_plan}, rule_collection={self.rule_collection}, question_groups={self.question_groups}, questions_to_randomize={self.questions_to_randomize})"
1089
+ return f"Survey(questions=[{questions_string}], memory_plan={self.memory_plan}, rule_collection={self.rule_collection}, question_groups={self.question_groups})"
1143
1090
 
1144
1091
  def _summary(self) -> dict:
1145
1092
  return {
@@ -1225,7 +1172,7 @@ class Survey(SurveyExportMixin, Base):
1225
1172
 
1226
1173
  def get_job(self, model=None, agent=None, **kwargs):
1227
1174
  if model is None:
1228
- from edsl.language_models.model import Model
1175
+ from edsl.language_models.registry import Model
1229
1176
 
1230
1177
  model = Model()
1231
1178
 
@@ -1246,24 +1193,26 @@ def main():
1246
1193
 
1247
1194
  def example_survey():
1248
1195
  """Return an example survey."""
1249
- from edsl import QuestionMultipleChoice, QuestionList, QuestionNumerical, Survey
1196
+ from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
1197
+ from edsl.surveys.Survey import Survey
1250
1198
 
1251
1199
  q0 = QuestionMultipleChoice(
1200
+ question_text="Do you like school?",
1201
+ question_options=["yes", "no"],
1252
1202
  question_name="q0",
1253
- question_text="What is the capital of France?",
1254
- question_options=["London", "Paris", "Rome", "Boston", "I don't know"]
1255
1203
  )
1256
- q1 = QuestionList(
1204
+ q1 = QuestionMultipleChoice(
1205
+ question_text="Why not?",
1206
+ question_options=["killer bees in cafeteria", "other"],
1257
1207
  question_name="q1",
1258
- question_text="Name some cities in France.",
1259
- max_list_items = 5
1260
1208
  )
1261
- q2 = QuestionNumerical(
1209
+ q2 = QuestionMultipleChoice(
1210
+ question_text="Why?",
1211
+ question_options=["**lack*** of killer bees in cafeteria", "other"],
1262
1212
  question_name="q2",
1263
- question_text="What is the population of {{ q0.answer }}?"
1264
1213
  )
1265
1214
  s = Survey(questions=[q0, q1, q2])
1266
- s = s.add_rule(q0, "q0 == 'Paris'", q2)
1215
+ s = s.add_rule(q0, "q0 == 'yes'", q2)
1267
1216
  return s
1268
1217
 
1269
1218
  s = example_survey()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: edsl
3
- Version: 0.1.39
3
+ Version: 0.1.39.dev2
4
4
  Summary: Create and analyze LLM-based surveys
5
5
  Home-page: https://www.expectedparrot.com/
6
6
  License: MIT