edsl 0.1.39__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +0 -28
- edsl/__init__.py +1 -1
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +17 -9
- edsl/agents/Invigilator.py +14 -13
- edsl/agents/InvigilatorBase.py +1 -4
- edsl/agents/PromptConstructor.py +22 -42
- edsl/agents/QuestionInstructionPromptBuilder.py +1 -1
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/coop/coop.py +5 -21
- edsl/data/Cache.py +18 -29
- edsl/data/CacheHandler.py +2 -0
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/enums.py +0 -7
- edsl/inference_services/AnthropicService.py +16 -38
- edsl/inference_services/AvailableModelFetcher.py +1 -7
- edsl/inference_services/GoogleService.py +1 -5
- edsl/inference_services/InferenceServicesCollection.py +2 -18
- edsl/inference_services/OpenAIService.py +31 -46
- edsl/inference_services/TestService.py +3 -1
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/inference_services/data_structures.py +2 -74
- edsl/jobs/AnswerQuestionFunctionConstructor.py +113 -148
- edsl/jobs/FetchInvigilator.py +3 -10
- edsl/jobs/InterviewsConstructor.py +4 -6
- edsl/jobs/Jobs.py +233 -299
- edsl/jobs/JobsChecks.py +2 -2
- edsl/jobs/JobsPrompts.py +1 -1
- edsl/jobs/JobsRemoteInferenceHandler.py +136 -160
- edsl/jobs/interviews/Interview.py +42 -80
- edsl/jobs/runners/JobsRunnerAsyncio.py +358 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/TaskHistory.py +3 -24
- edsl/language_models/LanguageModel.py +4 -59
- edsl/language_models/ModelList.py +8 -19
- edsl/language_models/__init__.py +1 -1
- edsl/language_models/registry.py +180 -0
- edsl/language_models/repair.py +1 -1
- edsl/questions/QuestionBase.py +26 -35
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +49 -52
- edsl/questions/QuestionBasePromptsMixin.py +1 -1
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +2 -2
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +1 -1
- edsl/questions/QuestionList.py +15 -9
- edsl/questions/QuestionMatrix.py +1 -1
- edsl/questions/QuestionMultipleChoice.py +1 -1
- edsl/questions/QuestionNumerical.py +1 -1
- edsl/questions/QuestionRank.py +1 -1
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +18 -6
- edsl/questions/{response_validator_factory.py → ResponseValidatorFactory.py} +1 -7
- edsl/questions/SimpleAskMixin.py +1 -1
- edsl/questions/__init__.py +1 -1
- edsl/results/DatasetExportMixin.py +119 -60
- edsl/results/Result.py +3 -109
- edsl/results/Results.py +39 -50
- edsl/scenarios/FileStore.py +0 -32
- edsl/scenarios/ScenarioList.py +7 -35
- edsl/scenarios/handlers/csv.py +0 -11
- edsl/surveys/Survey.py +20 -71
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +1 -1
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/RECORD +78 -84
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +1 -1
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/model.py +0 -256
- edsl/questions/data_structures.py +0 -20
- edsl/results/file_exports.py +0 -252
- /edsl/agents/{question_option_processor.py → QuestionOptionProcessor.py} +0 -0
- /edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +0 -0
- /edsl/questions/{loop_processor.py → LoopProcessor.py} +0 -0
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- /edsl/results/{results_selector.py → Selector.py} +0 -0
- /edsl/scenarios/{directory_scanner.py → DirectoryScanner.py} +0 -0
- /edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +0 -0
- /edsl/scenarios/{scenario_selector.py → ScenarioSelector.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
edsl/scenarios/FileStore.py
CHANGED
@@ -327,38 +327,6 @@ class FileStore(Scenario):
|
|
327
327
|
|
328
328
|
return ConstructDownloadLink(self).create_link(custom_filename, style)
|
329
329
|
|
330
|
-
def to_pandas(self):
|
331
|
-
"""
|
332
|
-
Convert the file content to a pandas DataFrame if supported by the file handler.
|
333
|
-
|
334
|
-
Returns:
|
335
|
-
pandas.DataFrame: The data from the file as a DataFrame
|
336
|
-
|
337
|
-
Raises:
|
338
|
-
AttributeError: If the file type's handler doesn't support pandas conversion
|
339
|
-
"""
|
340
|
-
handler = FileMethods.get_handler(self.suffix)
|
341
|
-
if handler and hasattr(handler, "to_pandas"):
|
342
|
-
return handler(self.path).to_pandas()
|
343
|
-
raise AttributeError(
|
344
|
-
f"Converting {self.suffix} files to pandas DataFrame is not supported"
|
345
|
-
)
|
346
|
-
|
347
|
-
def __getattr__(self, name):
|
348
|
-
"""
|
349
|
-
Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
|
350
|
-
"""
|
351
|
-
if self.suffix == "csv":
|
352
|
-
# Get the pandas DataFrame
|
353
|
-
df = self.to_pandas()
|
354
|
-
# Check if the requested attribute exists in the DataFrame
|
355
|
-
if hasattr(df, name):
|
356
|
-
return getattr(df, name)
|
357
|
-
# If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
|
358
|
-
raise AttributeError(
|
359
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
360
|
-
)
|
361
|
-
|
362
330
|
|
363
331
|
class CSVFileStore(FileStore):
|
364
332
|
@classmethod
|
edsl/scenarios/ScenarioList.py
CHANGED
@@ -45,7 +45,7 @@ from edsl.utilities.naming_utilities import sanitize_string
|
|
45
45
|
from edsl.utilities.is_valid_variable_name import is_valid_variable_name
|
46
46
|
from edsl.exceptions.scenarios import ScenarioError
|
47
47
|
|
48
|
-
from edsl.scenarios.
|
48
|
+
from edsl.scenarios.DirectoryScanner import DirectoryScanner
|
49
49
|
|
50
50
|
|
51
51
|
class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
|
@@ -661,7 +661,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
661
661
|
>>> s.select('a')
|
662
662
|
ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
|
663
663
|
"""
|
664
|
-
from edsl.scenarios.
|
664
|
+
from edsl.scenarios.ScenarioSelector import ScenarioSelector
|
665
665
|
|
666
666
|
return ScenarioSelector(self).select(*fields)
|
667
667
|
|
@@ -840,25 +840,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
840
840
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
841
841
|
"""
|
842
842
|
sl = self.duplicate()
|
843
|
-
if len(values) != len(sl):
|
844
|
-
raise ScenarioError(
|
845
|
-
f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
|
846
|
-
)
|
847
843
|
for i, value in enumerate(values):
|
848
844
|
sl[i][name] = value
|
849
845
|
return sl
|
850
846
|
|
851
|
-
@classmethod
|
852
|
-
def create_empty_scenario_list(cls, n: int) -> ScenarioList:
|
853
|
-
"""Create an empty ScenarioList with n scenarios.
|
854
|
-
|
855
|
-
Example:
|
856
|
-
|
857
|
-
>>> ScenarioList.create_empty_scenario_list(3)
|
858
|
-
ScenarioList([Scenario({}), Scenario({}), Scenario({})])
|
859
|
-
"""
|
860
|
-
return ScenarioList([Scenario({}) for _ in range(n)])
|
861
|
-
|
862
847
|
def add_value(self, name: str, value: Any) -> ScenarioList:
|
863
848
|
"""Add a value to all scenarios in a ScenarioList.
|
864
849
|
|
@@ -1237,7 +1222,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1237
1222
|
>>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
|
1238
1223
|
True
|
1239
1224
|
"""
|
1240
|
-
from edsl.scenarios.
|
1225
|
+
from edsl.scenarios.ScenarioJoin import ScenarioJoin
|
1241
1226
|
|
1242
1227
|
sj = ScenarioJoin(self, other)
|
1243
1228
|
return sj.left_join(by)
|
@@ -1259,7 +1244,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1259
1244
|
else:
|
1260
1245
|
data = self
|
1261
1246
|
d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
|
1262
|
-
|
1263
1247
|
if add_edsl_version:
|
1264
1248
|
from edsl import __version__
|
1265
1249
|
|
@@ -1312,22 +1296,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1312
1296
|
|
1313
1297
|
@classmethod
|
1314
1298
|
def from_nested_dict(cls, data: dict) -> ScenarioList:
|
1315
|
-
"""Create a `ScenarioList` from a nested dictionary.
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
ScenarioList([Scenario({'headline': 'Armistice Signed, War Over: Celebrations Erupt Across City', 'date': '1918-11-11', 'author': 'Jane Smith'})])
|
1320
|
-
|
1321
|
-
"""
|
1322
|
-
length_of_first_list = len(next(iter(data.values())))
|
1323
|
-
s = ScenarioList.create_empty_scenario_list(n=length_of_first_list)
|
1324
|
-
|
1325
|
-
if any(len(v) != length_of_first_list for v in data.values()):
|
1326
|
-
raise ValueError(
|
1327
|
-
"All lists in the dictionary must be of the same length.",
|
1328
|
-
)
|
1329
|
-
for key, list_of_values in data.items():
|
1330
|
-
s = s.add_list(key, list_of_values)
|
1299
|
+
"""Create a `ScenarioList` from a nested dictionary."""
|
1300
|
+
s = ScenarioList()
|
1301
|
+
for key, value in data.items():
|
1302
|
+
s.add_list(key, value)
|
1331
1303
|
return s
|
1332
1304
|
|
1333
1305
|
def code(self) -> str:
|
edsl/scenarios/handlers/csv.py
CHANGED
@@ -36,14 +36,3 @@ class CsvMethods(FileMethods):
|
|
36
36
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
|
37
37
|
df.to_csv(f.name, index=False)
|
38
38
|
return f.name
|
39
|
-
|
40
|
-
def to_pandas(self):
|
41
|
-
"""
|
42
|
-
Convert the CSV file to a pandas DataFrame.
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
pandas.DataFrame: The data from the CSV as a DataFrame
|
46
|
-
"""
|
47
|
-
import pandas as pd
|
48
|
-
|
49
|
-
return pd.read_csv(self.path)
|
edsl/surveys/Survey.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import re
|
5
|
-
import random
|
6
5
|
|
7
6
|
from typing import (
|
8
7
|
Any,
|
@@ -116,7 +115,6 @@ class Survey(SurveyExportMixin, Base):
|
|
116
115
|
rule_collection: Optional["RuleCollection"] = None,
|
117
116
|
question_groups: Optional["QuestionGroupType"] = None,
|
118
117
|
name: Optional[str] = None,
|
119
|
-
questions_to_randomize: Optional[List[str]] = None,
|
120
118
|
):
|
121
119
|
"""Create a new survey.
|
122
120
|
|
@@ -165,33 +163,6 @@ class Survey(SurveyExportMixin, Base):
|
|
165
163
|
|
166
164
|
warnings.warn("name parameter to a survey is deprecated.")
|
167
165
|
|
168
|
-
if questions_to_randomize is not None:
|
169
|
-
self.questions_to_randomize = questions_to_randomize
|
170
|
-
else:
|
171
|
-
self.questions_to_randomize = []
|
172
|
-
|
173
|
-
self._seed = None
|
174
|
-
|
175
|
-
def draw(self) -> "Survey":
|
176
|
-
"""Return a new survey with a randomly selected permutation of the options."""
|
177
|
-
if self._seed is None: # only set once
|
178
|
-
self._seed = hash(self)
|
179
|
-
random.seed(self._seed)
|
180
|
-
|
181
|
-
if len(self.questions_to_randomize) == 0:
|
182
|
-
return self
|
183
|
-
|
184
|
-
new_questions = []
|
185
|
-
for question in self.questions:
|
186
|
-
if question.question_name in self.questions_to_randomize:
|
187
|
-
new_questions.append(question.draw())
|
188
|
-
else:
|
189
|
-
new_questions.append(question.duplicate())
|
190
|
-
|
191
|
-
d = self.to_dict()
|
192
|
-
d["questions"] = [q.to_dict() for q in new_questions]
|
193
|
-
return Survey.from_dict(d)
|
194
|
-
|
195
166
|
def _process_raw_questions(self, questions: Optional[List["QuestionType"]]) -> list:
|
196
167
|
"""Process the raw questions passed to the survey."""
|
197
168
|
handler = InstructionHandler(self)
|
@@ -345,9 +316,7 @@ class Survey(SurveyExportMixin, Base):
|
|
345
316
|
>>> s.to_dict(add_edsl_version = False).keys()
|
346
317
|
dict_keys(['questions', 'memory_plan', 'rule_collection', 'question_groups'])
|
347
318
|
"""
|
348
|
-
|
349
|
-
|
350
|
-
d = {
|
319
|
+
return {
|
351
320
|
"questions": [
|
352
321
|
q.to_dict(add_edsl_version=add_edsl_version)
|
353
322
|
for q in self._recombined_questions_and_instructions()
|
@@ -358,13 +327,6 @@ class Survey(SurveyExportMixin, Base):
|
|
358
327
|
),
|
359
328
|
"question_groups": self.question_groups,
|
360
329
|
}
|
361
|
-
if self.questions_to_randomize != []:
|
362
|
-
d["questions_to_randomize"] = self.questions_to_randomize
|
363
|
-
|
364
|
-
if add_edsl_version:
|
365
|
-
d["edsl_version"] = __version__
|
366
|
-
d["edsl_class_name"] = "Survey"
|
367
|
-
return d
|
368
330
|
|
369
331
|
@classmethod
|
370
332
|
@remove_edsl_version
|
@@ -408,16 +370,11 @@ class Survey(SurveyExportMixin, Base):
|
|
408
370
|
get_class(q_dict).from_dict(q_dict) for q_dict in data["questions"]
|
409
371
|
]
|
410
372
|
memory_plan = MemoryPlan.from_dict(data["memory_plan"])
|
411
|
-
if "questions_to_randomize" in data:
|
412
|
-
questions_to_randomize = data["questions_to_randomize"]
|
413
|
-
else:
|
414
|
-
questions_to_randomize = None
|
415
373
|
survey = cls(
|
416
374
|
questions=questions,
|
417
375
|
memory_plan=memory_plan,
|
418
376
|
rule_collection=RuleCollection.from_dict(data["rule_collection"]),
|
419
377
|
question_groups=data["question_groups"],
|
420
|
-
questions_to_randomize=questions_to_randomize,
|
421
378
|
)
|
422
379
|
return survey
|
423
380
|
|
@@ -913,7 +870,6 @@ class Survey(SurveyExportMixin, Base):
|
|
913
870
|
agent: Optional["Agent"] = None,
|
914
871
|
cache: Optional["Cache"] = None,
|
915
872
|
disable_remote_inference: bool = False,
|
916
|
-
disable_remote_cache: bool = False,
|
917
873
|
**kwargs,
|
918
874
|
):
|
919
875
|
"""Run the survey with default model, taking the required survey as arguments.
|
@@ -923,7 +879,7 @@ class Survey(SurveyExportMixin, Base):
|
|
923
879
|
>>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
|
924
880
|
>>> q = QuestionFunctional(question_name = "q0", func = f)
|
925
881
|
>>> s = Survey([q])
|
926
|
-
>>> async def test_run_async(): result = await s.run_async(period="morning", disable_remote_inference = True
|
882
|
+
>>> async def test_run_async(): result = await s.run_async(period="morning", disable_remote_inference = True); print(result.select("answer.q0").first())
|
927
883
|
>>> asyncio.run(test_run_async())
|
928
884
|
yes
|
929
885
|
>>> import asyncio
|
@@ -931,23 +887,20 @@ class Survey(SurveyExportMixin, Base):
|
|
931
887
|
>>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
|
932
888
|
>>> q = QuestionFunctional(question_name = "q0", func = f)
|
933
889
|
>>> s = Survey([q])
|
934
|
-
>>> async def test_run_async(): result = await s.run_async(period="evening", disable_remote_inference = True
|
935
|
-
>>>
|
890
|
+
>>> async def test_run_async(): result = await s.run_async(period="evening", disable_remote_inference = True); print(result.select("answer.q0").first())
|
891
|
+
>>> asyncio.run(test_run_async())
|
936
892
|
no
|
937
893
|
"""
|
938
894
|
# TODO: temp fix by creating a cache
|
939
895
|
if cache is None:
|
940
896
|
from edsl.data import Cache
|
897
|
+
|
941
898
|
c = Cache()
|
942
899
|
else:
|
943
900
|
c = cache
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
jobs: "Jobs" = self.get_job(model=model, agent=agent, **kwargs).using(c)
|
901
|
+
jobs: "Jobs" = self.get_job(model=model, agent=agent, **kwargs)
|
948
902
|
return await jobs.run_async(
|
949
|
-
disable_remote_inference=disable_remote_inference
|
950
|
-
disable_remote_cache=disable_remote_cache,
|
903
|
+
cache=c, disable_remote_inference=disable_remote_inference
|
951
904
|
)
|
952
905
|
|
953
906
|
def run(self, *args, **kwargs) -> "Results":
|
@@ -965,12 +918,6 @@ class Survey(SurveyExportMixin, Base):
|
|
965
918
|
|
966
919
|
return Jobs(survey=self).run(*args, **kwargs)
|
967
920
|
|
968
|
-
def using(self, obj: Union["Cache", "KeyLookup", "BucketCollection"]) -> "Jobs":
|
969
|
-
"""Turn the survey into a Job and appends the arguments to the Job."""
|
970
|
-
from edsl.jobs.Jobs import Jobs
|
971
|
-
|
972
|
-
return Jobs(survey=self).using(obj)
|
973
|
-
|
974
921
|
def duplicate(self):
|
975
922
|
"""Duplicate the survey.
|
976
923
|
|
@@ -1139,7 +1086,7 @@ class Survey(SurveyExportMixin, Base):
|
|
1139
1086
|
# questions_string = ", ".join([repr(q) for q in self._questions])
|
1140
1087
|
questions_string = ", ".join([repr(q) for q in self.raw_passed_questions or []])
|
1141
1088
|
# question_names_string = ", ".join([repr(name) for name in self.question_names])
|
1142
|
-
return f"Survey(questions=[{questions_string}], memory_plan={self.memory_plan}, rule_collection={self.rule_collection}, question_groups={self.question_groups}
|
1089
|
+
return f"Survey(questions=[{questions_string}], memory_plan={self.memory_plan}, rule_collection={self.rule_collection}, question_groups={self.question_groups})"
|
1143
1090
|
|
1144
1091
|
def _summary(self) -> dict:
|
1145
1092
|
return {
|
@@ -1225,7 +1172,7 @@ class Survey(SurveyExportMixin, Base):
|
|
1225
1172
|
|
1226
1173
|
def get_job(self, model=None, agent=None, **kwargs):
|
1227
1174
|
if model is None:
|
1228
|
-
from edsl.language_models.
|
1175
|
+
from edsl.language_models.registry import Model
|
1229
1176
|
|
1230
1177
|
model = Model()
|
1231
1178
|
|
@@ -1246,24 +1193,26 @@ def main():
|
|
1246
1193
|
|
1247
1194
|
def example_survey():
|
1248
1195
|
"""Return an example survey."""
|
1249
|
-
from edsl import QuestionMultipleChoice
|
1196
|
+
from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
|
1197
|
+
from edsl.surveys.Survey import Survey
|
1250
1198
|
|
1251
1199
|
q0 = QuestionMultipleChoice(
|
1200
|
+
question_text="Do you like school?",
|
1201
|
+
question_options=["yes", "no"],
|
1252
1202
|
question_name="q0",
|
1253
|
-
question_text="What is the capital of France?",
|
1254
|
-
question_options=["London", "Paris", "Rome", "Boston", "I don't know"]
|
1255
1203
|
)
|
1256
|
-
q1 =
|
1204
|
+
q1 = QuestionMultipleChoice(
|
1205
|
+
question_text="Why not?",
|
1206
|
+
question_options=["killer bees in cafeteria", "other"],
|
1257
1207
|
question_name="q1",
|
1258
|
-
question_text="Name some cities in France.",
|
1259
|
-
max_list_items = 5
|
1260
1208
|
)
|
1261
|
-
q2 =
|
1209
|
+
q2 = QuestionMultipleChoice(
|
1210
|
+
question_text="Why?",
|
1211
|
+
question_options=["**lack*** of killer bees in cafeteria", "other"],
|
1262
1212
|
question_name="q2",
|
1263
|
-
question_text="What is the population of {{ q0.answer }}?"
|
1264
1213
|
)
|
1265
1214
|
s = Survey(questions=[q0, q1, q2])
|
1266
|
-
s = s.add_rule(q0, "q0 == '
|
1215
|
+
s = s.add_rule(q0, "q0 == 'yes'", q2)
|
1267
1216
|
return s
|
1268
1217
|
|
1269
1218
|
s = example_survey()
|