edsl 0.1.27.dev2__py3-none-any.whl → 0.1.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +99 -22
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +4 -0
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +26 -5
- edsl/agents/AgentList.py +62 -7
- edsl/agents/Invigilator.py +4 -9
- edsl/agents/InvigilatorBase.py +5 -5
- edsl/agents/descriptors.py +3 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +628 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +191 -12
- edsl/coop/utils.py +20 -2
- edsl/data/Cache.py +55 -17
- edsl/data/CacheHandler.py +10 -9
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Jobs.py +240 -36
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/interviews/Interview.py +4 -1
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +24 -10
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +87 -45
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/language_models/LanguageModel.py +37 -44
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +95 -24
- edsl/notebooks/Notebook.py +119 -31
- edsl/questions/QuestionBase.py +109 -12
- edsl/questions/descriptors.py +5 -2
- edsl/questions/question_registry.py +7 -0
- edsl/results/Result.py +20 -8
- edsl/results/Results.py +85 -11
- edsl/results/ResultsDBMixin.py +3 -6
- edsl/results/ResultsExportMixin.py +47 -16
- edsl/results/ResultsToolsMixin.py +5 -5
- edsl/scenarios/Scenario.py +59 -5
- edsl/scenarios/ScenarioList.py +97 -40
- edsl/study/ObjectEntry.py +97 -0
- edsl/study/ProofOfWork.py +110 -0
- edsl/study/SnapShot.py +77 -0
- edsl/study/Study.py +491 -0
- edsl/study/__init__.py +2 -0
- edsl/surveys/Survey.py +79 -31
- edsl/surveys/SurveyExportMixin.py +21 -3
- edsl/utilities/__init__.py +1 -0
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +24 -28
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +57 -2
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/METADATA +43 -17
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/RECORD +83 -55
- edsl-0.1.28.dist-info/entry_points.txt +3 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/WHEEL +0 -0
edsl/surveys/Survey.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import re
|
5
|
+
|
5
6
|
from typing import Any, Generator, Optional, Union, List, Literal, Callable
|
6
7
|
|
7
8
|
from rich import print
|
@@ -88,7 +89,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
88
89
|
|
89
90
|
>>> s = Survey.example()
|
90
91
|
>>> s.get_question("q0")
|
91
|
-
Question('multiple_choice', question_name =
|
92
|
+
Question('multiple_choice', question_name = \"""q0\""", question_text = \"""Do you like school?\""", question_options = ['yes', 'no'])
|
92
93
|
"""
|
93
94
|
if question_name not in self.question_name_to_index:
|
94
95
|
raise KeyError(f"Question name {question_name} not found in survey.")
|
@@ -101,6 +102,16 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
101
102
|
# warnings.warn("survey.get_question is deprecated. Use subscript operator instead.")
|
102
103
|
return self.get(question_name)
|
103
104
|
|
105
|
+
def __hash__(self) -> int:
|
106
|
+
"""Return a hash of the question."""
|
107
|
+
from edsl.utilities.utilities import dict_hash
|
108
|
+
|
109
|
+
return dict_hash(self._to_dict())
|
110
|
+
|
111
|
+
@property
|
112
|
+
def parameters(self):
|
113
|
+
return set.union(*[q.parameters for q in self.questions])
|
114
|
+
|
104
115
|
@property
|
105
116
|
def question_names(self) -> list[str]:
|
106
117
|
"""Return a list of question names in the survey.
|
@@ -528,7 +539,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
528
539
|
###################
|
529
540
|
# FORWARD METHODS
|
530
541
|
###################
|
531
|
-
def by(self, *args: Union[Agent, Scenario, LanguageModel]) -> Jobs:
|
542
|
+
def by(self, *args: Union["Agent", "Scenario", "LanguageModel"]) -> "Jobs":
|
532
543
|
"""Add Agents, Scenarios, and LanguageModels to a survey and returns a runnable Jobs object.
|
533
544
|
|
534
545
|
:param args: The Agents, Scenarios, and LanguageModels to add to the survey.
|
@@ -551,9 +562,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
551
562
|
|
552
563
|
>>> from edsl import QuestionFreeText
|
553
564
|
>>> s = Survey([QuestionFreeText.example()])
|
554
|
-
>>> results = s.run(debug = True)
|
555
|
-
>>> results
|
556
|
-
Results(...)
|
565
|
+
>>> results = s.run(debug = True, cache = False)
|
557
566
|
>>> results.select('answer.*').print(format = "rich")
|
558
567
|
┏━━━━━━━━━━━━━━┓
|
559
568
|
┃ answer ┃
|
@@ -640,18 +649,17 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
640
649
|
|
641
650
|
>>> i = s.gen_path_through_survey()
|
642
651
|
>>> next(i)
|
643
|
-
Question('multiple_choice', question_name =
|
652
|
+
Question('multiple_choice', question_name = \"""q0\""", question_text = \"""Do you like school?\""", question_options = ['yes', 'no'])
|
644
653
|
>>> i.send({"q0": "yes"})
|
645
|
-
Question('multiple_choice', question_name =
|
654
|
+
Question('multiple_choice', question_name = \"""q2\""", question_text = \"""Why?\""", question_options = ['**lack*** of killer bees in cafeteria', 'other'])
|
646
655
|
|
647
656
|
And here is the path through the survey if the answer to q0 is 'no':
|
648
657
|
|
649
658
|
>>> i2 = s.gen_path_through_survey()
|
650
659
|
>>> next(i2)
|
651
|
-
Question('multiple_choice', question_name =
|
660
|
+
Question('multiple_choice', question_name = \"""q0\""", question_text = \"""Do you like school?\""", question_options = ['yes', 'no'])
|
652
661
|
>>> i2.send({"q0": "no"})
|
653
|
-
Question('multiple_choice', question_name =
|
654
|
-
|
662
|
+
Question('multiple_choice', question_name = \"""q1\""", question_text = \"""Why not?\""", question_options = ['killer bees in cafeteria', 'other'])
|
655
663
|
"""
|
656
664
|
question = self._first_question()
|
657
665
|
while not question == EndOfSurvey:
|
@@ -761,7 +769,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
761
769
|
|
762
770
|
>>> s = Survey.example()
|
763
771
|
>>> s[0]
|
764
|
-
Question('multiple_choice', question_name =
|
772
|
+
Question('multiple_choice', question_name = \"""q0\""", question_text = \"""Do you like school?\""", question_options = ['yes', 'no'])
|
765
773
|
|
766
774
|
"""
|
767
775
|
if isinstance(index, int):
|
@@ -802,22 +810,33 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
802
810
|
###################
|
803
811
|
# SERIALIZATION METHODS
|
804
812
|
###################
|
805
|
-
|
806
|
-
def
|
813
|
+
|
814
|
+
def _to_dict(self) -> dict[str, Any]:
|
807
815
|
"""Serialize the Survey object to a dictionary.
|
808
816
|
|
809
817
|
>>> s = Survey.example()
|
810
|
-
>>> s.
|
811
|
-
dict_keys(['questions', 'memory_plan', 'rule_collection', 'question_groups'
|
818
|
+
>>> s._to_dict().keys()
|
819
|
+
dict_keys(['questions', 'memory_plan', 'rule_collection', 'question_groups'])
|
812
820
|
|
813
821
|
"""
|
814
822
|
return {
|
815
|
-
"questions": [q.
|
823
|
+
"questions": [q._to_dict() for q in self._questions],
|
816
824
|
"memory_plan": self.memory_plan.to_dict(),
|
817
825
|
"rule_collection": self.rule_collection.to_dict(),
|
818
826
|
"question_groups": self.question_groups,
|
819
827
|
}
|
820
828
|
|
829
|
+
@add_edsl_version
|
830
|
+
def to_dict(self) -> dict[str, Any]:
|
831
|
+
"""Serialize the Survey object to a dictionary.
|
832
|
+
|
833
|
+
>>> s = Survey.example()
|
834
|
+
>>> s.to_dict().keys()
|
835
|
+
dict_keys(['questions', 'memory_plan', 'rule_collection', 'question_groups', 'edsl_version', 'edsl_class_name'])
|
836
|
+
|
837
|
+
"""
|
838
|
+
return self._to_dict()
|
839
|
+
|
821
840
|
@classmethod
|
822
841
|
@remove_edsl_version
|
823
842
|
def from_dict(cls, data: dict) -> Survey:
|
@@ -891,7 +910,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
891
910
|
"""Print the survey in a rich format.
|
892
911
|
|
893
912
|
>>> t = Survey.example().rich_print()
|
894
|
-
>>> print(t)
|
913
|
+
>>> print(t) # doctest: +SKIP
|
895
914
|
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
896
915
|
┃ Questions ┃
|
897
916
|
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
@@ -938,7 +957,7 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
938
957
|
:param filename: The name of the file to save the CSV to.
|
939
958
|
|
940
959
|
>>> s = Survey.example()
|
941
|
-
>>> s.to_csv()
|
960
|
+
>>> s.to_csv() # doctest: +SKIP
|
942
961
|
index question_name question_text question_options question_type
|
943
962
|
0 0 q0 Do you like school? [yes, no] multiple_choice
|
944
963
|
1 1 q1 Why not? [killer bees in cafeteria, other] multiple_choice
|
@@ -1003,30 +1022,59 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
1003
1022
|
s = s.add_rule(q0, "q0 == 'yes'", q2)
|
1004
1023
|
return s
|
1005
1024
|
|
1006
|
-
def
|
1025
|
+
def get_job(self, model=None, agent=None, **kwargs):
|
1026
|
+
if not model:
|
1027
|
+
from edsl import Model
|
1028
|
+
|
1029
|
+
model = Model()
|
1030
|
+
|
1031
|
+
from edsl.scenarios.Scenario import Scenario
|
1032
|
+
|
1033
|
+
s = Scenario(kwargs)
|
1034
|
+
|
1035
|
+
if not agent:
|
1036
|
+
from edsl import Agent
|
1037
|
+
|
1038
|
+
agent = Agent()
|
1039
|
+
|
1040
|
+
return self.by(s).by(agent).by(model)
|
1041
|
+
|
1042
|
+
def __call__(self, model=None, agent=None, cache=None, **kwargs):
|
1007
1043
|
"""Run the survey with default model, taking the required survey as arguments.
|
1008
1044
|
|
1009
1045
|
>>> from edsl.questions import QuestionFunctional
|
1010
1046
|
>>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
|
1011
1047
|
>>> q = QuestionFunctional(question_name = "q0", func = f)
|
1012
1048
|
>>> s = Survey([q])
|
1013
|
-
>>> s(period = "morning").select("answer.q0").first()
|
1049
|
+
>>> s(period = "morning", cache = False).select("answer.q0").first()
|
1014
1050
|
'yes'
|
1015
|
-
>>> s(period = "evening").select("answer.q0").first()
|
1051
|
+
>>> s(period = "evening", cache = False).select("answer.q0").first()
|
1016
1052
|
'no'
|
1017
1053
|
"""
|
1018
|
-
|
1019
|
-
|
1020
|
-
del kwargs["model"]
|
1021
|
-
else:
|
1022
|
-
from edsl import Model
|
1054
|
+
job = self.get_job(model, agent, **kwargs)
|
1055
|
+
return job.run(cache=cache)
|
1023
1056
|
|
1024
|
-
|
1025
|
-
|
1057
|
+
async def run_async(self, model=None, agent=None, cache=None, **kwargs):
|
1058
|
+
"""Run the survey with default model, taking the required survey as arguments.
|
1026
1059
|
|
1027
|
-
|
1060
|
+
>>> from edsl.questions import QuestionFunctional
|
1061
|
+
>>> def f(scenario, agent_traits): return "yes" if scenario["period"] == "morning" else "no"
|
1062
|
+
>>> q = QuestionFunctional(question_name = "q0", func = f)
|
1063
|
+
>>> s = Survey([q])
|
1064
|
+
>>> s(period = "morning").select("answer.q0").first()
|
1065
|
+
'yes'
|
1066
|
+
>>> s(period = "evening").select("answer.q0").first()
|
1067
|
+
'no'
|
1068
|
+
"""
|
1069
|
+
# TODO: temp fix by creating a cache
|
1070
|
+
if cache is None:
|
1071
|
+
from edsl.data import Cache
|
1028
1072
|
|
1029
|
-
|
1073
|
+
c = Cache()
|
1074
|
+
else:
|
1075
|
+
c = cache
|
1076
|
+
jobs: "Jobs" = self.get_job(model, agent, **kwargs)
|
1077
|
+
return await jobs.run_async(cache=c)
|
1030
1078
|
|
1031
1079
|
|
1032
1080
|
def main():
|
@@ -1066,4 +1114,4 @@ def main():
|
|
1066
1114
|
if __name__ == "__main__":
|
1067
1115
|
import doctest
|
1068
1116
|
|
1069
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
1117
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.SKIP)
|
@@ -13,6 +13,20 @@ class SurveyExportMixin:
|
|
13
13
|
|
14
14
|
return SurveyCSS.default_style().generate_css()
|
15
15
|
|
16
|
+
def get_description(self) -> str:
|
17
|
+
"""Return the description of the survey."""
|
18
|
+
from edsl import QuestionFreeText
|
19
|
+
|
20
|
+
question_texts = "\n".join([q.question_text for q in self._questions])
|
21
|
+
q = QuestionFreeText(
|
22
|
+
question_name="description",
|
23
|
+
question_text=f"""A survey was conducted with the following questions:
|
24
|
+
{question_texts}
|
25
|
+
Please write a description of the survey.
|
26
|
+
""",
|
27
|
+
)
|
28
|
+
return q.run().select("description").first()
|
29
|
+
|
16
30
|
def docx(self, filename=None) -> Union["Document", None]:
|
17
31
|
"""Generate a docx document for the survey."""
|
18
32
|
doc = Document()
|
@@ -60,10 +74,14 @@ class SurveyExportMixin:
|
|
60
74
|
:param filename: The name of the file to save the code to.
|
61
75
|
:param survey_var_name: The name of the survey variable.
|
62
76
|
|
77
|
+
>>> from edsl.surveys import Survey
|
63
78
|
>>> survey = Survey.example()
|
64
|
-
>>> survey.code()
|
65
|
-
|
66
|
-
|
79
|
+
>>> print(survey.code())
|
80
|
+
from edsl.surveys.Survey import Survey
|
81
|
+
...
|
82
|
+
...
|
83
|
+
survey = Survey(questions=[q0, q1, q2])
|
84
|
+
...
|
67
85
|
"""
|
68
86
|
header_lines = ["from edsl.surveys.Survey import Survey"]
|
69
87
|
header_lines.append("from edsl import Question")
|
edsl/utilities/__init__.py
CHANGED
File without changes
|
@@ -0,0 +1,96 @@
|
|
1
|
+
import requests
|
2
|
+
|
3
|
+
|
4
|
+
class CloudStorageManager:
|
5
|
+
def __init__(self, secret_token=None):
|
6
|
+
self.api_url = "https://bucket-server-tte53lsfxq-uc.a.run.app"
|
7
|
+
self.secret_token = secret_token
|
8
|
+
|
9
|
+
def get_signed_url(self, file_name, operation="upload"):
|
10
|
+
"""Get a signed URL for uploading or downloading a file."""
|
11
|
+
|
12
|
+
if operation == "upload":
|
13
|
+
if self.secret_token == None:
|
14
|
+
raise "Set secret_token for upload permissions"
|
15
|
+
headers = {
|
16
|
+
"Authorization": self.secret_token,
|
17
|
+
"Content-Type": "application/json",
|
18
|
+
}
|
19
|
+
else:
|
20
|
+
headers = {
|
21
|
+
"Content-Type": "application/json",
|
22
|
+
}
|
23
|
+
data = {"file_name": file_name}
|
24
|
+
endpoint = f"{self.api_url}/generate-{operation}-signed-url"
|
25
|
+
response = requests.post(endpoint, json=data, headers=headers)
|
26
|
+
|
27
|
+
if response.status_code == 200:
|
28
|
+
return response.json().get("signed_url")
|
29
|
+
else:
|
30
|
+
raise Exception(
|
31
|
+
f"Failed to get signed URL: {response.status_code} {response.text}"
|
32
|
+
)
|
33
|
+
|
34
|
+
def upload_file(self, file_path, upload_file_name):
|
35
|
+
"""Upload a file to the signed URL."""
|
36
|
+
signed_url = self.get_signed_url(upload_file_name, operation="upload")
|
37
|
+
|
38
|
+
with open(file_path, "rb") as file:
|
39
|
+
upload_response = requests.put(
|
40
|
+
signed_url,
|
41
|
+
data=file,
|
42
|
+
headers={"Content-Type": "application/octet-stream"},
|
43
|
+
)
|
44
|
+
|
45
|
+
if upload_response.status_code == 200:
|
46
|
+
print("File uploaded successfully")
|
47
|
+
else:
|
48
|
+
raise Exception(
|
49
|
+
f"Failed to upload file: {upload_response.status_code} {upload_response.text}"
|
50
|
+
)
|
51
|
+
|
52
|
+
def download_file(self, file_name, save_name):
|
53
|
+
"""Download a file from the signed URL."""
|
54
|
+
|
55
|
+
signed_url = self.get_signed_url(file_name, operation="download")
|
56
|
+
download_response = requests.get(signed_url, stream=True)
|
57
|
+
|
58
|
+
if download_response.status_code == 200:
|
59
|
+
with open(save_name, "wb") as file:
|
60
|
+
for chunk in download_response.iter_content(chunk_size=8192):
|
61
|
+
file.write(chunk)
|
62
|
+
print("File downloaded successfully")
|
63
|
+
else:
|
64
|
+
raise Exception(
|
65
|
+
f"Failed to download file: {download_response.status_code} {download_response.text}"
|
66
|
+
)
|
67
|
+
|
68
|
+
def delete_file(self, file_name):
|
69
|
+
"""Delete a file from the cloud storage."""
|
70
|
+
headers = {
|
71
|
+
"Authorization": self.secret_token,
|
72
|
+
"Content-Type": "application/json",
|
73
|
+
}
|
74
|
+
data = {"file_name": file_name}
|
75
|
+
endpoint = f"{self.api_url}/delete-file"
|
76
|
+
response = requests.delete(endpoint, params=data, headers=headers)
|
77
|
+
|
78
|
+
if response.status_code == 200:
|
79
|
+
print("File deleted successfully")
|
80
|
+
else:
|
81
|
+
raise Exception(
|
82
|
+
f"Failed to delete file: {response.status_code} {response.text}"
|
83
|
+
)
|
84
|
+
|
85
|
+
def list_files(self):
|
86
|
+
url = self.api_url + "/list_files"
|
87
|
+
headers = {
|
88
|
+
"Authorization": self.secret_token,
|
89
|
+
"Content-Type": "application/json",
|
90
|
+
}
|
91
|
+
res = requests.get(url, headers=headers)
|
92
|
+
data = res.json()
|
93
|
+
for x in data["data"]:
|
94
|
+
x["url"] = self.api_url + "/file/" + x["shaKey"]
|
95
|
+
|
96
|
+
return data
|
edsl/utilities/interface.py
CHANGED
@@ -331,15 +331,15 @@ def create_latex_table_from_data(data, filename=None, split_at_dot=True):
|
|
331
331
|
|
332
332
|
>>> data = [{"a": [1, 2, 3], "b": [4, 5, 6]}]
|
333
333
|
>>> print(create_latex_table_from_data(data))
|
334
|
-
|
335
|
-
|
336
|
-
a & b
|
337
|
-
|
338
|
-
1 & 4
|
339
|
-
2 & 5
|
340
|
-
3 & 6
|
341
|
-
|
342
|
-
|
334
|
+
\\begin{tabular}{|c|c|}
|
335
|
+
\\hline
|
336
|
+
a & b \\\\
|
337
|
+
\\hline
|
338
|
+
1 & 4 \\\\
|
339
|
+
2 & 5 \\\\
|
340
|
+
3 & 6 \\\\
|
341
|
+
\\hline
|
342
|
+
\\end{tabular}
|
343
343
|
"""
|
344
344
|
|
345
345
|
def escape_latex(s):
|
@@ -379,7 +379,7 @@ def create_latex_table_from_data(data, filename=None, split_at_dot=True):
|
|
379
379
|
num_rows = len(next(iter(data[0].values())))
|
380
380
|
|
381
381
|
# Debugging: Print the keys of the dictionaries
|
382
|
-
print("Keys in data[0]:", list(data[0].keys()))
|
382
|
+
# print("Keys in data[0]:", list(data[0].keys()))
|
383
383
|
|
384
384
|
# Add the data rows
|
385
385
|
for i in range(num_rows):
|
@@ -410,9 +410,7 @@ def create_latex_table_from_data(data, filename=None, split_at_dot=True):
|
|
410
410
|
return latex_table_str
|
411
411
|
|
412
412
|
|
413
|
-
def print_list_of_dicts_as_html_table(
|
414
|
-
data, filename=None, interactive=True, notebook=False
|
415
|
-
):
|
413
|
+
def print_list_of_dicts_as_html_table(data, interactive=True):
|
416
414
|
"""Print a list of dictionaries as an HTML table.
|
417
415
|
|
418
416
|
:param data: The list of dictionaries to print.
|
@@ -459,20 +457,7 @@ def print_list_of_dicts_as_html_table(
|
|
459
457
|
# Close the table
|
460
458
|
html_table += "</tbody>\n"
|
461
459
|
html_table += "</table>"
|
462
|
-
|
463
|
-
html = gen_html_sandwich(html_table, interactive=interactive)
|
464
|
-
|
465
|
-
# Output or save to file
|
466
|
-
if filename:
|
467
|
-
with open(filename, "w") as f:
|
468
|
-
f.write(html)
|
469
|
-
else:
|
470
|
-
# view_html(html)
|
471
|
-
if notebook:
|
472
|
-
# ipython_diplay(HTML(html))
|
473
|
-
return html
|
474
|
-
else:
|
475
|
-
print(html)
|
460
|
+
return gen_html_sandwich(html_table, interactive=interactive)
|
476
461
|
|
477
462
|
|
478
463
|
def print_list_of_dicts_as_markdown_table(data, filename=None):
|
@@ -486,7 +471,11 @@ def print_list_of_dicts_as_markdown_table(data, filename=None):
|
|
486
471
|
return
|
487
472
|
|
488
473
|
# Gather all unique headers
|
489
|
-
headers = list({key for d in data for key in d.keys()})
|
474
|
+
# headers = list({key for d in data for key in d.keys()})
|
475
|
+
headers = []
|
476
|
+
for column in data:
|
477
|
+
headers.append(list(column.keys())[0])
|
478
|
+
|
490
479
|
markdown_table = "| " + " | ".join(headers) + " |\n"
|
491
480
|
markdown_table += "|-" + "-|-".join(["" for _ in headers]) + "-|\n"
|
492
481
|
|
@@ -527,6 +516,13 @@ def print_tally_with_rich(data, filename=None):
|
|
527
516
|
Example:
|
528
517
|
>>> data = {'a':12, 'b':14, 'c':9}
|
529
518
|
>>> print_tally_with_rich(data)
|
519
|
+
┏━━━━━━━┳━━━━━━━┓
|
520
|
+
┃ Value ┃ Count ┃
|
521
|
+
┡━━━━━━━╇━━━━━━━┩
|
522
|
+
│ a │ 12 │
|
523
|
+
│ b │ 14 │
|
524
|
+
│ c │ 9 │
|
525
|
+
└───────┴───────┘
|
530
526
|
"""
|
531
527
|
# Initialize a console object
|
532
528
|
console = Console(record=True)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import json
|
2
|
+
from edsl.utilities.utilities import valid_json
|
3
|
+
|
4
|
+
|
5
|
+
def extract_json_from_string(s):
|
6
|
+
"""Extract a JSON string from a string."""
|
7
|
+
# Find the first occurrence of '{'
|
8
|
+
start_idx = s.find("{")
|
9
|
+
# Find the last occurrence of '}'
|
10
|
+
end_idx = s.rfind("}")
|
11
|
+
# If both '{' and '}' are found in the string
|
12
|
+
if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
|
13
|
+
# Extract the substring from start_idx to end_idx (inclusive)
|
14
|
+
json_str = s[start_idx : end_idx + 1]
|
15
|
+
try:
|
16
|
+
return json.loads(json_str)
|
17
|
+
except json.JSONDecodeError:
|
18
|
+
raise ValueError("Invalid JSON string")
|
19
|
+
else:
|
20
|
+
raise ValueError("No JSON object found in string")
|
21
|
+
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
text = (
|
25
|
+
'Sure - here is some JSON { "key": "value", "number": 123, "array": [1, 2, 3] }'
|
26
|
+
)
|
27
|
+
extracted_json = extract_json_from_string(text)
|
28
|
+
d = extracted_json
|
edsl/utilities/utilities.py
CHANGED
@@ -19,6 +19,55 @@ from pygments.lexers import JsonLexer
|
|
19
19
|
from pygments.formatters import HtmlFormatter
|
20
20
|
from IPython.display import HTML
|
21
21
|
|
22
|
+
from functools import wraps
|
23
|
+
import types
|
24
|
+
import time
|
25
|
+
|
26
|
+
|
27
|
+
def time_it(func):
|
28
|
+
@wraps(func)
|
29
|
+
def wrapper(*args, **kwargs):
|
30
|
+
start_time = time.time()
|
31
|
+
result = func(*args, **kwargs)
|
32
|
+
end_time = time.time()
|
33
|
+
execution_time = end_time - start_time
|
34
|
+
class_name = args[0].__class__.__name__ if args else func.__module__
|
35
|
+
print(
|
36
|
+
f"Function {class_name}.{func.__name__} took {execution_time:.4f} seconds to execute"
|
37
|
+
)
|
38
|
+
return result
|
39
|
+
|
40
|
+
return wrapper
|
41
|
+
|
42
|
+
|
43
|
+
def time_all_functions(module_or_class):
|
44
|
+
for name, obj in vars(module_or_class).items():
|
45
|
+
if isinstance(obj, types.FunctionType):
|
46
|
+
setattr(module_or_class, name, time_it(obj))
|
47
|
+
|
48
|
+
|
49
|
+
def dict_hash(data: dict):
|
50
|
+
return hash(
|
51
|
+
int(hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest(), 16)
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
import re
|
56
|
+
import json
|
57
|
+
|
58
|
+
|
59
|
+
def extract_json_from_string(text):
|
60
|
+
pattern = re.compile(r"\{.*?\}")
|
61
|
+
match = pattern.search(text)
|
62
|
+
if match:
|
63
|
+
json_data = match.group(0)
|
64
|
+
try:
|
65
|
+
json_object = json.loads(json_data)
|
66
|
+
return json_object
|
67
|
+
except json.JSONDecodeError:
|
68
|
+
return None
|
69
|
+
return None
|
70
|
+
|
22
71
|
|
23
72
|
def clean_json(bad_json_str):
|
24
73
|
"""
|
@@ -32,6 +81,7 @@ def clean_json(bad_json_str):
|
|
32
81
|
("\t", "\\t"),
|
33
82
|
("\b", "\\b"),
|
34
83
|
("\f", "\\f"),
|
84
|
+
("[/INST]", "removed_inst"),
|
35
85
|
]
|
36
86
|
|
37
87
|
s = bad_json_str
|
@@ -233,9 +283,14 @@ def valid_json(json_string):
|
|
233
283
|
return False
|
234
284
|
|
235
285
|
|
236
|
-
def is_valid_variable_name(name):
|
286
|
+
def is_valid_variable_name(name, allow_name=True):
|
237
287
|
"""Check if a string is a valid variable name."""
|
238
|
-
|
288
|
+
if allow_name:
|
289
|
+
return name.isidentifier() and not keyword.iskeyword(name)
|
290
|
+
else:
|
291
|
+
return (
|
292
|
+
name.isidentifier() and not keyword.iskeyword(name) and not name == "name"
|
293
|
+
)
|
239
294
|
|
240
295
|
|
241
296
|
def create_valid_var_name(s, transform_func: Callable = lambda x: x.lower()) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: edsl
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.28
|
4
4
|
Summary: Create and analyze LLM-based surveys
|
5
5
|
Home-page: https://www.expectedparrot.com/
|
6
6
|
License: MIT
|
@@ -30,6 +30,7 @@ Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
|
30
30
|
Requires-Dist: pydot (>=2.0.0,<3.0.0)
|
31
31
|
Requires-Dist: pygments (>=2.17.2,<3.0.0)
|
32
32
|
Requires-Dist: pymupdf (>=1.24.4,<2.0.0)
|
33
|
+
Requires-Dist: pyreadstat (>=1.2.7,<2.0.0)
|
33
34
|
Requires-Dist: python-docx (>=1.1.0,<2.0.0)
|
34
35
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
35
36
|
Requires-Dist: restrictedpython (>=7.1,<8.0)
|
@@ -45,28 +46,53 @@ Description-Content-Type: text/markdown
|
|
45
46
|
<img src="https://github.com/expectedparrot/edsl/blob/main/static/logo.png?raw=true" alt="edsl.png" width="100"/>
|
46
47
|
</p>
|
47
48
|
|
48
|
-
The Expected Parrot Domain-Specific Language (EDSL) package lets you conduct computational social science and market research with AI. Use it to design surveys and experiments, simulate responses with large language models, and perform data labeling and other research tasks.
|
49
|
+
The Expected Parrot Domain-Specific Language (EDSL) package lets you conduct computational social science and market research with AI. Use it to design surveys and experiments, simulate responses with large language models, and perform data labeling and other research tasks. Results are formatted as specified datasets and come with built-in methods for analyzing, visualizing, and sharing.
|
49
50
|
|
50
51
|
## 🔗 Links
|
51
|
-
- PyPI
|
52
|
-
- Documentation
|
53
|
-
- Getting started
|
54
|
-
- Discord
|
52
|
+
- [PyPI](https://pypi.org/project/edsl/)
|
53
|
+
- [Documentation](https://docs.expectedparrot.com)
|
54
|
+
- [Getting started](https://docs.expectedparrot.com/en/latest/starter_tutorial.html)
|
55
|
+
- [Discord](https://discord.com/invite/mxAYkjfy9m)
|
56
|
+
- [Twitter](https://x.com/ExpectedParrot)
|
57
|
+
- [LinkedIn](https://www.linkedin.com/company/expectedparrot/)
|
58
|
+
- [Blog](https://blog.expectedparrot.com)
|
55
59
|
|
60
|
+
## 💡 Contributions, feature requests & bugs
|
61
|
+
Interested in contributing? Want us to add a new feature? Found a bug for us to squash?
|
62
|
+
Please send us an email at [info@expectedparrot.com](mailto:info@expectedparrot.com) or message us at our [Discord channel](https://discord.com/invite/mxAYkjfy9m).
|
56
63
|
|
57
|
-
##
|
58
|
-
|
64
|
+
## 💻 Requirements
|
65
|
+
* EDSL is compatible with Python 3.9 - 3.12.
|
66
|
+
* API keys for large language models that you want to use, stored in a `.env` file.
|
67
|
+
See instructions on [storing API keys](https://docs.expectedparrot.com/en/latest/api_keys.html).
|
59
68
|
|
69
|
+
## 🌎 Hello, World!
|
70
|
+
A quick example:
|
60
71
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
pip install edsl
|
65
|
-
```
|
72
|
+
```python
|
73
|
+
# Import a question type
|
74
|
+
from edsl.questions import QuestionMultipleChoice
|
66
75
|
|
67
|
-
|
68
|
-
|
76
|
+
# Construct a question using the question type template
|
77
|
+
q = QuestionMultipleChoice(
|
78
|
+
question_name="example_question",
|
79
|
+
question_text="How do you feel today?",
|
80
|
+
question_options=["Bad", "OK", "Good"]
|
81
|
+
)
|
69
82
|
|
70
|
-
|
71
|
-
|
83
|
+
# Run it with the default language model
|
84
|
+
results = q.run()
|
72
85
|
|
86
|
+
# Inspect the results in a dataset
|
87
|
+
results.select("example_question").print()
|
88
|
+
```
|
89
|
+
|
90
|
+
Output:
|
91
|
+
```python
|
92
|
+
┏━━━━━━━━━━━━━━━━━━━┓
|
93
|
+
┃ answer ┃
|
94
|
+
┃ .example_question ┃
|
95
|
+
┡━━━━━━━━━━━━━━━━━━━┩
|
96
|
+
│ Good │
|
97
|
+
└───────────────────┘
|
98
|
+
```
|