edsl 0.1.27.dev2__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. edsl/Base.py +99 -22
  2. edsl/BaseDiff.py +260 -0
  3. edsl/__init__.py +4 -0
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +26 -5
  6. edsl/agents/AgentList.py +62 -7
  7. edsl/agents/Invigilator.py +4 -9
  8. edsl/agents/InvigilatorBase.py +5 -5
  9. edsl/agents/descriptors.py +3 -1
  10. edsl/conjure/AgentConstructionMixin.py +152 -0
  11. edsl/conjure/Conjure.py +56 -0
  12. edsl/conjure/InputData.py +628 -0
  13. edsl/conjure/InputDataCSV.py +48 -0
  14. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  15. edsl/conjure/InputDataPyRead.py +91 -0
  16. edsl/conjure/InputDataSPSS.py +8 -0
  17. edsl/conjure/InputDataStata.py +8 -0
  18. edsl/conjure/QuestionOptionMixin.py +76 -0
  19. edsl/conjure/QuestionTypeMixin.py +23 -0
  20. edsl/conjure/RawQuestion.py +65 -0
  21. edsl/conjure/SurveyResponses.py +7 -0
  22. edsl/conjure/__init__.py +9 -4
  23. edsl/conjure/examples/placeholder.txt +0 -0
  24. edsl/conjure/naming_utilities.py +263 -0
  25. edsl/conjure/utilities.py +165 -28
  26. edsl/conversation/Conversation.py +238 -0
  27. edsl/conversation/car_buying.py +58 -0
  28. edsl/conversation/mug_negotiation.py +81 -0
  29. edsl/conversation/next_speaker_utilities.py +93 -0
  30. edsl/coop/coop.py +191 -12
  31. edsl/coop/utils.py +20 -2
  32. edsl/data/Cache.py +55 -17
  33. edsl/data/CacheHandler.py +10 -9
  34. edsl/inference_services/AnthropicService.py +1 -0
  35. edsl/inference_services/DeepInfraService.py +20 -13
  36. edsl/inference_services/GoogleService.py +7 -1
  37. edsl/inference_services/InferenceServicesCollection.py +33 -7
  38. edsl/inference_services/OpenAIService.py +17 -10
  39. edsl/inference_services/models_available_cache.py +69 -0
  40. edsl/inference_services/rate_limits_cache.py +25 -0
  41. edsl/inference_services/write_available.py +10 -0
  42. edsl/jobs/Jobs.py +240 -36
  43. edsl/jobs/buckets/BucketCollection.py +9 -3
  44. edsl/jobs/interviews/Interview.py +4 -1
  45. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +24 -10
  46. edsl/jobs/interviews/retry_management.py +4 -4
  47. edsl/jobs/runners/JobsRunnerAsyncio.py +87 -45
  48. edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
  49. edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
  50. edsl/language_models/LanguageModel.py +37 -44
  51. edsl/language_models/ModelList.py +96 -0
  52. edsl/language_models/registry.py +14 -0
  53. edsl/language_models/repair.py +95 -24
  54. edsl/notebooks/Notebook.py +119 -31
  55. edsl/questions/QuestionBase.py +109 -12
  56. edsl/questions/descriptors.py +5 -2
  57. edsl/questions/question_registry.py +7 -0
  58. edsl/results/Result.py +20 -8
  59. edsl/results/Results.py +85 -11
  60. edsl/results/ResultsDBMixin.py +3 -6
  61. edsl/results/ResultsExportMixin.py +47 -16
  62. edsl/results/ResultsToolsMixin.py +5 -5
  63. edsl/scenarios/Scenario.py +59 -5
  64. edsl/scenarios/ScenarioList.py +97 -40
  65. edsl/study/ObjectEntry.py +97 -0
  66. edsl/study/ProofOfWork.py +110 -0
  67. edsl/study/SnapShot.py +77 -0
  68. edsl/study/Study.py +491 -0
  69. edsl/study/__init__.py +2 -0
  70. edsl/surveys/Survey.py +79 -31
  71. edsl/surveys/SurveyExportMixin.py +21 -3
  72. edsl/utilities/__init__.py +1 -0
  73. edsl/utilities/gcp_bucket/__init__.py +0 -0
  74. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  75. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  76. edsl/utilities/interface.py +24 -28
  77. edsl/utilities/repair_functions.py +28 -0
  78. edsl/utilities/utilities.py +57 -2
  79. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/METADATA +43 -17
  80. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/RECORD +83 -55
  81. edsl-0.1.28.dist-info/entry_points.txt +3 -0
  82. edsl/conjure/RawResponseColumn.py +0 -327
  83. edsl/conjure/SurveyBuilder.py +0 -308
  84. edsl/conjure/SurveyBuilderCSV.py +0 -78
  85. edsl/conjure/SurveyBuilderSPSS.py +0 -118
  86. edsl/data/RemoteDict.py +0 -103
  87. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/LICENSE +0 -0
  88. {edsl-0.1.27.dev2.dist-info → edsl-0.1.28.dist-info}/WHEEL +0 -0
@@ -0,0 +1,263 @@
1
+ import re
2
+ import keyword
3
+
4
+ stop_words = {
5
+ "into",
6
+ "mustn't",
7
+ "there",
8
+ "you'll",
9
+ "don",
10
+ "have",
11
+ "at",
12
+ "if",
13
+ "on",
14
+ "some",
15
+ "with",
16
+ "in",
17
+ "can",
18
+ "mightn",
19
+ "off",
20
+ "few",
21
+ "not",
22
+ "d",
23
+ "hadn",
24
+ "shan't",
25
+ "t",
26
+ "re",
27
+ "where",
28
+ "s",
29
+ "won't",
30
+ "mustn",
31
+ "wasn't",
32
+ "didn't",
33
+ "has",
34
+ "same",
35
+ "too",
36
+ "will",
37
+ "you've",
38
+ "all",
39
+ "haven't",
40
+ "isn't",
41
+ "over",
42
+ "of",
43
+ "about",
44
+ "its",
45
+ "being",
46
+ "it",
47
+ "her",
48
+ "should",
49
+ "himself",
50
+ "wasn",
51
+ "out",
52
+ "theirs",
53
+ "aren",
54
+ "that",
55
+ "our",
56
+ "shouldn't",
57
+ "you'd",
58
+ "such",
59
+ "above",
60
+ "my",
61
+ "the",
62
+ "any",
63
+ "been",
64
+ "as",
65
+ "very",
66
+ "herself",
67
+ "o",
68
+ "weren",
69
+ "until",
70
+ "their",
71
+ "shouldn",
72
+ "up",
73
+ "wouldn",
74
+ "couldn't",
75
+ "hasn't",
76
+ "no",
77
+ "than",
78
+ "hadn't",
79
+ "had",
80
+ "you",
81
+ "here",
82
+ "yourself",
83
+ "yourselves",
84
+ "during",
85
+ "ain",
86
+ "once",
87
+ "aren't",
88
+ "what",
89
+ "so",
90
+ "hers",
91
+ "that'll",
92
+ "other",
93
+ "ours",
94
+ "yours",
95
+ "nor",
96
+ "him",
97
+ "doesn",
98
+ "doesn't",
99
+ "he",
100
+ "them",
101
+ "for",
102
+ "ll",
103
+ "isn",
104
+ "this",
105
+ "or",
106
+ "who",
107
+ "only",
108
+ "itself",
109
+ "they",
110
+ "between",
111
+ "against",
112
+ "under",
113
+ "me",
114
+ "now",
115
+ "mightn't",
116
+ "those",
117
+ "needn't",
118
+ "these",
119
+ "when",
120
+ "before",
121
+ "his",
122
+ "she's",
123
+ "having",
124
+ "be",
125
+ "don't",
126
+ "haven",
127
+ "won",
128
+ "while",
129
+ "both",
130
+ "didn",
131
+ "by",
132
+ "ourselves",
133
+ "m",
134
+ "your",
135
+ "then",
136
+ "myself",
137
+ "we",
138
+ "it's",
139
+ "should've",
140
+ "through",
141
+ "why",
142
+ "from",
143
+ "and",
144
+ "hasn",
145
+ "more",
146
+ "how",
147
+ "ve",
148
+ "most",
149
+ "because",
150
+ "did",
151
+ "y",
152
+ "i",
153
+ "an",
154
+ "but",
155
+ "whom",
156
+ "below",
157
+ "further",
158
+ "am",
159
+ "which",
160
+ "just",
161
+ "ma",
162
+ "you're",
163
+ "couldn",
164
+ "do",
165
+ "shan",
166
+ "own",
167
+ "again",
168
+ "are",
169
+ "weren't",
170
+ "down",
171
+ "is",
172
+ "were",
173
+ "each",
174
+ "needn",
175
+ "themselves",
176
+ "she",
177
+ "after",
178
+ "does",
179
+ "wouldn't",
180
+ "to",
181
+ "a",
182
+ "was",
183
+ "doing",
184
+ }
185
+
186
+
187
+ def sanitize_string(input_string, max_length=35):
188
+ """Return a sanitized version of the input string that can be used as a variable name.
189
+
190
+ >>> candidate_names = ["How are you doing this morning, Dave? What is your favorite kind of coffee?", "class", "def", "here_is_some_text"]
191
+ >>> [sanitize_string(name) for name in candidate_names]
192
+ ['morning_dave_favorite_kind_coffee', 'class_modified', 'def_modified', 'here_is_some_text']
193
+ """
194
+
195
+ # Ensure nltk stopwords are downloaded
196
+ # try:
197
+ # from nltk.corpus import stopwords
198
+ # except ImportError or ModuleNotFoundError:
199
+ # print(
200
+ # "nltk is not installed. Please install it using 'pip install nltk' to use these features."
201
+ # )
202
+ # raise
203
+
204
+ # try:
205
+ # stop_words = set(stopwords.words("english"))
206
+ # except LookupError:
207
+ # nltk.download("stopwords")
208
+ # stop_words = set(stopwords.words("english"))
209
+ # # raise LookupError("Stopwords not found. Please download them using nltk.download('stopwords')")
210
+
211
+ # # Define the list of stopwords
212
+
213
+ # Replace special characters with spaces and split into words
214
+ words = re.sub(r"\W+", " ", input_string).split()
215
+
216
+ # Remove stopwords
217
+ important_words = [word for word in words if word.lower() not in stop_words]
218
+
219
+ # Join words with underscores
220
+ sanitized_string = "_".join(important_words)
221
+
222
+ # Ensure the length is less than 25 characters
223
+ if len(sanitized_string) > max_length:
224
+ # split off the last word and remove it
225
+ words = sanitized_string[:max_length].split("_")
226
+ if len(words) == 1:
227
+ sanitized_string = words[0]
228
+ else:
229
+ sanitized_string = "_".join(words[:-1])
230
+
231
+ # Remove leading and trailing underscores
232
+ sanitized_string = sanitized_string.strip("_")
233
+
234
+ # Check if the string is a Python keyword
235
+ if keyword.iskeyword(sanitized_string):
236
+ sanitized_string += "_modified"
237
+
238
+ result = sanitized_string.lower()
239
+ return result
240
+
241
+
242
+ # Example usage
243
+ # input_string = "This is a sample variable-name@123 for testing"
244
+ # sanitized_string = sanitize_string(input_string)
245
+ # print(sanitized_string) # Output might be: sample_variable_name_123
246
+
247
+ # if __name__ == "__main__":
248
+ # candidate_names = [
249
+ # "How are you doing this morning, Dave? What is your favorite kind of coffee?",
250
+ # "class",
251
+ # "def",
252
+ # "here_is_some_text",
253
+ # ]
254
+ # for name in candidate_names:
255
+ # print(f"Original: {name}")
256
+ # print(f"Sanitized: {sanitize_string(name)}")
257
+ # print()
258
+
259
+ if __name__ == "__main__":
260
+ from edsl.conjure.InputData import InputDataABC
261
+ import doctest
262
+
263
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
edsl/conjure/utilities.py CHANGED
@@ -1,9 +1,59 @@
1
+ import requests
1
2
  import subprocess
2
3
  from io import StringIO
3
-
4
+ import os
4
5
  import pandas as pd
5
6
 
6
7
 
8
+ class ValidFilename:
9
+ """A descriptor that checks if a file exists.
10
+
11
+
12
+ >>> f = ValidFilename()
13
+ >>> f = "hello"
14
+ """
15
+
16
+ def __set_name__(self, owner, name):
17
+ self.name = name
18
+
19
+ def __get__(self, instance, owner):
20
+ return instance.__dict__.get(self.name, None)
21
+
22
+ def __set__(self, instance, value):
23
+ if not isinstance(value, str):
24
+ raise ValueError(
25
+ f"The filename must be a string, not {type(value).__name__}"
26
+ )
27
+
28
+ if not os.path.exists(value):
29
+ raise ValueError(f"The file '{value}' does not exist.")
30
+
31
+ instance.__dict__[self.name] = value
32
+
33
+
34
+ class DummyClassToTestDescriptor:
35
+ """
36
+
37
+ >>> d = DummyClassToTestDescriptor(1)
38
+ Traceback (most recent call last):
39
+ ...
40
+ ValueError: The filename must be a string, not int
41
+
42
+ >>> d = DummyClassToTestDescriptor("hello")
43
+ Traceback (most recent call last):
44
+ ...
45
+ ValueError: The file 'hello' does not exist.
46
+ """
47
+
48
+ filename = ValidFilename()
49
+
50
+ def __init__(self, filename):
51
+ self.filename = filename
52
+
53
+ def __repr__(self):
54
+ return f"DummyClassToTestDescriptor({self.filename})"
55
+
56
+
7
57
  class Missing:
8
58
  def __repr__(self):
9
59
  return "Missing()"
@@ -16,6 +66,18 @@ class Missing:
16
66
 
17
67
 
18
68
  def convert_value(x):
69
+ """Takes a string and tries to convert it.
70
+
71
+ >>> convert_value('1')
72
+ 1
73
+ >>> convert_value('1.2')
74
+ 1.2
75
+ >>> convert_value("how are you?")
76
+ 'how are you?'
77
+ >>> convert_value("")
78
+ 'missing'
79
+
80
+ """
19
81
  try:
20
82
  float_val = float(x)
21
83
  if float_val.is_integer():
@@ -29,36 +91,111 @@ def convert_value(x):
29
91
  return str(x)
30
92
 
31
93
 
32
- class RCodeSnippet:
33
- def __init__(self, r_code):
34
- self.r_code = r_code
94
+ # class RCodeSnippet:
95
+ # def __init__(self, r_code):
96
+ # self.r_code = r_code
35
97
 
36
- def __call__(self, data_file_name):
37
- return self.run_R_stdin(self.r_code, data_file_name)
98
+ # def __call__(self, data_file_name):
99
+ # return self.run_R_stdin(self.r_code, data_file_name)
38
100
 
39
- def __add__(self, other):
40
- return RCodeSnippet(self.r_code + other.r_code)
101
+ # def __add__(self, other):
102
+ # return RCodeSnippet(self.r_code + other.r_code)
41
103
 
42
- def write_to_file(self, filename) -> None:
43
- """Writes the R code to a file; useful for debugging."""
44
- if filename.endswith(".R") or filename.endswith(".r"):
45
- pass
46
- else:
47
- filename += ".R"
104
+ # def write_to_file(self, filename) -> None:
105
+ # """Writes the R code to a file; useful for debugging."""
106
+ # if filename.endswith(".R") or filename.endswith(".r"):
107
+ # pass
108
+ # else:
109
+ # filename += ".R"
48
110
 
49
- with open(filename, "w") as f:
50
- f.write(self.r_code)
111
+ # with open(filename, "w") as f:
112
+ # f.write(self.r_code)
51
113
 
52
- @staticmethod
53
- def run_R_stdin(r_code, data_file_name, transform_func=lambda x: pd.read_csv(x)):
54
- """Runs an R script and returns the stdout as a string."""
55
- cmd = ["Rscript", "-e", r_code, data_file_name]
56
- process = subprocess.Popen(
57
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
114
+ # @staticmethod
115
+ # def run_R_stdin(r_code, data_file_name, transform_func=lambda x: pd.read_csv(x)):
116
+ # """Runs an R script and returns the stdout as a string."""
117
+ # cmd = ["Rscript", "-e", r_code, data_file_name]
118
+ # process = subprocess.Popen(
119
+ # cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
120
+ # )
121
+ # stdout, stderr = process.communicate()
122
+ # if stderr != "":
123
+ # print("Warning: stderr is not empty.")
124
+ # print(f"Problem running: {r_code}")
125
+ # raise Exception(stderr)
126
+ # return transform_func(StringIO(stdout))
127
+
128
+
129
+ def infer_question_type(question_text, responses, sample_size=15):
130
+ from edsl.questions import QuestionMultipleChoice
131
+
132
+ q = QuestionMultipleChoice(
133
+ question_text="""We have a survey question and we are trying to infer its type.
134
+ The question text is: '{{question_text}}'.
135
+ The first {{ sample_size }} responses are: '{{responses}}'.
136
+ There are {{ total }} responses in total.
137
+ If a response is a command-separated list, it is likely a checkbox question.
138
+ """,
139
+ question_name="infer_question_type",
140
+ question_options=[
141
+ "budget",
142
+ "checkbox",
143
+ "extract",
144
+ "free_text",
145
+ "likert_five",
146
+ "linear_scale",
147
+ "list",
148
+ "multiple_choice",
149
+ "numerical",
150
+ "rank",
151
+ "top_k",
152
+ "yes_no",
153
+ ],
154
+ )
155
+ response = (
156
+ q.to_survey()(
157
+ question_text=question_text,
158
+ sample_zize=sample_size,
159
+ responses=responses[:sample_size],
58
160
  )
59
- stdout, stderr = process.communicate()
60
- if stderr != "":
61
- print("Warning: stderr is not empty.")
62
- print(f"Problem running: {r_code}")
63
- raise Exception(stderr)
64
- return transform_func(StringIO(stdout))
161
+ .select("infer_question_type")
162
+ .first()
163
+ )
164
+ return response
165
+
166
+
167
+ def download_file(url, filename):
168
+ """
169
+ Downloads a file from a given URL and saves it to the specified filename.
170
+
171
+ Parameters:
172
+ url (str): The URL of the file to download.
173
+ filename (str): The name of the file to save the downloaded content.
174
+
175
+ Returns:
176
+ str: The path to the saved file.
177
+ """
178
+ headers = {
179
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
180
+ }
181
+
182
+ # Sending the GET request
183
+ response = requests.get(url, headers=headers)
184
+
185
+ # Checking if the request was successful
186
+ if response.status_code == 200:
187
+ # Writing the content to the specified file
188
+ with open(filename, "wb") as file:
189
+ file.write(response.content)
190
+ print(f"File downloaded successfully and saved to {filename}")
191
+ return filename
192
+ else:
193
+ print(f"Failed to download file: {response.status_code}")
194
+ return None
195
+
196
+
197
+ # Example usage
198
+ if __name__ == "__main__":
199
+ import doctest
200
+
201
+ doctest.testmod(optionflags=doctest.ELLIPSIS)