edsl 0.1.36.dev6__py3-none-any.whl → 0.1.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. edsl/Base.py +303 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +48 -47
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +855 -804
  7. edsl/agents/AgentList.py +350 -337
  8. edsl/agents/Invigilator.py +222 -222
  9. edsl/agents/InvigilatorBase.py +284 -294
  10. edsl/agents/PromptConstructor.py +353 -312
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +99 -86
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +289 -289
  26. edsl/config.py +149 -149
  27. edsl/conjure/AgentConstructionMixin.py +160 -152
  28. edsl/conjure/Conjure.py +62 -62
  29. edsl/conjure/InputData.py +659 -659
  30. edsl/conjure/InputDataCSV.py +48 -48
  31. edsl/conjure/InputDataMixinQuestionStats.py +182 -182
  32. edsl/conjure/InputDataPyRead.py +91 -91
  33. edsl/conjure/InputDataSPSS.py +8 -8
  34. edsl/conjure/InputDataStata.py +8 -8
  35. edsl/conjure/QuestionOptionMixin.py +76 -76
  36. edsl/conjure/QuestionTypeMixin.py +23 -23
  37. edsl/conjure/RawQuestion.py +65 -65
  38. edsl/conjure/SurveyResponses.py +7 -7
  39. edsl/conjure/__init__.py +9 -9
  40. edsl/conjure/naming_utilities.py +263 -263
  41. edsl/conjure/utilities.py +201 -201
  42. edsl/conversation/Conversation.py +290 -238
  43. edsl/conversation/car_buying.py +58 -58
  44. edsl/conversation/chips.py +95 -0
  45. edsl/conversation/mug_negotiation.py +81 -81
  46. edsl/conversation/next_speaker_utilities.py +93 -93
  47. edsl/coop/PriceFetcher.py +54 -54
  48. edsl/coop/__init__.py +2 -2
  49. edsl/coop/coop.py +958 -849
  50. edsl/coop/utils.py +131 -131
  51. edsl/data/Cache.py +527 -527
  52. edsl/data/CacheEntry.py +228 -228
  53. edsl/data/CacheHandler.py +149 -149
  54. edsl/data/RemoteCacheSync.py +97 -84
  55. edsl/data/SQLiteDict.py +292 -292
  56. edsl/data/__init__.py +4 -4
  57. edsl/data/orm.py +10 -10
  58. edsl/data_transfer_models.py +73 -73
  59. edsl/enums.py +173 -173
  60. edsl/exceptions/BaseException.py +21 -0
  61. edsl/exceptions/__init__.py +54 -50
  62. edsl/exceptions/agents.py +38 -40
  63. edsl/exceptions/configuration.py +16 -16
  64. edsl/exceptions/coop.py +10 -10
  65. edsl/exceptions/data.py +14 -14
  66. edsl/exceptions/general.py +34 -34
  67. edsl/exceptions/jobs.py +33 -33
  68. edsl/exceptions/language_models.py +63 -63
  69. edsl/exceptions/prompts.py +15 -15
  70. edsl/exceptions/questions.py +91 -91
  71. edsl/exceptions/results.py +29 -26
  72. edsl/exceptions/scenarios.py +22 -0
  73. edsl/exceptions/surveys.py +37 -34
  74. edsl/inference_services/AnthropicService.py +87 -87
  75. edsl/inference_services/AwsBedrock.py +120 -115
  76. edsl/inference_services/AzureAI.py +217 -217
  77. edsl/inference_services/DeepInfraService.py +18 -18
  78. edsl/inference_services/GoogleService.py +156 -156
  79. edsl/inference_services/GroqService.py +20 -20
  80. edsl/inference_services/InferenceServiceABC.py +147 -147
  81. edsl/inference_services/InferenceServicesCollection.py +97 -72
  82. edsl/inference_services/MistralAIService.py +123 -123
  83. edsl/inference_services/OllamaService.py +18 -18
  84. edsl/inference_services/OpenAIService.py +224 -224
  85. edsl/inference_services/TestService.py +89 -89
  86. edsl/inference_services/TogetherAIService.py +170 -170
  87. edsl/inference_services/models_available_cache.py +118 -118
  88. edsl/inference_services/rate_limits_cache.py +25 -25
  89. edsl/inference_services/registry.py +39 -39
  90. edsl/inference_services/write_available.py +10 -10
  91. edsl/jobs/Answers.py +56 -56
  92. edsl/jobs/Jobs.py +1347 -1112
  93. edsl/jobs/__init__.py +1 -1
  94. edsl/jobs/buckets/BucketCollection.py +63 -63
  95. edsl/jobs/buckets/ModelBuckets.py +65 -65
  96. edsl/jobs/buckets/TokenBucket.py +248 -248
  97. edsl/jobs/interviews/Interview.py +661 -651
  98. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  99. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -182
  100. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  101. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  102. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  103. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  104. edsl/jobs/interviews/ReportErrors.py +66 -66
  105. edsl/jobs/interviews/interview_status_enum.py +9 -9
  106. edsl/jobs/runners/JobsRunnerAsyncio.py +338 -337
  107. edsl/jobs/runners/JobsRunnerStatus.py +332 -332
  108. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  109. edsl/jobs/tasks/TaskCreators.py +64 -64
  110. edsl/jobs/tasks/TaskHistory.py +442 -441
  111. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  112. edsl/jobs/tasks/task_status_enum.py +163 -163
  113. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  114. edsl/jobs/tokens/TokenUsage.py +34 -34
  115. edsl/language_models/KeyLookup.py +30 -0
  116. edsl/language_models/LanguageModel.py +706 -718
  117. edsl/language_models/ModelList.py +102 -102
  118. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  119. edsl/language_models/__init__.py +3 -2
  120. edsl/language_models/fake_openai_call.py +15 -15
  121. edsl/language_models/fake_openai_service.py +61 -61
  122. edsl/language_models/registry.py +137 -137
  123. edsl/language_models/repair.py +156 -156
  124. edsl/language_models/unused/ReplicateBase.py +83 -83
  125. edsl/language_models/utilities.py +64 -64
  126. edsl/notebooks/Notebook.py +259 -259
  127. edsl/notebooks/__init__.py +1 -1
  128. edsl/prompts/Prompt.py +357 -358
  129. edsl/prompts/__init__.py +2 -2
  130. edsl/questions/AnswerValidatorMixin.py +289 -289
  131. edsl/questions/QuestionBase.py +656 -616
  132. edsl/questions/QuestionBaseGenMixin.py +161 -161
  133. edsl/questions/QuestionBasePromptsMixin.py +234 -266
  134. edsl/questions/QuestionBudget.py +227 -227
  135. edsl/questions/QuestionCheckBox.py +359 -359
  136. edsl/questions/QuestionExtract.py +183 -183
  137. edsl/questions/QuestionFreeText.py +114 -113
  138. edsl/questions/QuestionFunctional.py +159 -159
  139. edsl/questions/QuestionList.py +231 -231
  140. edsl/questions/QuestionMultipleChoice.py +286 -286
  141. edsl/questions/QuestionNumerical.py +153 -153
  142. edsl/questions/QuestionRank.py +324 -324
  143. edsl/questions/Quick.py +41 -41
  144. edsl/questions/RegisterQuestionsMeta.py +71 -71
  145. edsl/questions/ResponseValidatorABC.py +174 -174
  146. edsl/questions/SimpleAskMixin.py +73 -73
  147. edsl/questions/__init__.py +26 -26
  148. edsl/questions/compose_questions.py +98 -98
  149. edsl/questions/decorators.py +21 -21
  150. edsl/questions/derived/QuestionLikertFive.py +76 -76
  151. edsl/questions/derived/QuestionLinearScale.py +87 -87
  152. edsl/questions/derived/QuestionTopK.py +91 -91
  153. edsl/questions/derived/QuestionYesNo.py +82 -82
  154. edsl/questions/descriptors.py +413 -418
  155. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  156. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  157. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  158. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  159. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  160. edsl/questions/prompt_templates/question_list.jinja +17 -17
  161. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  162. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  163. edsl/questions/question_registry.py +147 -147
  164. edsl/questions/settings.py +12 -12
  165. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  166. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  167. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  168. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  169. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  170. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  171. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  172. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  173. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  174. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  175. edsl/questions/templates/list/question_presentation.jinja +5 -5
  176. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  177. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  178. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  179. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  180. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  181. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  182. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  183. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  184. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  185. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  186. edsl/results/Dataset.py +293 -293
  187. edsl/results/DatasetExportMixin.py +717 -693
  188. edsl/results/DatasetTree.py +145 -145
  189. edsl/results/Result.py +450 -433
  190. edsl/results/Results.py +1071 -1158
  191. edsl/results/ResultsDBMixin.py +238 -238
  192. edsl/results/ResultsExportMixin.py +43 -43
  193. edsl/results/ResultsFetchMixin.py +33 -33
  194. edsl/results/ResultsGGMixin.py +121 -121
  195. edsl/results/ResultsToolsMixin.py +98 -98
  196. edsl/results/Selector.py +135 -118
  197. edsl/results/__init__.py +2 -2
  198. edsl/results/tree_explore.py +115 -115
  199. edsl/scenarios/FileStore.py +458 -443
  200. edsl/scenarios/Scenario.py +546 -507
  201. edsl/scenarios/ScenarioHtmlMixin.py +64 -59
  202. edsl/scenarios/ScenarioList.py +1112 -1101
  203. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  204. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  205. edsl/scenarios/__init__.py +4 -2
  206. edsl/shared.py +1 -1
  207. edsl/study/ObjectEntry.py +173 -173
  208. edsl/study/ProofOfWork.py +113 -113
  209. edsl/study/SnapShot.py +80 -80
  210. edsl/study/Study.py +528 -528
  211. edsl/study/__init__.py +4 -4
  212. edsl/surveys/DAG.py +148 -148
  213. edsl/surveys/Memory.py +31 -31
  214. edsl/surveys/MemoryPlan.py +244 -244
  215. edsl/surveys/Rule.py +330 -324
  216. edsl/surveys/RuleCollection.py +387 -387
  217. edsl/surveys/Survey.py +1795 -1772
  218. edsl/surveys/SurveyCSS.py +261 -261
  219. edsl/surveys/SurveyExportMixin.py +259 -259
  220. edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
  221. edsl/surveys/SurveyQualtricsImport.py +284 -284
  222. edsl/surveys/__init__.py +3 -3
  223. edsl/surveys/base.py +53 -53
  224. edsl/surveys/descriptors.py +56 -56
  225. edsl/surveys/instructions/ChangeInstruction.py +47 -47
  226. edsl/surveys/instructions/Instruction.py +51 -51
  227. edsl/surveys/instructions/InstructionCollection.py +77 -77
  228. edsl/templates/error_reporting/base.html +23 -23
  229. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  230. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  231. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  232. edsl/templates/error_reporting/interview_details.html +115 -115
  233. edsl/templates/error_reporting/interviews.html +9 -9
  234. edsl/templates/error_reporting/overview.html +4 -4
  235. edsl/templates/error_reporting/performance_plot.html +1 -1
  236. edsl/templates/error_reporting/report.css +73 -73
  237. edsl/templates/error_reporting/report.html +117 -117
  238. edsl/templates/error_reporting/report.js +25 -25
  239. edsl/tools/__init__.py +1 -1
  240. edsl/tools/clusters.py +192 -192
  241. edsl/tools/embeddings.py +27 -27
  242. edsl/tools/embeddings_plotting.py +118 -118
  243. edsl/tools/plotting.py +112 -112
  244. edsl/tools/summarize.py +18 -18
  245. edsl/utilities/SystemInfo.py +28 -28
  246. edsl/utilities/__init__.py +22 -22
  247. edsl/utilities/ast_utilities.py +25 -25
  248. edsl/utilities/data/Registry.py +6 -6
  249. edsl/utilities/data/__init__.py +1 -1
  250. edsl/utilities/data/scooter_results.json +1 -1
  251. edsl/utilities/decorators.py +77 -77
  252. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  253. edsl/utilities/interface.py +627 -627
  254. edsl/utilities/repair_functions.py +28 -28
  255. edsl/utilities/restricted_python.py +70 -70
  256. edsl/utilities/utilities.py +409 -391
  257. {edsl-0.1.36.dev6.dist-info → edsl-0.1.37.dist-info}/LICENSE +21 -21
  258. {edsl-0.1.36.dev6.dist-info → edsl-0.1.37.dist-info}/METADATA +1 -1
  259. edsl-0.1.37.dist-info/RECORD +283 -0
  260. edsl-0.1.36.dev6.dist-info/RECORD +0 -279
  261. {edsl-0.1.36.dev6.dist-info → edsl-0.1.37.dist-info}/WHEEL +0 -0
@@ -1,391 +1,409 @@
1
- """Utility functions for working with strings, dictionaries, and files."""
2
-
3
- from functools import wraps
4
- import types
5
- import time
6
-
7
- import hashlib
8
- import json
9
- import keyword
10
- import os
11
- import random
12
- import re
13
- import string
14
- import tempfile
15
- import gzip
16
- import webbrowser
17
- import json
18
-
19
- from html import escape
20
- from typing import Callable, Union
21
-
22
-
23
- class CustomEncoder(json.JSONEncoder):
24
- def default(self, obj):
25
- try:
26
- return json.JSONEncoder.default(self, obj)
27
- except TypeError:
28
- return str(obj)
29
-
30
-
31
- def time_it(func):
32
- @wraps(func)
33
- def wrapper(*args, **kwargs):
34
- start_time = time.time()
35
- result = func(*args, **kwargs)
36
- end_time = time.time()
37
- execution_time = end_time - start_time
38
- class_name = args[0].__class__.__name__ if args else func.__module__
39
- print(
40
- f"Function {class_name}.{func.__name__} took {execution_time:.4f} seconds to execute"
41
- )
42
- return result
43
-
44
- return wrapper
45
-
46
-
47
- def time_all_functions(module_or_class):
48
- for name, obj in vars(module_or_class).items():
49
- if isinstance(obj, types.FunctionType):
50
- setattr(module_or_class, name, time_it(obj))
51
-
52
-
53
- def dict_hash(data: dict):
54
- return hash(
55
- int(hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest(), 16)
56
- )
57
-
58
-
59
- def extract_json_from_string(text):
60
- pattern = re.compile(r"\{.*?\}")
61
- match = pattern.search(text)
62
- if match:
63
- json_data = match.group(0)
64
- try:
65
- json_object = json.loads(json_data)
66
- return json_object
67
- except json.JSONDecodeError:
68
- return None
69
- return None
70
-
71
-
72
- def fix_partial_correct_response(text: str) -> dict:
73
- # Find the start position of the key "answer"
74
- answer_key_start = text.find('"answer"')
75
-
76
- if answer_key_start == -1:
77
- return {"error": "No 'answer' key found in the text"}
78
-
79
- # Define regex to find the complete JSON object starting with "answer"
80
- json_pattern = r'(\{[^\{\}]*"answer"[^\{\}]*\})'
81
- match = re.search(json_pattern, text)
82
-
83
- if not match:
84
- return {"error": "No valid JSON object found"}
85
-
86
- # Extract the matched JSON object
87
- json_object = match.group(0)
88
-
89
- # Find the start and stop positions of the JSON object in the original text
90
- start_pos = text.find(json_object)
91
- stop_pos = start_pos + len(json_object)
92
-
93
- # Parse the JSON object to validate it
94
- try:
95
- parsed_json = json.loads(json_object)
96
- except json.JSONDecodeError:
97
- return {"error": "Failed to parse JSON object"}
98
-
99
- # Return the result as a dictionary with positions
100
- return {"start": start_pos, "stop": stop_pos, "extracted_json": json_object}
101
-
102
-
103
- def clean_json(bad_json_str):
104
- """
105
- Clean JSON string by replacing single quotes with double quotes
106
-
107
- """
108
- replacements = [
109
- ("\\", "\\\\"),
110
- ("\n", "\\n"),
111
- ("\r", "\\r"),
112
- ("\t", "\\t"),
113
- ("\b", "\\b"),
114
- ("\f", "\\f"),
115
- ("[/INST]", "removed_inst"),
116
- ]
117
-
118
- s = bad_json_str
119
- for old, new in replacements:
120
- s = s.replace(old, new)
121
- return s
122
-
123
-
124
- def data_to_html(data, replace_new_lines=False):
125
- if "edsl_version" in data:
126
- _ = data.pop("edsl_version")
127
- if "edsl_class_name" in data:
128
- _ = data.pop("edsl_class_name")
129
-
130
- from pygments import highlight
131
- from pygments.lexers import JsonLexer
132
- from pygments.formatters import HtmlFormatter
133
- from IPython.display import HTML
134
-
135
- json_str = json.dumps(data, indent=4, cls=CustomEncoder)
136
- formatted_json = highlight(
137
- json_str,
138
- JsonLexer(),
139
- HtmlFormatter(style="default", full=False, noclasses=False),
140
- )
141
- if replace_new_lines:
142
- formatted_json = formatted_json.replace("\\n", "<br>")
143
-
144
- return HTML(formatted_json).data
145
-
146
-
147
- def is_gzipped(file_path):
148
- """Check if a file is gzipped."""
149
- try:
150
- with gzip.open(file_path, "rb") as file:
151
- file.read(1) # Try reading a small amount of data
152
- return True
153
- except OSError:
154
- return False
155
-
156
-
157
- def hash_value(value: Union[str, int]) -> str:
158
- """Hash a string or integer value using SHA-256."""
159
- if isinstance(value, str):
160
- value_bytes = value.encode("utf-8")
161
- elif isinstance(value, int):
162
- value_bytes = str(value).encode("utf-8")
163
- else:
164
- raise ValueError("Hashing supported only for strings or integers.")
165
- hash_obj = hashlib.sha256(value_bytes)
166
- return hash_obj.hexdigest()
167
-
168
-
169
- def repair_json(json_string: str) -> str:
170
- """Attempt to repair a JSON string that is not valid JSON."""
171
- json_string = json_string.replace("\n", "\\n").replace("\r", "\\r")
172
- json_string = json_string.replace("'", "\\'")
173
- json_string = json_string.replace("'", '"')
174
- json_string = re.sub(r",\s*}", "}", json_string)
175
- json_string = re.sub(r",\s*]", "]", json_string)
176
- json_string = re.sub(r"(?<={|,)\s*([a-zA-Z0-9_]+)\s*:", r'"\1":', json_string)
177
- return json_string
178
-
179
-
180
- def dict_to_html(d):
181
- """Convert a dictionary to an HTML table."""
182
- # Start the HTML table
183
- html_table = f'<table border="1">\n<tr><th>{escape("Key")}</th><th>{escape("Value")}</th></tr>\n'
184
-
185
- # Add rows to the HTML table
186
- for key, value in d.items():
187
- html_table += (
188
- f"<tr><td>{escape(str(key))}</td><td>{escape(str(value))}</td></tr>\n"
189
- )
190
-
191
- # Close the HTML table
192
- html_table += "</table>"
193
- return html_table
194
-
195
-
196
- def is_notebook() -> bool:
197
- """Check if the code is running in a Jupyter notebook."""
198
- try:
199
- shell = get_ipython().__class__.__name__
200
- if shell == "ZMQInteractiveShell":
201
- return True # Jupyter notebook or qtconsole
202
- elif shell == "TerminalInteractiveShell":
203
- return False # Terminal running IPython
204
- else:
205
- return False # Other type (e.g., IDLE, PyCharm, etc.)
206
- except NameError:
207
- return False # Probably standard Python interpreter
208
-
209
-
210
- class HTMLSnippet(str):
211
- """Create an object with html content (`value`).
212
-
213
- `view` method allows you to view the html content in a web browser.
214
- """
215
-
216
- def __init__(self, value):
217
- """Initialize the HTMLSnippet object."""
218
- super().__init__()
219
- self.value = value
220
-
221
- def view(self):
222
- """View the HTML content in a web browser."""
223
- html_content = self.value
224
-
225
- # create a tempfile to write the HTML content
226
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as f:
227
- f.write(html_content)
228
-
229
- # open the HTML tempfile in the default web browser
230
- webbrowser.open(f"file://{os.path.realpath(f.name)}")
231
-
232
-
233
- def random_string() -> str:
234
- """Generate a random string of fixed length."""
235
- return "".join(random.choice(string.ascii_letters) for i in range(10))
236
-
237
-
238
- def shortname_proposal(question, max_length=None):
239
- """Take a question text and generate a slug."""
240
- question = question.lower()
241
- tokens = question.split()
242
- stopwords = set(
243
- [
244
- "is",
245
- "your",
246
- "who",
247
- "the",
248
- "a",
249
- "an",
250
- "of",
251
- "could",
252
- "you",
253
- "what",
254
- "when",
255
- "where",
256
- "why",
257
- "in",
258
- "and",
259
- "to",
260
- "how",
261
- "are",
262
- "what",
263
- ]
264
- )
265
- filtered_tokens = [
266
- token.strip(string.punctuation) for token in tokens if token not in stopwords
267
- ]
268
- heading = "_".join(filtered_tokens)
269
- # Limit length if needed
270
- if max_length and len(heading) > max_length:
271
- heading = heading[:max_length]
272
- while heading.endswith("_"): # trim any trailing _ characters
273
- heading = heading[:-1]
274
- return heading
275
-
276
-
277
- def text_to_shortname(long_text, forbidden_names=[]):
278
- """Create a slug for the question."""
279
- proposed_name = shortname_proposal(long_text)
280
- counter = 1
281
- # make sure the name is unique
282
- while proposed_name in forbidden_names:
283
- proposed_name += f"_{counter}"
284
- counter += 1
285
- return proposed_name
286
-
287
-
288
- def merge_dicts(dict_list):
289
- """Merge a list of dictionaries into a single dictionary."""
290
- result = {}
291
- all_keys = set()
292
- for d in dict_list:
293
- all_keys.update(d.keys())
294
- for key in all_keys:
295
- result[key] = [d.get(key, None) for d in dict_list]
296
- return result
297
-
298
-
299
- def extract_json_from_string(s):
300
- """Extract a JSON string from a string."""
301
- # Find the first occurrence of '{'
302
- start_idx = s.find("{")
303
- # Find the last occurrence of '}'
304
- end_idx = s.rfind("}")
305
- # If both '{' and '}' are found in the string
306
- if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
307
- # Extract the substring from start_idx to end_idx (inclusive)
308
- json_str = s[start_idx : end_idx + 1]
309
- return json_str
310
- else:
311
- raise ValueError("No JSON object found in string")
312
-
313
-
314
- def valid_json(json_string):
315
- """Check if a string is valid JSON."""
316
- try:
317
- _ = json.loads(json_string)
318
- return True
319
- except json.JSONDecodeError:
320
- return False
321
-
322
-
323
- def is_valid_variable_name(name, allow_name=True):
324
- """Check if a string is a valid variable name."""
325
- if allow_name:
326
- return name.isidentifier() and not keyword.iskeyword(name)
327
- else:
328
- return (
329
- name.isidentifier() and not keyword.iskeyword(name) and not name == "name"
330
- )
331
-
332
-
333
- def create_valid_var_name(s, transform_func: Callable = lambda x: x.lower()) -> str:
334
- """Create a valid variable name from a string."""
335
- if transform_func is None:
336
- transform_func = lambda x: x
337
-
338
- # Ensure the string is not empty
339
- if not s:
340
- raise ValueError("Input string cannot be empty.")
341
-
342
- if is_valid_variable_name(s):
343
- return transform_func(s)
344
-
345
- # Remove leading numbers if they exist since variable names can't start with a number
346
- s = re.sub("^[0-9]+", "", s)
347
-
348
- # Replace invalid characters (anything not a letter, number, or underscore) with an underscore
349
- s = re.sub("[^0-9a-zA-Z_]", "_", s)
350
-
351
- # Check if the first character is a number; if so, prepend an underscore
352
- if re.match("^[0-9]", s):
353
- s = "_" + s
354
-
355
- if s in keyword.kwlist:
356
- s += "_"
357
-
358
- # Ensure the string is not empty after the transformations
359
- if not s:
360
- raise ValueError(
361
- "Input string does not contain valid characters for a variable name."
362
- )
363
-
364
- return transform_func(s)
365
-
366
-
367
- def shorten_string(s, max_length, placeholder="..."):
368
- """Shorten a string to a maximum length by removing characters from the middle."""
369
- if len(s) <= max_length:
370
- return s
371
-
372
- # Length to be removed
373
- remove_length = len(s) - max_length + len(placeholder)
374
-
375
- # Find the indices to start and end removal
376
- start_remove = (len(s) - remove_length) // 2
377
- end_remove = start_remove + remove_length
378
-
379
- # Adjust start and end to break at spaces (if possible)
380
- start_space = s.rfind(" ", 0, start_remove)
381
- end_space = s.find(" ", end_remove)
382
-
383
- if start_space != -1 and end_space != -1:
384
- start_remove = start_space
385
- end_remove = end_space
386
- elif start_space != -1:
387
- start_remove = start_space
388
- elif end_space != -1:
389
- end_remove = end_space
390
-
391
- return s[:start_remove] + placeholder + s[end_remove:]
1
+ """Utility functions for working with strings, dictionaries, and files."""
2
+
3
+ from functools import wraps
4
+ import types
5
+ import time
6
+
7
+ import hashlib
8
+ import json
9
+ import keyword
10
+ import os
11
+ import random
12
+ import re
13
+ import string
14
+ import tempfile
15
+ import gzip
16
+ import webbrowser
17
+ import json
18
+
19
+ from html import escape
20
+ from typing import Callable, Union
21
+
22
+
23
+ class CustomEncoder(json.JSONEncoder):
24
+ def default(self, obj):
25
+ try:
26
+ return json.JSONEncoder.default(self, obj)
27
+ except TypeError:
28
+ return str(obj)
29
+
30
+
31
+ def time_it(func):
32
+ @wraps(func)
33
+ def wrapper(*args, **kwargs):
34
+ start_time = time.time()
35
+ result = func(*args, **kwargs)
36
+ end_time = time.time()
37
+ execution_time = end_time - start_time
38
+ class_name = args[0].__class__.__name__ if args else func.__module__
39
+ print(
40
+ f"Function {class_name}.{func.__name__} took {execution_time:.4f} seconds to execute"
41
+ )
42
+ return result
43
+
44
+ return wrapper
45
+
46
+
47
+ def time_all_functions(module_or_class):
48
+ for name, obj in vars(module_or_class).items():
49
+ if isinstance(obj, types.FunctionType):
50
+ setattr(module_or_class, name, time_it(obj))
51
+
52
+
53
+ def dict_hash(data: dict):
54
+ return hash(
55
+ int(hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest(), 16)
56
+ )
57
+
58
+
59
+ def extract_json_from_string(text):
60
+ pattern = re.compile(r"\{.*?\}")
61
+ match = pattern.search(text)
62
+ if match:
63
+ json_data = match.group(0)
64
+ try:
65
+ json_object = json.loads(json_data)
66
+ return json_object
67
+ except json.JSONDecodeError:
68
+ return None
69
+ return None
70
+
71
+
72
+ def fix_partial_correct_response(text: str) -> dict:
73
+ # Find the start position of the key "answer"
74
+ answer_key_start = text.find('"answer"')
75
+
76
+ if answer_key_start == -1:
77
+ return {"error": "No 'answer' key found in the text"}
78
+
79
+ # Define regex to find the complete JSON object starting with "answer"
80
+ json_pattern = r'(\{[^\{\}]*"answer"[^\{\}]*\})'
81
+ match = re.search(json_pattern, text)
82
+
83
+ if not match:
84
+ return {"error": "No valid JSON object found"}
85
+
86
+ # Extract the matched JSON object
87
+ json_object = match.group(0)
88
+
89
+ # Find the start and stop positions of the JSON object in the original text
90
+ start_pos = text.find(json_object)
91
+ stop_pos = start_pos + len(json_object)
92
+
93
+ # Parse the JSON object to validate it
94
+ try:
95
+ parsed_json = json.loads(json_object)
96
+ except json.JSONDecodeError:
97
+ return {"error": "Failed to parse JSON object"}
98
+
99
+ # Return the result as a dictionary with positions
100
+ return {"start": start_pos, "stop": stop_pos, "extracted_json": json_object}
101
+
102
+
103
+ def clean_json(bad_json_str):
104
+ """
105
+ Clean JSON string by replacing single quotes with double quotes
106
+
107
+ """
108
+ replacements = [
109
+ ("\\", "\\\\"),
110
+ ("\n", "\\n"),
111
+ ("\r", "\\r"),
112
+ ("\t", "\\t"),
113
+ ("\b", "\\b"),
114
+ ("\f", "\\f"),
115
+ ("[/INST]", "removed_inst"),
116
+ ]
117
+
118
+ s = bad_json_str
119
+ for old, new in replacements:
120
+ s = s.replace(old, new)
121
+ return s
122
+
123
+
124
+ def data_to_html(data, replace_new_lines=False):
125
+ if "edsl_version" in data:
126
+ _ = data.pop("edsl_version")
127
+ if "edsl_class_name" in data:
128
+ _ = data.pop("edsl_class_name")
129
+
130
+ from pygments import highlight
131
+ from pygments.lexers import JsonLexer
132
+ from pygments.formatters import HtmlFormatter
133
+ from IPython.display import HTML
134
+
135
+ json_str = json.dumps(data, indent=4, cls=CustomEncoder)
136
+ formatted_json = highlight(
137
+ json_str,
138
+ JsonLexer(),
139
+ HtmlFormatter(style="default", full=False, noclasses=False),
140
+ )
141
+ if replace_new_lines:
142
+ formatted_json = formatted_json.replace("\\n", "<br>")
143
+
144
+ return HTML(formatted_json).data
145
+
146
+
147
+ def is_gzipped(file_path):
148
+ """Check if a file is gzipped."""
149
+ try:
150
+ with gzip.open(file_path, "rb") as file:
151
+ file.read(1) # Try reading a small amount of data
152
+ return True
153
+ except OSError:
154
+ return False
155
+
156
+
157
+ def hash_value(value: Union[str, int]) -> str:
158
+ """Hash a string or integer value using SHA-256."""
159
+ if isinstance(value, str):
160
+ value_bytes = value.encode("utf-8")
161
+ elif isinstance(value, int):
162
+ value_bytes = str(value).encode("utf-8")
163
+ else:
164
+ raise ValueError("Hashing supported only for strings or integers.")
165
+ hash_obj = hashlib.sha256(value_bytes)
166
+ return hash_obj.hexdigest()
167
+
168
+
169
+ def repair_json(json_string: str) -> str:
170
+ """Attempt to repair a JSON string that is not valid JSON."""
171
+ json_string = json_string.replace("\n", "\\n").replace("\r", "\\r")
172
+ json_string = json_string.replace("'", "\\'")
173
+ json_string = json_string.replace("'", '"')
174
+ json_string = re.sub(r",\s*}", "}", json_string)
175
+ json_string = re.sub(r",\s*]", "]", json_string)
176
+ json_string = re.sub(r"(?<={|,)\s*([a-zA-Z0-9_]+)\s*:", r'"\1":', json_string)
177
+ return json_string
178
+
179
+
180
+ def dict_to_html(d):
181
+ """Convert a dictionary to an HTML table."""
182
+ # Start the HTML table
183
+ html_table = f'<table border="1">\n<tr><th>{escape("Key")}</th><th>{escape("Value")}</th></tr>\n'
184
+
185
+ # Add rows to the HTML table
186
+ for key, value in d.items():
187
+ html_table += (
188
+ f"<tr><td>{escape(str(key))}</td><td>{escape(str(value))}</td></tr>\n"
189
+ )
190
+
191
+ # Close the HTML table
192
+ html_table += "</table>"
193
+ return html_table
194
+
195
+
196
+ def is_notebook() -> bool:
197
+ """Check if the code is running in a Jupyter notebook."""
198
+ try:
199
+ shell = get_ipython().__class__.__name__
200
+ if shell == "ZMQInteractiveShell":
201
+ return True # Jupyter notebook or qtconsole
202
+ elif shell == "TerminalInteractiveShell":
203
+ return False # Terminal running IPython
204
+ else:
205
+ return False # Other type (e.g., IDLE, PyCharm, etc.)
206
+ except NameError:
207
+ return False # Probably standard Python interpreter
208
+
209
+
210
+ class HTMLSnippet(str):
211
+ """Create an object with html content (`value`).
212
+
213
+ `view` method allows you to view the html content in a web browser.
214
+ """
215
+
216
+ def __init__(self, value):
217
+ """Initialize the HTMLSnippet object."""
218
+ super().__init__()
219
+ self.value = value
220
+
221
+ def view(self):
222
+ """View the HTML content in a web browser."""
223
+ html_content = self.value
224
+
225
+ # create a tempfile to write the HTML content
226
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as f:
227
+ f.write(html_content)
228
+
229
+ # open the HTML tempfile in the default web browser
230
+ webbrowser.open(f"file://{os.path.realpath(f.name)}")
231
+
232
+
233
+ def random_string() -> str:
234
+ """Generate a random string of fixed length."""
235
+ return "".join(random.choice(string.ascii_letters) for i in range(10))
236
+
237
+
238
+ def shortname_proposal(question, max_length=None):
239
+ """Take a question text and generate a slug."""
240
+ question = question.lower()
241
+ tokens = question.split()
242
+ stopwords = set(
243
+ [
244
+ "is",
245
+ "your",
246
+ "who",
247
+ "the",
248
+ "a",
249
+ "an",
250
+ "of",
251
+ "could",
252
+ "you",
253
+ "what",
254
+ "when",
255
+ "where",
256
+ "why",
257
+ "in",
258
+ "and",
259
+ "to",
260
+ "how",
261
+ "are",
262
+ "what",
263
+ ]
264
+ )
265
+ filtered_tokens = [
266
+ token.strip(string.punctuation) for token in tokens if token not in stopwords
267
+ ]
268
+ heading = "_".join(filtered_tokens)
269
+ # Limit length if needed
270
+ if max_length and len(heading) > max_length:
271
+ heading = heading[:max_length]
272
+ while heading.endswith("_"): # trim any trailing _ characters
273
+ heading = heading[:-1]
274
+ return heading
275
+
276
+
277
+ def text_to_shortname(long_text, forbidden_names=[]):
278
+ """Create a slug for the question."""
279
+ proposed_name = shortname_proposal(long_text)
280
+ counter = 1
281
+ # make sure the name is unique
282
+ while proposed_name in forbidden_names:
283
+ proposed_name += f"_{counter}"
284
+ counter += 1
285
+ return proposed_name
286
+
287
+
288
+ def merge_dicts(dict_list):
289
+ """Merge a list of dictionaries into a single dictionary."""
290
+ result = {}
291
+ all_keys = set()
292
+ for d in dict_list:
293
+ all_keys.update(d.keys())
294
+ for key in all_keys:
295
+ result[key] = [d.get(key, None) for d in dict_list]
296
+ return result
297
+
298
+
299
+ def extract_json_from_string(s):
300
+ """Extract a JSON string from a string."""
301
+ # Find the first occurrence of '{'
302
+ start_idx = s.find("{")
303
+ # Find the last occurrence of '}'
304
+ end_idx = s.rfind("}")
305
+ # If both '{' and '}' are found in the string
306
+ if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
307
+ # Extract the substring from start_idx to end_idx (inclusive)
308
+ json_str = s[start_idx : end_idx + 1]
309
+ return json_str
310
+ else:
311
+ raise ValueError("No JSON object found in string")
312
+
313
+
314
+ def valid_json(json_string):
315
+ """Check if a string is valid JSON."""
316
+ try:
317
+ _ = json.loads(json_string)
318
+ return True
319
+ except json.JSONDecodeError:
320
+ return False
321
+
322
+
323
+ def is_valid_variable_name(name, allow_name=True):
324
+ """Check if a string is a valid variable name."""
325
+ if allow_name:
326
+ return name.isidentifier() and not keyword.iskeyword(name)
327
+ else:
328
+ return (
329
+ name.isidentifier() and not keyword.iskeyword(name) and not name == "name"
330
+ )
331
+
332
+
333
+ def create_valid_var_name(s, transform_func: Callable = lambda x: x.lower()) -> str:
334
+ """Create a valid variable name from a string."""
335
+ if transform_func is None:
336
+ transform_func = lambda x: x
337
+
338
+ # Ensure the string is not empty
339
+ if not s:
340
+ raise ValueError("Input string cannot be empty.")
341
+
342
+ if is_valid_variable_name(s):
343
+ return transform_func(s)
344
+
345
+ # Remove leading numbers if they exist since variable names can't start with a number
346
+ s = re.sub("^[0-9]+", "", s)
347
+
348
+ # Replace invalid characters (anything not a letter, number, or underscore) with an underscore
349
+ s = re.sub("[^0-9a-zA-Z_]", "_", s)
350
+
351
+ # Check if the first character is a number; if so, prepend an underscore
352
+ if re.match("^[0-9]", s):
353
+ s = "_" + s
354
+
355
+ if s in keyword.kwlist:
356
+ s += "_"
357
+
358
+ # Ensure the string is not empty after the transformations
359
+ if not s:
360
+ raise ValueError(
361
+ "Input string does not contain valid characters for a variable name."
362
+ )
363
+
364
+ return transform_func(s)
365
+
366
+
367
+ def shorten_string(s, max_length, placeholder="..."):
368
+ """Shorten a string to a maximum length by removing characters from the middle."""
369
+ if len(s) <= max_length:
370
+ return s
371
+
372
+ # Length to be removed
373
+ remove_length = len(s) - max_length + len(placeholder)
374
+
375
+ # Find the indices to start and end removal
376
+ start_remove = (len(s) - remove_length) // 2
377
+ end_remove = start_remove + remove_length
378
+
379
+ # Adjust start and end to break at spaces (if possible)
380
+ start_space = s.rfind(" ", 0, start_remove)
381
+ end_space = s.find(" ", end_remove)
382
+
383
+ if start_space != -1 and end_space != -1:
384
+ start_remove = start_space
385
+ end_remove = end_space
386
+ elif start_space != -1:
387
+ start_remove = start_space
388
+ elif end_space != -1:
389
+ end_remove = end_space
390
+
391
+ return s[:start_remove] + placeholder + s[end_remove:]
392
+
393
+
394
+ def write_api_key_to_env(api_key: str) -> None:
395
+ """
396
+ Write the user's Expected Parrot key to their .env file.
397
+
398
+ If a .env file doesn't exist in the current directory, one will be created.
399
+ """
400
+ from pathlib import Path
401
+ from dotenv import set_key
402
+
403
+ # Create .env file if it doesn't exist
404
+ env_path = ".env"
405
+ env_file = Path(env_path)
406
+ env_file.touch(exist_ok=True)
407
+
408
+ # Write API key to file
409
+ set_key(env_path, "EXPECTED_PARROT_API_KEY", str(api_key))