edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +135 -219
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +47 -56
  23. edsl/coop/PriceFetcher.py +58 -0
  24. edsl/coop/coop.py +50 -7
  25. edsl/data/Cache.py +35 -1
  26. edsl/data_transfer_models.py +73 -38
  27. edsl/enums.py +4 -0
  28. edsl/exceptions/language_models.py +25 -1
  29. edsl/exceptions/questions.py +62 -5
  30. edsl/exceptions/results.py +4 -0
  31. edsl/inference_services/AnthropicService.py +13 -11
  32. edsl/inference_services/AwsBedrock.py +19 -17
  33. edsl/inference_services/AzureAI.py +37 -20
  34. edsl/inference_services/GoogleService.py +16 -12
  35. edsl/inference_services/GroqService.py +2 -0
  36. edsl/inference_services/InferenceServiceABC.py +58 -3
  37. edsl/inference_services/MistralAIService.py +120 -0
  38. edsl/inference_services/OpenAIService.py +48 -54
  39. edsl/inference_services/TestService.py +80 -0
  40. edsl/inference_services/TogetherAIService.py +170 -0
  41. edsl/inference_services/models_available_cache.py +0 -6
  42. edsl/inference_services/registry.py +6 -0
  43. edsl/jobs/Answers.py +10 -12
  44. edsl/jobs/FailedQuestion.py +78 -0
  45. edsl/jobs/Jobs.py +37 -22
  46. edsl/jobs/buckets/BucketCollection.py +24 -15
  47. edsl/jobs/buckets/TokenBucket.py +93 -14
  48. edsl/jobs/interviews/Interview.py +366 -78
  49. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
  50. edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
  51. edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
  52. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  53. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  54. edsl/jobs/tasks/TaskHistory.py +148 -213
  55. edsl/language_models/LanguageModel.py +261 -156
  56. edsl/language_models/ModelList.py +2 -2
  57. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  58. edsl/language_models/fake_openai_call.py +15 -0
  59. edsl/language_models/fake_openai_service.py +61 -0
  60. edsl/language_models/registry.py +23 -6
  61. edsl/language_models/repair.py +0 -19
  62. edsl/language_models/utilities.py +61 -0
  63. edsl/notebooks/Notebook.py +20 -2
  64. edsl/prompts/Prompt.py +52 -2
  65. edsl/questions/AnswerValidatorMixin.py +23 -26
  66. edsl/questions/QuestionBase.py +330 -249
  67. edsl/questions/QuestionBaseGenMixin.py +133 -0
  68. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  69. edsl/questions/QuestionBudget.py +99 -41
  70. edsl/questions/QuestionCheckBox.py +227 -35
  71. edsl/questions/QuestionExtract.py +98 -27
  72. edsl/questions/QuestionFreeText.py +52 -29
  73. edsl/questions/QuestionFunctional.py +7 -0
  74. edsl/questions/QuestionList.py +141 -22
  75. edsl/questions/QuestionMultipleChoice.py +159 -65
  76. edsl/questions/QuestionNumerical.py +88 -46
  77. edsl/questions/QuestionRank.py +182 -24
  78. edsl/questions/Quick.py +41 -0
  79. edsl/questions/RegisterQuestionsMeta.py +31 -12
  80. edsl/questions/ResponseValidatorABC.py +170 -0
  81. edsl/questions/__init__.py +3 -4
  82. edsl/questions/decorators.py +21 -0
  83. edsl/questions/derived/QuestionLikertFive.py +10 -5
  84. edsl/questions/derived/QuestionLinearScale.py +15 -2
  85. edsl/questions/derived/QuestionTopK.py +10 -1
  86. edsl/questions/derived/QuestionYesNo.py +24 -3
  87. edsl/questions/descriptors.py +43 -7
  88. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  89. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  90. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  91. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  92. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  93. edsl/questions/prompt_templates/question_list.jinja +17 -0
  94. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  95. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  96. edsl/questions/question_registry.py +6 -2
  97. edsl/questions/templates/__init__.py +0 -0
  98. edsl/questions/templates/budget/__init__.py +0 -0
  99. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  100. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  101. edsl/questions/templates/checkbox/__init__.py +0 -0
  102. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  104. edsl/questions/templates/extract/__init__.py +0 -0
  105. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  106. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  107. edsl/questions/templates/free_text/__init__.py +0 -0
  108. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  109. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  110. edsl/questions/templates/likert_five/__init__.py +0 -0
  111. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  112. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  113. edsl/questions/templates/linear_scale/__init__.py +0 -0
  114. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  115. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  116. edsl/questions/templates/list/__init__.py +0 -0
  117. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  118. edsl/questions/templates/list/question_presentation.jinja +5 -0
  119. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  120. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  121. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  122. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  123. edsl/questions/templates/numerical/__init__.py +0 -0
  124. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  125. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  126. edsl/questions/templates/rank/__init__.py +0 -0
  127. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  128. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  129. edsl/questions/templates/top_k/__init__.py +0 -0
  130. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  131. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  132. edsl/questions/templates/yes_no/__init__.py +0 -0
  133. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  134. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  135. edsl/results/Dataset.py +20 -0
  136. edsl/results/DatasetExportMixin.py +46 -48
  137. edsl/results/DatasetTree.py +145 -0
  138. edsl/results/Result.py +32 -5
  139. edsl/results/Results.py +135 -46
  140. edsl/results/ResultsDBMixin.py +3 -3
  141. edsl/results/Selector.py +118 -0
  142. edsl/results/tree_explore.py +115 -0
  143. edsl/scenarios/FileStore.py +71 -10
  144. edsl/scenarios/Scenario.py +96 -25
  145. edsl/scenarios/ScenarioImageMixin.py +2 -2
  146. edsl/scenarios/ScenarioList.py +361 -39
  147. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  148. edsl/scenarios/ScenarioListPdfMixin.py +150 -4
  149. edsl/study/SnapShot.py +8 -1
  150. edsl/study/Study.py +32 -0
  151. edsl/surveys/Rule.py +10 -1
  152. edsl/surveys/RuleCollection.py +21 -5
  153. edsl/surveys/Survey.py +637 -311
  154. edsl/surveys/SurveyExportMixin.py +71 -9
  155. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  156. edsl/surveys/SurveyQualtricsImport.py +75 -4
  157. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  158. edsl/surveys/instructions/Instruction.py +34 -0
  159. edsl/surveys/instructions/InstructionCollection.py +77 -0
  160. edsl/surveys/instructions/__init__.py +0 -0
  161. edsl/templates/error_reporting/base.html +24 -0
  162. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  163. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  164. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  165. edsl/templates/error_reporting/interview_details.html +116 -0
  166. edsl/templates/error_reporting/interviews.html +10 -0
  167. edsl/templates/error_reporting/overview.html +5 -0
  168. edsl/templates/error_reporting/performance_plot.html +2 -0
  169. edsl/templates/error_reporting/report.css +74 -0
  170. edsl/templates/error_reporting/report.html +118 -0
  171. edsl/templates/error_reporting/report.js +25 -0
  172. edsl/utilities/utilities.py +9 -1
  173. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
  174. edsl-0.1.33.dist-info/RECORD +295 -0
  175. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  176. edsl/jobs/interviews/retry_management.py +0 -37
  177. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
  178. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  179. edsl-0.1.32.dist-info/RECORD +0 -209
  180. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  181. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,145 @@
1
+ from typing import Dict, List, Any, Optional
2
+ from docx import Document
3
+ from docx.shared import Inches, Pt
4
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
5
+ from docx.enum.style import WD_STYLE_TYPE
6
+
7
+
8
+ class TreeNode:
9
+ def __init__(self, key=None, value=None):
10
+ self.key = key
11
+ self.value = value
12
+ self.children = {}
13
+
14
+
15
+ class Tree:
16
+ def __init__(self, data: "Dataset"):
17
+ d = {}
18
+ for entry in data:
19
+ d.update(entry)
20
+ self.data = d
21
+ self.root = None
22
+
23
+ def unique_values_by_keys(self) -> dict:
24
+ unique_values = {}
25
+ for key, values in self.data.items():
26
+ unique_values[key] = list(set(values))
27
+ return unique_values
28
+
29
+ def construct_tree(self, node_order: Optional[List[str]] = None):
30
+ # Validate node_order
31
+ if node_order is None:
32
+ unique_values = self.unique_values_by_keys()
33
+ # Sort keys by number of unique values
34
+ node_order = sorted(
35
+ unique_values, key=lambda k: len(unique_values[k]), reverse=True
36
+ )
37
+ else:
38
+ if not set(node_order).issubset(set(self.data.keys())):
39
+ invalid_keys = set(node_order) - set(self.data.keys())
40
+ raise ValueError(f"Invalid keys in node_order: {invalid_keys}")
41
+
42
+ self.root = TreeNode()
43
+
44
+ for i in range(len(self.data[node_order[0]])):
45
+ current = self.root
46
+ for level in node_order[:-1]:
47
+ value = self.data[level][i]
48
+ if value not in current.children:
49
+ current.children[value] = TreeNode(key=level, value=value)
50
+ current = current.children[value]
51
+
52
+ leaf_key = node_order[-1]
53
+ leaf_value = self.data[leaf_key][i]
54
+ if leaf_value not in current.children:
55
+ current.children[leaf_value] = TreeNode(key=leaf_key, value=leaf_value)
56
+
57
+ def print_tree(
58
+ self, node: Optional[TreeNode] = None, level: int = 0, print_keys: bool = False
59
+ ):
60
+ if node is None:
61
+ node = self.root
62
+ if node is None:
63
+ print("Tree has not been constructed yet.")
64
+ return
65
+
66
+ if node.value is not None:
67
+ if print_keys and node.key is not None:
68
+ print(" " * level + f"{node.key}: {node.value}")
69
+ else:
70
+ print(" " * level + str(node.value))
71
+ for child in node.children.values():
72
+ self.print_tree(child, level + 1, print_keys)
73
+
74
+ def to_docx(self, filename: str):
75
+ doc = Document()
76
+
77
+ # Create styles for headings
78
+ for i in range(1, 10): # Up to 9 levels of headings
79
+ style_name = f"Heading {i}"
80
+ if style_name not in doc.styles:
81
+ doc.styles.add_style(style_name, WD_STYLE_TYPE.PARAGRAPH)
82
+
83
+ # Get or create the 'Body Text' style
84
+ if "Body Text" not in doc.styles:
85
+ body_style = doc.styles.add_style("Body Text", WD_STYLE_TYPE.PARAGRAPH)
86
+ else:
87
+ body_style = doc.styles["Body Text"]
88
+
89
+ body_style.font.size = Pt(11)
90
+
91
+ self._add_to_docx(doc, self.root, 0)
92
+ doc.save(filename)
93
+
94
+ def _add_to_docx(self, doc, node: TreeNode, level: int):
95
+ if node.value is not None:
96
+ if level == 0:
97
+ doc.add_heading(str(node.value), level=level + 1)
98
+ elif node.children: # If the node has children, it's not the last level
99
+ para = doc.add_paragraph(str(node.value))
100
+ para.style = f"Heading {level+1}"
101
+ else: # If the node has no children, it's the last level (body text)
102
+ para = doc.add_paragraph(str(node.value))
103
+ para.style = "Body Text"
104
+
105
+ # Process child nodes (moved outside the if block)
106
+ for child in node.children.values():
107
+ self._add_to_docx(doc, child, level + 1)
108
+
109
+
110
+ # Example usage (commented out)
111
+ """
112
+ from edsl.results.Dataset import Dataset
113
+
114
+ data = Dataset(
115
+ [
116
+ {"continent": ["North America", "Asia", "Europe", "North America", "Asia"]},
117
+ {"country": ["US", "China", "France", "Canada", "Japan"]},
118
+ {"city": ["New York", "Beijing", "Paris", "Toronto", "Tokyo"]},
119
+ {"population": [8419000, 21540000, 2161000, 2930000, 13960000]},
120
+ ]
121
+ )
122
+
123
+ tree = Tree(data)
124
+
125
+ try:
126
+ tree.construct_tree(["continent", "country", "city", "population"])
127
+ print("Tree without key names:")
128
+ tree.print_tree()
129
+ print("\nTree with key names:")
130
+ tree.print_tree(print_keys=True)
131
+ except ValueError as e:
132
+ print(f"Error: {e}")
133
+
134
+ # Demonstrating validation
135
+ try:
136
+ tree.construct_tree(["continent", "country", "invalid_key"])
137
+ except ValueError as e:
138
+ print(f"\nValidation Error: {e}")
139
+
140
+ tree = Tree(data)
141
+ tree.construct_tree(["continent", "country", "city", "population"])
142
+ tree.print_tree(print_keys=True)
143
+ tree.to_docx("tree_structure.docx")
144
+ print("DocX file 'tree_structure.docx' has been created.")
145
+ """
edsl/results/Result.py CHANGED
@@ -53,8 +53,8 @@ class Result(Base, UserDict):
53
53
 
54
54
  >>> import warnings
55
55
  >>> warnings.simplefilter("ignore", UserWarning)
56
- >>> Result.example().answer
57
- {'how_feeling': 'OK', 'how_feeling_comment': 'This is a real survey response from a human.', 'how_feeling_yesterday': 'Great', 'how_feeling_yesterday_comment': 'This is a real survey response from a human.'}
56
+ >>> Result.example().answer == {'how_feeling_yesterday': 'Great', 'how_feeling': 'OK'}
57
+ True
58
58
 
59
59
  Its main data is an Agent, a Scenario, a Model, an Iteration, and an Answer.
60
60
  These are stored both in the UserDict and as attributes.
@@ -73,6 +73,8 @@ class Result(Base, UserDict):
73
73
  raw_model_response=None,
74
74
  survey: Optional["Survey"] = None,
75
75
  question_to_attributes: Optional[dict] = None,
76
+ generated_tokens: Optional[dict] = None,
77
+ comments_dict: Optional[dict] = None,
76
78
  ):
77
79
  """Initialize a Result object.
78
80
 
@@ -113,6 +115,7 @@ class Result(Base, UserDict):
113
115
  "prompt": prompt or {},
114
116
  "raw_model_response": raw_model_response or {},
115
117
  "question_to_attributes": question_to_attributes,
118
+ "generated_tokens": generated_tokens or {},
116
119
  }
117
120
  super().__init__(**data)
118
121
  # but also store the data as attributes
@@ -125,6 +128,8 @@ class Result(Base, UserDict):
125
128
  self.raw_model_response = raw_model_response or {}
126
129
  self.survey = survey
127
130
  self.question_to_attributes = question_to_attributes
131
+ self.generated_tokens = generated_tokens
132
+ self.comments_dict = comments_dict or {}
128
133
 
129
134
  self._combined_dict = None
130
135
  self._problem_keys = None
@@ -140,7 +145,7 @@ class Result(Base, UserDict):
140
145
  else:
141
146
  agent_name = self.agent.name
142
147
 
143
- comments_dict = {k: v for k, v in self.answer.items() if k.endswith("_comment")}
148
+ # comments_dict = {k: v for k, v in self.answer.items() if k.endswith("_comment")}
144
149
  question_text_dict = {}
145
150
  question_options_dict = {}
146
151
  question_type_dict = {}
@@ -167,11 +172,12 @@ class Result(Base, UserDict):
167
172
  "answer": self.answer,
168
173
  "prompt": self.prompt,
169
174
  "raw_model_response": self.raw_model_response,
170
- # "iteration": {"iteration": self.iteration},
175
+ "iteration": {"iteration": self.iteration},
171
176
  "question_text": question_text_dict,
172
177
  "question_options": question_options_dict,
173
178
  "question_type": question_type_dict,
174
- "comment": comments_dict,
179
+ "comment": self.comments_dict,
180
+ "generated_tokens": self.generated_tokens,
175
181
  }
176
182
 
177
183
  def check_expression(self, expression) -> None:
@@ -260,6 +266,26 @@ class Result(Base, UserDict):
260
266
  for key, value in subdict.items():
261
267
  yield (index, data_type, key, str(value))
262
268
 
269
+ def leaves(self):
270
+ leaves = []
271
+ for question_name, answer in self.answer.items():
272
+ if not question_name.endswith("_comment"):
273
+ leaves.append(
274
+ {
275
+ "question": f"({question_name}): "
276
+ + str(
277
+ self.question_to_attributes[question_name]["question_text"]
278
+ ),
279
+ "answer": answer,
280
+ "comment": self.answer.get(question_name + "_comment", ""),
281
+ "scenario": repr(self.scenario),
282
+ "agent": repr(self.agent),
283
+ "model": repr(self.model),
284
+ "iteration": self.iteration,
285
+ }
286
+ )
287
+ return leaves
288
+
263
289
  ###############
264
290
  # Useful
265
291
  ###############
@@ -341,6 +367,7 @@ class Result(Base, UserDict):
341
367
  "raw_model_response", {"raw_model_response": "No raw model response"}
342
368
  ),
343
369
  question_to_attributes=json_dict.get("question_to_attributes", None),
370
+ generated_tokens=json_dict.get("generated_tokens", {}),
344
371
  )
345
372
  return result
346
373
 
edsl/results/Results.py CHANGED
@@ -17,6 +17,7 @@ from edsl.exceptions.results import (
17
17
  ResultsInvalidNameError,
18
18
  ResultsMutateError,
19
19
  ResultsFilterError,
20
+ ResultsDeserializationError,
20
21
  )
21
22
 
22
23
  from edsl.results.ResultsExportMixin import ResultsExportMixin
@@ -77,6 +78,7 @@ class Results(UserList, Mixins, Base):
77
78
  "question_options",
78
79
  "question_type",
79
80
  "comment",
81
+ "generated_tokens",
80
82
  ]
81
83
 
82
84
  def __init__(
@@ -108,6 +110,81 @@ class Results(UserList, Mixins, Base):
108
110
  if hasattr(self, "_add_output_functions"):
109
111
  self._add_output_functions()
110
112
 
113
+ def leaves(self):
114
+ leaves = []
115
+ for result in self:
116
+ leaves.extend(result.leaves())
117
+ return leaves
118
+
119
+ def tree(
120
+ self,
121
+ fold_attributes: Optional[List[str]] = None,
122
+ drop: Optional[List[str]] = None,
123
+ open_file=True,
124
+ ) -> dict:
125
+ """Return the results as a tree."""
126
+ from edsl.results.tree_explore import FoldableHTMLTableGenerator
127
+
128
+ if drop is None:
129
+ drop = []
130
+
131
+ valid_attributes = [
132
+ "model",
133
+ "scenario",
134
+ "agent",
135
+ "answer",
136
+ "question",
137
+ "iteration",
138
+ ]
139
+ if fold_attributes is None:
140
+ fold_attributes = []
141
+
142
+ for attribute in fold_attributes:
143
+ if attribute not in valid_attributes:
144
+ raise ValueError(
145
+ f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
146
+ )
147
+ data = self.leaves()
148
+ generator = FoldableHTMLTableGenerator(data)
149
+ tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
150
+ html_content = generator.generate_html(tree, fold_attributes)
151
+ import tempfile
152
+ from edsl.utilities.utilities import is_notebook
153
+
154
+ from IPython.display import display, HTML
155
+
156
+ if is_notebook():
157
+ import html
158
+ from IPython.display import display, HTML
159
+
160
+ height = 1000
161
+ width = 1000
162
+ escaped_output = html.escape(html_content)
163
+ # escaped_output = rendered_html
164
+ iframe = f""""
165
+ <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
166
+ """
167
+ display(HTML(iframe))
168
+ return None
169
+
170
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
171
+ f.write(html_content.encode())
172
+ print(f"HTML file has been generated: {f.name}")
173
+
174
+ if open_file:
175
+ import webbrowser
176
+ import time
177
+
178
+ time.sleep(1) # Wait for 1 second
179
+ # webbrowser.open(f.name)
180
+ import os
181
+
182
+ filename = f.name
183
+ webbrowser.open(f"file://{os.path.abspath(filename)}")
184
+
185
+ else:
186
+ return html_content
187
+
111
188
  def code(self):
112
189
  raise NotImplementedError
113
190
 
@@ -168,7 +245,9 @@ class Results(UserList, Mixins, Base):
168
245
  )
169
246
 
170
247
  def __repr__(self) -> str:
171
- return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
248
+ import reprlib
249
+
250
+ return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
172
251
 
173
252
  def _repr_html_(self) -> str:
174
253
  from IPython.display import HTML
@@ -290,8 +369,7 @@ class Results(UserList, Mixins, Base):
290
369
  ),
291
370
  )
292
371
  except Exception as e:
293
- print(e)
294
- # breakpoint()
372
+ raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
295
373
  return results
296
374
 
297
375
  ######################
@@ -395,7 +473,7 @@ class Results(UserList, Mixins, Base):
395
473
 
396
474
  >>> r = Results.example()
397
475
  >>> r.models[0]
398
- Model(model_name = 'gpt-4-1106-preview', temperature = 0.5, max_tokens = 1000, top_p = 1, frequency_penalty = 0, presence_penalty = 0, logprobs = False, top_logprobs = 3)
476
+ Model(model_name = ...)
399
477
  """
400
478
  return [r.model for r in self.data]
401
479
 
@@ -477,39 +555,6 @@ class Results(UserList, Mixins, Base):
477
555
  )
478
556
  return sorted(list(all_keys))
479
557
 
480
- def _parse_column(self, column: str) -> tuple[str, str]:
481
- """
482
- Parses a column name into a tuple containing a data type and a key.
483
-
484
- >>> r = Results.example()
485
- >>> r._parse_column("answer.how_feeling")
486
- ('answer', 'how_feeling')
487
-
488
- The standard way a column is specified is with a dot-separated string, e.g. _parse_column("agent.status")
489
- But you can also specify a single key, e.g. "status", in which case it will look up the data type.
490
- """
491
- if "." in column:
492
- data_type, key = column.split(".")
493
- else:
494
- try:
495
- data_type, key = self._key_to_data_type[column], column
496
- except KeyError:
497
- import difflib
498
-
499
- close_matches = difflib.get_close_matches(
500
- column, self._key_to_data_type.keys()
501
- )
502
- if close_matches:
503
- suggestions = ", ".join(close_matches)
504
- raise ResultsColumnNotFoundError(
505
- f"Column '{column}' not found in data. Did you mean: {suggestions}?"
506
- )
507
- else:
508
- raise ResultsColumnNotFoundError(
509
- f"Column {column} not found in data"
510
- )
511
- return data_type, key
512
-
513
558
  def first(self) -> "Result":
514
559
  """Return the first observation in the results.
515
560
 
@@ -632,9 +677,11 @@ class Results(UserList, Mixins, Base):
632
677
  """
633
678
  if functions_dict is None:
634
679
  functions_dict = {}
635
- return EvalWithCompoundTypes(
680
+ evaluator = EvalWithCompoundTypes(
636
681
  names=result.combined_dict, functions=functions_dict
637
682
  )
683
+ evaluator.functions.update(int=int, float=float)
684
+ return evaluator
638
685
 
639
686
  def mutate(
640
687
  self, new_var_string: str, functions_dict: Optional[dict] = None
@@ -721,8 +768,8 @@ class Results(UserList, Mixins, Base):
721
768
 
722
769
  def sample(
723
770
  self,
724
- n: int = None,
725
- frac: float = None,
771
+ n: Optional[int] = None,
772
+ frac: Optional[float] = None,
726
773
  with_replacement: bool = True,
727
774
  seed: Optional[str] = "edsl",
728
775
  ) -> Results:
@@ -771,13 +818,17 @@ class Results(UserList, Mixins, Base):
771
818
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
772
819
 
773
820
  >>> results.select('how_feeling', 'model', 'how_feeling')
774
- Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview', 'gpt-4-1106-preview']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
821
+ Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'model.model': ['...', '...', '...', '...']}, {'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
822
+
823
+ >>> from edsl import Results; r = Results.example(); r.select('answer.how_feeling_y')
824
+ Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
775
825
  """
776
826
 
777
- if len(self) == 0:
778
- raise Exception("No data to select from---the Results object is empty.")
827
+ # if len(self) == 0:
828
+ # raise Exception("No data to select from---the Results object is empty.")
779
829
 
780
830
  if not columns or columns == ("*",) or columns == (None,):
831
+ # is the users passes nothing, then we'll return all the columns
781
832
  columns = ("*.*",)
782
833
 
783
834
  if isinstance(columns[0], list):
@@ -801,6 +852,16 @@ class Results(UserList, Mixins, Base):
801
852
  # iterate through the passed columns
802
853
  for column in columns:
803
854
  # a user could pass 'result.how_feeling' or just 'how_feeling'
855
+ matches = self._matching_columns(column)
856
+ if len(matches) > 1:
857
+ raise Exception(
858
+ f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
859
+ )
860
+ if len(matches) == 0 and ".*" not in column:
861
+ raise Exception(f"Column '{column}' not found in data.")
862
+ if len(matches) == 1:
863
+ column = matches[0]
864
+
804
865
  parsed_data_type, parsed_key = self._parse_column(column)
805
866
  data_types = get_data_types_to_return(parsed_data_type)
806
867
  found_once = False # we need to track this to make sure we found the key at least once
@@ -843,6 +904,21 @@ class Results(UserList, Mixins, Base):
843
904
 
844
905
  return Dataset(sorted_new_data)
845
906
 
907
+ def select(self, *columns: Union[str, list[str]]) -> "Results":
908
+ from edsl.results.Selector import Selector
909
+
910
+ if len(self) == 0:
911
+ raise Exception("No data to select from---the Results object is empty.")
912
+
913
+ selector = Selector(
914
+ known_data_types=self.known_data_types,
915
+ data_type_to_keys=self._data_type_to_keys,
916
+ key_to_data_type=self._key_to_data_type,
917
+ fetch_list_func=self._fetch_list,
918
+ columns=self.columns,
919
+ )
920
+ return selector.select(*columns)
921
+
846
922
  def sort_by(self, *columns: str, reverse: bool = False) -> Results:
847
923
  import warnings
848
924
 
@@ -851,6 +927,11 @@ class Results(UserList, Mixins, Base):
851
927
  )
852
928
  return self.order_by(*columns, reverse=reverse)
853
929
 
930
+ def _parse_column(self, column: str) -> tuple[str, str]:
931
+ if "." in column:
932
+ return column.split(".")
933
+ return self._key_to_data_type[column], column
934
+
854
935
  def order_by(self, *columns: str, reverse: bool = False) -> Results:
855
936
  """Sort the results by one or more columns.
856
937
 
@@ -948,7 +1029,9 @@ class Results(UserList, Mixins, Base):
948
1029
  def has_single_equals(string):
949
1030
  if "!=" in string:
950
1031
  return False
951
- if "=" in string and not "==" in string:
1032
+ if "=" in string and not (
1033
+ "==" in string or "<=" in string or ">=" in string
1034
+ ):
952
1035
  return True
953
1036
 
954
1037
  if has_single_equals(expression):
@@ -989,7 +1072,7 @@ class Results(UserList, Mixins, Base):
989
1072
  return Results(survey=self.survey, data=new_data, created_columns=None)
990
1073
 
991
1074
  @classmethod
992
- def example(cls, debug: bool = False, randomize: bool = False) -> Results:
1075
+ def example(cls, randomize: bool = False) -> Results:
993
1076
  """Return an example `Results` object.
994
1077
 
995
1078
  Example usage:
@@ -1003,7 +1086,13 @@ class Results(UserList, Mixins, Base):
1003
1086
 
1004
1087
  c = Cache()
1005
1088
  job = Jobs.example(randomize=randomize)
1006
- results = job.run(cache=c, debug=debug)
1089
+ results = job.run(
1090
+ cache=c,
1091
+ stop_on_exception=True,
1092
+ skip_retry=True,
1093
+ raise_validation_errors=True,
1094
+ disable_remote_inference=True,
1095
+ )
1007
1096
  return results
1008
1097
 
1009
1098
  def rich_print(self):
@@ -136,9 +136,9 @@ class ResultsDBMixin:
136
136
 
137
137
  >>> from edsl.results import Results
138
138
  >>> r = Results.example()
139
- >>> d = r.sql("select data_type, key, value from self where data_type = 'answer' limit 3", shape="long")
140
- >>> list(d['value'])
141
- ['OK', 'This is a real survey response from a human.', 'Great']
139
+ >>> d = r.sql("select data_type, key, value from self where data_type = 'answer' order by value limit 3", shape="long")
140
+ >>> sorted(list(d['value']))
141
+ ['Good', 'Great', 'Great']
142
142
 
143
143
  We can also return the data in wide format.
144
144
  Note the use of single quotes to escape the column names, as required by sql.
@@ -0,0 +1,118 @@
1
+ from typing import Union, List, Dict, Any
2
+ from collections import defaultdict
3
+ from edsl.results.Dataset import Dataset
4
+
5
+
6
+ class Selector:
7
+ def __init__(
8
+ self,
9
+ known_data_types: List[str],
10
+ data_type_to_keys: Dict[str, List[str]],
11
+ key_to_data_type: Dict[str, str],
12
+ fetch_list_func,
13
+ columns: List[str],
14
+ ):
15
+ self.known_data_types = known_data_types
16
+ self._data_type_to_keys = data_type_to_keys
17
+ self._key_to_data_type = key_to_data_type
18
+ self._fetch_list = fetch_list_func
19
+ self.columns = columns
20
+
21
+ def select(self, *columns: Union[str, List[str]]) -> "Dataset":
22
+ columns = self._normalize_columns(columns)
23
+ to_fetch = self._get_columns_to_fetch(columns)
24
+ new_data = self._fetch_data(to_fetch)
25
+ return Dataset(new_data)
26
+
27
+ def _normalize_columns(self, columns: Union[str, List[str]]) -> tuple:
28
+ if not columns or columns == ("*",) or columns == (None,):
29
+ return ("*.*",)
30
+ if isinstance(columns[0], list):
31
+ return tuple(columns[0])
32
+ return columns
33
+
34
+ def _get_columns_to_fetch(self, columns: tuple) -> Dict[str, List[str]]:
35
+ to_fetch = defaultdict(list)
36
+ self.items_in_order = []
37
+
38
+ for column in columns:
39
+ matches = self._find_matching_columns(column)
40
+ self._validate_matches(column, matches)
41
+
42
+ if len(matches) == 1:
43
+ column = matches[0]
44
+
45
+ data_type, key = self._parse_column(column)
46
+ self._process_column(data_type, key, to_fetch)
47
+
48
+ return to_fetch
49
+
50
+ def _find_matching_columns(self, partial_name: str) -> list[str]:
51
+ if "." in partial_name:
52
+ search_in_list = self.columns
53
+ else:
54
+ search_in_list = [s.split(".")[1] for s in self.columns]
55
+
56
+ matches = [s for s in search_in_list if s.startswith(partial_name)]
57
+ return [partial_name] if partial_name in matches else matches
58
+
59
+ def _validate_matches(self, column: str, matches: List[str]):
60
+ if len(matches) > 1:
61
+ raise ValueError(
62
+ f"Column '{column}' is ambiguous. Did you mean one of {matches}?"
63
+ )
64
+ if len(matches) == 0 and ".*" not in column:
65
+ raise ValueError(f"Column '{column}' not found in data.")
66
+
67
+ def _parse_column(self, column: str) -> tuple[str, str]:
68
+ if "." in column:
69
+ return column.split(".")
70
+ try:
71
+ return self._key_to_data_type[column], column
72
+ except KeyError:
73
+ self._raise_key_error(column)
74
+
75
+ def _raise_key_error(self, column: str):
76
+ import difflib
77
+
78
+ close_matches = difflib.get_close_matches(column, self._key_to_data_type.keys())
79
+ if close_matches:
80
+ suggestions = ", ".join(close_matches)
81
+ raise KeyError(
82
+ f"Column '{column}' not found in data. Did you mean: {suggestions}?"
83
+ )
84
+ else:
85
+ raise KeyError(f"Column {column} not found in data")
86
+
87
+ def _process_column(self, data_type: str, key: str, to_fetch: Dict[str, List[str]]):
88
+ data_types = self._get_data_types_to_return(data_type)
89
+ found_once = False
90
+
91
+ for dt in data_types:
92
+ relevant_keys = self._data_type_to_keys[dt]
93
+ for k in relevant_keys:
94
+ if k == key or key == "*":
95
+ found_once = True
96
+ to_fetch[dt].append(k)
97
+ self.items_in_order.append(f"{dt}.{k}")
98
+
99
+ if not found_once:
100
+ raise ValueError(f"Key {key} not found in data.")
101
+
102
+ def _get_data_types_to_return(self, parsed_data_type: str) -> List[str]:
103
+ if parsed_data_type == "*":
104
+ return self.known_data_types
105
+ if parsed_data_type not in self.known_data_types:
106
+ raise ValueError(
107
+ f"Data type {parsed_data_type} not found in data. Did you mean one of {self.known_data_types}"
108
+ )
109
+ return [parsed_data_type]
110
+
111
+ def _fetch_data(self, to_fetch: Dict[str, List[str]]) -> List[Dict[str, Any]]:
112
+ new_data = []
113
+ for data_type, keys in to_fetch.items():
114
+ for key in keys:
115
+ entries = self._fetch_list(data_type, key)
116
+ new_data.append({f"{data_type}.{key}": entries})
117
+
118
+ return [d for key in self.items_in_order for d in new_data if key in d]