edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +135 -219
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +47 -56
  23. edsl/coop/PriceFetcher.py +58 -0
  24. edsl/coop/coop.py +50 -7
  25. edsl/data/Cache.py +35 -1
  26. edsl/data_transfer_models.py +73 -38
  27. edsl/enums.py +4 -0
  28. edsl/exceptions/language_models.py +25 -1
  29. edsl/exceptions/questions.py +62 -5
  30. edsl/exceptions/results.py +4 -0
  31. edsl/inference_services/AnthropicService.py +13 -11
  32. edsl/inference_services/AwsBedrock.py +19 -17
  33. edsl/inference_services/AzureAI.py +37 -20
  34. edsl/inference_services/GoogleService.py +16 -12
  35. edsl/inference_services/GroqService.py +2 -0
  36. edsl/inference_services/InferenceServiceABC.py +58 -3
  37. edsl/inference_services/MistralAIService.py +120 -0
  38. edsl/inference_services/OpenAIService.py +48 -54
  39. edsl/inference_services/TestService.py +80 -0
  40. edsl/inference_services/TogetherAIService.py +170 -0
  41. edsl/inference_services/models_available_cache.py +0 -6
  42. edsl/inference_services/registry.py +6 -0
  43. edsl/jobs/Answers.py +10 -12
  44. edsl/jobs/FailedQuestion.py +78 -0
  45. edsl/jobs/Jobs.py +37 -22
  46. edsl/jobs/buckets/BucketCollection.py +24 -15
  47. edsl/jobs/buckets/TokenBucket.py +93 -14
  48. edsl/jobs/interviews/Interview.py +366 -78
  49. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
  50. edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
  51. edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
  52. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  53. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  54. edsl/jobs/tasks/TaskHistory.py +148 -213
  55. edsl/language_models/LanguageModel.py +261 -156
  56. edsl/language_models/ModelList.py +2 -2
  57. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  58. edsl/language_models/fake_openai_call.py +15 -0
  59. edsl/language_models/fake_openai_service.py +61 -0
  60. edsl/language_models/registry.py +23 -6
  61. edsl/language_models/repair.py +0 -19
  62. edsl/language_models/utilities.py +61 -0
  63. edsl/notebooks/Notebook.py +20 -2
  64. edsl/prompts/Prompt.py +52 -2
  65. edsl/questions/AnswerValidatorMixin.py +23 -26
  66. edsl/questions/QuestionBase.py +330 -249
  67. edsl/questions/QuestionBaseGenMixin.py +133 -0
  68. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  69. edsl/questions/QuestionBudget.py +99 -41
  70. edsl/questions/QuestionCheckBox.py +227 -35
  71. edsl/questions/QuestionExtract.py +98 -27
  72. edsl/questions/QuestionFreeText.py +52 -29
  73. edsl/questions/QuestionFunctional.py +7 -0
  74. edsl/questions/QuestionList.py +141 -22
  75. edsl/questions/QuestionMultipleChoice.py +159 -65
  76. edsl/questions/QuestionNumerical.py +88 -46
  77. edsl/questions/QuestionRank.py +182 -24
  78. edsl/questions/Quick.py +41 -0
  79. edsl/questions/RegisterQuestionsMeta.py +31 -12
  80. edsl/questions/ResponseValidatorABC.py +170 -0
  81. edsl/questions/__init__.py +3 -4
  82. edsl/questions/decorators.py +21 -0
  83. edsl/questions/derived/QuestionLikertFive.py +10 -5
  84. edsl/questions/derived/QuestionLinearScale.py +15 -2
  85. edsl/questions/derived/QuestionTopK.py +10 -1
  86. edsl/questions/derived/QuestionYesNo.py +24 -3
  87. edsl/questions/descriptors.py +43 -7
  88. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  89. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  90. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  91. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  92. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  93. edsl/questions/prompt_templates/question_list.jinja +17 -0
  94. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  95. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  96. edsl/questions/question_registry.py +6 -2
  97. edsl/questions/templates/__init__.py +0 -0
  98. edsl/questions/templates/budget/__init__.py +0 -0
  99. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  100. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  101. edsl/questions/templates/checkbox/__init__.py +0 -0
  102. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  104. edsl/questions/templates/extract/__init__.py +0 -0
  105. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  106. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  107. edsl/questions/templates/free_text/__init__.py +0 -0
  108. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  109. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  110. edsl/questions/templates/likert_five/__init__.py +0 -0
  111. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  112. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  113. edsl/questions/templates/linear_scale/__init__.py +0 -0
  114. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  115. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  116. edsl/questions/templates/list/__init__.py +0 -0
  117. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  118. edsl/questions/templates/list/question_presentation.jinja +5 -0
  119. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  120. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  121. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  122. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  123. edsl/questions/templates/numerical/__init__.py +0 -0
  124. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  125. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  126. edsl/questions/templates/rank/__init__.py +0 -0
  127. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  128. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  129. edsl/questions/templates/top_k/__init__.py +0 -0
  130. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  131. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  132. edsl/questions/templates/yes_no/__init__.py +0 -0
  133. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  134. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  135. edsl/results/Dataset.py +20 -0
  136. edsl/results/DatasetExportMixin.py +46 -48
  137. edsl/results/DatasetTree.py +145 -0
  138. edsl/results/Result.py +32 -5
  139. edsl/results/Results.py +135 -46
  140. edsl/results/ResultsDBMixin.py +3 -3
  141. edsl/results/Selector.py +118 -0
  142. edsl/results/tree_explore.py +115 -0
  143. edsl/scenarios/FileStore.py +71 -10
  144. edsl/scenarios/Scenario.py +96 -25
  145. edsl/scenarios/ScenarioImageMixin.py +2 -2
  146. edsl/scenarios/ScenarioList.py +361 -39
  147. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  148. edsl/scenarios/ScenarioListPdfMixin.py +150 -4
  149. edsl/study/SnapShot.py +8 -1
  150. edsl/study/Study.py +32 -0
  151. edsl/surveys/Rule.py +10 -1
  152. edsl/surveys/RuleCollection.py +21 -5
  153. edsl/surveys/Survey.py +637 -311
  154. edsl/surveys/SurveyExportMixin.py +71 -9
  155. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  156. edsl/surveys/SurveyQualtricsImport.py +75 -4
  157. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  158. edsl/surveys/instructions/Instruction.py +34 -0
  159. edsl/surveys/instructions/InstructionCollection.py +77 -0
  160. edsl/surveys/instructions/__init__.py +0 -0
  161. edsl/templates/error_reporting/base.html +24 -0
  162. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  163. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  164. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  165. edsl/templates/error_reporting/interview_details.html +116 -0
  166. edsl/templates/error_reporting/interviews.html +10 -0
  167. edsl/templates/error_reporting/overview.html +5 -0
  168. edsl/templates/error_reporting/performance_plot.html +2 -0
  169. edsl/templates/error_reporting/report.css +74 -0
  170. edsl/templates/error_reporting/report.html +118 -0
  171. edsl/templates/error_reporting/report.js +25 -0
  172. edsl/utilities/utilities.py +9 -1
  173. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
  174. edsl-0.1.33.dist-info/RECORD +295 -0
  175. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  176. edsl/jobs/interviews/retry_management.py +0 -37
  177. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
  178. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  179. edsl-0.1.32.dist-info/RECORD +0 -209
  180. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  181. {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,37 @@
1
+ You are being asked a question that requires a numerical response
2
+ in the form of an integer or decimal (e.g., -12, 0, 1, 2, 3.45, ...).
3
+
4
+ Your response must be in the following format:
5
+
6
+ {% if include_comment %}
7
+ {"answer": "<your numerical answer here>", "comment": "<your explanation here>"}
8
+ {% else %}
9
+ {"answer": "<your numerical answer here>"}
10
+ {% endif %}
11
+
12
+ You must only include an integer or decimal in the quoted "answer" part of your response.
13
+
14
+ Here is an example of a valid response:
15
+ {% if include_comment %}
16
+ {"answer": "100", "comment": "This is my explanation..."}
17
+ {% else %}
18
+ {"answer": "100"}
19
+ {% endif %}
20
+
21
+ Here is an example of a response that is invalid because the "answer" includes words:
22
+ {"answer": "I don't know.", ...}
23
+
24
+ If your response is equivalent to zero, your formatted response should look like this:
25
+ {% if include_comment %}
26
+ {"answer": "0", "comment": "This is my explanation..."}
27
+ {% else %}
28
+ {"answer": "0"}
29
+ {% endif %}
30
+
31
+ You are being asked the following question: {{question_text}}
32
+ {% if min_value is not none %}
33
+ Minimum answer value: {{min_value}}
34
+ {% endif %}
35
+ {% if max_value is not none %}
36
+ Maximum answer value: {{max_value}}
37
+ {% endif %}
@@ -100,12 +100,16 @@ class Question(metaclass=Meta):
100
100
 
101
101
  >>> from edsl import Question
102
102
  >>> Question.available()
103
- ['budget', 'checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
103
+ ['checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
104
104
  """
105
+ exclude = ["budget"]
105
106
  if show_class_names:
106
107
  return RegisterQuestionsMeta.question_types_to_classes()
107
108
  else:
108
- return sorted(set(RegisterQuestionsMeta.question_types_to_classes().keys()))
109
+ question_list = sorted(
110
+ set(RegisterQuestionsMeta.question_types_to_classes().keys())
111
+ )
112
+ return [q for q in question_list if q not in exclude]
109
113
 
110
114
 
111
115
  def get_question_class(question_type):
File without changes
File without changes
@@ -0,0 +1,7 @@
1
+ Return only a comma-separated list the values in the same order as the options, with 0s included, on one line, in square braces.
2
+
3
+ Example: if there are 4 options, the response should be "[25,25,25,25]" to allocate 25 to each option.
4
+
5
+ {% if include_comment %}
6
+ After the answer, you can put a comment explaining your choice on the next line.
7
+ {% endif %}
@@ -0,0 +1,7 @@
1
+ {{question_text}}
2
+ The options are
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ Allocate your budget of {{budget_sum}} among the options.
7
+
File without changes
@@ -0,0 +1,10 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Please respond only with a comma-separated list of the code of the options that apply, with square brackets. E.g., [0, 1, 3]
4
+ {% else %}
5
+ Please respond only with a comma-separated list of the options that apply, with square brackets. E.g., ['Good', 'Bad', 'Ugly']
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining your choice on the next line.
9
+ {% endif %}
10
+
@@ -0,0 +1,22 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ {% else %}
7
+ {% for option in question_options %}
8
+ {{ option }}
9
+ {% endfor %}
10
+ {% endif %}
11
+
12
+ {# Restrictions #}
13
+ {% if min_selections != None and max_selections != None and min_selections == max_selections %}
14
+ You must select exactly {{min_selections}} options.
15
+ {% elif min_selections != None and max_selections != None %}
16
+ Minimum number of options that must be selected: {{min_selections}}.
17
+ Maximum number of options that must be selected: {{max_selections}}.
18
+ {% elif min_selections != None %}
19
+ Minimum number of options that must be selected: {{min_selections}}.
20
+ {% elif max_selections != None %}
21
+ Maximum number of options that must be selected: {{max_selections}}.
22
+ {% endif %}
File without changes
@@ -0,0 +1,7 @@
1
+ An ANSWER should be formatted like this:
2
+
3
+ {{ answer_template }}
4
+
5
+ It should have the same keys but values extracted from the input.
6
+ If the value of a key is not present in the input, fill with "null".
7
+ Put any comments in the next line after the answer.
@@ -0,0 +1 @@
1
+ {{question_text}}
File without changes
@@ -0,0 +1 @@
1
+ {{question_text}}
File without changes
@@ -0,0 +1,10 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Respond only with the code corresponding to one of the options.
4
+ {% else %}
5
+ Respond only with a string corresponding to one of the options.
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining why you chose that option on the next line.
9
+ {% endif %}
10
+
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,5 @@
1
+ {# Answering Instructions #}
2
+ Respond only with the code corresponding to one of the options. E.g., "1" or "5" by itself.
3
+ {% if include_comment %}
4
+ After the answer, you can put a comment explaining why you chose that option on the next line.
5
+ {% endif %}
@@ -0,0 +1,5 @@
1
+ {{question_text}}
2
+ {% for option in question_options %}
3
+ {{option}} : {{ option_labels.get(option, "") }}
4
+ {% endfor %}
5
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,4 @@
1
+ Return your answers on one line, in a comma-separated list of your responses, with square brackets and each answer in quotes E.g., ["A", "B", "C"]
2
+ {% if include_comment %}
3
+ After the answers, you can put a comment explaining your choice on the next line.
4
+ {% endif %}
@@ -0,0 +1,5 @@
1
+ {{question_text}}
2
+
3
+ {% if max_list_items is not none %}
4
+ The list must not contain more than {{ max_list_items }} items.
5
+ {% endif %}
File without changes
@@ -0,0 +1,9 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Respond only with the code corresponding to one of the options.
4
+ {% else %}
5
+ Respond only with a string corresponding to one of the options.
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining why you chose that option on the next line.
9
+ {% endif %}
File without changes
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,8 @@
1
+ This question requires a numerical response in the form of an integer or decimal (e.g., -12, 0, 1, 2, 3.45, ...).
2
+ Respond with just your number on a single line.
3
+ If your response is equivalent to zero, report '0'
4
+ If you cannot determine the answer, report 'None'
5
+
6
+ {% if include_comment %}
7
+ After the answer, put a comment explaining your choice on the next line.
8
+ {% endif %}
@@ -0,0 +1,7 @@
1
+ {{question_text}}
2
+ {% if min_value is not none %}
3
+ Minimum answer value: {{min_value}}
4
+ {% endif %}
5
+ {% if max_value is not none %}
6
+ Maximum answer value: {{max_value}}
7
+ {% endif %}
File without changes
@@ -0,0 +1,11 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Please respond only with a comma-separated list of the code of the raked options, with square brackets. E.g., [0, 1, 3]
4
+ {% else %}
5
+ Please respond only with a comma-separated list of the ranked options, with square brackets. E.g., ['Good', 'Bad', 'Ugly']
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining your choice on the next line.
9
+ {% endif %}
10
+
11
+
@@ -0,0 +1,15 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ The options are
4
+ {% for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ The options are:
9
+ {% for option in question_options %}
10
+ {{option}}
11
+ {% endfor %}
12
+ {% endif %}
13
+ {% if num_selections %}
14
+ You can inlcude up to {{num_selections}} options in your answer.
15
+ {% endif %}
File without changes
@@ -0,0 +1,8 @@
1
+ {# Answering Instructions #}
2
+ Please respond with valid JSON, formatted like so:
3
+ {% if include_comment %}
4
+ {"answer": [<put comma-separated list here>], "comment": "<put explanation here>"}
5
+ {% else %}
6
+ {"answer": [<put comma-separated list here>]}
7
+ {% endif %}
8
+
@@ -0,0 +1,22 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ {% else %}
7
+ {% for option in question_options %}
8
+ {{ option }}
9
+ {% endfor %}
10
+ {% endif %}
11
+
12
+ {# Restrictions #}
13
+ {% if min_selections != None and max_selections != None and min_selections == max_selections %}
14
+ You must select exactly {{min_selections}} options.
15
+ {% elif min_selections != None and max_selections != None %}
16
+ Minimum number of options that must be selected: {{min_selections}}.
17
+ Maximum number of options that must be selected: {{max_selections}}.
18
+ {% elif min_selections != None %}
19
+ Minimum number of options that must be selected: {{min_selections}}.
20
+ {% elif max_selections != None %}
21
+ Maximum number of options that must be selected: {{max_selections}}.
22
+ {% endif %}
File without changes
@@ -0,0 +1,6 @@
1
+ {# Answering Instructions #}
2
+ Please reponse with just your answer.
3
+
4
+ {% if include_comment %}
5
+ After the answer, you can put a comment explaining your reponse.
6
+ {% endif %}
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
edsl/results/Dataset.py CHANGED
@@ -8,6 +8,7 @@ from typing import Any, Union, Optional
8
8
  import numpy as np
9
9
 
10
10
  from edsl.results.ResultsExportMixin import ResultsExportMixin
11
+ from edsl.results.DatasetTree import Tree
11
12
 
12
13
 
13
14
  class Dataset(UserList, ResultsExportMixin):
@@ -30,6 +31,15 @@ class Dataset(UserList, ResultsExportMixin):
30
31
  _, values = list(self.data[0].items())[0]
31
32
  return len(values)
32
33
 
34
+ def keys(self):
35
+ """Return the keys of the first observation in the dataset.
36
+
37
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
38
+ >>> d.keys()
39
+ ['a.b']
40
+ """
41
+ return [list(o.keys())[0] for o in self]
42
+
33
43
  def __repr__(self) -> str:
34
44
  """Return a string representation of the dataset."""
35
45
  return f"Dataset({self.data})"
@@ -245,6 +255,16 @@ class Dataset(UserList, ResultsExportMixin):
245
255
 
246
256
  return Dataset(new_data)
247
257
 
258
+ @property
259
+ def tree(self):
260
+ """Return a tree representation of the dataset.
261
+
262
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
263
+ >>> d.tree.print_tree()
264
+ Tree has not been constructed yet.
265
+ """
266
+ return Tree(self)
267
+
248
268
  @classmethod
249
269
  def example(self):
250
270
  """Return an example dataset.
@@ -4,6 +4,7 @@ import base64
4
4
  import csv
5
5
  import io
6
6
  import html
7
+ from typing import Optional
7
8
 
8
9
  from typing import Literal, Optional, Union, List
9
10
 
@@ -41,7 +42,7 @@ class DatasetExportMixin:
41
42
  >>> Results.example().relevant_columns(data_type = "flimflam")
42
43
  Traceback (most recent call last):
43
44
  ...
44
- ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
45
46
  """
46
47
  columns = [list(x.keys())[0] for x in self]
47
48
  if remove_prefix:
@@ -156,12 +157,13 @@ class DatasetExportMixin:
156
157
  iframe_height: int = 200,
157
158
  iframe_width: int = 600,
158
159
  web=False,
159
- ) -> None:
160
+ return_string: bool = False,
161
+ ) -> Union[None, str, "Results"]:
160
162
  """Print the results in a pretty format.
161
163
 
162
164
  :param pretty_labels: A dictionary of pretty labels for the columns.
163
165
  :param filename: The filename to save the results to.
164
- :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
166
+ :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
165
167
  :param interactive: Whether to print the results interactively in a Jupyter notebook.
166
168
  :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
167
169
  :param max_rows: The maximum number of rows to print.
@@ -170,6 +172,9 @@ class DatasetExportMixin:
170
172
  :param iframe_height: The height of the iframe.
171
173
  :param iframe_width: The width of the iframe.
172
174
  :param web: Whether to display the table in a web browser.
175
+ :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
173
178
 
174
179
  Example: Print in rich format at the terminal
175
180
 
@@ -253,11 +258,14 @@ class DatasetExportMixin:
253
258
 
254
259
  >>> r.select('how_feeling').print(format='latex')
255
260
  \\begin{tabular}{l}
256
- \\toprule
257
261
  ...
262
+ \\end{tabular}
263
+ <BLANKLINE>
258
264
  """
259
265
  from IPython.display import HTML, display
260
266
  from edsl.utilities.utilities import is_notebook
267
+ import io
268
+ import sys
261
269
 
262
270
  def _determine_format(format):
263
271
  if format is None:
@@ -266,7 +274,9 @@ class DatasetExportMixin:
266
274
  else:
267
275
  format = "rich"
268
276
  if format not in ["rich", "html", "markdown", "latex"]:
269
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
277
+ raise ValueError(
278
+ "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ )
270
280
 
271
281
  return format
272
282
 
@@ -285,21 +295,24 @@ class DatasetExportMixin:
285
295
 
286
296
  new_data = list(_create_data())
287
297
 
298
+ # Capture output if return_string is True
299
+ if return_string:
300
+ old_stdout = sys.stdout
301
+ sys.stdout = io.StringIO()
302
+
303
+ output = None
304
+
288
305
  if format == "rich":
289
306
  from edsl.utilities.interface import print_dataset_with_rich
290
307
 
291
- print_dataset_with_rich(
308
+ output = print_dataset_with_rich(
292
309
  new_data, filename=filename, split_at_dot=split_at_dot
293
310
  )
294
- return self if tee else None
295
-
296
- if format == "markdown":
311
+ elif format == "markdown":
297
312
  from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
298
313
 
299
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
300
- return self if tee else None
301
-
302
- if format == "latex":
314
+ output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ elif format == "latex":
303
316
  df = self.to_pandas()
304
317
  df.columns = [col.replace("_", " ") for col in df.columns]
305
318
  latex_string = df.to_latex(index=False)
@@ -309,23 +322,14 @@ class DatasetExportMixin:
309
322
  f.write(latex_string)
310
323
  else:
311
324
  print(latex_string)
312
-
313
- return self if tee else None
314
-
315
- if format == "html":
325
+ output = latex_string
326
+ elif format == "html":
316
327
  from edsl.utilities.interface import print_list_of_dicts_as_html_table
317
328
 
318
329
  html_source = print_list_of_dicts_as_html_table(
319
330
  new_data, interactive=interactive
320
331
  )
321
332
 
322
- # if download_link:
323
- # from IPython.display import HTML, display
324
- # csv_file = output.getvalue()
325
- # b64 = base64.b64encode(csv_file.encode()).decode()
326
- # download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
327
- # #display(HTML(download_link))
328
-
329
333
  if iframe:
330
334
  iframe = f""""
331
335
  <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
@@ -338,7 +342,18 @@ class DatasetExportMixin:
338
342
 
339
343
  view_html(html_source)
340
344
 
341
- return self if tee else None
345
+ output = html_source
346
+
347
+ # Restore stdout and get captured output if return_string is True
348
+ if return_string:
349
+ captured_output = sys.stdout.getvalue()
350
+ sys.stdout = old_stdout
351
+ return captured_output or output
352
+
353
+ if tee:
354
+ return self
355
+
356
+ return None
342
357
 
343
358
  def to_csv(
344
359
  self,
@@ -457,7 +472,11 @@ class DatasetExportMixin:
457
472
  from edsl import ScenarioList, Scenario
458
473
 
459
474
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
460
- return ScenarioList([Scenario(d) for d in list_of_dicts])
475
+ scenarios = []
476
+ for d in list_of_dicts:
477
+ scenarios.append(Scenario(d))
478
+ return ScenarioList(scenarios)
479
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
461
480
 
462
481
  def to_agent_list(self, remove_prefix: bool = True):
463
482
  """Convert the results to a list of dictionaries, one per agent.
@@ -501,7 +520,7 @@ class DatasetExportMixin:
501
520
 
502
521
  return list_of_dicts
503
522
 
504
- def to_list(self, flatten=False, remove_none=False) -> list[list]:
523
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
505
524
  """Convert the results to a list of lists.
506
525
 
507
526
  :param flatten: Whether to flatten the list of lists.
@@ -596,27 +615,6 @@ class DatasetExportMixin:
596
615
  if return_link:
597
616
  return filename
598
617
 
599
- def to_docx(self, filename: Optional[str] = None, separator: str = "\n"):
600
- """Export the results to a Word document.
601
-
602
- :param filename: The filename to save the Word document to.
603
-
604
-
605
- """
606
- from docx import Document
607
-
608
- doc = Document()
609
- for entry in self:
610
- key, values = list(entry.items())[0]
611
- doc.add_paragraph(key)
612
- line = separator.join(values)
613
- doc.add_paragraph(line)
614
-
615
- if filename is not None:
616
- doc.save(filename)
617
- else:
618
- return doc
619
-
620
618
  def tally(
621
619
  self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
622
620
  ) -> Union[dict, "Dataset"]: