edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +136 -221
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +48 -47
  23. edsl/conjure/Conjure.py +6 -0
  24. edsl/coop/PriceFetcher.py +58 -0
  25. edsl/coop/coop.py +50 -7
  26. edsl/data/Cache.py +35 -1
  27. edsl/data/CacheHandler.py +3 -4
  28. edsl/data_transfer_models.py +73 -38
  29. edsl/enums.py +8 -0
  30. edsl/exceptions/general.py +10 -8
  31. edsl/exceptions/language_models.py +25 -1
  32. edsl/exceptions/questions.py +62 -5
  33. edsl/exceptions/results.py +4 -0
  34. edsl/inference_services/AnthropicService.py +13 -11
  35. edsl/inference_services/AwsBedrock.py +112 -0
  36. edsl/inference_services/AzureAI.py +214 -0
  37. edsl/inference_services/DeepInfraService.py +4 -3
  38. edsl/inference_services/GoogleService.py +16 -12
  39. edsl/inference_services/GroqService.py +5 -4
  40. edsl/inference_services/InferenceServiceABC.py +58 -3
  41. edsl/inference_services/InferenceServicesCollection.py +13 -8
  42. edsl/inference_services/MistralAIService.py +120 -0
  43. edsl/inference_services/OllamaService.py +18 -0
  44. edsl/inference_services/OpenAIService.py +55 -56
  45. edsl/inference_services/TestService.py +80 -0
  46. edsl/inference_services/TogetherAIService.py +170 -0
  47. edsl/inference_services/models_available_cache.py +25 -0
  48. edsl/inference_services/registry.py +19 -1
  49. edsl/jobs/Answers.py +10 -12
  50. edsl/jobs/FailedQuestion.py +78 -0
  51. edsl/jobs/Jobs.py +137 -41
  52. edsl/jobs/buckets/BucketCollection.py +24 -15
  53. edsl/jobs/buckets/TokenBucket.py +105 -18
  54. edsl/jobs/interviews/Interview.py +393 -83
  55. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
  56. edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
  57. edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
  58. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  59. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  60. edsl/jobs/tasks/TaskCreators.py +1 -1
  61. edsl/jobs/tasks/TaskHistory.py +205 -126
  62. edsl/language_models/LanguageModel.py +297 -177
  63. edsl/language_models/ModelList.py +2 -2
  64. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  65. edsl/language_models/fake_openai_call.py +15 -0
  66. edsl/language_models/fake_openai_service.py +61 -0
  67. edsl/language_models/registry.py +25 -8
  68. edsl/language_models/repair.py +0 -19
  69. edsl/language_models/utilities.py +61 -0
  70. edsl/notebooks/Notebook.py +20 -2
  71. edsl/prompts/Prompt.py +52 -2
  72. edsl/questions/AnswerValidatorMixin.py +23 -26
  73. edsl/questions/QuestionBase.py +330 -249
  74. edsl/questions/QuestionBaseGenMixin.py +133 -0
  75. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  76. edsl/questions/QuestionBudget.py +99 -42
  77. edsl/questions/QuestionCheckBox.py +227 -36
  78. edsl/questions/QuestionExtract.py +98 -28
  79. edsl/questions/QuestionFreeText.py +47 -31
  80. edsl/questions/QuestionFunctional.py +7 -0
  81. edsl/questions/QuestionList.py +141 -23
  82. edsl/questions/QuestionMultipleChoice.py +159 -66
  83. edsl/questions/QuestionNumerical.py +88 -47
  84. edsl/questions/QuestionRank.py +182 -25
  85. edsl/questions/Quick.py +41 -0
  86. edsl/questions/RegisterQuestionsMeta.py +31 -12
  87. edsl/questions/ResponseValidatorABC.py +170 -0
  88. edsl/questions/__init__.py +3 -4
  89. edsl/questions/decorators.py +21 -0
  90. edsl/questions/derived/QuestionLikertFive.py +10 -5
  91. edsl/questions/derived/QuestionLinearScale.py +15 -2
  92. edsl/questions/derived/QuestionTopK.py +10 -1
  93. edsl/questions/derived/QuestionYesNo.py +24 -3
  94. edsl/questions/descriptors.py +43 -7
  95. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  96. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  97. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  98. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  99. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  100. edsl/questions/prompt_templates/question_list.jinja +17 -0
  101. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  102. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  103. edsl/questions/question_registry.py +6 -2
  104. edsl/questions/templates/__init__.py +0 -0
  105. edsl/questions/templates/budget/__init__.py +0 -0
  106. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  107. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  108. edsl/questions/templates/checkbox/__init__.py +0 -0
  109. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  110. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  111. edsl/questions/templates/extract/__init__.py +0 -0
  112. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  113. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  114. edsl/questions/templates/free_text/__init__.py +0 -0
  115. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  116. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  117. edsl/questions/templates/likert_five/__init__.py +0 -0
  118. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  119. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  120. edsl/questions/templates/linear_scale/__init__.py +0 -0
  121. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  122. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  123. edsl/questions/templates/list/__init__.py +0 -0
  124. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  125. edsl/questions/templates/list/question_presentation.jinja +5 -0
  126. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  127. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  128. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  129. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  130. edsl/questions/templates/numerical/__init__.py +0 -0
  131. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  132. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  133. edsl/questions/templates/rank/__init__.py +0 -0
  134. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  135. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  136. edsl/questions/templates/top_k/__init__.py +0 -0
  137. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  138. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  139. edsl/questions/templates/yes_no/__init__.py +0 -0
  140. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  141. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  142. edsl/results/Dataset.py +20 -0
  143. edsl/results/DatasetExportMixin.py +58 -30
  144. edsl/results/DatasetTree.py +145 -0
  145. edsl/results/Result.py +32 -5
  146. edsl/results/Results.py +135 -46
  147. edsl/results/ResultsDBMixin.py +3 -3
  148. edsl/results/Selector.py +118 -0
  149. edsl/results/tree_explore.py +115 -0
  150. edsl/scenarios/FileStore.py +71 -10
  151. edsl/scenarios/Scenario.py +109 -24
  152. edsl/scenarios/ScenarioImageMixin.py +2 -2
  153. edsl/scenarios/ScenarioList.py +546 -21
  154. edsl/scenarios/ScenarioListExportMixin.py +24 -4
  155. edsl/scenarios/ScenarioListPdfMixin.py +153 -4
  156. edsl/study/SnapShot.py +8 -1
  157. edsl/study/Study.py +32 -0
  158. edsl/surveys/Rule.py +15 -3
  159. edsl/surveys/RuleCollection.py +21 -5
  160. edsl/surveys/Survey.py +707 -298
  161. edsl/surveys/SurveyExportMixin.py +71 -9
  162. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  163. edsl/surveys/SurveyQualtricsImport.py +284 -0
  164. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  165. edsl/surveys/instructions/Instruction.py +34 -0
  166. edsl/surveys/instructions/InstructionCollection.py +77 -0
  167. edsl/surveys/instructions/__init__.py +0 -0
  168. edsl/templates/error_reporting/base.html +24 -0
  169. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  170. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  171. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  172. edsl/templates/error_reporting/interview_details.html +116 -0
  173. edsl/templates/error_reporting/interviews.html +10 -0
  174. edsl/templates/error_reporting/overview.html +5 -0
  175. edsl/templates/error_reporting/performance_plot.html +2 -0
  176. edsl/templates/error_reporting/report.css +74 -0
  177. edsl/templates/error_reporting/report.html +118 -0
  178. edsl/templates/error_reporting/report.js +25 -0
  179. edsl/utilities/utilities.py +40 -1
  180. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
  181. edsl-0.1.33.dist-info/RECORD +295 -0
  182. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
  183. edsl/jobs/interviews/retry_management.py +0 -37
  184. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
  185. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  186. edsl-0.1.31.dev4.dist-info/RECORD +0 -204
  187. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  188. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -0,0 +1,37 @@
1
+ You are being asked a question that requires a numerical response
2
+ in the form of an integer or decimal (e.g., -12, 0, 1, 2, 3.45, ...).
3
+
4
+ Your response must be in the following format:
5
+
6
+ {% if include_comment %}
7
+ {"answer": "<your numerical answer here>", "comment": "<your explanation here>"}
8
+ {% else %}
9
+ {"answer": "<your numerical answer here>"}
10
+ {% endif %}
11
+
12
+ You must only include an integer or decimal in the quoted "answer" part of your response.
13
+
14
+ Here is an example of a valid response:
15
+ {% if include_comment %}
16
+ {"answer": "100", "comment": "This is my explanation..."}
17
+ {% else %}
18
+ {"answer": "100"}
19
+ {% endif %}
20
+
21
+ Here is an example of a response that is invalid because the "answer" includes words:
22
+ {"answer": "I don't know.", ...}
23
+
24
+ If your response is equivalent to zero, your formatted response should look like this:
25
+ {% if include_comment %}
26
+ {"answer": "0", "comment": "This is my explanation..."}
27
+ {% else %}
28
+ {"answer": "0"}
29
+ {% endif %}
30
+
31
+ You are being asked the following question: {{question_text}}
32
+ {% if min_value is not none %}
33
+ Minimum answer value: {{min_value}}
34
+ {% endif %}
35
+ {% if max_value is not none %}
36
+ Maximum answer value: {{max_value}}
37
+ {% endif %}
@@ -100,12 +100,16 @@ class Question(metaclass=Meta):
100
100
 
101
101
  >>> from edsl import Question
102
102
  >>> Question.available()
103
- ['budget', 'checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
103
+ ['checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
104
104
  """
105
+ exclude = ["budget"]
105
106
  if show_class_names:
106
107
  return RegisterQuestionsMeta.question_types_to_classes()
107
108
  else:
108
- return sorted(set(RegisterQuestionsMeta.question_types_to_classes().keys()))
109
+ question_list = sorted(
110
+ set(RegisterQuestionsMeta.question_types_to_classes().keys())
111
+ )
112
+ return [q for q in question_list if q not in exclude]
109
113
 
110
114
 
111
115
  def get_question_class(question_type):
File without changes
File without changes
@@ -0,0 +1,7 @@
1
+ Return only a comma-separated list the values in the same order as the options, with 0s included, on one line, in square braces.
2
+
3
+ Example: if there are 4 options, the response should be "[25,25,25,25]" to allocate 25 to each option.
4
+
5
+ {% if include_comment %}
6
+ After the answer, you can put a comment explaining your choice on the next line.
7
+ {% endif %}
@@ -0,0 +1,7 @@
1
+ {{question_text}}
2
+ The options are
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ Allocate your budget of {{budget_sum}} among the options.
7
+
File without changes
@@ -0,0 +1,10 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Please respond only with a comma-separated list of the code of the options that apply, with square brackets. E.g., [0, 1, 3]
4
+ {% else %}
5
+ Please respond only with a comma-separated list of the options that apply, with square brackets. E.g., ['Good', 'Bad', 'Ugly']
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining your choice on the next line.
9
+ {% endif %}
10
+
@@ -0,0 +1,22 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ {% else %}
7
+ {% for option in question_options %}
8
+ {{ option }}
9
+ {% endfor %}
10
+ {% endif %}
11
+
12
+ {# Restrictions #}
13
+ {% if min_selections != None and max_selections != None and min_selections == max_selections %}
14
+ You must select exactly {{min_selections}} options.
15
+ {% elif min_selections != None and max_selections != None %}
16
+ Minimum number of options that must be selected: {{min_selections}}.
17
+ Maximum number of options that must be selected: {{max_selections}}.
18
+ {% elif min_selections != None %}
19
+ Minimum number of options that must be selected: {{min_selections}}.
20
+ {% elif max_selections != None %}
21
+ Maximum number of options that must be selected: {{max_selections}}.
22
+ {% endif %}
File without changes
@@ -0,0 +1,7 @@
1
+ An ANSWER should be formatted like this:
2
+
3
+ {{ answer_template }}
4
+
5
+ It should have the same keys but values extracted from the input.
6
+ If the value of a key is not present in the input, fill with "null".
7
+ Put any comments in the next line after the answer.
@@ -0,0 +1 @@
1
+ {{question_text}}
File without changes
@@ -0,0 +1 @@
1
+ {{question_text}}
File without changes
@@ -0,0 +1,10 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Respond only with the code corresponding to one of the options.
4
+ {% else %}
5
+ Respond only with a string corresponding to one of the options.
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining why you chose that option on the next line.
9
+ {% endif %}
10
+
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,5 @@
1
+ {# Answering Instructions #}
2
+ Respond only with the code corresponding to one of the options. E.g., "1" or "5" by itself.
3
+ {% if include_comment %}
4
+ After the answer, you can put a comment explaining why you chose that option on the next line.
5
+ {% endif %}
@@ -0,0 +1,5 @@
1
+ {{question_text}}
2
+ {% for option in question_options %}
3
+ {{option}} : {{ option_labels.get(option, "") }}
4
+ {% endfor %}
5
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,4 @@
1
+ Return your answers on one line, in a comma-separated list of your responses, with square brackets and each answer in quotes E.g., ["A", "B", "C"]
2
+ {% if include_comment %}
3
+ After the answers, you can put a comment explaining your choice on the next line.
4
+ {% endif %}
@@ -0,0 +1,5 @@
1
+ {{question_text}}
2
+
3
+ {% if max_list_items is not none %}
4
+ The list must not contain more than {{ max_list_items }} items.
5
+ {% endif %}
File without changes
@@ -0,0 +1,9 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Respond only with the code corresponding to one of the options.
4
+ {% else %}
5
+ Respond only with a string corresponding to one of the options.
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining why you chose that option on the next line.
9
+ {% endif %}
File without changes
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
File without changes
@@ -0,0 +1,8 @@
1
+ This question requires a numerical response in the form of an integer or decimal (e.g., -12, 0, 1, 2, 3.45, ...).
2
+ Respond with just your number on a single line.
3
+ If your response is equivalent to zero, report '0'
4
+ If you cannot determine the answer, report 'None'
5
+
6
+ {% if include_comment %}
7
+ After the answer, put a comment explaining your choice on the next line.
8
+ {% endif %}
@@ -0,0 +1,7 @@
1
+ {{question_text}}
2
+ {% if min_value is not none %}
3
+ Minimum answer value: {{min_value}}
4
+ {% endif %}
5
+ {% if max_value is not none %}
6
+ Maximum answer value: {{max_value}}
7
+ {% endif %}
File without changes
@@ -0,0 +1,11 @@
1
+ {# Answering Instructions #}
2
+ {% if use_code %}
3
+ Please respond only with a comma-separated list of the code of the raked options, with square brackets. E.g., [0, 1, 3]
4
+ {% else %}
5
+ Please respond only with a comma-separated list of the ranked options, with square brackets. E.g., ['Good', 'Bad', 'Ugly']
6
+ {% endif %}
7
+ {% if include_comment %}
8
+ After the answer, you can put a comment explaining your choice on the next line.
9
+ {% endif %}
10
+
11
+
@@ -0,0 +1,15 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ The options are
4
+ {% for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ The options are:
9
+ {% for option in question_options %}
10
+ {{option}}
11
+ {% endfor %}
12
+ {% endif %}
13
+ {% if num_selections %}
14
+ You can inlcude up to {{num_selections}} options in your answer.
15
+ {% endif %}
File without changes
@@ -0,0 +1,8 @@
1
+ {# Answering Instructions #}
2
+ Please respond with valid JSON, formatted like so:
3
+ {% if include_comment %}
4
+ {"answer": [<put comma-separated list here>], "comment": "<put explanation here>"}
5
+ {% else %}
6
+ {"answer": [<put comma-separated list here>]}
7
+ {% endif %}
8
+
@@ -0,0 +1,22 @@
1
+ {{question_text}}
2
+ {% if use_code %}
3
+ {% for option in question_options %}
4
+ {{ loop.index0 }}: {{option}}
5
+ {% endfor %}
6
+ {% else %}
7
+ {% for option in question_options %}
8
+ {{ option }}
9
+ {% endfor %}
10
+ {% endif %}
11
+
12
+ {# Restrictions #}
13
+ {% if min_selections != None and max_selections != None and min_selections == max_selections %}
14
+ You must select exactly {{min_selections}} options.
15
+ {% elif min_selections != None and max_selections != None %}
16
+ Minimum number of options that must be selected: {{min_selections}}.
17
+ Maximum number of options that must be selected: {{max_selections}}.
18
+ {% elif min_selections != None %}
19
+ Minimum number of options that must be selected: {{min_selections}}.
20
+ {% elif max_selections != None %}
21
+ Maximum number of options that must be selected: {{max_selections}}.
22
+ {% endif %}
File without changes
@@ -0,0 +1,6 @@
1
+ {# Answering Instructions #}
2
+ Please reponse with just your answer.
3
+
4
+ {% if include_comment %}
5
+ After the answer, you can put a comment explaining your reponse.
6
+ {% endif %}
@@ -0,0 +1,12 @@
1
+ {# Question Presention #}
2
+ {{question_text}}
3
+ {% if use_code %}
4
+ {%- for option in question_options %}
5
+ {{ loop.index0 }}: {{option}}
6
+ {% endfor %}
7
+ {% else %}
8
+ {% for option in question_options %}
9
+ {{option}}
10
+ {% endfor %}
11
+ {% endif %}
12
+ Only 1 option may be selected.
edsl/results/Dataset.py CHANGED
@@ -8,6 +8,7 @@ from typing import Any, Union, Optional
8
8
  import numpy as np
9
9
 
10
10
  from edsl.results.ResultsExportMixin import ResultsExportMixin
11
+ from edsl.results.DatasetTree import Tree
11
12
 
12
13
 
13
14
  class Dataset(UserList, ResultsExportMixin):
@@ -30,6 +31,15 @@ class Dataset(UserList, ResultsExportMixin):
30
31
  _, values = list(self.data[0].items())[0]
31
32
  return len(values)
32
33
 
34
+ def keys(self):
35
+ """Return the keys of the first observation in the dataset.
36
+
37
+ >>> d = Dataset([{'a.b':[1,2,3,4]}])
38
+ >>> d.keys()
39
+ ['a.b']
40
+ """
41
+ return [list(o.keys())[0] for o in self]
42
+
33
43
  def __repr__(self) -> str:
34
44
  """Return a string representation of the dataset."""
35
45
  return f"Dataset({self.data})"
@@ -245,6 +255,16 @@ class Dataset(UserList, ResultsExportMixin):
245
255
 
246
256
  return Dataset(new_data)
247
257
 
258
+ @property
259
+ def tree(self):
260
+ """Return a tree representation of the dataset.
261
+
262
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
263
+ >>> d.tree.print_tree()
264
+ Tree has not been constructed yet.
265
+ """
266
+ return Tree(self)
267
+
248
268
  @classmethod
249
269
  def example(self):
250
270
  """Return an example dataset.
@@ -4,6 +4,7 @@ import base64
4
4
  import csv
5
5
  import io
6
6
  import html
7
+ from typing import Optional
7
8
 
8
9
  from typing import Literal, Optional, Union, List
9
10
 
@@ -27,6 +28,10 @@ class DatasetExportMixin:
27
28
  >>> d.relevant_columns(remove_prefix=True)
28
29
  ['b']
29
30
 
31
+ >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
32
+ >>> d.relevant_columns()
33
+ ['a', 'b']
34
+
30
35
  >>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
31
36
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
32
37
 
@@ -37,7 +42,7 @@ class DatasetExportMixin:
37
42
  >>> Results.example().relevant_columns(data_type = "flimflam")
38
43
  Traceback (most recent call last):
39
44
  ...
40
- ValueError: No columns found for data type: flimflam. Available data types are: ['agent', 'answer', 'comment', 'model', 'prompt', 'question_options', 'question_text', 'question_type', 'raw_model_response', 'scenario'].
45
+ ValueError: No columns found for data type: flimflam. Available data types are: ...
41
46
  """
42
47
  columns = [list(x.keys())[0] for x in self]
43
48
  if remove_prefix:
@@ -152,12 +157,13 @@ class DatasetExportMixin:
152
157
  iframe_height: int = 200,
153
158
  iframe_width: int = 600,
154
159
  web=False,
155
- ) -> None:
160
+ return_string: bool = False,
161
+ ) -> Union[None, str, "Results"]:
156
162
  """Print the results in a pretty format.
157
163
 
158
164
  :param pretty_labels: A dictionary of pretty labels for the columns.
159
165
  :param filename: The filename to save the results to.
160
- :param format: The format to print the results in. Options are 'rich', 'html', or 'markdown'.
166
+ :param format: The format to print the results in. Options are 'rich', 'html', 'markdown', or 'latex'.
161
167
  :param interactive: Whether to print the results interactively in a Jupyter notebook.
162
168
  :param split_at_dot: Whether to split the column names at the last dot w/ a newline.
163
169
  :param max_rows: The maximum number of rows to print.
@@ -166,6 +172,9 @@ class DatasetExportMixin:
166
172
  :param iframe_height: The height of the iframe.
167
173
  :param iframe_width: The width of the iframe.
168
174
  :param web: Whether to display the table in a web browser.
175
+ :param return_string: Whether to return the output as a string instead of printing.
176
+
177
+ :return: None if tee is False and return_string is False, the dataset if tee is True, or a string if return_string is True.
169
178
 
170
179
  Example: Print in rich format at the terminal
171
180
 
@@ -249,11 +258,14 @@ class DatasetExportMixin:
249
258
 
250
259
  >>> r.select('how_feeling').print(format='latex')
251
260
  \\begin{tabular}{l}
252
- \\toprule
253
261
  ...
262
+ \\end{tabular}
263
+ <BLANKLINE>
254
264
  """
255
265
  from IPython.display import HTML, display
256
266
  from edsl.utilities.utilities import is_notebook
267
+ import io
268
+ import sys
257
269
 
258
270
  def _determine_format(format):
259
271
  if format is None:
@@ -262,7 +274,9 @@ class DatasetExportMixin:
262
274
  else:
263
275
  format = "rich"
264
276
  if format not in ["rich", "html", "markdown", "latex"]:
265
- raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
277
+ raise ValueError(
278
+ "format must be one of 'rich', 'html', 'markdown', or 'latex'."
279
+ )
266
280
 
267
281
  return format
268
282
 
@@ -281,21 +295,24 @@ class DatasetExportMixin:
281
295
 
282
296
  new_data = list(_create_data())
283
297
 
298
+ # Capture output if return_string is True
299
+ if return_string:
300
+ old_stdout = sys.stdout
301
+ sys.stdout = io.StringIO()
302
+
303
+ output = None
304
+
284
305
  if format == "rich":
285
306
  from edsl.utilities.interface import print_dataset_with_rich
286
307
 
287
- print_dataset_with_rich(
308
+ output = print_dataset_with_rich(
288
309
  new_data, filename=filename, split_at_dot=split_at_dot
289
310
  )
290
- return self if tee else None
291
-
292
- if format == "markdown":
311
+ elif format == "markdown":
293
312
  from edsl.utilities.interface import print_list_of_dicts_as_markdown_table
294
313
 
295
- print_list_of_dicts_as_markdown_table(new_data, filename=filename)
296
- return self if tee else None
297
-
298
- if format == "latex":
314
+ output = print_list_of_dicts_as_markdown_table(new_data, filename=filename)
315
+ elif format == "latex":
299
316
  df = self.to_pandas()
300
317
  df.columns = [col.replace("_", " ") for col in df.columns]
301
318
  latex_string = df.to_latex(index=False)
@@ -305,23 +322,14 @@ class DatasetExportMixin:
305
322
  f.write(latex_string)
306
323
  else:
307
324
  print(latex_string)
308
-
309
- return self if tee else None
310
-
311
- if format == "html":
325
+ output = latex_string
326
+ elif format == "html":
312
327
  from edsl.utilities.interface import print_list_of_dicts_as_html_table
313
328
 
314
329
  html_source = print_list_of_dicts_as_html_table(
315
330
  new_data, interactive=interactive
316
331
  )
317
332
 
318
- # if download_link:
319
- # from IPython.display import HTML, display
320
- # csv_file = output.getvalue()
321
- # b64 = base64.b64encode(csv_file.encode()).decode()
322
- # download_link = f'<a href="data:file/csv;base64,{b64}" download="my_data.csv">Download CSV file</a>'
323
- # #display(HTML(download_link))
324
-
325
333
  if iframe:
326
334
  iframe = f""""
327
335
  <iframe srcdoc="{ html.escape(html_source) }" style="width: {iframe_width}px; height: {iframe_height}px;"></iframe>
@@ -334,7 +342,18 @@ class DatasetExportMixin:
334
342
 
335
343
  view_html(html_source)
336
344
 
337
- return self if tee else None
345
+ output = html_source
346
+
347
+ # Restore stdout and get captured output if return_string is True
348
+ if return_string:
349
+ captured_output = sys.stdout.getvalue()
350
+ sys.stdout = old_stdout
351
+ return captured_output or output
352
+
353
+ if tee:
354
+ return self
355
+
356
+ return None
338
357
 
339
358
  def to_csv(
340
359
  self,
@@ -453,7 +472,11 @@ class DatasetExportMixin:
453
472
  from edsl import ScenarioList, Scenario
454
473
 
455
474
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
456
- return ScenarioList([Scenario(d) for d in list_of_dicts])
475
+ scenarios = []
476
+ for d in list_of_dicts:
477
+ scenarios.append(Scenario(d))
478
+ return ScenarioList(scenarios)
479
+ # return ScenarioList([Scenario(d) for d in list_of_dicts])
457
480
 
458
481
  def to_agent_list(self, remove_prefix: bool = True):
459
482
  """Convert the results to a list of dictionaries, one per agent.
@@ -497,7 +520,7 @@ class DatasetExportMixin:
497
520
 
498
521
  return list_of_dicts
499
522
 
500
- def to_list(self, flatten=False, remove_none=False) -> list[list]:
523
+ def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
501
524
  """Convert the results to a list of lists.
502
525
 
503
526
  :param flatten: Whether to flatten the list of lists.
@@ -593,7 +616,7 @@ class DatasetExportMixin:
593
616
  return filename
594
617
 
595
618
  def tally(
596
- self, *fields: Optional[str], top_n: Optional[int] = None, output="dict"
619
+ self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
597
620
  ) -> Union[dict, "Dataset"]:
598
621
  """Tally the values of a field or perform a cross-tab of multiple fields.
599
622
 
@@ -601,9 +624,11 @@ class DatasetExportMixin:
601
624
 
602
625
  >>> from edsl.results import Results
603
626
  >>> r = Results.example()
604
- >>> r.select('how_feeling').tally('answer.how_feeling')
627
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
605
628
  {'OK': 2, 'Great': 1, 'Terrible': 1}
606
- >>> r.select('how_feeling', 'period').tally('how_feeling', 'period')
629
+ >>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset")
630
+ Dataset([{'value': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
631
+ >>> r.select('how_feeling', 'period').tally('how_feeling', 'period', output = "dict")
607
632
  {('OK', 'morning'): 1, ('Great', 'afternoon'): 1, ('Terrible', 'morning'): 1, ('OK', 'afternoon'): 1}
608
633
  """
609
634
  from collections import Counter
@@ -615,6 +640,8 @@ class DatasetExportMixin:
615
640
  column.split(".")[-1] for column in self.relevant_columns()
616
641
  ]
617
642
 
643
+ # breakpoint()
644
+
618
645
  if not all(
619
646
  f in self.relevant_columns() or f in relevant_columns_without_prefix
620
647
  for f in fields
@@ -641,6 +668,7 @@ class DatasetExportMixin:
641
668
  from edsl.results.Dataset import Dataset
642
669
 
643
670
  if output == "dict":
671
+ # why did I do this?
644
672
  warnings.warn(
645
673
  textwrap.dedent(
646
674
  """\