edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. edsl/Base.py +413 -332
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -867
  7. edsl/agents/AgentList.py +551 -413
  8. edsl/agents/Invigilator.py +284 -233
  9. edsl/agents/InvigilatorBase.py +257 -270
  10. edsl/agents/PromptConstructor.py +272 -354
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -157
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -1028
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -555
  42. edsl/data/CacheEntry.py +230 -233
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -78
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/hack.py +10 -0
  48. edsl/data/orm.py +10 -10
  49. edsl/data_transfer_models.py +74 -73
  50. edsl/enums.py +202 -175
  51. edsl/exceptions/BaseException.py +21 -21
  52. edsl/exceptions/__init__.py +54 -54
  53. edsl/exceptions/agents.py +54 -42
  54. edsl/exceptions/cache.py +5 -5
  55. edsl/exceptions/configuration.py +16 -16
  56. edsl/exceptions/coop.py +10 -10
  57. edsl/exceptions/data.py +14 -14
  58. edsl/exceptions/general.py +34 -34
  59. edsl/exceptions/inference_services.py +5 -0
  60. edsl/exceptions/jobs.py +33 -33
  61. edsl/exceptions/language_models.py +63 -63
  62. edsl/exceptions/prompts.py +15 -15
  63. edsl/exceptions/questions.py +109 -91
  64. edsl/exceptions/results.py +29 -29
  65. edsl/exceptions/scenarios.py +29 -22
  66. edsl/exceptions/surveys.py +37 -37
  67. edsl/inference_services/AnthropicService.py +106 -87
  68. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  69. edsl/inference_services/AvailableModelFetcher.py +215 -0
  70. edsl/inference_services/AwsBedrock.py +118 -120
  71. edsl/inference_services/AzureAI.py +215 -217
  72. edsl/inference_services/DeepInfraService.py +18 -18
  73. edsl/inference_services/GoogleService.py +143 -148
  74. edsl/inference_services/GroqService.py +20 -20
  75. edsl/inference_services/InferenceServiceABC.py +80 -147
  76. edsl/inference_services/InferenceServicesCollection.py +138 -97
  77. edsl/inference_services/MistralAIService.py +120 -123
  78. edsl/inference_services/OllamaService.py +18 -18
  79. edsl/inference_services/OpenAIService.py +236 -224
  80. edsl/inference_services/PerplexityService.py +160 -163
  81. edsl/inference_services/ServiceAvailability.py +135 -0
  82. edsl/inference_services/TestService.py +90 -89
  83. edsl/inference_services/TogetherAIService.py +172 -170
  84. edsl/inference_services/data_structures.py +134 -0
  85. edsl/inference_services/models_available_cache.py +118 -118
  86. edsl/inference_services/rate_limits_cache.py +25 -25
  87. edsl/inference_services/registry.py +41 -41
  88. edsl/inference_services/write_available.py +10 -10
  89. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  90. edsl/jobs/Answers.py +43 -56
  91. edsl/jobs/FetchInvigilator.py +47 -0
  92. edsl/jobs/InterviewTaskManager.py +98 -0
  93. edsl/jobs/InterviewsConstructor.py +50 -0
  94. edsl/jobs/Jobs.py +823 -898
  95. edsl/jobs/JobsChecks.py +172 -147
  96. edsl/jobs/JobsComponentConstructor.py +189 -0
  97. edsl/jobs/JobsPrompts.py +270 -268
  98. edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
  99. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  100. edsl/jobs/RequestTokenEstimator.py +30 -0
  101. edsl/jobs/__init__.py +1 -1
  102. edsl/jobs/async_interview_runner.py +138 -0
  103. edsl/jobs/buckets/BucketCollection.py +104 -63
  104. edsl/jobs/buckets/ModelBuckets.py +65 -65
  105. edsl/jobs/buckets/TokenBucket.py +283 -251
  106. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  107. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  108. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  109. edsl/jobs/data_structures.py +120 -0
  110. edsl/jobs/decorators.py +35 -0
  111. edsl/jobs/interviews/Interview.py +396 -661
  112. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  113. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  114. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  115. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  116. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  117. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  118. edsl/jobs/interviews/ReportErrors.py +66 -66
  119. edsl/jobs/interviews/interview_status_enum.py +9 -9
  120. edsl/jobs/jobs_status_enums.py +9 -0
  121. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  122. edsl/jobs/results_exceptions_handler.py +98 -0
  123. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
  124. edsl/jobs/runners/JobsRunnerStatus.py +297 -330
  125. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  126. edsl/jobs/tasks/TaskCreators.py +64 -64
  127. edsl/jobs/tasks/TaskHistory.py +470 -450
  128. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  129. edsl/jobs/tasks/task_status_enum.py +161 -163
  130. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  131. edsl/jobs/tokens/TokenUsage.py +34 -34
  132. edsl/language_models/ComputeCost.py +63 -0
  133. edsl/language_models/LanguageModel.py +626 -668
  134. edsl/language_models/ModelList.py +164 -155
  135. edsl/language_models/PriceManager.py +127 -0
  136. edsl/language_models/RawResponseHandler.py +106 -0
  137. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  138. edsl/language_models/ServiceDataSources.py +0 -0
  139. edsl/language_models/__init__.py +2 -3
  140. edsl/language_models/fake_openai_call.py +15 -15
  141. edsl/language_models/fake_openai_service.py +61 -61
  142. edsl/language_models/key_management/KeyLookup.py +63 -0
  143. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  144. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  145. edsl/language_models/key_management/__init__.py +0 -0
  146. edsl/language_models/key_management/models.py +131 -0
  147. edsl/language_models/model.py +256 -0
  148. edsl/language_models/repair.py +156 -156
  149. edsl/language_models/utilities.py +65 -64
  150. edsl/notebooks/Notebook.py +263 -258
  151. edsl/notebooks/NotebookToLaTeX.py +142 -0
  152. edsl/notebooks/__init__.py +1 -1
  153. edsl/prompts/Prompt.py +352 -362
  154. edsl/prompts/__init__.py +2 -2
  155. edsl/questions/ExceptionExplainer.py +77 -0
  156. edsl/questions/HTMLQuestion.py +103 -0
  157. edsl/questions/QuestionBase.py +518 -664
  158. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  159. edsl/questions/QuestionBudget.py +227 -227
  160. edsl/questions/QuestionCheckBox.py +359 -359
  161. edsl/questions/QuestionExtract.py +180 -182
  162. edsl/questions/QuestionFreeText.py +113 -114
  163. edsl/questions/QuestionFunctional.py +166 -166
  164. edsl/questions/QuestionList.py +223 -231
  165. edsl/questions/QuestionMatrix.py +265 -0
  166. edsl/questions/QuestionMultipleChoice.py +330 -286
  167. edsl/questions/QuestionNumerical.py +151 -153
  168. edsl/questions/QuestionRank.py +314 -324
  169. edsl/questions/Quick.py +41 -41
  170. edsl/questions/SimpleAskMixin.py +74 -73
  171. edsl/questions/__init__.py +27 -26
  172. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  173. edsl/questions/compose_questions.py +98 -98
  174. edsl/questions/data_structures.py +20 -0
  175. edsl/questions/decorators.py +21 -21
  176. edsl/questions/derived/QuestionLikertFive.py +76 -76
  177. edsl/questions/derived/QuestionLinearScale.py +90 -87
  178. edsl/questions/derived/QuestionTopK.py +93 -93
  179. edsl/questions/derived/QuestionYesNo.py +82 -82
  180. edsl/questions/descriptors.py +427 -413
  181. edsl/questions/loop_processor.py +149 -0
  182. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  183. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  184. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  185. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  186. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  187. edsl/questions/prompt_templates/question_list.jinja +17 -17
  188. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  189. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  190. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  191. edsl/questions/question_registry.py +177 -177
  192. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  193. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  194. edsl/questions/response_validator_factory.py +34 -0
  195. edsl/questions/settings.py +12 -12
  196. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  197. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  198. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  199. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  200. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  201. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  202. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  203. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  204. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  205. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  206. edsl/questions/templates/list/question_presentation.jinja +5 -5
  207. edsl/questions/templates/matrix/__init__.py +1 -0
  208. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  209. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  210. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  211. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  212. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  213. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  214. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  215. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  216. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  217. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  218. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  219. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  220. edsl/results/CSSParameterizer.py +108 -108
  221. edsl/results/Dataset.py +587 -424
  222. edsl/results/DatasetExportMixin.py +594 -731
  223. edsl/results/DatasetTree.py +295 -275
  224. edsl/results/MarkdownToDocx.py +122 -0
  225. edsl/results/MarkdownToPDF.py +111 -0
  226. edsl/results/Result.py +557 -465
  227. edsl/results/Results.py +1183 -1165
  228. edsl/results/ResultsExportMixin.py +45 -43
  229. edsl/results/ResultsGGMixin.py +121 -121
  230. edsl/results/TableDisplay.py +125 -198
  231. edsl/results/TextEditor.py +50 -0
  232. edsl/results/__init__.py +2 -2
  233. edsl/results/file_exports.py +252 -0
  234. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  235. edsl/results/{Selector.py → results_selector.py} +145 -135
  236. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  237. edsl/results/smart_objects.py +96 -0
  238. edsl/results/table_data_class.py +12 -0
  239. edsl/results/table_display.css +77 -77
  240. edsl/results/table_renderers.py +118 -0
  241. edsl/results/tree_explore.py +115 -115
  242. edsl/scenarios/ConstructDownloadLink.py +109 -0
  243. edsl/scenarios/DocumentChunker.py +102 -0
  244. edsl/scenarios/DocxScenario.py +16 -0
  245. edsl/scenarios/FileStore.py +511 -632
  246. edsl/scenarios/PdfExtractor.py +40 -0
  247. edsl/scenarios/Scenario.py +498 -601
  248. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  249. edsl/scenarios/ScenarioList.py +1458 -1287
  250. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  251. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  252. edsl/scenarios/__init__.py +3 -4
  253. edsl/scenarios/directory_scanner.py +96 -0
  254. edsl/scenarios/file_methods.py +85 -0
  255. edsl/scenarios/handlers/__init__.py +13 -0
  256. edsl/scenarios/handlers/csv.py +38 -0
  257. edsl/scenarios/handlers/docx.py +76 -0
  258. edsl/scenarios/handlers/html.py +37 -0
  259. edsl/scenarios/handlers/json.py +111 -0
  260. edsl/scenarios/handlers/latex.py +5 -0
  261. edsl/scenarios/handlers/md.py +51 -0
  262. edsl/scenarios/handlers/pdf.py +68 -0
  263. edsl/scenarios/handlers/png.py +39 -0
  264. edsl/scenarios/handlers/pptx.py +105 -0
  265. edsl/scenarios/handlers/py.py +294 -0
  266. edsl/scenarios/handlers/sql.py +313 -0
  267. edsl/scenarios/handlers/sqlite.py +149 -0
  268. edsl/scenarios/handlers/txt.py +33 -0
  269. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
  270. edsl/scenarios/scenario_selector.py +156 -0
  271. edsl/shared.py +1 -1
  272. edsl/study/ObjectEntry.py +173 -173
  273. edsl/study/ProofOfWork.py +113 -113
  274. edsl/study/SnapShot.py +80 -80
  275. edsl/study/Study.py +521 -528
  276. edsl/study/__init__.py +4 -4
  277. edsl/surveys/ConstructDAG.py +92 -0
  278. edsl/surveys/DAG.py +148 -148
  279. edsl/surveys/EditSurvey.py +221 -0
  280. edsl/surveys/InstructionHandler.py +100 -0
  281. edsl/surveys/Memory.py +31 -31
  282. edsl/surveys/MemoryManagement.py +72 -0
  283. edsl/surveys/MemoryPlan.py +244 -244
  284. edsl/surveys/Rule.py +327 -326
  285. edsl/surveys/RuleCollection.py +385 -387
  286. edsl/surveys/RuleManager.py +172 -0
  287. edsl/surveys/Simulator.py +75 -0
  288. edsl/surveys/Survey.py +1280 -1801
  289. edsl/surveys/SurveyCSS.py +273 -261
  290. edsl/surveys/SurveyExportMixin.py +259 -259
  291. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
  292. edsl/surveys/SurveyQualtricsImport.py +284 -284
  293. edsl/surveys/SurveyToApp.py +141 -0
  294. edsl/surveys/__init__.py +5 -3
  295. edsl/surveys/base.py +53 -53
  296. edsl/surveys/descriptors.py +60 -56
  297. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  298. edsl/surveys/instructions/Instruction.py +56 -65
  299. edsl/surveys/instructions/InstructionCollection.py +82 -77
  300. edsl/templates/error_reporting/base.html +23 -23
  301. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  302. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  303. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  304. edsl/templates/error_reporting/interview_details.html +115 -115
  305. edsl/templates/error_reporting/interviews.html +19 -19
  306. edsl/templates/error_reporting/overview.html +4 -4
  307. edsl/templates/error_reporting/performance_plot.html +1 -1
  308. edsl/templates/error_reporting/report.css +73 -73
  309. edsl/templates/error_reporting/report.html +117 -117
  310. edsl/templates/error_reporting/report.js +25 -25
  311. edsl/test_h +1 -0
  312. edsl/tools/__init__.py +1 -1
  313. edsl/tools/clusters.py +192 -192
  314. edsl/tools/embeddings.py +27 -27
  315. edsl/tools/embeddings_plotting.py +118 -118
  316. edsl/tools/plotting.py +112 -112
  317. edsl/tools/summarize.py +18 -18
  318. edsl/utilities/PrettyList.py +56 -0
  319. edsl/utilities/SystemInfo.py +28 -28
  320. edsl/utilities/__init__.py +22 -22
  321. edsl/utilities/ast_utilities.py +25 -25
  322. edsl/utilities/data/Registry.py +6 -6
  323. edsl/utilities/data/__init__.py +1 -1
  324. edsl/utilities/data/scooter_results.json +1 -1
  325. edsl/utilities/decorators.py +77 -77
  326. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  327. edsl/utilities/gcp_bucket/example.py +50 -0
  328. edsl/utilities/interface.py +627 -627
  329. edsl/utilities/is_notebook.py +18 -0
  330. edsl/utilities/is_valid_variable_name.py +11 -0
  331. edsl/utilities/naming_utilities.py +263 -263
  332. edsl/utilities/remove_edsl_version.py +24 -0
  333. edsl/utilities/repair_functions.py +28 -28
  334. edsl/utilities/restricted_python.py +70 -70
  335. edsl/utilities/utilities.py +436 -424
  336. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
  337. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
  338. edsl-0.1.39.dev4.dist-info/RECORD +361 -0
  339. edsl/language_models/KeyLookup.py +0 -30
  340. edsl/language_models/registry.py +0 -190
  341. edsl/language_models/unused/ReplicateBase.py +0 -83
  342. edsl/results/ResultsDBMixin.py +0 -238
  343. edsl-0.1.39.dev3.dist-info/RECORD +0 -277
  344. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
@@ -1,1287 +1,1458 @@
1
- """A list of Scenarios to be used in a survey."""
2
-
3
- from __future__ import annotations
4
- from typing import Any, Optional, Union, List, Callable
5
- import csv
6
- import random
7
- from collections import UserList, Counter
8
- from collections.abc import Iterable
9
- import urllib.parse
10
- import urllib.request
11
- from io import StringIO
12
- from collections import defaultdict
13
- import inspect
14
-
15
- from simpleeval import EvalWithCompoundTypes
16
-
17
- from edsl.Base import Base
18
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
19
- from edsl.scenarios.Scenario import Scenario
20
- from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
21
- from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
22
-
23
- from edsl.utilities.naming_utilities import sanitize_string
24
- from edsl.utilities.utilities import is_valid_variable_name
25
-
26
-
27
- class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
28
- pass
29
-
30
-
31
- class ScenarioList(Base, UserList, ScenarioListMixin):
32
- """Class for creating a list of scenarios to be used in a survey."""
33
-
34
- __documentation__ = (
35
- "https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
36
- )
37
-
38
- def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
39
- """Initialize the ScenarioList class."""
40
- if data is not None:
41
- super().__init__(data)
42
- else:
43
- super().__init__([])
44
- self.codebook = codebook or {}
45
-
46
- def unique(self) -> ScenarioList:
47
- """Return a list of unique scenarios.
48
-
49
- >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
50
- >>> s.unique()
51
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
52
- """
53
- return ScenarioList(list(set(self)))
54
-
55
- @property
56
- def has_jinja_braces(self) -> bool:
57
- """Check if the ScenarioList has Jinja braces."""
58
- return any([scenario.has_jinja_braces for scenario in self])
59
-
60
- def convert_jinja_braces(self) -> ScenarioList:
61
- """Convert Jinja braces to Python braces."""
62
- return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
63
-
64
- def give_valid_names(self) -> ScenarioList:
65
- """Give valid names to the scenario keys.
66
-
67
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
68
- >>> s.give_valid_names()
69
- ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
70
- >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
71
- >>> s.give_valid_names()
72
- ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
73
- """
74
- codebook = {}
75
- new_scenaerios = []
76
- for scenario in self:
77
- new_scenario = {}
78
- for key in scenario:
79
- if not is_valid_variable_name(key):
80
- if key in codebook:
81
- new_key = codebook[key]
82
- else:
83
- new_key = sanitize_string(key)
84
- if not is_valid_variable_name(new_key):
85
- new_key = f"var_{len(codebook)}"
86
- codebook[key] = new_key
87
- new_scenario[new_key] = scenario[key]
88
- else:
89
- new_scenario[key] = scenario[key]
90
- new_scenaerios.append(Scenario(new_scenario))
91
- return ScenarioList(new_scenaerios, codebook)
92
-
93
- def unpivot(self, id_vars=None, value_vars=None):
94
- """
95
- Unpivot the ScenarioList, allowing for id variables to be specified.
96
-
97
- Parameters:
98
- id_vars (list): Fields to use as identifier variables (kept in each entry)
99
- value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
100
-
101
- Example:
102
- >>> s = ScenarioList([
103
- ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
104
- ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
105
- ... ])
106
- >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
107
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
108
- """
109
- if id_vars is None:
110
- id_vars = []
111
- if value_vars is None:
112
- value_vars = [field for field in self[0].keys() if field not in id_vars]
113
-
114
- new_scenarios = []
115
- for scenario in self:
116
- for var in value_vars:
117
- new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
118
- new_scenario["variable"] = var
119
- new_scenario["value"] = scenario[var]
120
- new_scenarios.append(Scenario(new_scenario))
121
-
122
- return ScenarioList(new_scenarios)
123
-
124
- def pivot(self, id_vars, var_name="variable", value_name="value"):
125
- """
126
- Pivot the ScenarioList from long to wide format.
127
-
128
- Parameters:
129
- id_vars (list): Fields to use as identifier variables
130
- var_name (str): Name of the variable column (default: 'variable')
131
- value_name (str): Name of the value column (default: 'value')
132
-
133
- Example:
134
- >>> s = ScenarioList([
135
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
136
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
137
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
138
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
139
- ... ])
140
- >>> s.pivot(id_vars=['id', 'year'])
141
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
142
- """
143
- pivoted_dict = {}
144
-
145
- for scenario in self:
146
- # Create a tuple of id values to use as a key
147
- id_key = tuple(scenario[id_var] for id_var in id_vars)
148
-
149
- # If this combination of id values hasn't been seen before, initialize it
150
- if id_key not in pivoted_dict:
151
- pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
152
-
153
- # Add the variable-value pair to the dict
154
- variable = scenario[var_name]
155
- value = scenario[value_name]
156
- pivoted_dict[id_key][variable] = value
157
-
158
- # Convert the dict of dicts to a list of Scenarios
159
- pivoted_scenarios = [
160
- Scenario(dict(zip(id_vars, id_key), **values))
161
- for id_key, values in pivoted_dict.items()
162
- ]
163
-
164
- return ScenarioList(pivoted_scenarios)
165
-
166
- def group_by(self, id_vars, variables, func):
167
- """
168
- Group the ScenarioList by id_vars and apply a function to the specified variables.
169
-
170
- Parameters:
171
- id_vars (list): Fields to use as identifier variables for grouping
172
- variables (list): Fields to pass to the aggregation function
173
- func (callable): Function to apply to the grouped variables.
174
- Should accept lists of values for each variable.
175
-
176
- Returns:
177
- ScenarioList: A new ScenarioList with the grouped and aggregated results
178
-
179
- Example:
180
- >>> def avg_sum(a, b):
181
- ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
182
- >>> s = ScenarioList([
183
- ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
184
- ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
185
- ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
186
- ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
187
- ... ])
188
- >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
189
- ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
190
- """
191
- # Check if the function is compatible with the specified variables
192
- func_params = inspect.signature(func).parameters
193
- if len(func_params) != len(variables):
194
- raise ValueError(
195
- f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
196
- )
197
-
198
- # Group the scenarios
199
- grouped = defaultdict(lambda: defaultdict(list))
200
- for scenario in self:
201
- key = tuple(scenario[id_var] for id_var in id_vars)
202
- for var in variables:
203
- grouped[key][var].append(scenario[var])
204
-
205
- # Apply the function to each group
206
- result = []
207
- for key, group in grouped.items():
208
- try:
209
- aggregated = func(*[group[var] for var in variables])
210
- except Exception as e:
211
- raise ValueError(f"Error applying function to group {key}: {str(e)}")
212
-
213
- if not isinstance(aggregated, dict):
214
- raise ValueError(f"Function {func.__name__} must return a dictionary")
215
-
216
- new_scenario = dict(zip(id_vars, key))
217
- new_scenario.update(aggregated)
218
- result.append(Scenario(new_scenario))
219
-
220
- return ScenarioList(result)
221
-
222
- @property
223
- def parameters(self) -> set:
224
- """Return the set of parameters in the ScenarioList
225
-
226
- Example:
227
-
228
- >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
229
- >>> s.parameters == {'a', 'b'}
230
- True
231
- """
232
- if len(self) == 0:
233
- return set()
234
-
235
- return set.union(*[set(s.keys()) for s in self])
236
-
237
- def __hash__(self) -> int:
238
- """Return the hash of the ScenarioList.
239
-
240
- >>> s = ScenarioList.example()
241
- >>> hash(s)
242
- 1262252885757976162
243
- """
244
- from edsl.utilities.utilities import dict_hash
245
-
246
- return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
247
-
248
- def __eq__(self, other: Any) -> bool:
249
- return hash(self) == hash(other)
250
-
251
- def __repr__(self):
252
- return f"ScenarioList({self.data})"
253
-
254
- def __mul__(self, other: ScenarioList) -> ScenarioList:
255
- """Takes the cross product of two ScenarioLists.
256
-
257
- >>> s1 = ScenarioList.from_list("a", [1, 2])
258
- >>> s2 = ScenarioList.from_list("b", [3, 4])
259
- >>> s1 * s2
260
- ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
261
- """
262
- from itertools import product
263
-
264
- new_sl = []
265
- for s1, s2 in list(product(self, other)):
266
- new_sl.append(s1 + s2)
267
- return ScenarioList(new_sl)
268
-
269
- def times(self, other: ScenarioList) -> ScenarioList:
270
- """Takes the cross product of two ScenarioLists.
271
-
272
- Example:
273
-
274
- >>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
275
- >>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
276
- >>> s1.times(s2)
277
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
278
- """
279
- return self.__mul__(other)
280
-
281
- def shuffle(self, seed: Optional[str] = "edsl") -> ScenarioList:
282
- """Shuffle the ScenarioList.
283
-
284
- >>> s = ScenarioList.from_list("a", [1,2,3,4])
285
- >>> s.shuffle()
286
- ScenarioList([Scenario({'a': 3}), Scenario({'a': 4}), Scenario({'a': 1}), Scenario({'a': 2})])
287
- """
288
- random.seed(seed)
289
- random.shuffle(self.data)
290
- return self
291
-
292
- def _repr_html_(self):
293
- """Return an HTML representation of the AgentList."""
294
- # return (
295
- # str(self.summary(format="html")) + "<br>" + str(self.table(tablefmt="html"))
296
- # )
297
- footer = f"<a href={self.__documentation__}>(docs)</a>"
298
- return str(self.summary(format="html")) + footer
299
-
300
- # def _repr_html_(self) -> str:
301
- # from edsl.utilities.utilities import data_to_html
302
-
303
- # data = self.to_dict()
304
- # _ = data.pop("edsl_version")
305
- # _ = data.pop("edsl_class_name")
306
- # for s in data["scenarios"]:
307
- # _ = s.pop("edsl_version")
308
- # _ = s.pop("edsl_class_name")
309
- # for scenario in data["scenarios"]:
310
- # for key, value in scenario.items():
311
- # if hasattr(value, "to_dict"):
312
- # data[key] = value.to_dict()
313
- # return data_to_html(data)
314
-
315
- # def tally(self, field) -> dict:
316
- # """Return a tally of the values in the field.
317
-
318
- # Example:
319
-
320
- # >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
321
- # >>> s.tally('b')
322
- # {1: 1, 2: 1}
323
- # """
324
- # return dict(Counter([scenario[field] for scenario in self]))
325
-
326
- def sample(self, n: int, seed: Optional[str] = None) -> ScenarioList:
327
- """Return a random sample from the ScenarioList
328
-
329
- >>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
330
- >>> s.sample(3, seed = "edsl")
331
- ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
332
- """
333
- if seed:
334
- random.seed(seed)
335
-
336
- return ScenarioList(random.sample(self.data, n))
337
-
338
- def expand(self, expand_field: str, number_field=False) -> ScenarioList:
339
- """Expand the ScenarioList by a field.
340
-
341
- Example:
342
-
343
- >>> s = ScenarioList( [ Scenario({'a':1, 'b':[1,2]}) ] )
344
- >>> s.expand('b')
345
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
346
- """
347
- new_scenarios = []
348
- for scenario in self:
349
- values = scenario[expand_field]
350
- if not isinstance(values, Iterable) or isinstance(values, str):
351
- values = [values]
352
- for index, value in enumerate(values):
353
- new_scenario = scenario.copy()
354
- new_scenario[expand_field] = value
355
- if number_field:
356
- new_scenario[expand_field + "_number"] = index + 1
357
- new_scenarios.append(new_scenario)
358
- return ScenarioList(new_scenarios)
359
-
360
- def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
361
- """Concatenate specified fields into a single field.
362
-
363
- Args:
364
- fields (List[str]): List of field names to concatenate.
365
- separator (str, optional): Separator to use between field values. Defaults to ";".
366
-
367
- Returns:
368
- ScenarioList: A new ScenarioList with concatenated fields.
369
-
370
- Example:
371
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
372
- >>> s.concatenate(['a', 'b', 'c'])
373
- ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
374
- """
375
- new_scenarios = []
376
- for scenario in self:
377
- new_scenario = scenario.copy()
378
- concat_values = []
379
- for field in fields:
380
- if field in new_scenario:
381
- concat_values.append(str(new_scenario[field]))
382
- del new_scenario[field]
383
-
384
- new_field_name = f"concat_{'_'.join(fields)}"
385
- new_scenario[new_field_name] = separator.join(concat_values)
386
- new_scenarios.append(new_scenario)
387
-
388
- return ScenarioList(new_scenarios)
389
-
390
- def unpack_dict(
391
- self, field: str, prefix: Optional[str] = None, drop_field: bool = False
392
- ) -> ScenarioList:
393
- """Unpack a dictionary field into separate fields.
394
-
395
- Example:
396
-
397
- >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
398
- >>> s.unpack_dict('b')
399
- ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
400
- """
401
- new_scenarios = []
402
- for scenario in self:
403
- new_scenario = scenario.copy()
404
- for key, value in scenario[field].items():
405
- if prefix:
406
- new_scenario[prefix + key] = value
407
- else:
408
- new_scenario[key] = value
409
- if drop_field:
410
- new_scenario.pop(field)
411
- new_scenarios.append(new_scenario)
412
- return ScenarioList(new_scenarios)
413
-
414
- def transform(
415
- self, field: str, func: Callable, new_name: Optional[str] = None
416
- ) -> ScenarioList:
417
- """Transform a field using a function."""
418
- new_scenarios = []
419
- for scenario in self:
420
- new_scenario = scenario.copy()
421
- new_scenario[new_name or field] = func(scenario[field])
422
- new_scenarios.append(new_scenario)
423
- return ScenarioList(new_scenarios)
424
-
425
- def mutate(
426
- self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
427
- ) -> ScenarioList:
428
- """
429
- Return a new ScenarioList with a new variable added.
430
-
431
- Example:
432
-
433
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
434
- >>> s.mutate("c = a + b")
435
- ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 1, 'b': 1, 'c': 2})])
436
-
437
- """
438
- if "=" not in new_var_string:
439
- raise Exception(
440
- f"Mutate requires an '=' in the string, but '{new_var_string}' doesn't have one."
441
- )
442
- raw_var_name, expression = new_var_string.split("=", 1)
443
- var_name = raw_var_name.strip()
444
- from edsl.utilities.utilities import is_valid_variable_name
445
-
446
- if not is_valid_variable_name(var_name):
447
- raise Exception(f"{var_name} is not a valid variable name.")
448
-
449
- # create the evaluator
450
- functions_dict = functions_dict or {}
451
-
452
- def create_evaluator(scenario) -> EvalWithCompoundTypes:
453
- return EvalWithCompoundTypes(names=scenario, functions=functions_dict)
454
-
455
- def new_scenario(old_scenario: Scenario, var_name: str) -> Scenario:
456
- evaluator = create_evaluator(old_scenario)
457
- value = evaluator.eval(expression)
458
- new_s = old_scenario.copy()
459
- new_s[var_name] = value
460
- return new_s
461
-
462
- try:
463
- new_data = [new_scenario(s, var_name) for s in self]
464
- except Exception as e:
465
- raise Exception(f"Error in mutate. Exception:{e}")
466
-
467
- return ScenarioList(new_data)
468
-
469
- def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
470
- """Order the scenarios by one or more fields.
471
-
472
- Example:
473
-
474
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
475
- >>> s.order_by('b', 'a')
476
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
477
- """
478
-
479
- def get_sort_key(scenario: Any) -> tuple:
480
- return tuple(scenario[field] for field in fields)
481
-
482
- return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
483
-
484
- def filter(self, expression: str) -> ScenarioList:
485
- """
486
- Filter a list of scenarios based on an expression.
487
-
488
- Example:
489
-
490
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
491
- >>> s.filter("b == 2")
492
- ScenarioList([Scenario({'a': 1, 'b': 2})])
493
- """
494
-
495
- def create_evaluator(scenario: Scenario):
496
- """Create an evaluator for the given result.
497
- The 'combined_dict' is a mapping of all values for that Result object.
498
- """
499
- return EvalWithCompoundTypes(names=scenario)
500
-
501
- try:
502
- # iterates through all the results and evaluates the expression
503
- new_data = [
504
- scenario
505
- for scenario in self.data
506
- if create_evaluator(scenario).eval(expression)
507
- ]
508
- except Exception as e:
509
- print(f"Exception:{e}")
510
- raise Exception(f"Error in filter. Exception:{e}")
511
-
512
- return ScenarioList(new_data)
513
-
514
- def from_urls(
515
- self, urls: list[str], field_name: Optional[str] = "text"
516
- ) -> ScenarioList:
517
- """Create a ScenarioList from a list of URLs.
518
-
519
- :param urls: A list of URLs.
520
- :param field_name: The name of the field to store the text from the URLs.
521
-
522
-
523
- """
524
- return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
525
-
526
- def select(self, *fields) -> ScenarioList:
527
- """
528
- Selects scenarios with only the references fields.
529
-
530
- Example:
531
-
532
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
533
- >>> s.select('a')
534
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
535
- """
536
- if len(fields) == 1:
537
- fields_to_select = [list(fields)[0]]
538
- else:
539
- fields_to_select = list(fields)
540
-
541
- return ScenarioList(
542
- [scenario.select(fields_to_select) for scenario in self.data]
543
- )
544
-
545
- def drop(self, *fields) -> ScenarioList:
546
- """Drop fields from the scenarios.
547
-
548
- Example:
549
-
550
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
551
- >>> s.drop('a')
552
- ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
553
- """
554
- return ScenarioList([scenario.drop(fields) for scenario in self.data])
555
-
556
- def keep(self, *fields) -> ScenarioList:
557
- """Keep only the specified fields in the scenarios.
558
-
559
- Example:
560
-
561
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
562
- >>> s.keep('a')
563
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
564
- """
565
- return ScenarioList([scenario.keep(fields) for scenario in self.data])
566
-
567
- @classmethod
568
- def from_list(
569
- cls, name: str, values: list, func: Optional[Callable] = None
570
- ) -> ScenarioList:
571
- """Create a ScenarioList from a list of values.
572
-
573
- Example:
574
-
575
- >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
576
- ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
577
- """
578
- if not func:
579
- func = lambda x: x
580
- return cls([Scenario({name: func(value)}) for value in values])
581
-
582
- def table(self, *fields, tablefmt=None, pretty_labels=None) -> str:
583
- """Return the ScenarioList as a table."""
584
-
585
- from tabulate import tabulate_formats
586
-
587
- if tablefmt is not None and tablefmt not in tabulate_formats:
588
- raise ValueError(
589
- f"Invalid table format: {tablefmt}",
590
- f"Valid formats are: {tabulate_formats}",
591
- )
592
- return self.to_dataset().table(
593
- *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
594
- )
595
-
596
- def tree(self, node_list: Optional[List[str]] = None) -> str:
597
- """Return the ScenarioList as a tree."""
598
- return self.to_dataset().tree(node_list)
599
-
600
- def _summary(self):
601
- d = {
602
- "EDSL Class name": "ScenarioList",
603
- "# Scenarios": len(self),
604
- "Scenario Keys": list(self.parameters),
605
- }
606
- return d
607
-
608
- def reorder_keys(self, new_order):
609
- """Reorder the keys in the scenarios.
610
-
611
- Example:
612
-
613
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 3, 'b': 4})])
614
- >>> s.reorder_keys(['b', 'a'])
615
- ScenarioList([Scenario({'b': 2, 'a': 1}), Scenario({'b': 4, 'a': 3})])
616
- """
617
- new_scenarios = []
618
- for scenario in self:
619
- new_scenario = Scenario({key: scenario[key] for key in new_order})
620
- new_scenarios.append(new_scenario)
621
- return ScenarioList(new_scenarios)
622
-
623
- def to_dataset(self) -> "Dataset":
624
- """
625
- >>> s = ScenarioList.from_list("a", [1,2,3])
626
- >>> s.to_dataset()
627
- Dataset([{'a': [1, 2, 3]}])
628
- >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
629
- >>> s.to_dataset()
630
- Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
631
- """
632
- from edsl.results.Dataset import Dataset
633
-
634
- keys = self[0].keys()
635
- data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
636
- return Dataset(data)
637
-
638
- def unpack(
639
- self, field: str, new_names: Optional[List[str]] = None, keep_original=True
640
- ) -> ScenarioList:
641
- """Unpack a field into multiple fields.
642
-
643
- Example:
644
-
645
- >>> s = ScenarioList([Scenario({'a': 1, 'b': [2, True]}), Scenario({'a': 3, 'b': [3, False]})])
646
- >>> s.unpack('b')
647
- ScenarioList([Scenario({'a': 1, 'b': [2, True], 'b_0': 2, 'b_1': True}), Scenario({'a': 3, 'b': [3, False], 'b_0': 3, 'b_1': False})])
648
- >>> s.unpack('b', new_names=['c', 'd'], keep_original=False)
649
- ScenarioList([Scenario({'a': 1, 'c': 2, 'd': True}), Scenario({'a': 3, 'c': 3, 'd': False})])
650
-
651
- """
652
- new_names = new_names or [f"{field}_{i}" for i in range(len(self[0][field]))]
653
- new_scenarios = []
654
- for scenario in self:
655
- new_scenario = scenario.copy()
656
- if len(new_names) == 1:
657
- new_scenario[new_names[0]] = scenario[field]
658
- else:
659
- for i, new_name in enumerate(new_names):
660
- new_scenario[new_name] = scenario[field][i]
661
-
662
- if not keep_original:
663
- del new_scenario[field]
664
- new_scenarios.append(new_scenario)
665
- return ScenarioList(new_scenarios)
666
-
667
- def add_list(self, name, values) -> ScenarioList:
668
- """Add a list of values to a ScenarioList.
669
-
670
- Example:
671
-
672
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
673
- >>> s.add_list('age', [30, 25])
674
- ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
675
- """
676
- for i, value in enumerate(values):
677
- if i < len(self):
678
- self[i][name] = value
679
- else:
680
- self.append(Scenario({name: value}))
681
- return self
682
-
683
- def add_value(self, name: str, value: Any) -> ScenarioList:
684
- """Add a value to all scenarios in a ScenarioList.
685
-
686
- Example:
687
-
688
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
689
- >>> s.add_value('age', 30)
690
- ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
691
- """
692
- for scenario in self:
693
- scenario[name] = value
694
- return self
695
-
696
- def rename(self, replacement_dict: dict) -> ScenarioList:
697
- """Rename the fields in the scenarios.
698
-
699
- Example:
700
-
701
- >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
702
- >>> s.rename({'name': 'first_name', 'age': 'years'})
703
- ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
704
-
705
- """
706
-
707
- new_list = ScenarioList([])
708
- for obj in self:
709
- new_obj = obj.rename(replacement_dict)
710
- new_list.append(new_obj)
711
- return new_list
712
-
713
- @classmethod
714
- def from_sqlite(cls, filepath: str, table: str):
715
- import sqlite3
716
-
717
- with sqlite3.connect(filepath) as conn:
718
- cursor = conn.cursor()
719
- cursor.execute(f"SELECT * FROM {table}")
720
- columns = [description[0] for description in cursor.description]
721
- data = cursor.fetchall()
722
- return cls([Scenario(dict(zip(columns, row))) for row in data])
723
-
724
- @classmethod
725
- def from_latex(cls, tex_file_path: str):
726
- with open(tex_file_path, "r") as file:
727
- lines = file.readlines()
728
-
729
- processed_lines = []
730
- non_blank_lines = [
731
- (i, line.strip()) for i, line in enumerate(lines) if line.strip()
732
- ]
733
-
734
- for index, (line_no, text) in enumerate(non_blank_lines):
735
- entry = {
736
- "line_no": line_no + 1, # Using 1-based index for line numbers
737
- "text": text,
738
- "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
739
- "line_after": (
740
- non_blank_lines[index + 1][1]
741
- if index < len(non_blank_lines) - 1
742
- else None
743
- ),
744
- }
745
- processed_lines.append(entry)
746
-
747
- return ScenarioList([Scenario(entry) for entry in processed_lines])
748
-
749
- @classmethod
750
- def from_google_doc(cls, url: str) -> ScenarioList:
751
- """Create a ScenarioList from a Google Doc.
752
-
753
- This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
754
- and then reads it using the from_docx class method.
755
-
756
- Args:
757
- url (str): The URL to the Google Doc.
758
-
759
- Returns:
760
- ScenarioList: An instance of the ScenarioList class.
761
-
762
- """
763
- import tempfile
764
- import requests
765
- from docx import Document
766
-
767
- if "/edit" in url:
768
- doc_id = url.split("/d/")[1].split("/edit")[0]
769
- else:
770
- raise ValueError("Invalid Google Doc URL format.")
771
-
772
- export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
773
-
774
- # Download the Google Doc as a Word file (.docx)
775
- response = requests.get(export_url)
776
- response.raise_for_status() # Ensure the request was successful
777
-
778
- # Save the Word file to a temporary file
779
- with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
780
- temp_file.write(response.content)
781
- temp_filename = temp_file.name
782
-
783
- # Call the from_docx class method with the temporary file
784
- return cls.from_docx(temp_filename)
785
-
786
- @classmethod
787
- def from_pandas(cls, df) -> ScenarioList:
788
- """Create a ScenarioList from a pandas DataFrame.
789
-
790
- Example:
791
-
792
- >>> import pandas as pd
793
- >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25], 'location': ['New York', 'Los Angeles']})
794
- >>> ScenarioList.from_pandas(df)
795
- ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
796
- """
797
- return cls([Scenario(row) for row in df.to_dict(orient="records")])
798
-
799
- @classmethod
800
- def from_wikipedia(cls, url: str, table_index: int = 0):
801
- """
802
- Extracts a table from a Wikipedia page.
803
-
804
- Parameters:
805
- url (str): The URL of the Wikipedia page.
806
- table_index (int): The index of the table to extract (default is 0).
807
-
808
- Returns:
809
- pd.DataFrame: A DataFrame containing the extracted table.
810
- # # Example usage
811
- # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
812
- # df = from_wikipedia(url, 0)
813
-
814
- # if not df.empty:
815
- # print(df.head())
816
- # else:
817
- # print("Failed to extract table.")
818
-
819
-
820
- """
821
- import pandas as pd
822
- import requests
823
- from requests.exceptions import RequestException
824
-
825
- try:
826
- # Check if the URL is reachable
827
- response = requests.get(url)
828
- response.raise_for_status() # Raises HTTPError for bad responses
829
-
830
- # Extract tables from the Wikipedia page
831
- tables = pd.read_html(url)
832
-
833
- # Ensure the requested table index is within the range of available tables
834
- if table_index >= len(tables) or table_index < 0:
835
- raise IndexError(
836
- f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
837
- )
838
-
839
- # Return the requested table as a DataFrame
840
- # return tables[table_index]
841
- return cls.from_pandas(tables[table_index])
842
-
843
- except RequestException as e:
844
- print(f"Error fetching the URL: {e}")
845
- except ValueError as e:
846
- print(f"Error parsing tables: {e}")
847
- except IndexError as e:
848
- print(e)
849
- except Exception as e:
850
- print(f"An unexpected error occurred: {e}")
851
-
852
- # Return an empty DataFrame in case of an error
853
- # return cls.from_pandas(pd.DataFrame())
854
-
855
- def to_key_value(self, field: str, value=None) -> Union[dict, set]:
856
- """Return the set of values in the field.
857
-
858
- Example:
859
-
860
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
861
- >>> s.to_key_value('name') == {'Alice', 'Bob'}
862
- True
863
- """
864
- if value is None:
865
- return {scenario[field] for scenario in self}
866
- else:
867
- return {scenario[field]: scenario[value] for scenario in self}
868
-
869
- @classmethod
870
- def from_excel(
871
- cls, filename: str, sheet_name: Optional[str] = None
872
- ) -> ScenarioList:
873
- """Create a ScenarioList from an Excel file.
874
-
875
- If the Excel file contains multiple sheets and no sheet_name is provided,
876
- the method will print the available sheets and require the user to specify one.
877
-
878
- Example:
879
-
880
- >>> import tempfile
881
- >>> import os
882
- >>> import pandas as pd
883
- >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
884
- ... df1 = pd.DataFrame({
885
- ... 'name': ['Alice', 'Bob'],
886
- ... 'age': [30, 25],
887
- ... 'location': ['New York', 'Los Angeles']
888
- ... })
889
- ... df2 = pd.DataFrame({
890
- ... 'name': ['Charlie', 'David'],
891
- ... 'age': [35, 40],
892
- ... 'location': ['Chicago', 'Boston']
893
- ... })
894
- ... with pd.ExcelWriter(f.name) as writer:
895
- ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
896
- ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
897
- ... temp_filename = f.name
898
- >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
899
- >>> len(scenario_list)
900
- 2
901
- >>> scenario_list[0]['name']
902
- 'Alice'
903
- >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
904
- Traceback (most recent call last):
905
- ...
906
- ValueError: Please provide a sheet name to load data from.
907
- """
908
- from edsl.scenarios.Scenario import Scenario
909
- import pandas as pd
910
-
911
- # Get all sheets
912
- all_sheets = pd.read_excel(filename, sheet_name=None)
913
-
914
- # If no sheet_name is provided and there is more than one sheet, print available sheets
915
- if sheet_name is None:
916
- if len(all_sheets) > 1:
917
- print("The Excel file contains multiple sheets:")
918
- for name in all_sheets.keys():
919
- print(f"- {name}")
920
- raise ValueError("Please provide a sheet name to load data from.")
921
- else:
922
- # If there is only one sheet, use it
923
- sheet_name = list(all_sheets.keys())[0]
924
-
925
- # Load the specified or determined sheet
926
- df = pd.read_excel(filename, sheet_name=sheet_name)
927
-
928
- observations = []
929
- for _, row in df.iterrows():
930
- observations.append(Scenario(row.to_dict()))
931
-
932
- return cls(observations)
933
-
934
- @classmethod
935
- def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
936
- """Create a ScenarioList from a Google Sheet.
937
-
938
- This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
939
- and then reads it using the from_excel class method.
940
-
941
- Args:
942
- url (str): The URL to the Google Sheet.
943
- sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
944
- the same as from_excel regarding multiple sheets.
945
-
946
- Returns:
947
- ScenarioList: An instance of the ScenarioList class.
948
-
949
- """
950
- import pandas as pd
951
- import tempfile
952
- import requests
953
-
954
- if "/edit" in url:
955
- sheet_id = url.split("/d/")[1].split("/edit")[0]
956
- else:
957
- raise ValueError("Invalid Google Sheet URL format.")
958
-
959
- export_url = (
960
- f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
961
- )
962
-
963
- # Download the Google Sheet as an Excel file
964
- response = requests.get(export_url)
965
- response.raise_for_status() # Ensure the request was successful
966
-
967
- # Save the Excel file to a temporary file
968
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
969
- temp_file.write(response.content)
970
- temp_filename = temp_file.name
971
-
972
- # Call the from_excel class method with the temporary file
973
- return cls.from_excel(temp_filename, sheet_name=sheet_name)
974
-
975
- @classmethod
976
- def from_delimited_file(
977
- cls, source: Union[str, urllib.parse.ParseResult], delimiter: str = ","
978
- ) -> ScenarioList:
979
- """Create a ScenarioList from a delimited file (CSV/TSV) or URL.
980
-
981
- Args:
982
- source: A string representing either a local file path or a URL to a delimited file,
983
- or a urllib.parse.ParseResult object for a URL.
984
- delimiter: The delimiter used in the file. Defaults to ',' for CSV files.
985
- Use '\t' for TSV files.
986
-
987
- Returns:
988
- ScenarioList: A ScenarioList object containing the data from the file.
989
-
990
- Example:
991
- # For CSV files
992
-
993
- >>> with open('data.csv', 'w') as f:
994
- ... _ = f.write('name,age\\nAlice,30\\nBob,25\\n')
995
- >>> scenario_list = ScenarioList.from_delimited_file('data.csv')
996
-
997
- # For TSV files
998
- >>> with open('data.tsv', 'w') as f:
999
- ... _ = f.write('name\\tage\\nAlice\t30\\nBob\t25\\n')
1000
- >>> scenario_list = ScenarioList.from_delimited_file('data.tsv', delimiter='\\t')
1001
-
1002
- """
1003
- from edsl.scenarios.Scenario import Scenario
1004
-
1005
- def is_url(source):
1006
- try:
1007
- result = urllib.parse.urlparse(source)
1008
- return all([result.scheme, result.netloc])
1009
- except ValueError:
1010
- return False
1011
-
1012
- if isinstance(source, str) and is_url(source):
1013
- with urllib.request.urlopen(source) as response:
1014
- file_content = response.read().decode("utf-8")
1015
- file_obj = StringIO(file_content)
1016
- elif isinstance(source, urllib.parse.ParseResult):
1017
- with urllib.request.urlopen(source.geturl()) as response:
1018
- file_content = response.read().decode("utf-8")
1019
- file_obj = StringIO(file_content)
1020
- else:
1021
- file_obj = open(source, "r")
1022
-
1023
- try:
1024
- reader = csv.reader(file_obj, delimiter=delimiter)
1025
- header = next(reader)
1026
- observations = [Scenario(dict(zip(header, row))) for row in reader]
1027
- finally:
1028
- file_obj.close()
1029
-
1030
- return cls(observations)
1031
-
1032
- # Convenience methods for specific file types
1033
- @classmethod
1034
- def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
1035
- """Create a ScenarioList from a CSV file or URL."""
1036
- return cls.from_delimited_file(source, delimiter=",")
1037
-
1038
- def left_join(self, other: ScenarioList, by: Union[str, list[str]]) -> ScenarioList:
1039
- """Perform a left join with another ScenarioList, following SQL join semantics.
1040
-
1041
- Args:
1042
- other: The ScenarioList to join with
1043
- by: String or list of strings representing the key(s) to join on. Cannot be empty.
1044
-
1045
- >>> s1 = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
1046
- >>> s2 = ScenarioList([Scenario({'name': 'Alice', 'location': 'New York'}), Scenario({'name': 'Charlie', 'location': 'Los Angeles'})])
1047
- >>> s3 = s1.left_join(s2, 'name')
1048
- >>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
1049
- True
1050
- """
1051
- from edsl.scenarios.ScenarioJoin import ScenarioJoin
1052
-
1053
- sj = ScenarioJoin(self, other)
1054
- return sj.left_join(by)
1055
- # # Validate join keys
1056
- # if not by:
1057
- # raise ValueError(
1058
- # "Join keys cannot be empty. Please specify at least one key to join on."
1059
- # )
1060
-
1061
- # # Convert single string to list for consistent handling
1062
- # by_keys = [by] if isinstance(by, str) else by
1063
-
1064
- # # Verify all join keys exist in both ScenarioLists
1065
- # left_keys = set(next(iter(self)).keys()) if self else set()
1066
- # right_keys = set(next(iter(other)).keys()) if other else set()
1067
-
1068
- # missing_left = set(by_keys) - left_keys
1069
- # missing_right = set(by_keys) - right_keys
1070
- # if missing_left or missing_right:
1071
- # missing = missing_left | missing_right
1072
- # raise ValueError(f"Join key(s) {missing} not found in both ScenarioLists")
1073
-
1074
- # # Create lookup dictionary from the other ScenarioList
1075
- # def get_key_tuple(scenario: Scenario, keys: list[str]) -> tuple:
1076
- # return tuple(scenario[k] for k in keys)
1077
-
1078
- # other_dict = {get_key_tuple(scenario, by_keys): scenario for scenario in other}
1079
-
1080
- # # Collect all possible keys (like SQL combining all columns)
1081
- # all_keys = set()
1082
- # for scenario in self:
1083
- # all_keys.update(scenario.keys())
1084
- # for scenario in other:
1085
- # all_keys.update(scenario.keys())
1086
-
1087
- # new_scenarios = []
1088
- # for scenario in self:
1089
- # new_scenario = {
1090
- # key: None for key in all_keys
1091
- # } # Start with nulls (like SQL)
1092
- # new_scenario.update(scenario) # Add all left values
1093
-
1094
- # key_tuple = get_key_tuple(scenario, by_keys)
1095
- # if matching_scenario := other_dict.get(key_tuple):
1096
- # # Check for overlapping keys with different values
1097
- # overlapping_keys = set(scenario.keys()) & set(matching_scenario.keys())
1098
- # for key in overlapping_keys:
1099
- # if key not in by_keys and scenario[key] != matching_scenario[key]:
1100
- # join_conditions = [f"{k}='{scenario[k]}'" for k in by_keys]
1101
- # print(
1102
- # f"Warning: Conflicting values for key '{key}' where {' AND '.join(join_conditions)}. "
1103
- # f"Keeping left value: {scenario[key]} (discarding: {matching_scenario[key]})"
1104
- # )
1105
-
1106
- # # Only update with non-overlapping keys from matching scenario
1107
- # new_keys = set(matching_scenario.keys()) - set(scenario.keys())
1108
- # new_scenario.update({k: matching_scenario[k] for k in new_keys})
1109
-
1110
- # new_scenarios.append(Scenario(new_scenario))
1111
-
1112
- # return ScenarioList(new_scenarios)
1113
-
1114
- @classmethod
1115
- def from_tsv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
1116
- """Create a ScenarioList from a TSV file or URL."""
1117
- return cls.from_delimited_file(source, delimiter="\t")
1118
-
1119
- def to_dict(self, sort=False, add_edsl_version=True) -> dict:
1120
- """
1121
- >>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
1122
- >>> s.to_dict()
1123
- {'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
1124
-
1125
- """
1126
- if sort:
1127
- data = sorted(self, key=lambda x: hash(x))
1128
- else:
1129
- data = self
1130
- d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
1131
- if add_edsl_version:
1132
- from edsl import __version__
1133
-
1134
- d["edsl_version"] = __version__
1135
- d["edsl_class_name"] = self.__class__.__name__
1136
- return d
1137
-
1138
- @classmethod
1139
- def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
1140
- """Create a `ScenarioList` from a list of dictionaries.
1141
-
1142
- Example:
1143
-
1144
- >>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
1145
- ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
1146
-
1147
- """
1148
- from edsl.scenarios.Scenario import Scenario
1149
-
1150
- return cls([Scenario(s) for s in scenario_dicts_list])
1151
-
1152
- @classmethod
1153
- @remove_edsl_version
1154
- def from_dict(cls, data) -> ScenarioList:
1155
- """Create a `ScenarioList` from a dictionary."""
1156
- from edsl.scenarios.Scenario import Scenario
1157
-
1158
- return cls([Scenario.from_dict(s) for s in data["scenarios"]])
1159
-
1160
- @classmethod
1161
- def from_nested_dict(cls, data: dict) -> ScenarioList:
1162
- """Create a `ScenarioList` from a nested dictionary."""
1163
- from edsl.scenarios.Scenario import Scenario
1164
-
1165
- s = ScenarioList()
1166
- for key, value in data.items():
1167
- s.add_list(key, value)
1168
- return s
1169
-
1170
- def code(self) -> str:
1171
- ## TODO: Refactor to only use the questions actually in the survey
1172
- """Create the Python code representation of a survey."""
1173
- header_lines = [
1174
- "from edsl.scenarios.Scenario import Scenario",
1175
- "from edsl.scenarios.ScenarioList import ScenarioList",
1176
- ]
1177
- lines = ["\n".join(header_lines)]
1178
- names = []
1179
- for index, scenario in enumerate(self):
1180
- lines.append(f"scenario_{index} = " + repr(scenario))
1181
- names.append(f"scenario_{index}")
1182
- lines.append(f"scenarios = ScenarioList([{', '.join(names)}])")
1183
- return lines
1184
-
1185
- @classmethod
1186
- def example(cls, randomize: bool = False) -> ScenarioList:
1187
- """
1188
- Return an example ScenarioList instance.
1189
-
1190
- :params randomize: If True, use Scenario's randomize method to randomize the values.
1191
- """
1192
- return cls([Scenario.example(randomize), Scenario.example(randomize)])
1193
-
1194
- def rich_print(self) -> None:
1195
- """Display an object as a table."""
1196
- from rich.table import Table
1197
-
1198
- table = Table(title="ScenarioList")
1199
- table.add_column("Index", style="bold")
1200
- table.add_column("Scenario")
1201
- for i, s in enumerate(self):
1202
- table.add_row(str(i), s.rich_print())
1203
- return table
1204
-
1205
- def __getitem__(self, key: Union[int, slice]) -> Any:
1206
- """Return the item at the given index.
1207
-
1208
- Example:
1209
- >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1210
- >>> s[0]
1211
- Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
1212
-
1213
- >>> s[:1]
1214
- ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1215
-
1216
- """
1217
- if isinstance(key, slice):
1218
- return ScenarioList(super().__getitem__(key))
1219
- elif isinstance(key, int):
1220
- return super().__getitem__(key)
1221
- else:
1222
- return self.to_dict(add_edsl_version=False)[key]
1223
-
1224
- def to_agent_list(self):
1225
- """Convert the ScenarioList to an AgentList.
1226
-
1227
- Example:
1228
-
1229
- >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1230
- >>> s.to_agent_list()
1231
- AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
1232
- """
1233
- from edsl.agents.AgentList import AgentList
1234
- from edsl.agents.Agent import Agent
1235
- import warnings
1236
-
1237
- agents = []
1238
- for scenario in self:
1239
- new_scenario = scenario.copy().data
1240
- if "name" in new_scenario:
1241
- name = new_scenario.pop("name")
1242
- proposed_agent_name = "agent_name"
1243
- while proposed_agent_name not in new_scenario:
1244
- proposed_agent_name += "_"
1245
- warnings.warn(
1246
- f"The 'name' field is reserved for the agent's name---putting this value in {proposed_agent_name}"
1247
- )
1248
- new_scenario[proposed_agent_name] = name
1249
- agents.append(Agent(traits=new_scenario, name=name))
1250
- else:
1251
- agents.append(Agent(traits=new_scenario))
1252
-
1253
- return AgentList(agents)
1254
-
1255
- def chunk(
1256
- self,
1257
- field,
1258
- num_words: Optional[int] = None,
1259
- num_lines: Optional[int] = None,
1260
- include_original=False,
1261
- hash_original=False,
1262
- ) -> "ScenarioList":
1263
- """Chunk the scenarios based on a field.
1264
-
1265
- Example:
1266
-
1267
- >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
1268
- >>> s.chunk('text', num_words=3)
1269
- ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
1270
- """
1271
- new_scenarios = []
1272
- for scenario in self:
1273
- replacement_scenarios = scenario.chunk(
1274
- field,
1275
- num_words=num_words,
1276
- num_lines=num_lines,
1277
- include_original=include_original,
1278
- hash_original=hash_original,
1279
- )
1280
- new_scenarios.extend(replacement_scenarios)
1281
- return ScenarioList(new_scenarios)
1282
-
1283
-
1284
- if __name__ == "__main__":
1285
- import doctest
1286
-
1287
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """A list of Scenarios to be used in a survey."""
2
+
3
+ from __future__ import annotations
4
+ from typing import (
5
+ Any,
6
+ Optional,
7
+ Union,
8
+ List,
9
+ Callable,
10
+ Literal,
11
+ TYPE_CHECKING,
12
+ )
13
+
14
+ try:
15
+ from typing import TypeAlias
16
+ except ImportError:
17
+ from typing_extensions import TypeAlias
18
+
19
+ import csv
20
+ import random
21
+ from io import StringIO
22
+ import inspect
23
+ from collections import UserList, defaultdict
24
+ from collections.abc import Iterable
25
+
26
+ if TYPE_CHECKING:
27
+ from urllib.parse import ParseResult
28
+ from edsl.results.Dataset import Dataset
29
+ from edsl.jobs.Jobs import Jobs
30
+ from edsl.surveys.Survey import Survey
31
+ from edsl.questions.QuestionBase import QuestionBase
32
+
33
+
34
+ from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
35
+
36
+ from tabulate import tabulate_formats
37
+
38
+ from edsl.Base import Base
39
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
40
+
41
+ from edsl.scenarios.Scenario import Scenario
42
+ from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
43
+ from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
44
+ from edsl.utilities.naming_utilities import sanitize_string
45
+ from edsl.utilities.is_valid_variable_name import is_valid_variable_name
46
+ from edsl.exceptions.scenarios import ScenarioError
47
+
48
+ from edsl.scenarios.directory_scanner import DirectoryScanner
49
+
50
+
51
+ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
52
+ pass
53
+
54
+
55
+ if TYPE_CHECKING:
56
+ from edsl.results.Dataset import Dataset
57
+
58
+ TableFormat: TypeAlias = Literal[
59
+ "plain",
60
+ "simple",
61
+ "github",
62
+ "grid",
63
+ "fancy_grid",
64
+ "pipe",
65
+ "orgtbl",
66
+ "rst",
67
+ "mediawiki",
68
+ "html",
69
+ "latex",
70
+ "latex_raw",
71
+ "latex_booktabs",
72
+ "tsv",
73
+ ]
74
+
75
+
76
+ class ScenarioList(Base, UserList, ScenarioListMixin):
77
+ """Class for creating a list of scenarios to be used in a survey."""
78
+
79
+ __documentation__ = (
80
+ "https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
81
+ )
82
+
83
+ def __init__(
84
+ self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
85
+ ):
86
+ """Initialize the ScenarioList class."""
87
+ if data is not None:
88
+ super().__init__(data)
89
+ else:
90
+ super().__init__([])
91
+ self.codebook = codebook or {}
92
+
93
+ def unique(self) -> ScenarioList:
94
+ """Return a list of unique scenarios.
95
+
96
+ >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
97
+ >>> s.unique()
98
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
99
+ """
100
+ return ScenarioList(list(set(self)))
101
+
102
+ @property
103
+ def has_jinja_braces(self) -> bool:
104
+ """Check if the ScenarioList has Jinja braces."""
105
+ return any([scenario.has_jinja_braces for scenario in self])
106
+
107
+ def _convert_jinja_braces(self) -> ScenarioList:
108
+ """Convert Jinja braces to Python braces."""
109
+ return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
110
+
111
+ def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
112
+ """Give valid names to the scenario keys, using an existing codebook if provided.
113
+
114
+ Args:
115
+ existing_codebook (dict, optional): Existing mapping of original keys to valid names.
116
+ Defaults to None.
117
+
118
+ Returns:
119
+ ScenarioList: A new ScenarioList with valid variable names and updated codebook.
120
+
121
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
122
+ >>> s.give_valid_names()
123
+ ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
124
+ >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
125
+ >>> s.give_valid_names()
126
+ ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
127
+ >>> s.give_valid_names({'are you there John?': 'custom_name'})
128
+ ScenarioList([Scenario({'custom_name': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
129
+ """
130
+ codebook = existing_codebook.copy() if existing_codebook else {}
131
+ new_scenarios = []
132
+
133
+ for scenario in self:
134
+ new_scenario = {}
135
+ for key in scenario:
136
+ if is_valid_variable_name(key):
137
+ new_scenario[key] = scenario[key]
138
+ continue
139
+
140
+ if key in codebook:
141
+ new_key = codebook[key]
142
+ else:
143
+ new_key = sanitize_string(key)
144
+ if not is_valid_variable_name(new_key):
145
+ new_key = f"var_{len(codebook)}"
146
+ codebook[key] = new_key
147
+
148
+ new_scenario[new_key] = scenario[key]
149
+
150
+ new_scenarios.append(Scenario(new_scenario))
151
+
152
+ return ScenarioList(new_scenarios, codebook)
153
+
154
+ def unpivot(
155
+ self,
156
+ id_vars: Optional[List[str]] = None,
157
+ value_vars: Optional[List[str]] = None,
158
+ ) -> ScenarioList:
159
+ """
160
+ Unpivot the ScenarioList, allowing for id variables to be specified.
161
+
162
+ Parameters:
163
+ id_vars (list): Fields to use as identifier variables (kept in each entry)
164
+ value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
165
+
166
+ Example:
167
+ >>> s = ScenarioList([
168
+ ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
169
+ ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
170
+ ... ])
171
+ >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
172
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
173
+ """
174
+ if id_vars is None:
175
+ id_vars = []
176
+ if value_vars is None:
177
+ value_vars = [field for field in self[0].keys() if field not in id_vars]
178
+
179
+ new_scenarios = []
180
+ for scenario in self:
181
+ for var in value_vars:
182
+ new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
183
+ new_scenario["variable"] = var
184
+ new_scenario["value"] = scenario[var]
185
+ new_scenarios.append(Scenario(new_scenario))
186
+
187
+ return ScenarioList(new_scenarios)
188
+
189
+ def sem_filter(self, language_predicate: str) -> ScenarioList:
190
+ """Filter the ScenarioList based on a language predicate.
191
+
192
+ :param language_predicate: The language predicate to use.
193
+
194
+ Inspired by:
195
+ @misc{patel2024semanticoperators,
196
+ title={Semantic Operators: A Declarative Model for Rich, AI-based Analytics Over Text Data},
197
+ author={Liana Patel and Siddharth Jha and Parth Asawa and Melissa Pan and Carlos Guestrin and Matei Zaharia},
198
+ year={2024},
199
+ eprint={2407.11418},
200
+ archivePrefix={arXiv},
201
+ primaryClass={cs.DB},
202
+ url={https://arxiv.org/abs/2407.11418},
203
+ }
204
+ """
205
+ from edsl import QuestionYesNo
206
+
207
+ new_scenario_list = self.duplicate()
208
+ q = QuestionYesNo(
209
+ question_text=language_predicate, question_name="binary_outcome"
210
+ )
211
+ results = q.by(new_scenario_list).run(verbose=False)
212
+ new_scenario_list = new_scenario_list.add_list(
213
+ "criteria", results.select("binary_outcome").to_list()
214
+ )
215
+ return new_scenario_list.filter("criteria == 'Yes'").drop("criteria")
216
+
217
+ def pivot(
218
+ self,
219
+ id_vars: List[str] = None,
220
+ var_name="variable",
221
+ value_name="value",
222
+ ) -> ScenarioList:
223
+ """
224
+ Pivot the ScenarioList from long to wide format.
225
+
226
+ Parameters:
227
+ id_vars (list): Fields to use as identifier variables
228
+ var_name (str): Name of the variable column (default: 'variable')
229
+ value_name (str): Name of the value column (default: 'value')
230
+
231
+ Example:
232
+ >>> s = ScenarioList([
233
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
234
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
235
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
236
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
237
+ ... ])
238
+ >>> s.pivot(id_vars=['id', 'year'])
239
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
240
+ """
241
+ pivoted_dict = {}
242
+
243
+ for scenario in self:
244
+ # Create a tuple of id values to use as a key
245
+ id_key = tuple(scenario[id_var] for id_var in id_vars)
246
+
247
+ # If this combination of id values hasn't been seen before, initialize it
248
+ if id_key not in pivoted_dict:
249
+ pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
250
+
251
+ # Add the variable-value pair to the dict
252
+ variable = scenario[var_name]
253
+ value = scenario[value_name]
254
+ pivoted_dict[id_key][variable] = value
255
+
256
+ # Convert the dict of dicts to a list of Scenarios
257
+ pivoted_scenarios = [
258
+ Scenario(dict(zip(id_vars, id_key), **values))
259
+ for id_key, values in pivoted_dict.items()
260
+ ]
261
+
262
+ return ScenarioList(pivoted_scenarios)
263
+
264
+ def group_by(
265
+ self, id_vars: List[str], variables: List[str], func: Callable
266
+ ) -> ScenarioList:
267
+ """
268
+ Group the ScenarioList by id_vars and apply a function to the specified variables.
269
+
270
+ :param id_vars: Fields to use as identifier variables
271
+ :param variables: Fields to group and aggregate
272
+ :param func: Function to apply to the grouped variables
273
+
274
+ Returns:
275
+ ScenarioList: A new ScenarioList with the grouped and aggregated results
276
+
277
+ Example:
278
+ >>> def avg_sum(a, b):
279
+ ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
280
+ >>> s = ScenarioList([
281
+ ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
282
+ ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
283
+ ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
284
+ ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
285
+ ... ])
286
+ >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
287
+ ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
288
+ """
289
+ # Check if the function is compatible with the specified variables
290
+ func_params = inspect.signature(func).parameters
291
+ if len(func_params) != len(variables):
292
+ raise ScenarioError(
293
+ f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
294
+ )
295
+
296
+ # Group the scenarios
297
+ grouped: dict[str, list] = defaultdict(lambda: defaultdict(list))
298
+ for scenario in self:
299
+ key = tuple(scenario[id_var] for id_var in id_vars)
300
+ for var in variables:
301
+ grouped[key][var].append(scenario[var])
302
+
303
+ # Apply the function to each group
304
+ result = []
305
+ for key, group in grouped.items():
306
+ try:
307
+ aggregated = func(*[group[var] for var in variables])
308
+ except Exception as e:
309
+ raise ScenarioError(f"Error applying function to group {key}: {str(e)}")
310
+
311
+ if not isinstance(aggregated, dict):
312
+ raise ScenarioError(
313
+ f"Function {func.__name__} must return a dictionary"
314
+ )
315
+
316
+ new_scenario = dict(zip(id_vars, key))
317
+ new_scenario.update(aggregated)
318
+ result.append(Scenario(new_scenario))
319
+
320
+ return ScenarioList(result)
321
+
322
+ @property
323
+ def parameters(self) -> set:
324
+ """Return the set of parameters in the ScenarioList
325
+
326
+ Example:
327
+
328
+ >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
329
+ >>> s.parameters == {'a', 'b'}
330
+ True
331
+ """
332
+ if len(self) == 0:
333
+ return set()
334
+
335
+ return set.union(*[set(s.keys()) for s in self])
336
+
337
+ def __hash__(self) -> int:
338
+ """Return the hash of the ScenarioList.
339
+
340
+ >>> s = ScenarioList.example()
341
+ >>> hash(s)
342
+ 1262252885757976162
343
+ """
344
+ from edsl.utilities.utilities import dict_hash
345
+
346
+ return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
347
+
348
+ def __eq__(self, other: Any) -> bool:
349
+ return hash(self) == hash(other)
350
+
351
+ def __repr__(self):
352
+ return f"ScenarioList({self.data})"
353
+
354
+ def __mul__(self, other: ScenarioList) -> ScenarioList:
355
+ """Takes the cross product of two ScenarioLists.
356
+
357
+ >>> s1 = ScenarioList.from_list("a", [1, 2])
358
+ >>> s2 = ScenarioList.from_list("b", [3, 4])
359
+ >>> s1 * s2
360
+ ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
361
+ """
362
+ from itertools import product
363
+
364
+ new_sl = []
365
+ for s1, s2 in list(product(self, other)):
366
+ new_sl.append(s1 + s2)
367
+ return ScenarioList(new_sl)
368
+
369
+ def times(self, other: ScenarioList) -> ScenarioList:
370
+ """Takes the cross product of two ScenarioLists.
371
+
372
+ Example:
373
+
374
+ >>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
375
+ >>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
376
+ >>> s1.times(s2)
377
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
378
+ """
379
+ return self.__mul__(other)
380
+
381
+ def shuffle(self, seed: Optional[str] = None) -> ScenarioList:
382
+ """Shuffle the ScenarioList.
383
+
384
+ >>> s = ScenarioList.from_list("a", [1,2,3,4])
385
+ >>> s.shuffle(seed = "1234")
386
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 4}), Scenario({'a': 3}), Scenario({'a': 2})])
387
+ """
388
+ sl = self.duplicate()
389
+ if seed:
390
+ random.seed(seed)
391
+ random.shuffle(sl.data)
392
+ return sl
393
+
394
+ def sample(self, n: int, seed: Optional[str] = None) -> ScenarioList:
395
+ """Return a random sample from the ScenarioList
396
+
397
+ >>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
398
+ >>> s.sample(3, seed = "edsl")
399
+ ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
400
+ """
401
+ if seed:
402
+ random.seed(seed)
403
+
404
+ sl = self.duplicate()
405
+ return ScenarioList(random.sample(sl.data, n))
406
+
407
+ def expand(self, expand_field: str, number_field: bool = False) -> ScenarioList:
408
+ """Expand the ScenarioList by a field.
409
+
410
+ :param expand_field: The field to expand.
411
+ :param number_field: Whether to add a field with the index of the value
412
+
413
+ Example:
414
+
415
+ >>> s = ScenarioList( [ Scenario({'a':1, 'b':[1,2]}) ] )
416
+ >>> s.expand('b')
417
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
418
+ >>> s.expand('b', number_field=True)
419
+ ScenarioList([Scenario({'a': 1, 'b': 1, 'b_number': 1}), Scenario({'a': 1, 'b': 2, 'b_number': 2})])
420
+ """
421
+ new_scenarios = []
422
+ for scenario in self:
423
+ values = scenario[expand_field]
424
+ if not isinstance(values, Iterable) or isinstance(values, str):
425
+ values = [values]
426
+ for index, value in enumerate(values):
427
+ new_scenario = scenario.copy()
428
+ new_scenario[expand_field] = value
429
+ if number_field:
430
+ new_scenario[expand_field + "_number"] = index + 1
431
+ new_scenarios.append(new_scenario)
432
+ return ScenarioList(new_scenarios)
433
+
434
+ def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
435
+ """Concatenate specified fields into a single field.
436
+
437
+ :param fields: The fields to concatenate.
438
+ :param separator: The separator to use.
439
+
440
+ Returns:
441
+ ScenarioList: A new ScenarioList with concatenated fields.
442
+
443
+ Example:
444
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
445
+ >>> s.concatenate(['a', 'b', 'c'])
446
+ ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
447
+ """
448
+ new_scenarios = []
449
+ for scenario in self:
450
+ new_scenario = scenario.copy()
451
+ concat_values = []
452
+ for field in fields:
453
+ if field in new_scenario:
454
+ concat_values.append(str(new_scenario[field]))
455
+ del new_scenario[field]
456
+
457
+ new_field_name = f"concat_{'_'.join(fields)}"
458
+ new_scenario[new_field_name] = separator.join(concat_values)
459
+ new_scenarios.append(new_scenario)
460
+
461
+ return ScenarioList(new_scenarios)
462
+
463
+ def unpack_dict(
464
+ self, field: str, prefix: Optional[str] = None, drop_field: bool = False
465
+ ) -> ScenarioList:
466
+ """Unpack a dictionary field into separate fields.
467
+
468
+ :param field: The field to unpack.
469
+ :param prefix: An optional prefix to add to the new fields.
470
+ :param drop_field: Whether to drop the original field.
471
+
472
+ Example:
473
+
474
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
475
+ >>> s.unpack_dict('b')
476
+ ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
477
+ >>> s.unpack_dict('b', prefix='new_')
478
+ ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'new_c': 2, 'new_d': 3})])
479
+ """
480
+ new_scenarios = []
481
+ for scenario in self:
482
+ new_scenario = scenario.copy()
483
+ for key, value in scenario[field].items():
484
+ if prefix:
485
+ new_scenario[prefix + key] = value
486
+ else:
487
+ new_scenario[key] = value
488
+ if drop_field:
489
+ new_scenario.pop(field)
490
+ new_scenarios.append(new_scenario)
491
+ return ScenarioList(new_scenarios)
492
+
493
+ def transform(
494
+ self, field: str, func: Callable, new_name: Optional[str] = None
495
+ ) -> ScenarioList:
496
+ """Transform a field using a function.
497
+
498
+ :param field: The field to transform.
499
+ :param func: The function to apply to the field.
500
+ :param new_name: An optional new name for the transformed field.
501
+
502
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
503
+ >>> s.transform('b', lambda x: x + 1)
504
+ ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 2})])
505
+
506
+ """
507
+ new_scenarios = []
508
+ for scenario in self:
509
+ new_scenario = scenario.copy()
510
+ new_scenario[new_name or field] = func(scenario[field])
511
+ new_scenarios.append(new_scenario)
512
+ return ScenarioList(new_scenarios)
513
+
514
+ def mutate(
515
+ self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
516
+ ) -> ScenarioList:
517
+ """
518
+ Return a new ScenarioList with a new variable added.
519
+
520
+ :param new_var_string: A string with the new variable assignment.
521
+ :param functions_dict: A dictionary of functions to use in the assignment.
522
+
523
+ Example:
524
+
525
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
526
+ >>> s.mutate("c = a + b")
527
+ ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 1, 'b': 1, 'c': 2})])
528
+
529
+ """
530
+ if "=" not in new_var_string:
531
+ raise ScenarioError(
532
+ f"Mutate requires an '=' in the string, but '{new_var_string}' doesn't have one."
533
+ )
534
+ raw_var_name, expression = new_var_string.split("=", 1)
535
+ var_name = raw_var_name.strip()
536
+ from edsl.utilities.utilities import is_valid_variable_name
537
+
538
+ if not is_valid_variable_name(var_name):
539
+ raise ScenarioError(f"{var_name} is not a valid variable name.")
540
+
541
+ # create the evaluator
542
+ functions_dict = functions_dict or {}
543
+
544
+ def create_evaluator(scenario) -> EvalWithCompoundTypes:
545
+ return EvalWithCompoundTypes(names=scenario, functions=functions_dict)
546
+
547
+ def new_scenario(old_scenario: Scenario, var_name: str) -> Scenario:
548
+ evaluator = create_evaluator(old_scenario)
549
+ value = evaluator.eval(expression)
550
+ new_s = old_scenario.copy()
551
+ new_s[var_name] = value
552
+ return new_s
553
+
554
+ try:
555
+ new_data = [new_scenario(s, var_name) for s in self]
556
+ except Exception as e:
557
+ raise ScenarioError(f"Error in mutate. Exception:{e}")
558
+
559
+ return ScenarioList(new_data)
560
+
561
+ def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
562
+ """Order the scenarios by one or more fields.
563
+
564
+ :param fields: The fields to order by.
565
+ :param reverse: Whether to reverse the order.
566
+ Example:
567
+
568
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
569
+ >>> s.order_by('b', 'a')
570
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
571
+ """
572
+
573
+ def get_sort_key(scenario: Any) -> tuple:
574
+ return tuple(scenario[field] for field in fields)
575
+
576
+ return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
577
+
578
+ def duplicate(self) -> ScenarioList:
579
+ """Return a copy of the ScenarioList.
580
+
581
+ >>> sl = ScenarioList.example()
582
+ >>> sl_copy = sl.duplicate()
583
+ >>> sl == sl_copy
584
+ True
585
+ >>> sl is sl_copy
586
+ False
587
+ """
588
+ return ScenarioList([scenario.copy() for scenario in self])
589
+
590
+ def filter(self, expression: str) -> ScenarioList:
591
+ """
592
+ Filter a list of scenarios based on an expression.
593
+
594
+ :param expression: The expression to filter by.
595
+
596
+ Example:
597
+
598
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
599
+ >>> s.filter("b == 2")
600
+ ScenarioList([Scenario({'a': 1, 'b': 2})])
601
+ """
602
+ sl = self.duplicate()
603
+ base_keys = set(self[0].keys())
604
+ keys = set()
605
+ for scenario in sl:
606
+ keys.update(scenario.keys())
607
+ if keys != base_keys:
608
+ import warnings
609
+
610
+ warnings.warn(
611
+ "Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
612
+ )
613
+
614
+ def create_evaluator(scenario: Scenario):
615
+ """Create an evaluator for the given result.
616
+ The 'combined_dict' is a mapping of all values for that Result object.
617
+ """
618
+ return EvalWithCompoundTypes(names=scenario)
619
+
620
+ try:
621
+ # iterates through all the results and evaluates the expression
622
+ new_data = []
623
+ for scenario in sl:
624
+ if create_evaluator(scenario).eval(expression):
625
+ new_data.append(scenario)
626
+ except NameNotDefined as e:
627
+ available_fields = ", ".join(self.data[0].keys() if self.data else [])
628
+ raise ScenarioError(
629
+ f"Error in filter: '{e}'\n"
630
+ f"The expression '{expression}' refers to a field that does not exist.\n"
631
+ f"Scenario: {scenario}\n"
632
+ f"Available fields: {available_fields}\n"
633
+ "Check your filter expression or consult the documentation: "
634
+ "https://docs.expectedparrot.com/en/latest/scenarios.html#module-edsl.scenarios.Scenario"
635
+ ) from None
636
+ except Exception as e:
637
+ raise ScenarioError(f"Error in filter. Exception:{e}")
638
+
639
+ return ScenarioList(new_data)
640
+
641
+ def from_urls(
642
+ self, urls: list[str], field_name: Optional[str] = "text"
643
+ ) -> ScenarioList:
644
+ """Create a ScenarioList from a list of URLs.
645
+
646
+ :param urls: A list of URLs.
647
+ :param field_name: The name of the field to store the text from the URLs.
648
+
649
+ """
650
+ return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
651
+
652
+ def select(self, *fields: str) -> ScenarioList:
653
+ """
654
+ Selects scenarios with only the references fields.
655
+
656
+ :param fields: The fields to select.
657
+
658
+ Example:
659
+
660
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
661
+ >>> s.select('a')
662
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
663
+ """
664
+ from edsl.scenarios.scenario_selector import ScenarioSelector
665
+
666
+ return ScenarioSelector(self).select(*fields)
667
+
668
+ def drop(self, *fields: str) -> ScenarioList:
669
+ """Drop fields from the scenarios.
670
+
671
+ Example:
672
+
673
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
674
+ >>> s.drop('a')
675
+ ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
676
+ """
677
+ sl = self.duplicate()
678
+ return ScenarioList([scenario.drop(fields) for scenario in sl])
679
+
680
+ def keep(self, *fields: str) -> ScenarioList:
681
+ """Keep only the specified fields in the scenarios.
682
+
683
+ :param fields: The fields to keep.
684
+
685
+ Example:
686
+
687
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
688
+ >>> s.keep('a')
689
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
690
+ """
691
+ sl = self.duplicate()
692
+ return ScenarioList([scenario.keep(fields) for scenario in sl])
693
+
694
+ @classmethod
695
+ def from_list(
696
+ cls, name: str, values: list, func: Optional[Callable] = None
697
+ ) -> ScenarioList:
698
+ """Create a ScenarioList from a list of values.
699
+
700
+ :param name: The name of the field.
701
+ :param values: The list of values.
702
+ :param func: An optional function to apply to the values.
703
+
704
+ Example:
705
+
706
+ >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
707
+ ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
708
+ """
709
+ if not func:
710
+ func = lambda x: x
711
+ return cls([Scenario({name: func(value)}) for value in values])
712
+
713
+ def table(
714
+ self,
715
+ *fields: str,
716
+ tablefmt: Optional[TableFormat] = None,
717
+ pretty_labels: Optional[dict[str, str]] = None,
718
+ ) -> str:
719
+ """Return the ScenarioList as a table."""
720
+
721
+ from tabulate import tabulate_formats
722
+
723
+ if tablefmt is not None and tablefmt not in tabulate_formats:
724
+ raise ValueError(
725
+ f"Invalid table format: {tablefmt}",
726
+ f"Valid formats are: {tabulate_formats}",
727
+ )
728
+ return self.to_dataset().table(
729
+ *fields, tablefmt=tablefmt, pretty_labels=pretty_labels
730
+ )
731
+
732
+ def tree(self, node_list: Optional[List[str]] = None) -> str:
733
+ """Return the ScenarioList as a tree.
734
+
735
+ :param node_list: The list of nodes to include in the tree.
736
+ """
737
+ return self.to_dataset().tree(node_list)
738
+
739
+ def _summary(self) -> dict:
740
+ """Return a summary of the ScenarioList.
741
+
742
+ >>> ScenarioList.example()._summary()
743
+ {'scenarios': 2, 'keys': ['persona']}
744
+ """
745
+ d = {
746
+ "scenarios": len(self),
747
+ "keys": list(self.parameters),
748
+ }
749
+ return d
750
+
751
+ def reorder_keys(self, new_order: List[str]) -> ScenarioList:
752
+ """Reorder the keys in the scenarios.
753
+
754
+ :param new_order: The new order of the keys.
755
+
756
+ Example:
757
+
758
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 3, 'b': 4})])
759
+ >>> s.reorder_keys(['b', 'a'])
760
+ ScenarioList([Scenario({'b': 2, 'a': 1}), Scenario({'b': 4, 'a': 3})])
761
+ >>> s.reorder_keys(['a', 'b', 'c'])
762
+ Traceback (most recent call last):
763
+ ...
764
+ AssertionError
765
+ """
766
+ assert set(new_order) == set(self.parameters)
767
+
768
+ new_scenarios = []
769
+ for scenario in self:
770
+ new_scenario = Scenario({key: scenario[key] for key in new_order})
771
+ new_scenarios.append(new_scenario)
772
+ return ScenarioList(new_scenarios)
773
+
774
+ def to_dataset(self) -> "Dataset":
775
+ """
776
+ Convert the ScenarioList to a Dataset.
777
+
778
+ >>> s = ScenarioList.from_list("a", [1,2,3])
779
+ >>> s.to_dataset()
780
+ Dataset([{'a': [1, 2, 3]}])
781
+ >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
782
+ >>> s.to_dataset()
783
+ Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
784
+ """
785
+ from edsl.results.Dataset import Dataset
786
+
787
+ keys = list(self[0].keys())
788
+ for scenario in self:
789
+ new_keys = list(scenario.keys())
790
+ if new_keys != keys:
791
+ keys = list(set(keys + new_keys))
792
+ data = [
793
+ {key: [scenario.get(key, None) for scenario in self.data]} for key in keys
794
+ ]
795
+ return Dataset(data)
796
+
797
+ def unpack(
798
+ self, field: str, new_names: Optional[List[str]] = None, keep_original=True
799
+ ) -> ScenarioList:
800
+ """Unpack a field into multiple fields.
801
+
802
+ Example:
803
+
804
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': [2, True]}), Scenario({'a': 3, 'b': [3, False]})])
805
+ >>> s.unpack('b')
806
+ ScenarioList([Scenario({'a': 1, 'b': [2, True], 'b_0': 2, 'b_1': True}), Scenario({'a': 3, 'b': [3, False], 'b_0': 3, 'b_1': False})])
807
+ >>> s.unpack('b', new_names=['c', 'd'], keep_original=False)
808
+ ScenarioList([Scenario({'a': 1, 'c': 2, 'd': True}), Scenario({'a': 3, 'c': 3, 'd': False})])
809
+
810
+ """
811
+ new_names = new_names or [f"{field}_{i}" for i in range(len(self[0][field]))]
812
+ new_scenarios = []
813
+ for scenario in self:
814
+ new_scenario = scenario.copy()
815
+ if len(new_names) == 1:
816
+ new_scenario[new_names[0]] = scenario[field]
817
+ else:
818
+ for i, new_name in enumerate(new_names):
819
+ new_scenario[new_name] = scenario[field][i]
820
+
821
+ if not keep_original:
822
+ del new_scenario[field]
823
+ new_scenarios.append(new_scenario)
824
+ return ScenarioList(new_scenarios)
825
+
826
+ @classmethod
827
+ def from_list_of_tuples(self, *names: str, values: List[Tuple]) -> ScenarioList:
828
+ sl = ScenarioList.from_list(names[0], [value[0] for value in values])
829
+ for index, name in enumerate(names[1:]):
830
+ sl = sl.add_list(name, [value[index + 1] for value in values])
831
+ return sl
832
+
833
+ def add_list(self, name: str, values: List[Any]) -> ScenarioList:
834
+ """Add a list of values to a ScenarioList.
835
+
836
+ Example:
837
+
838
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
839
+ >>> s.add_list('age', [30, 25])
840
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
841
+ """
842
+ sl = self.duplicate()
843
+ if len(values) != len(sl):
844
+ raise ScenarioError(
845
+ f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
846
+ )
847
+ for i, value in enumerate(values):
848
+ sl[i][name] = value
849
+ return sl
850
+
851
+ @classmethod
852
+ def create_empty_scenario_list(cls, n: int) -> ScenarioList:
853
+ """Create an empty ScenarioList with n scenarios.
854
+
855
+ Example:
856
+
857
+ >>> ScenarioList.create_empty_scenario_list(3)
858
+ ScenarioList([Scenario({}), Scenario({}), Scenario({})])
859
+ """
860
+ return ScenarioList([Scenario({}) for _ in range(n)])
861
+
862
+ def add_value(self, name: str, value: Any) -> ScenarioList:
863
+ """Add a value to all scenarios in a ScenarioList.
864
+
865
+ Example:
866
+
867
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
868
+ >>> s.add_value('age', 30)
869
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
870
+ """
871
+ sl = self.duplicate()
872
+ for scenario in sl:
873
+ scenario[name] = value
874
+ return sl
875
+
876
+ def rename(self, replacement_dict: dict) -> ScenarioList:
877
+ """Rename the fields in the scenarios.
878
+
879
+ :param replacement_dict: A dictionary with the old names as keys and the new names as values.
880
+
881
+ Example:
882
+
883
+ >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
884
+ >>> s.rename({'name': 'first_name', 'age': 'years'})
885
+ ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
886
+
887
+ """
888
+
889
+ new_list = ScenarioList([])
890
+ for obj in self:
891
+ new_obj = obj.rename(replacement_dict)
892
+ new_list.append(new_obj)
893
+ return new_list
894
+
895
+ ## NEEDS TO BE FIXED
896
+ # def new_column_names(self, new_names: List[str]) -> ScenarioList:
897
+ # """Rename the fields in the scenarios.
898
+
899
+ # Example:
900
+
901
+ # >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
902
+ # >>> s.new_column_names(['first_name', 'years'])
903
+ # ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
904
+
905
+ # """
906
+ # new_list = ScenarioList([])
907
+ # for obj in self:
908
+ # new_obj = obj.new_column_names(new_names)
909
+ # new_list.append(new_obj)
910
+ # return new_list
911
+
912
+ @classmethod
913
+ def from_sqlite(cls, filepath: str, table: str):
914
+ """Create a ScenarioList from a SQLite database."""
915
+ import sqlite3
916
+
917
+ with sqlite3.connect(filepath) as conn:
918
+ cursor = conn.cursor()
919
+ cursor.execute(f"SELECT * FROM {table}")
920
+ columns = [description[0] for description in cursor.description]
921
+ data = cursor.fetchall()
922
+ return cls([Scenario(dict(zip(columns, row))) for row in data])
923
+
924
+ @classmethod
925
+ def from_latex(cls, tex_file_path: str):
926
+ with open(tex_file_path, "r") as file:
927
+ lines = file.readlines()
928
+
929
+ processed_lines = []
930
+ non_blank_lines = [
931
+ (i, line.strip()) for i, line in enumerate(lines) if line.strip()
932
+ ]
933
+
934
+ for index, (line_no, text) in enumerate(non_blank_lines):
935
+ entry = {
936
+ "line_no": line_no + 1, # Using 1-based index for line numbers
937
+ "text": text,
938
+ "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
939
+ "line_after": (
940
+ non_blank_lines[index + 1][1]
941
+ if index < len(non_blank_lines) - 1
942
+ else None
943
+ ),
944
+ }
945
+ processed_lines.append(entry)
946
+
947
+ return ScenarioList([Scenario(entry) for entry in processed_lines])
948
+
949
+ @classmethod
950
+ def from_google_doc(cls, url: str) -> ScenarioList:
951
+ """Create a ScenarioList from a Google Doc.
952
+
953
+ This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
954
+ and then reads it using the from_docx class method.
955
+
956
+ Args:
957
+ url (str): The URL to the Google Doc.
958
+
959
+ Returns:
960
+ ScenarioList: An instance of the ScenarioList class.
961
+
962
+ """
963
+ import tempfile
964
+ import requests
965
+ from docx import Document
966
+
967
+ if "/edit" in url:
968
+ doc_id = url.split("/d/")[1].split("/edit")[0]
969
+ else:
970
+ raise ValueError("Invalid Google Doc URL format.")
971
+
972
+ export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
973
+
974
+ # Download the Google Doc as a Word file (.docx)
975
+ response = requests.get(export_url)
976
+ response.raise_for_status() # Ensure the request was successful
977
+
978
+ # Save the Word file to a temporary file
979
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
980
+ temp_file.write(response.content)
981
+ temp_filename = temp_file.name
982
+
983
+ # Call the from_docx class method with the temporary file
984
+ return cls.from_docx(temp_filename)
985
+
986
+ @classmethod
987
+ def from_pandas(cls, df) -> ScenarioList:
988
+ """Create a ScenarioList from a pandas DataFrame.
989
+
990
+ Example:
991
+
992
+ >>> import pandas as pd
993
+ >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25], 'location': ['New York', 'Los Angeles']})
994
+ >>> ScenarioList.from_pandas(df)
995
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
996
+ """
997
+ return cls([Scenario(row) for row in df.to_dict(orient="records")])
998
+
999
+ @classmethod
1000
+ def from_wikipedia(cls, url: str, table_index: int = 0):
1001
+ """
1002
+ Extracts a table from a Wikipedia page.
1003
+
1004
+ Parameters:
1005
+ url (str): The URL of the Wikipedia page.
1006
+ table_index (int): The index of the table to extract (default is 0).
1007
+
1008
+ Returns:
1009
+ pd.DataFrame: A DataFrame containing the extracted table.
1010
+ # # Example usage
1011
+ # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
1012
+ # df = from_wikipedia(url, 0)
1013
+
1014
+ # if not df.empty:
1015
+ # print(df.head())
1016
+ # else:
1017
+ # print("Failed to extract table.")
1018
+
1019
+
1020
+ """
1021
+ import pandas as pd
1022
+ import requests
1023
+ from requests.exceptions import RequestException
1024
+
1025
+ try:
1026
+ # Check if the URL is reachable
1027
+ response = requests.get(url)
1028
+ response.raise_for_status() # Raises HTTPError for bad responses
1029
+
1030
+ # Extract tables from the Wikipedia page
1031
+ tables = pd.read_html(url)
1032
+
1033
+ # Ensure the requested table index is within the range of available tables
1034
+ if table_index >= len(tables) or table_index < 0:
1035
+ raise IndexError(
1036
+ f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
1037
+ )
1038
+
1039
+ # Return the requested table as a DataFrame
1040
+ # return tables[table_index]
1041
+ return cls.from_pandas(tables[table_index])
1042
+
1043
+ except RequestException as e:
1044
+ print(f"Error fetching the URL: {e}")
1045
+ except ValueError as e:
1046
+ print(f"Error parsing tables: {e}")
1047
+ except IndexError as e:
1048
+ print(e)
1049
+ except Exception as e:
1050
+ print(f"An unexpected error occurred: {e}")
1051
+
1052
+ # Return an empty DataFrame in case of an error
1053
+ # return cls.from_pandas(pd.DataFrame())
1054
+
1055
+ def to_key_value(self, field: str, value=None) -> Union[dict, set]:
1056
+ """Return the set of values in the field.
1057
+
1058
+ :param field: The field to extract values from.
1059
+ :param value: An optional field to use as the value in the key-value pair.
1060
+
1061
+ Example:
1062
+
1063
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
1064
+ >>> s.to_key_value('name') == {'Alice', 'Bob'}
1065
+ True
1066
+ """
1067
+ if value is None:
1068
+ return {scenario[field] for scenario in self}
1069
+ else:
1070
+ return {scenario[field]: scenario[value] for scenario in self}
1071
+
1072
+ @classmethod
1073
+ def from_excel(
1074
+ cls, filename: str, sheet_name: Optional[str] = None
1075
+ ) -> ScenarioList:
1076
+ """Create a ScenarioList from an Excel file.
1077
+
1078
+ If the Excel file contains multiple sheets and no sheet_name is provided,
1079
+ the method will print the available sheets and require the user to specify one.
1080
+
1081
+ Example:
1082
+
1083
+ >>> import tempfile
1084
+ >>> import os
1085
+ >>> import pandas as pd
1086
+ >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
1087
+ ... df1 = pd.DataFrame({
1088
+ ... 'name': ['Alice', 'Bob'],
1089
+ ... 'age': [30, 25],
1090
+ ... 'location': ['New York', 'Los Angeles']
1091
+ ... })
1092
+ ... df2 = pd.DataFrame({
1093
+ ... 'name': ['Charlie', 'David'],
1094
+ ... 'age': [35, 40],
1095
+ ... 'location': ['Chicago', 'Boston']
1096
+ ... })
1097
+ ... with pd.ExcelWriter(f.name) as writer:
1098
+ ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
1099
+ ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
1100
+ ... temp_filename = f.name
1101
+ >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
1102
+ >>> len(scenario_list)
1103
+ 2
1104
+ >>> scenario_list[0]['name']
1105
+ 'Alice'
1106
+ >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
1107
+ Traceback (most recent call last):
1108
+ ...
1109
+ ValueError: Please provide a sheet name to load data from.
1110
+ """
1111
+ from edsl.scenarios.Scenario import Scenario
1112
+ import pandas as pd
1113
+
1114
+ # Get all sheets
1115
+ all_sheets = pd.read_excel(filename, sheet_name=None)
1116
+
1117
+ # If no sheet_name is provided and there is more than one sheet, print available sheets
1118
+ if sheet_name is None:
1119
+ if len(all_sheets) > 1:
1120
+ print("The Excel file contains multiple sheets:")
1121
+ for name in all_sheets.keys():
1122
+ print(f"- {name}")
1123
+ raise ValueError("Please provide a sheet name to load data from.")
1124
+ else:
1125
+ # If there is only one sheet, use it
1126
+ sheet_name = list(all_sheets.keys())[0]
1127
+
1128
+ # Load the specified or determined sheet
1129
+ df = pd.read_excel(filename, sheet_name=sheet_name)
1130
+
1131
+ observations = []
1132
+ for _, row in df.iterrows():
1133
+ observations.append(Scenario(row.to_dict()))
1134
+
1135
+ return cls(observations)
1136
+
1137
+ @classmethod
1138
+ def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
1139
+ """Create a ScenarioList from a Google Sheet.
1140
+
1141
+ This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
1142
+ and then reads it using the from_excel class method.
1143
+
1144
+ Args:
1145
+ url (str): The URL to the Google Sheet.
1146
+ sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
1147
+ the same as from_excel regarding multiple sheets.
1148
+
1149
+ Returns:
1150
+ ScenarioList: An instance of the ScenarioList class.
1151
+
1152
+ """
1153
+ import pandas as pd
1154
+ import tempfile
1155
+ import requests
1156
+
1157
+ if "/edit" in url:
1158
+ sheet_id = url.split("/d/")[1].split("/edit")[0]
1159
+ else:
1160
+ raise ValueError("Invalid Google Sheet URL format.")
1161
+
1162
+ export_url = (
1163
+ f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
1164
+ )
1165
+
1166
+ # Download the Google Sheet as an Excel file
1167
+ response = requests.get(export_url)
1168
+ response.raise_for_status() # Ensure the request was successful
1169
+
1170
+ # Save the Excel file to a temporary file
1171
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
1172
+ temp_file.write(response.content)
1173
+ temp_filename = temp_file.name
1174
+
1175
+ # Call the from_excel class method with the temporary file
1176
+ return cls.from_excel(temp_filename, sheet_name=sheet_name)
1177
+
1178
+ @classmethod
1179
+ def from_delimited_file(
1180
+ cls, source: Union[str, "ParseResult"], delimiter: str = ","
1181
+ ) -> ScenarioList:
1182
+ """Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
1183
+ import requests
1184
+ from edsl.scenarios.Scenario import Scenario
1185
+ from urllib.parse import urlparse
1186
+ from urllib.parse import ParseResult
1187
+
1188
+ headers = {
1189
+ "Accept": "text/csv,application/csv,text/plain",
1190
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
1191
+ }
1192
+
1193
+ def is_url(source):
1194
+ try:
1195
+ result = urlparse(source)
1196
+ return all([result.scheme, result.netloc])
1197
+ except ValueError:
1198
+ return False
1199
+
1200
+ try:
1201
+ if isinstance(source, str) and is_url(source):
1202
+ response = requests.get(source, headers=headers)
1203
+ response.raise_for_status()
1204
+ file_obj = StringIO(response.text)
1205
+ elif isinstance(source, ParseResult):
1206
+ response = requests.get(source.geturl(), headers=headers)
1207
+ response.raise_for_status()
1208
+ file_obj = StringIO(response.text)
1209
+ else:
1210
+ file_obj = open(source, "r")
1211
+
1212
+ reader = csv.reader(file_obj, delimiter=delimiter)
1213
+ header = next(reader)
1214
+ observations = [Scenario(dict(zip(header, row))) for row in reader]
1215
+
1216
+ finally:
1217
+ file_obj.close()
1218
+
1219
+ return cls(observations)
1220
+
1221
+ # Convenience methods for specific file types
1222
+ @classmethod
1223
+ def from_csv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
1224
+ """Create a ScenarioList from a CSV file or URL."""
1225
+ return cls.from_delimited_file(source, delimiter=",")
1226
+
1227
+ def left_join(self, other: ScenarioList, by: Union[str, list[str]]) -> ScenarioList:
1228
+ """Perform a left join with another ScenarioList, following SQL join semantics.
1229
+
1230
+ Args:
1231
+ other: The ScenarioList to join with
1232
+ by: String or list of strings representing the key(s) to join on. Cannot be empty.
1233
+
1234
+ >>> s1 = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
1235
+ >>> s2 = ScenarioList([Scenario({'name': 'Alice', 'location': 'New York'}), Scenario({'name': 'Charlie', 'location': 'Los Angeles'})])
1236
+ >>> s3 = s1.left_join(s2, 'name')
1237
+ >>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
1238
+ True
1239
+ """
1240
+ from edsl.scenarios.scenario_join import ScenarioJoin
1241
+
1242
+ sj = ScenarioJoin(self, other)
1243
+ return sj.left_join(by)
1244
+
1245
+ @classmethod
1246
+ def from_tsv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
1247
+ """Create a ScenarioList from a TSV file or URL."""
1248
+ return cls.from_delimited_file(source, delimiter="\t")
1249
+
1250
+ def to_dict(self, sort: bool = False, add_edsl_version: bool = True) -> dict:
1251
+ """
1252
+ >>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
1253
+ >>> s.to_dict()
1254
+ {'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
1255
+
1256
+ """
1257
+ if sort:
1258
+ data = sorted(self, key=lambda x: hash(x))
1259
+ else:
1260
+ data = self
1261
+ d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
1262
+
1263
+ if add_edsl_version:
1264
+ from edsl import __version__
1265
+
1266
+ d["edsl_version"] = __version__
1267
+ d["edsl_class_name"] = self.__class__.__name__
1268
+ return d
1269
+
1270
+ def to(self, survey: Union["Survey", "QuestionBase"]) -> "Jobs":
1271
+ """Create a Jobs object from a ScenarioList and a Survey object.
1272
+
1273
+ :param survey: The Survey object to use for the Jobs object.
1274
+
1275
+ Example:
1276
+ >>> from edsl import Survey
1277
+ >>> from edsl.jobs.Jobs import Jobs
1278
+ >>> from edsl import ScenarioList
1279
+ >>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
1280
+ True
1281
+ """
1282
+ from edsl.surveys.Survey import Survey
1283
+ from edsl.questions.QuestionBase import QuestionBase
1284
+ from edsl.jobs.Jobs import Jobs
1285
+
1286
+ if isinstance(survey, QuestionBase):
1287
+ return Survey([survey]).by(self)
1288
+ else:
1289
+ return survey.by(self)
1290
+
1291
+ @classmethod
1292
+ def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
1293
+ """Create a `ScenarioList` from a list of dictionaries.
1294
+
1295
+ Example:
1296
+
1297
+ >>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
1298
+ ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
1299
+
1300
+ """
1301
+ from edsl.scenarios.Scenario import Scenario
1302
+
1303
+ return cls([Scenario(s) for s in scenario_dicts_list])
1304
+
1305
+ @classmethod
1306
+ @remove_edsl_version
1307
+ def from_dict(cls, data) -> ScenarioList:
1308
+ """Create a `ScenarioList` from a dictionary."""
1309
+ from edsl.scenarios.Scenario import Scenario
1310
+
1311
+ return cls([Scenario.from_dict(s) for s in data["scenarios"]])
1312
+
1313
+ @classmethod
1314
+ def from_nested_dict(cls, data: dict) -> ScenarioList:
1315
+ """Create a `ScenarioList` from a nested dictionary.
1316
+
1317
+ >>> data = {"headline": ["Armistice Signed, War Over: Celebrations Erupt Across City"], "date": ["1918-11-11"], "author": ["Jane Smith"]}
1318
+ >>> ScenarioList.from_nested_dict(data)
1319
+ ScenarioList([Scenario({'headline': 'Armistice Signed, War Over: Celebrations Erupt Across City', 'date': '1918-11-11', 'author': 'Jane Smith'})])
1320
+
1321
+ """
1322
+ length_of_first_list = len(next(iter(data.values())))
1323
+ s = ScenarioList.create_empty_scenario_list(n=length_of_first_list)
1324
+
1325
+ if any(len(v) != length_of_first_list for v in data.values()):
1326
+ raise ValueError(
1327
+ "All lists in the dictionary must be of the same length.",
1328
+ )
1329
+ for key, list_of_values in data.items():
1330
+ s = s.add_list(key, list_of_values)
1331
+ return s
1332
+
1333
+ def code(self) -> str:
1334
+ """Create the Python code representation of a survey."""
1335
+ header_lines = [
1336
+ "from edsl.scenarios.Scenario import Scenario",
1337
+ "from edsl.scenarios.ScenarioList import ScenarioList",
1338
+ ]
1339
+ lines = ["\n".join(header_lines)]
1340
+ names = []
1341
+ for index, scenario in enumerate(self):
1342
+ lines.append(f"scenario_{index} = " + repr(scenario))
1343
+ names.append(f"scenario_{index}")
1344
+ lines.append(f"scenarios = ScenarioList([{', '.join(names)}])")
1345
+ return lines
1346
+
1347
+ @classmethod
1348
+ def example(cls, randomize: bool = False) -> ScenarioList:
1349
+ """
1350
+ Return an example ScenarioList instance.
1351
+
1352
+ :params randomize: If True, use Scenario's randomize method to randomize the values.
1353
+ """
1354
+ return cls([Scenario.example(randomize), Scenario.example(randomize)])
1355
+
1356
+ # def rich_print(self) -> None:
1357
+ # """Display an object as a table."""
1358
+ # from rich.table import Table
1359
+
1360
+ # table = Table(title="ScenarioList")
1361
+ # table.add_column("Index", style="bold")
1362
+ # table.add_column("Scenario")
1363
+ # for i, s in enumerate(self):
1364
+ # table.add_row(str(i), s.rich_print())
1365
+ # return table
1366
+
1367
+ def __getitem__(self, key: Union[int, slice]) -> Any:
1368
+ """Return the item at the given index.
1369
+
1370
+ Example:
1371
+ >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1372
+ >>> s[0]
1373
+ Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
1374
+
1375
+ >>> s[:1]
1376
+ ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1377
+
1378
+ """
1379
+ if isinstance(key, slice):
1380
+ return ScenarioList(super().__getitem__(key))
1381
+ elif isinstance(key, int):
1382
+ return super().__getitem__(key)
1383
+ else:
1384
+ return self.to_dict(add_edsl_version=False)[key]
1385
+
1386
+ def to_agent_list(self):
1387
+ """Convert the ScenarioList to an AgentList.
1388
+
1389
+ Example:
1390
+
1391
+ >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1392
+ >>> s.to_agent_list()
1393
+ AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
1394
+ """
1395
+ from edsl.agents.AgentList import AgentList
1396
+ from edsl.agents.Agent import Agent
1397
+ import warnings
1398
+
1399
+ agents = []
1400
+ for scenario in self:
1401
+ new_scenario = scenario.copy().data
1402
+ if "name" in new_scenario:
1403
+ name = new_scenario.pop("name")
1404
+ proposed_agent_name = "agent_name"
1405
+ while proposed_agent_name not in new_scenario:
1406
+ proposed_agent_name += "_"
1407
+ warnings.warn(
1408
+ f"The 'name' field is reserved for the agent's name---putting this value in {proposed_agent_name}"
1409
+ )
1410
+ new_scenario[proposed_agent_name] = name
1411
+ new_agent = Agent(traits=new_scenario, name=name)
1412
+ if "agent_parameters" in new_scenario:
1413
+ agent_parameters = new_scenario.pop("agent_parameters")
1414
+ instruction = agent_parameters.get("instruction", None)
1415
+ name = agent_parameters.get("name", None)
1416
+ new_agent = Agent(
1417
+ traits=new_scenario, name=name, instruction=instruction
1418
+ )
1419
+ else:
1420
+ new_agent = Agent(traits=new_scenario)
1421
+
1422
+ agents.append(new_agent)
1423
+
1424
+ return AgentList(agents)
1425
+
1426
+ def chunk(
1427
+ self,
1428
+ field,
1429
+ num_words: Optional[int] = None,
1430
+ num_lines: Optional[int] = None,
1431
+ include_original=False,
1432
+ hash_original=False,
1433
+ ) -> "ScenarioList":
1434
+ """Chunk the scenarios based on a field.
1435
+
1436
+ Example:
1437
+
1438
+ >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
1439
+ >>> s.chunk('text', num_words=3)
1440
+ ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
1441
+ """
1442
+ new_scenarios = []
1443
+ for scenario in self:
1444
+ replacement_scenarios = scenario.chunk(
1445
+ field,
1446
+ num_words=num_words,
1447
+ num_lines=num_lines,
1448
+ include_original=include_original,
1449
+ hash_original=hash_original,
1450
+ )
1451
+ new_scenarios.extend(replacement_scenarios)
1452
+ return ScenarioList(new_scenarios)
1453
+
1454
+
1455
+ if __name__ == "__main__":
1456
+ import doctest
1457
+
1458
+ doctest.testmod(optionflags=doctest.ELLIPSIS)