edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +430 -113
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/dataset/dataset_operations_mixin.py +1492 -0
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +321 -155
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +10 -16
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +420 -216
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/file_store.py +755 -0
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +20 -21
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -426
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/auto/AutoStudy.py +0 -130
  236. edsl/auto/StageBase.py +0 -243
  237. edsl/auto/StageGenerateSurvey.py +0 -178
  238. edsl/auto/StageLabelQuestions.py +0 -125
  239. edsl/auto/StagePersona.py +0 -61
  240. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  241. edsl/auto/StagePersonaDimensionValues.py +0 -74
  242. edsl/auto/StagePersonaDimensions.py +0 -69
  243. edsl/auto/StageQuestions.py +0 -74
  244. edsl/auto/SurveyCreatorPipeline.py +0 -21
  245. edsl/auto/utilities.py +0 -218
  246. edsl/base/Base.py +0 -279
  247. edsl/coop/PriceFetcher.py +0 -54
  248. edsl/data/Cache.py +0 -580
  249. edsl/data/CacheEntry.py +0 -230
  250. edsl/data/SQLiteDict.py +0 -292
  251. edsl/data/__init__.py +0 -5
  252. edsl/data/orm.py +0 -10
  253. edsl/exceptions/cache.py +0 -5
  254. edsl/exceptions/coop.py +0 -14
  255. edsl/exceptions/data.py +0 -14
  256. edsl/exceptions/scenarios.py +0 -29
  257. edsl/jobs/Answers.py +0 -43
  258. edsl/jobs/JobsPrompts.py +0 -354
  259. edsl/jobs/buckets/BucketCollection.py +0 -134
  260. edsl/jobs/buckets/ModelBuckets.py +0 -65
  261. edsl/jobs/buckets/TokenBucket.py +0 -283
  262. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  263. edsl/jobs/interviews/Interview.py +0 -395
  264. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  265. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  266. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  267. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  268. edsl/jobs/tasks/TaskCreators.py +0 -64
  269. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  270. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  271. edsl/language_models/LanguageModel.py +0 -635
  272. edsl/language_models/ServiceDataSources.py +0 -0
  273. edsl/language_models/key_management/KeyLookup.py +0 -63
  274. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  275. edsl/language_models/key_management/models.py +0 -137
  276. edsl/questions/QuestionBase.py +0 -539
  277. edsl/questions/QuestionFreeText.py +0 -130
  278. edsl/questions/derived/QuestionLikertFive.py +0 -76
  279. edsl/results/DatasetExportMixin.py +0 -911
  280. edsl/results/ResultsExportMixin.py +0 -45
  281. edsl/results/TextEditor.py +0 -50
  282. edsl/results/results_fetch_mixin.py +0 -33
  283. edsl/results/results_tools_mixin.py +0 -98
  284. edsl/scenarios/DocumentChunker.py +0 -104
  285. edsl/scenarios/FileStore.py +0 -564
  286. edsl/scenarios/Scenario.py +0 -548
  287. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  288. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  289. edsl/scenarios/handlers/latex.py +0 -5
  290. edsl/shared.py +0 -1
  291. edsl/surveys/Survey.py +0 -1306
  292. edsl/surveys/SurveyQualtricsImport.py +0 -284
  293. edsl/surveys/SurveyToApp.py +0 -141
  294. edsl/surveys/instructions/__init__.py +0 -0
  295. edsl/tools/__init__.py +0 -1
  296. edsl/tools/clusters.py +0 -192
  297. edsl/tools/embeddings.py +0 -27
  298. edsl/tools/embeddings_plotting.py +0 -118
  299. edsl/tools/plotting.py +0 -112
  300. edsl/tools/summarize.py +0 -18
  301. edsl/utilities/data/Registry.py +0 -6
  302. edsl/utilities/data/__init__.py +0 -1
  303. edsl/utilities/data/scooter_results.json +0 -1
  304. edsl-0.1.46.dist-info/RECORD +0 -366
  305. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  306. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  307. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  308. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  309. /edsl/{results → dataset/display}/table_display.css +0 -0
  310. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  311. /edsl/{results → dataset}/tree_explore.py +0 -0
  312. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  313. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  314. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  315. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  316. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  317. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  318. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  319. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  320. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  321. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  322. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  323. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  324. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  325. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  326. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  327. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  328. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,4 +1,21 @@
1
- """A list of Scenarios to be used in a survey."""
1
+ """
2
+ ScenarioList provides a collection of Scenario objects with advanced operations.
3
+
4
+ The ScenarioList module extends the functionality of a simple list of Scenario objects,
5
+ providing powerful operations for data manipulation, filtering, transformation, and analysis.
6
+ It serves as a bridge between individual Scenarios and higher-level EDSL components like
7
+ Surveys and Jobs.
8
+
9
+ Key features include:
10
+ - Collection operations (filtering, sorting, sampling, and iteration)
11
+ - Data manipulation (transformation, joining, grouping, pivoting)
12
+ - Format conversion (to/from pandas, CSV, Excel, etc.)
13
+ - Advanced selection and retrieval mechanisms
14
+ - Integration with other EDSL components
15
+
16
+ ScenarioList is a core component in the EDSL framework for creating, managing, and
17
+ manipulating collections of Scenarios for experiments, surveys, and data processing tasks.
18
+ """
2
19
 
3
20
  from __future__ import annotations
4
21
  from typing import (
@@ -10,12 +27,7 @@ from typing import (
10
27
  Literal,
11
28
  TYPE_CHECKING,
12
29
  )
13
-
14
- try:
15
- from typing import TypeAlias
16
- except ImportError:
17
- from typing_extensions import TypeAlias
18
-
30
+ import warnings
19
31
  import csv
20
32
  import random
21
33
  from io import StringIO
@@ -23,37 +35,34 @@ import inspect
23
35
  from collections import UserList, defaultdict
24
36
  from collections.abc import Iterable
25
37
 
26
- if TYPE_CHECKING:
27
- from urllib.parse import ParseResult
28
- from edsl.results.Dataset import Dataset
29
- from edsl.jobs.Jobs import Jobs
30
- from edsl.surveys.Survey import Survey
31
- from edsl.questions.QuestionBase import QuestionBase
32
-
33
-
34
38
  from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
35
-
36
39
  from tabulate import tabulate_formats
37
40
 
38
- from edsl.Base import Base
39
- from edsl.utilities.remove_edsl_version import remove_edsl_version
41
+ try:
42
+ from typing import TypeAlias
43
+ except ImportError:
44
+ from typing_extensions import TypeAlias
40
45
 
41
- from edsl.scenarios.Scenario import Scenario
42
- from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
43
- from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
44
- from edsl.utilities.naming_utilities import sanitize_string
45
- from edsl.utilities.is_valid_variable_name import is_valid_variable_name
46
- from edsl.exceptions.scenarios import ScenarioError
46
+ if TYPE_CHECKING:
47
+ from urllib.parse import ParseResult
48
+ from ..dataset import Dataset
49
+ from ..jobs import Jobs
50
+ from ..surveys import Survey
51
+ from ..questions import QuestionBase
47
52
 
48
- from edsl.scenarios.directory_scanner import DirectoryScanner
49
53
 
54
+ from ..base import Base
55
+ from ..utilities import remove_edsl_version, sanitize_string, is_valid_variable_name, dict_hash
56
+ from ..dataset import ScenarioListOperationsMixin
50
57
 
51
- class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
52
- pass
58
+ from .exceptions import ScenarioError
59
+ from .scenario import Scenario
60
+ from .directory_scanner import DirectoryScanner
61
+ from .scenario_list_pdf_tools import PdfTools
53
62
 
54
63
 
55
64
  if TYPE_CHECKING:
56
- from edsl.results.Dataset import Dataset
65
+ from ..dataset import Dataset
57
66
 
58
67
  TableFormat: TypeAlias = Literal[
59
68
  "plain",
@@ -72,9 +81,42 @@ TableFormat: TypeAlias = Literal[
72
81
  "tsv",
73
82
  ]
74
83
 
75
-
76
- class ScenarioList(Base, UserList, ScenarioListMixin):
77
- """Class for creating a list of scenarios to be used in a survey."""
84
+ class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
85
+ """
86
+ A collection of Scenario objects with advanced operations for manipulation and analysis.
87
+
88
+ ScenarioList extends Python's UserList to provide specialized functionality for
89
+ working with collections of Scenario objects. It inherits from Base to integrate
90
+ with EDSL's object model and from ScenarioListOperationsMixin to provide
91
+ powerful data manipulation capabilities.
92
+
93
+ The class provides methods for filtering, sorting, joining, transforming, and
94
+ analyzing collections of Scenarios. It's designed to work seamlessly with other
95
+ EDSL components like Surveys, Jobs, and Questions.
96
+
97
+ Attributes:
98
+ data (list): The underlying list of Scenario objects.
99
+ codebook (dict): Optional metadata describing the fields in the scenarios.
100
+
101
+ Examples:
102
+ Create a ScenarioList from Scenario objects:
103
+ >>> from edsl.scenarios import Scenario, ScenarioList
104
+ >>> s1 = Scenario({"product": "apple", "price": 1.99})
105
+ >>> s2 = Scenario({"product": "banana", "price": 0.99})
106
+ >>> sl = ScenarioList([s1, s2])
107
+
108
+ Filter scenarios based on a condition:
109
+ >>> cheap_fruits = sl.filter("price < 1.50")
110
+ >>> len(cheap_fruits)
111
+ 1
112
+ >>> cheap_fruits[0]["product"]
113
+ 'banana'
114
+
115
+ Add a new column based on existing data:
116
+ >>> sl_with_tax = sl.mutate("tax = price * 0.08")
117
+ >>> sl_with_tax[0]["tax"]
118
+ 0.1592
119
+ """
78
120
 
79
121
  __documentation__ = (
80
122
  "https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
@@ -83,7 +125,24 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
83
125
  def __init__(
84
126
  self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
85
127
  ):
86
- """Initialize the ScenarioList class."""
128
+ """
129
+ Initialize a new ScenarioList with optional data and codebook.
130
+
131
+ Args:
132
+ data: A list of Scenario objects. If None, an empty list is used.
133
+ codebook: A dictionary mapping field names to descriptions or metadata.
134
+ Used for documentation and to provide context for fields.
135
+
136
+ Examples:
137
+ >>> sl = ScenarioList() # Empty list
138
+ >>> s1 = Scenario({"product": "apple"})
139
+ >>> s2 = Scenario({"product": "banana"})
140
+ >>> sl = ScenarioList([s1, s2]) # With data
141
+
142
+ >>> # With a codebook
143
+ >>> codebook = {"product": "Fruit name", "price": "Price in USD"}
144
+ >>> sl = ScenarioList([s1, s2], codebook=codebook)
145
+ """
87
146
  if data is not None:
88
147
  super().__init__(data)
89
148
  else:
@@ -91,21 +150,85 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
91
150
  self.codebook = codebook or {}
92
151
 
93
152
  def unique(self) -> ScenarioList:
94
- """Return a list of unique scenarios.
95
-
96
- >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
97
- >>> s.unique()
98
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
153
+ """
154
+ Return a new ScenarioList containing only unique Scenario objects.
155
+
156
+ This method removes duplicate Scenario objects based on their hash values,
157
+ which are determined by their content. Two Scenarios with identical key-value
158
+ pairs will have the same hash and be considered duplicates.
159
+
160
+ Returns:
161
+ A new ScenarioList containing only unique Scenario objects.
162
+
163
+ Examples:
164
+ >>> from edsl.scenarios import Scenario, ScenarioList
165
+ >>> s1 = Scenario({"a": 1})
166
+ >>> s2 = Scenario({"a": 1}) # Same content as s1
167
+ >>> s3 = Scenario({"a": 2})
168
+ >>> sl = ScenarioList([s1, s2, s3])
169
+ >>> unique_sl = sl.unique()
170
+ >>> len(unique_sl)
171
+ 2
172
+ >>> unique_sl
173
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
174
+
175
+ Notes:
176
+ - The order of scenarios in the result is not guaranteed due to the use of sets
177
+ - Uniqueness is determined by the Scenario's __hash__ method
178
+ - The original ScenarioList is not modified
99
179
  """
100
180
  return ScenarioList(list(set(self)))
101
181
 
102
182
  @property
103
183
  def has_jinja_braces(self) -> bool:
104
- """Check if the ScenarioList has Jinja braces."""
184
+ """
185
+ Check if any Scenario in the list contains values with Jinja template braces.
186
+
187
+ This property checks all Scenarios in the list to determine if any contain
188
+ string values with Jinja template syntax ({{ and }}). This is important for
189
+ rendering templates and avoiding conflicts with other templating systems.
190
+
191
+ Returns:
192
+ True if any Scenario contains values with Jinja braces, False otherwise.
193
+
194
+ Examples:
195
+ >>> from edsl.scenarios import Scenario, ScenarioList
196
+ >>> s1 = Scenario({"text": "Plain text"})
197
+ >>> s2 = Scenario({"text": "Template with {{variable}}"})
198
+ >>> sl1 = ScenarioList([s1])
199
+ >>> sl1.has_jinja_braces
200
+ False
201
+ >>> sl2 = ScenarioList([s1, s2])
202
+ >>> sl2.has_jinja_braces
203
+ True
204
+ """
105
205
  return any([scenario.has_jinja_braces for scenario in self])
106
206
 
107
207
  def _convert_jinja_braces(self) -> ScenarioList:
108
- """Convert Jinja braces to Python braces."""
208
+ """
209
+ Convert Jinja braces to alternative symbols in all Scenarios in the list.
210
+
211
+ This method creates a new ScenarioList where all Jinja template braces
212
+ ({{ and }}) in string values are converted to alternative symbols (<< and >>).
213
+ This is useful when you need to prevent template processing or avoid conflicts
214
+ with other templating systems.
215
+
216
+ Returns:
217
+ A new ScenarioList with converted braces in all Scenarios.
218
+
219
+ Examples:
220
+ >>> from edsl.scenarios import Scenario, ScenarioList
221
+ >>> s = Scenario({"text": "Template with {{variable}}"})
222
+ >>> sl = ScenarioList([s])
223
+ >>> converted = sl._convert_jinja_braces()
224
+ >>> converted[0]["text"]
225
+ 'Template with <<variable>>'
226
+
227
+ Notes:
228
+ - The original ScenarioList is not modified
229
+ - This is primarily intended for internal use
230
+ - The default replacement symbols are << and >>
231
+ """
109
232
  return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
110
233
 
111
234
  def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
@@ -202,7 +325,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
202
325
  url={https://arxiv.org/abs/2407.11418},
203
326
  }
204
327
  """
205
- from edsl import QuestionYesNo
328
+ from ..questions import QuestionYesNo
206
329
 
207
330
  new_scenario_list = self.duplicate()
208
331
  q = QuestionYesNo(
@@ -341,8 +464,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
341
464
  >>> hash(s)
342
465
  1262252885757976162
343
466
  """
344
- from edsl.utilities.utilities import dict_hash
345
-
346
467
  return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
347
468
 
348
469
  def __eq__(self, other: Any) -> bool:
@@ -360,7 +481,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
360
481
  ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
361
482
  """
362
483
  from itertools import product
363
- from edsl import Scenario
484
+ from .scenario import Scenario
364
485
  if isinstance(other, Scenario):
365
486
  other = ScenarioList([other])
366
487
  elif not isinstance(other, ScenarioList):
@@ -436,12 +557,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
436
557
  new_scenarios.append(new_scenario)
437
558
  return ScenarioList(new_scenarios)
438
559
 
439
- def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
560
+ def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
440
561
  """Private method to handle concatenation logic for different output types.
441
562
 
442
563
  :param fields: The fields to concatenate.
443
564
  :param output_type: The type of output ("string", "list", or "set").
444
565
  :param separator: The separator to use for string concatenation.
566
+ :param new_field_name: Optional custom name for the concatenated field.
567
+ If None, defaults to "concat_field1_field2_..."
445
568
 
446
569
  Returns:
447
570
  ScenarioList: A new ScenarioList with concatenated fields.
@@ -461,17 +584,17 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
461
584
  values.append(new_scenario[field])
462
585
  del new_scenario[field]
463
586
 
464
- new_field_name = f"concat_{'_'.join(fields)}"
587
+ field_name = new_field_name if new_field_name is not None else f"concat_{'_'.join(fields)}"
465
588
 
466
589
  if output_type == "string":
467
590
  # Convert all values to strings and join with separator
468
- new_scenario[new_field_name] = separator.join(str(v) for v in values)
591
+ new_scenario[field_name] = separator.join(str(v) for v in values)
469
592
  elif output_type == "list":
470
593
  # Keep as a list
471
- new_scenario[new_field_name] = values
594
+ new_scenario[field_name] = values
472
595
  elif output_type == "set":
473
596
  # Convert to a set (removes duplicates)
474
- new_scenario[new_field_name] = set(values)
597
+ new_scenario[field_name] = set(values)
475
598
  else:
476
599
  raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
477
600
 
@@ -479,11 +602,12 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
479
602
 
480
603
  return ScenarioList(new_scenarios)
481
604
 
482
- def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
605
+ def concatenate(self, fields: List[str], separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
483
606
  """Concatenate specified fields into a single string field.
484
607
 
485
608
  :param fields: The fields to concatenate.
486
609
  :param separator: The separator to use.
610
+ :param new_field_name: Optional custom name for the concatenated field.
487
611
 
488
612
  Returns:
489
613
  ScenarioList: A new ScenarioList with concatenated fields.
@@ -492,13 +616,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
492
616
  >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
493
617
  >>> s.concatenate(['a', 'b', 'c'])
494
618
  ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
619
+ >>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
620
+ ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
495
621
  """
496
- return self._concatenate(fields, output_type="string", separator=separator)
622
+ return self._concatenate(fields, output_type="string", separator=separator, new_field_name=new_field_name)
497
623
 
498
- def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
624
+ def concatenate_to_list(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
499
625
  """Concatenate specified fields into a single list field.
500
626
 
501
627
  :param fields: The fields to concatenate.
628
+ :param new_field_name: Optional custom name for the concatenated field.
502
629
 
503
630
  Returns:
504
631
  ScenarioList: A new ScenarioList with fields concatenated into a list.
@@ -507,13 +634,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
507
634
  >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
508
635
  >>> s.concatenate_to_list(['a', 'b', 'c'])
509
636
  ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
637
+ >>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
638
+ ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
510
639
  """
511
- return self._concatenate(fields, output_type="list")
640
+ return self._concatenate(fields, output_type="list", new_field_name=new_field_name)
512
641
 
513
- def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
642
+ def concatenate_to_set(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
514
643
  """Concatenate specified fields into a single set field.
515
644
 
516
645
  :param fields: The fields to concatenate.
646
+ :param new_field_name: Optional custom name for the concatenated field.
517
647
 
518
648
  Returns:
519
649
  ScenarioList: A new ScenarioList with fields concatenated into a set.
@@ -522,11 +652,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
522
652
  >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
523
653
  >>> s.concatenate_to_set(['a', 'b', 'c'])
524
654
  ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
525
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1, 'c': 3})])
526
- >>> s.concatenate_to_set(['a', 'b', 'c'])
527
- ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
655
+ >>> s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
656
+ ScenarioList([Scenario({'unique_values': {1, 2, 3}}), Scenario({'unique_values': {4, 5, 6}})])
528
657
  """
529
- return self._concatenate(fields, output_type="set")
658
+ return self._concatenate(fields, output_type="set", new_field_name=new_field_name)
530
659
 
531
660
  def unpack_dict(
532
661
  self, field: str, prefix: Optional[str] = None, drop_field: bool = False
@@ -601,7 +730,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
601
730
  )
602
731
  raw_var_name, expression = new_var_string.split("=", 1)
603
732
  var_name = raw_var_name.strip()
604
- from edsl.utilities.utilities import is_valid_variable_name
605
733
 
606
734
  if not is_valid_variable_name(var_name):
607
735
  raise ScenarioError(f"{var_name} is not a valid variable name.")
@@ -729,7 +857,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
729
857
  >>> s.select('a')
730
858
  ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
731
859
  """
732
- from edsl.scenarios.scenario_selector import ScenarioSelector
860
+ from .scenario_selector import ScenarioSelector
733
861
 
734
862
  return ScenarioSelector(self).select(*fields)
735
863
 
@@ -850,7 +978,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
850
978
  >>> s.to_dataset()
851
979
  Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
852
980
  """
853
- from edsl.results.Dataset import Dataset
981
+ from ..dataset import Dataset
854
982
 
855
983
  keys = list(self[0].keys())
856
984
  for scenario in self:
@@ -959,6 +1087,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
959
1087
  new_list.append(new_obj)
960
1088
  return new_list
961
1089
 
1090
+
1091
+
962
1092
  def replace_names(self, new_names: list) -> ScenarioList:
963
1093
  """Replace the field names in the scenarios with a new list of names.
964
1094
 
@@ -1156,6 +1286,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1156
1286
 
1157
1287
  return scenario_list
1158
1288
 
1289
+ @classmethod
1159
1290
  def from_wikipedia(cls, url: str, table_index: int = 0):
1160
1291
  """
1161
1292
  Extracts a table from a Wikipedia page.
@@ -1230,13 +1361,19 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1230
1361
 
1231
1362
  @classmethod
1232
1363
  def from_excel(
1233
- cls, filename: str, sheet_name: Optional[str] = None
1364
+ cls, filename: str, sheet_name: Optional[str] = None, skip_rows: Optional[List[int]] = None, use_codebook: bool = False
1234
1365
  ) -> ScenarioList:
1235
1366
  """Create a ScenarioList from an Excel file.
1236
1367
 
1237
1368
  If the Excel file contains multiple sheets and no sheet_name is provided,
1238
1369
  the method will print the available sheets and require the user to specify one.
1239
1370
 
1371
+ Args:
1372
+ filename (str): Path to the Excel file
1373
+ sheet_name (Optional[str]): Name of the sheet to load. If None and multiple sheets exist,
1374
+ will raise an error listing available sheets.
1375
+ skip_rows (Optional[List[int]]): List of row indices to skip (0-based). If None, all rows are included.
1376
+
1240
1377
  Example:
1241
1378
 
1242
1379
  >>> import tempfile
@@ -1244,30 +1381,33 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1244
1381
  >>> import pandas as pd
1245
1382
  >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
1246
1383
  ... df1 = pd.DataFrame({
1247
- ... 'name': ['Alice', 'Bob'],
1248
- ... 'age': [30, 25],
1249
- ... 'location': ['New York', 'Los Angeles']
1384
+ ... 'name': ['Alice', 'Bob', 'Charlie'],
1385
+ ... 'age': [30, 25, 35],
1386
+ ... 'location': ['New York', 'Los Angeles', 'Chicago']
1250
1387
  ... })
1251
1388
  ... df2 = pd.DataFrame({
1252
- ... 'name': ['Charlie', 'David'],
1253
- ... 'age': [35, 40],
1254
- ... 'location': ['Chicago', 'Boston']
1389
+ ... 'name': ['David', 'Eve'],
1390
+ ... 'age': [40, 45],
1391
+ ... 'location': ['Boston', 'Seattle']
1255
1392
  ... })
1256
1393
  ... with pd.ExcelWriter(f.name) as writer:
1257
1394
  ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
1258
1395
  ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
1259
1396
  ... temp_filename = f.name
1397
+ >>> # Load all rows
1260
1398
  >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
1261
1399
  >>> len(scenario_list)
1400
+ 3
1401
+ >>> # Skip the second row (index 1)
1402
+ >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1', skip_rows=[1])
1403
+ >>> len(scenario_list)
1262
1404
  2
1263
1405
  >>> scenario_list[0]['name']
1264
1406
  'Alice'
1265
- >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
1266
- Traceback (most recent call last):
1267
- ...
1268
- ValueError: Please provide a sheet name to load data from.
1407
+ >>> scenario_list[1]['name']
1408
+ 'Charlie'
1269
1409
  """
1270
- from edsl.scenarios.Scenario import Scenario
1410
+ from .scenario import Scenario
1271
1411
  import pandas as pd
1272
1412
 
1273
1413
  # Get all sheets
@@ -1287,11 +1427,28 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1287
1427
  # Load the specified or determined sheet
1288
1428
  df = pd.read_excel(filename, sheet_name=sheet_name)
1289
1429
 
1430
+ # Skip specified rows if any
1431
+ if skip_rows:
1432
+ df = df.drop(skip_rows)
1433
+ # Reset index to ensure continuous indexing
1434
+ df = df.reset_index(drop=True)
1435
+
1436
+ if use_codebook:
1437
+ codebook = {f"col_{i}": col for i, col in enumerate(df.columns)}
1438
+ koobedoc = {col:f"col_{i}" for i, col in enumerate(df.columns)}
1439
+
1290
1440
  observations = []
1291
1441
  for _, row in df.iterrows():
1292
- observations.append(Scenario(row.to_dict()))
1442
+ if use_codebook:
1443
+ observations.append(Scenario({koobedoc.get(k):v for k,v in row.to_dict().items()}))
1444
+ else:
1445
+ observations.append(Scenario(row.to_dict()))
1293
1446
 
1294
- return cls(observations)
1447
+
1448
+ if use_codebook:
1449
+ return cls(observations, codebook=codebook)
1450
+ else:
1451
+ return cls(observations)
1295
1452
 
1296
1453
  @classmethod
1297
1454
  def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
@@ -1359,7 +1516,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1359
1516
  ) -> ScenarioList:
1360
1517
  """Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
1361
1518
  import requests
1362
- from edsl.scenarios.Scenario import Scenario
1519
+ from .scenario import Scenario
1363
1520
  from urllib.parse import urlparse
1364
1521
  from urllib.parse import ParseResult
1365
1522
 
@@ -1415,7 +1572,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1415
1572
  >>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
1416
1573
  True
1417
1574
  """
1418
- from edsl.scenarios.scenario_join import ScenarioJoin
1575
+ from .scenario_join import ScenarioJoin
1419
1576
 
1420
1577
  sj = ScenarioJoin(self, other)
1421
1578
  return sj.left_join(by)
@@ -1439,7 +1596,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1439
1596
  d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
1440
1597
 
1441
1598
  if add_edsl_version:
1442
- from edsl import __version__
1599
+ from .. import __version__
1443
1600
 
1444
1601
  d["edsl_version"] = __version__
1445
1602
  d["edsl_class_name"] = self.__class__.__name__
@@ -1451,15 +1608,13 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1451
1608
  :param survey: The Survey object to use for the Jobs object.
1452
1609
 
1453
1610
  Example:
1454
- >>> from edsl import Survey
1455
- >>> from edsl.jobs.Jobs import Jobs
1456
- >>> from edsl import ScenarioList
1611
+ >>> from edsl import Survey, Jobs, ScenarioList
1457
1612
  >>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
1458
1613
  True
1459
1614
  """
1460
- from edsl.surveys.Survey import Survey
1461
- from edsl.questions.QuestionBase import QuestionBase
1462
- from edsl.jobs.Jobs import Jobs
1615
+ from ..surveys import Survey
1616
+ from ..questions import QuestionBase
1617
+ from ..jobs import Jobs
1463
1618
 
1464
1619
  if isinstance(survey, QuestionBase):
1465
1620
  return Survey([survey]).by(self)
@@ -1476,7 +1631,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1476
1631
  ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
1477
1632
 
1478
1633
  """
1479
- from edsl.scenarios.Scenario import Scenario
1634
+ from .scenario import Scenario
1480
1635
 
1481
1636
  return cls([Scenario(s) for s in scenario_dicts_list])
1482
1637
 
@@ -1484,7 +1639,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1484
1639
  @remove_edsl_version
1485
1640
  def from_dict(cls, data) -> ScenarioList:
1486
1641
  """Create a `ScenarioList` from a dictionary."""
1487
- from edsl.scenarios.Scenario import Scenario
1642
+ from .scenario import Scenario
1488
1643
 
1489
1644
  return cls([Scenario.from_dict(s) for s in data["scenarios"]])
1490
1645
 
@@ -1511,8 +1666,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1511
1666
  def code(self) -> str:
1512
1667
  """Create the Python code representation of a survey."""
1513
1668
  header_lines = [
1514
- "from edsl.scenarios.Scenario import Scenario",
1515
- "from edsl.scenarios.ScenarioList import ScenarioList",
1669
+ "from edsl.scenarios import Scenario",
1670
+ "from edsl.scenarios import ScenarioList",
1516
1671
  ]
1517
1672
  lines = ["\n".join(header_lines)]
1518
1673
  names = []
@@ -1531,17 +1686,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1531
1686
  """
1532
1687
  return cls([Scenario.example(randomize), Scenario.example(randomize)])
1533
1688
 
1534
- # def rich_print(self) -> None:
1535
- # """Display an object as a table."""
1536
- # from rich.table import Table
1537
-
1538
- # table = Table(title="ScenarioList")
1539
- # table.add_column("Index", style="bold")
1540
- # table.add_column("Scenario")
1541
- # for i, s in enumerate(self):
1542
- # table.add_row(str(i), s.rich_print())
1543
- # return table
1544
-
1545
1689
  def __getitem__(self, key: Union[int, slice]) -> Any:
1546
1690
  """Return the item at the given index.
1547
1691
 
@@ -1570,9 +1714,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1570
1714
  >>> s.to_agent_list()
1571
1715
  AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
1572
1716
  """
1573
- from edsl.agents.AgentList import AgentList
1574
- from edsl.agents.Agent import Agent
1575
- import warnings
1717
+ from ..agents import AgentList, Agent
1576
1718
 
1577
1719
  agents = []
1578
1720
  for scenario in self:
@@ -1629,12 +1771,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1629
1771
  new_scenarios.extend(replacement_scenarios)
1630
1772
  return ScenarioList(new_scenarios)
1631
1773
 
1632
- def collapse(self, field: str) -> ScenarioList:
1774
+ def collapse(self, field: str, separator: Optional[str] = None, add_count: bool = False) -> ScenarioList:
1633
1775
  """Collapse a ScenarioList by grouping on all fields except the specified one,
1634
1776
  collecting the values of the specified field into a list.
1635
1777
 
1636
1778
  Args:
1637
1779
  field: The field to collapse (whose values will be collected into lists)
1780
+ separator: Optional string to join the values with instead of keeping as a list
1781
+ add_count: If True, adds a field showing the number of collapsed rows
1638
1782
 
1639
1783
  Returns:
1640
1784
  ScenarioList: A new ScenarioList with the specified field collapsed into lists
@@ -1642,12 +1786,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1642
1786
  Example:
1643
1787
  >>> s = ScenarioList([
1644
1788
  ... Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
1645
- ... Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
1646
1789
  ... Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
1647
1790
  ... Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
1648
1791
  ... ])
1649
- >>> s.collapse('item')
1650
- ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']}), Scenario({'category': 'fruit', 'color': 'yellow', 'item': ['banana']}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach']})])
1792
+ >>> s.collapse('item', add_count=True)
1793
+ ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry'], 'num_collapsed_rows': 2}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach'], 'num_collapsed_rows': 1})])
1651
1794
  """
1652
1795
  if not self:
1653
1796
  return ScenarioList([])
@@ -1667,11 +1810,186 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1667
1810
  result = []
1668
1811
  for key, values in grouped.items():
1669
1812
  new_scenario = dict(zip(id_vars, key))
1670
- new_scenario[field] = values
1813
+ if separator:
1814
+ new_scenario[field] = separator.join(values)
1815
+ else:
1816
+ new_scenario[field] = values
1817
+ if add_count:
1818
+ new_scenario['num_collapsed_rows'] = len(values)
1819
+ result.append(Scenario(new_scenario))
1820
+
1821
+ return ScenarioList(result)
1822
+
1823
+ def create_comparisons(
1824
+ self,
1825
+ bidirectional: bool = False,
1826
+ num_options: int = 2,
1827
+ option_prefix: str = "option_",
1828
+ use_alphabet: bool = False
1829
+ ) -> ScenarioList:
1830
+ """Create a new ScenarioList with comparisons between scenarios.
1831
+
1832
+ Each scenario in the result contains multiple original scenarios as dictionaries,
1833
+ allowing for side-by-side comparison.
1834
+
1835
+ Args:
1836
+ bidirectional (bool): If True, include both (A,B) and (B,A) comparisons.
1837
+ If False, only include (A,B) where A comes before B in the original list.
1838
+ num_options (int): Number of scenarios to include in each comparison.
1839
+ Default is 2 for pairwise comparisons.
1840
+ option_prefix (str): Prefix for the keys in the resulting scenarios.
1841
+ Default is "option_", resulting in keys like "option_1", "option_2", etc.
1842
+ Ignored if use_alphabet is True.
1843
+ use_alphabet (bool): If True, use letters as keys (A, B, C, etc.) instead of
1844
+ the option_prefix with numbers.
1845
+
1846
+ Returns:
1847
+ ScenarioList: A new ScenarioList where each scenario contains multiple original
1848
+ scenarios as dictionaries.
1849
+
1850
+ Example:
1851
+ >>> s = ScenarioList([
1852
+ ... Scenario({'id': 1, 'text': 'Option A'}),
1853
+ ... Scenario({'id': 2, 'text': 'Option B'}),
1854
+ ... Scenario({'id': 3, 'text': 'Option C'})
1855
+ ... ])
1856
+ >>> s.create_comparisons(use_alphabet=True)
1857
+ ScenarioList([Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 2, 'text': 'Option B'}}), Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 3, 'text': 'Option C'}}), Scenario({'A': {'id': 2, 'text': 'Option B'}, 'B': {'id': 3, 'text': 'Option C'}})])
1858
+ >>> s.create_comparisons(num_options=3, use_alphabet=True)
1859
+ ScenarioList([Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 2, 'text': 'Option B'}, 'C': {'id': 3, 'text': 'Option C'}})])
1860
+ """
1861
+ from itertools import combinations, permutations
1862
+ import string
1863
+
1864
+ if num_options < 2:
1865
+ raise ValueError("num_options must be at least 2")
1866
+
1867
+ if num_options > len(self):
1868
+ raise ValueError(f"num_options ({num_options}) cannot exceed the number of scenarios ({len(self)})")
1869
+
1870
+ if use_alphabet and num_options > 26:
1871
+ raise ValueError("When using alphabet labels, num_options cannot exceed 26 (the number of letters in the English alphabet)")
1872
+
1873
+ # Convert each scenario to a dictionary
1874
+ scenario_dicts = [scenario.to_dict(add_edsl_version=False) for scenario in self]
1875
+
1876
+ # Generate combinations or permutations based on bidirectional flag
1877
+ if bidirectional:
1878
+ # For bidirectional, use permutations to get all ordered arrangements
1879
+ if num_options == 2:
1880
+ # For pairwise, we can use permutations with r=2
1881
+ scenario_groups = permutations(scenario_dicts, 2)
1882
+ else:
1883
+ # For more than 2 options with bidirectional=True,
1884
+ # we need all permutations of the specified size
1885
+ scenario_groups = permutations(scenario_dicts, num_options)
1886
+ else:
1887
+ # For unidirectional, use combinations to get unordered groups
1888
+ scenario_groups = combinations(scenario_dicts, num_options)
1889
+
1890
+ # Create new scenarios with the combinations
1891
+ result = []
1892
+ for group in scenario_groups:
1893
+ new_scenario = {}
1894
+ for i, scenario_dict in enumerate(group):
1895
+ if use_alphabet:
1896
+ # Use uppercase letters (A, B, C, etc.)
1897
+ key = string.ascii_uppercase[i]
1898
+ else:
1899
+ # Use the option prefix with numbers (option_1, option_2, etc.)
1900
+ key = f"{option_prefix}{i+1}"
1901
+ new_scenario[key] = scenario_dict
1671
1902
  result.append(Scenario(new_scenario))
1672
1903
 
1673
1904
  return ScenarioList(result)
1674
1905
 
1906
+ @classmethod
1907
+ def from_parquet(cls, filepath: str) -> ScenarioList:
1908
+ """Create a ScenarioList from a Parquet file.
1909
+
1910
+ Args:
1911
+ filepath (str): Path to the Parquet file
1912
+
1913
+ Returns:
1914
+ ScenarioList: A ScenarioList containing the data from the Parquet file
1915
+
1916
+ Example:
1917
+ >>> import pandas as pd
1918
+ >>> import tempfile
1919
+ >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25]})
1920
+ >>> # The following would create and read a parquet file if dependencies are installed:
1921
+ >>> # with tempfile.NamedTemporaryFile(suffix='.parquet', delete=False) as f:
1922
+ >>> # df.to_parquet(f.name)
1923
+ >>> # scenario_list = ScenarioList.from_parquet(f.name)
1924
+ >>> # Instead, we'll demonstrate the equivalent result:
1925
+ >>> scenario_list = ScenarioList.from_pandas(df)
1926
+ >>> len(scenario_list)
1927
+ 2
1928
+ >>> scenario_list[0]['name']
1929
+ 'Alice'
1930
+ """
1931
+ import pandas as pd
1932
+
1933
+ try:
1934
+ # Try to read the Parquet file with pandas
1935
+ df = pd.read_parquet(filepath)
1936
+ except ImportError as e:
1937
+ # Handle missing dependencies with a helpful error message
1938
+ if "pyarrow" in str(e) or "fastparquet" in str(e):
1939
+ raise ImportError(
1940
+ "Missing dependencies for Parquet support. Please install either pyarrow or fastparquet:\n"
1941
+ " pip install pyarrow\n"
1942
+ " or\n"
1943
+ " pip install fastparquet"
1944
+ ) from e
1945
+ else:
1946
+ raise
1947
+
1948
+ # Convert the DataFrame to a ScenarioList
1949
+ return cls.from_pandas(df)
1950
+
1951
+ def replace_values(self, replacements:dict) -> "ScenarioList":
1952
+ """
1953
+ Create new scenarios with values replaced according to the provided replacement dictionary.
1954
+
1955
+ Args:
1956
+ replacements (dict): Dictionary of values to replace {old_value: new_value}
1957
+
1958
+ Returns:
1959
+ ScenarioList: A new ScenarioList with replaced values
1960
+
1961
+ Examples:
1962
+ >>> scenarios = ScenarioList([
1963
+ ... Scenario({'a': 'nan', 'b': 1}),
1964
+ ... Scenario({'a': 2, 'b': 'nan'})
1965
+ ... ])
1966
+ >>> replaced = scenarios.replace_values({'nan': None})
1967
+ >>> print(replaced)
1968
+ ScenarioList([Scenario({'a': None, 'b': 1}), Scenario({'a': 2, 'b': None})])
1969
+ >>> # Original scenarios remain unchanged
1970
+ >>> print(scenarios)
1971
+ ScenarioList([Scenario({'a': 'nan', 'b': 1}), Scenario({'a': 2, 'b': 'nan'})])
1972
+ """
1973
+ new_scenarios = []
1974
+ for scenario in self:
1975
+ new_scenario = {}
1976
+ for key, value in scenario.items():
1977
+ if str(value) in replacements:
1978
+ new_scenario[key] = replacements[str(value)]
1979
+ else:
1980
+ new_scenario[key] = value
1981
+ new_scenarios.append(Scenario(new_scenario))
1982
+ return ScenarioList(new_scenarios)
1983
+
1984
+ @classmethod
1985
+ def from_pdf(cls, filename_or_url, collapse_pages=False):
1986
+ return PdfTools.from_pdf(filename_or_url, collapse_pages)
1987
+
1988
+ @classmethod
1989
+ def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
1990
+ return PdfTools.from_pdf_to_image(pdf_path, image_format)
1991
+
1992
+
1675
1993
 
1676
1994
  if __name__ == "__main__":
1677
1995
  import doctest