edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +303 -67
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +313 -167
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +4 -9
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +365 -220
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/{FileStore.py → file_store.py} +275 -189
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +18 -19
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -493
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/coop/PriceFetcher.py +0 -54
  236. edsl/data/Cache.py +0 -582
  237. edsl/data/CacheEntry.py +0 -238
  238. edsl/data/SQLiteDict.py +0 -292
  239. edsl/data/__init__.py +0 -5
  240. edsl/data/orm.py +0 -10
  241. edsl/exceptions/cache.py +0 -5
  242. edsl/exceptions/coop.py +0 -14
  243. edsl/exceptions/data.py +0 -14
  244. edsl/exceptions/scenarios.py +0 -29
  245. edsl/jobs/Answers.py +0 -43
  246. edsl/jobs/JobsPrompts.py +0 -354
  247. edsl/jobs/buckets/BucketCollection.py +0 -134
  248. edsl/jobs/buckets/ModelBuckets.py +0 -65
  249. edsl/jobs/buckets/TokenBucket.py +0 -283
  250. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  251. edsl/jobs/interviews/Interview.py +0 -395
  252. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  253. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  254. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  255. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  256. edsl/jobs/tasks/TaskCreators.py +0 -64
  257. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  258. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  259. edsl/language_models/LanguageModel.py +0 -635
  260. edsl/language_models/ServiceDataSources.py +0 -0
  261. edsl/language_models/key_management/KeyLookup.py +0 -63
  262. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  263. edsl/language_models/key_management/models.py +0 -137
  264. edsl/questions/QuestionBase.py +0 -544
  265. edsl/questions/QuestionFreeText.py +0 -130
  266. edsl/questions/derived/QuestionLikertFive.py +0 -76
  267. edsl/results/ResultsExportMixin.py +0 -45
  268. edsl/results/TextEditor.py +0 -50
  269. edsl/results/results_fetch_mixin.py +0 -33
  270. edsl/results/results_tools_mixin.py +0 -98
  271. edsl/scenarios/DocumentChunker.py +0 -104
  272. edsl/scenarios/Scenario.py +0 -548
  273. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  274. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  275. edsl/scenarios/handlers/latex.py +0 -5
  276. edsl/shared.py +0 -1
  277. edsl/surveys/Survey.py +0 -1301
  278. edsl/surveys/SurveyQualtricsImport.py +0 -284
  279. edsl/surveys/SurveyToApp.py +0 -141
  280. edsl/surveys/instructions/__init__.py +0 -0
  281. edsl/tools/__init__.py +0 -1
  282. edsl/tools/clusters.py +0 -192
  283. edsl/tools/embeddings.py +0 -27
  284. edsl/tools/embeddings_plotting.py +0 -118
  285. edsl/tools/plotting.py +0 -112
  286. edsl/tools/summarize.py +0 -18
  287. edsl/utilities/data/Registry.py +0 -6
  288. edsl/utilities/data/__init__.py +0 -1
  289. edsl/utilities/data/scooter_results.json +0 -1
  290. edsl-0.1.47.dist-info/RECORD +0 -354
  291. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  292. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  293. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  294. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  295. /edsl/{results → dataset/display}/table_display.css +0 -0
  296. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  297. /edsl/{results → dataset}/tree_explore.py +0 -0
  298. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  299. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  300. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  301. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  302. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  303. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  304. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  305. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  306. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  307. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  308. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  309. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  310. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  311. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  312. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  313. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  314. {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,548 +0,0 @@
1
- """A Scenario is a dictionary with a key/value to parameterize a question."""
2
-
3
- from __future__ import annotations
4
- import copy
5
- import os
6
- import json
7
- from collections import UserDict
8
- from typing import Union, List, Optional, TYPE_CHECKING, Collection
9
- from uuid import uuid4
10
-
11
- from edsl.Base import Base
12
- from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
13
- from edsl.utilities.remove_edsl_version import remove_edsl_version
14
- from edsl.exceptions.scenarios import ScenarioError
15
-
16
- if TYPE_CHECKING:
17
- from edsl.scenarios.ScenarioList import ScenarioList
18
- from edsl.results.Dataset import Dataset
19
-
20
-
21
- class DisplayJSON:
22
- """Display a dictionary as JSON."""
23
-
24
- def __init__(self, input_dict: dict):
25
- self.text = json.dumps(input_dict, indent=4)
26
-
27
- def __repr__(self):
28
- return self.text
29
-
30
-
31
- class DisplayYAML:
32
- """Display a dictionary as YAML."""
33
-
34
- def __init__(self, input_dict: dict):
35
- import yaml
36
-
37
- self.text = yaml.dump(input_dict)
38
-
39
- def __repr__(self):
40
- return self.text
41
-
42
-
43
- class Scenario(Base, UserDict, ScenarioHtmlMixin):
44
- """A Scenario is a dictionary of keys/values that can be used to parameterize questions."""
45
-
46
- __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
47
-
48
- def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
49
- """Initialize a new Scenario.
50
-
51
- :param data: A dictionary of keys/values for parameterizing questions.
52
- :param name: The name of the scenario.
53
- """
54
- if not isinstance(data, dict) and data is not None:
55
- try:
56
- data = dict(data)
57
- except Exception as e:
58
- raise ScenarioError(
59
- f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
60
- "Exception message:" + str(e),
61
- )
62
-
63
- super().__init__()
64
- self.data = data if data is not None else {}
65
- self.name = name
66
-
67
- def __mul__(self, scenario_list_or_scenario: Union["ScenarioList", "Scenario"]) -> "ScenarioList":
68
- from edsl.scenarios.ScenarioList import ScenarioList
69
- if isinstance(scenario_list_or_scenario, ScenarioList):
70
- return scenario_list_or_scenario * self
71
- elif isinstance(scenario_list_or_scenario, Scenario):
72
- return ScenarioList([self]) * scenario_list_or_scenario
73
- else:
74
- raise TypeError(f"Cannot multiply Scenario with {type(scenario_list_or_scenario)}")
75
-
76
- def replicate(self, n: int) -> "ScenarioList":
77
- """Replicate a scenario n times to return a ScenarioList.
78
-
79
- :param n: The number of times to replicate the scenario.
80
-
81
- Example:
82
- >>> s = Scenario({"food": "wood chips"})
83
- >>> s.replicate(2)
84
- ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
85
- """
86
- from edsl.scenarios.ScenarioList import ScenarioList
87
-
88
- return ScenarioList([copy.deepcopy(self) for _ in range(n)])
89
-
90
- @property
91
- def has_jinja_braces(self) -> bool:
92
- """Return whether the scenario has jinja braces. This matters for rendering.
93
-
94
- >>> s = Scenario({"food": "I love {{wood chips}}"})
95
- >>> s.has_jinja_braces
96
- True
97
- """
98
- for _, value in self.items():
99
- if isinstance(value, str):
100
- if "{{" in value and "}}" in value:
101
- return True
102
- return False
103
-
104
- def _convert_jinja_braces(
105
- self, replacement_left: str = "<<", replacement_right: str = ">>"
106
- ) -> Scenario:
107
- """Convert Jinja braces to some other character.
108
-
109
- >>> s = Scenario({"food": "I love {{wood chips}}"})
110
- >>> s._convert_jinja_braces()
111
- Scenario({'food': 'I love <<wood chips>>'})
112
-
113
- """
114
- new_scenario = Scenario()
115
- for key, value in self.items():
116
- if isinstance(value, str):
117
- new_scenario[key] = value.replace("{{", replacement_left).replace(
118
- "}}", replacement_right
119
- )
120
- else:
121
- new_scenario[key] = value
122
- return new_scenario
123
-
124
- def __add__(self, other_scenario: Scenario) -> Scenario:
125
- """Combine two scenarios by taking the union of their keys
126
-
127
- If the other scenario is None, then just return self.
128
-
129
- :param other_scenario: The other scenario to combine with.
130
-
131
- Example:
132
-
133
- >>> s1 = Scenario({"price": 100, "quantity": 2})
134
- >>> s2 = Scenario({"color": "red"})
135
- >>> s1 + s2
136
- Scenario({'price': 100, 'quantity': 2, 'color': 'red'})
137
- >>> (s1 + s2).__class__.__name__
138
- 'Scenario'
139
- """
140
- if other_scenario is None:
141
- return self
142
- else:
143
- data1 = copy.deepcopy(self.data)
144
- data2 = copy.deepcopy(other_scenario.data)
145
- s = Scenario(data1 | data2)
146
- return s
147
-
148
- def rename(
149
- self,
150
- old_name_or_replacement_dict: Union[str, dict[str, str]],
151
- new_name: Optional[str] = None,
152
- ) -> Scenario:
153
- """Rename the keys of a scenario.
154
-
155
- :param old_name_or_replacement_dict: A dictionary of old keys to new keys *OR* a string of the old key.
156
- :param new_name: The new name of the key.
157
-
158
- Example:
159
-
160
- >>> s = Scenario({"food": "wood chips"})
161
- >>> s.rename({"food": "food_preference"})
162
- Scenario({'food_preference': 'wood chips'})
163
-
164
- >>> s = Scenario({"food": "wood chips"})
165
- >>> s.rename("food", "snack")
166
- Scenario({'snack': 'wood chips'})
167
- """
168
- if isinstance(old_name_or_replacement_dict, str) and new_name is not None:
169
- replacement_dict = {old_name_or_replacement_dict: new_name}
170
- else:
171
- replacement_dict = old_name_or_replacement_dict
172
-
173
- new_scenario = Scenario()
174
- for key, value in self.items():
175
- if key in replacement_dict:
176
- new_scenario[replacement_dict[key]] = value
177
- else:
178
- new_scenario[key] = value
179
- return new_scenario
180
-
181
- def new_column_names(self, new_names: List[str]) -> Scenario:
182
- """Rename the keys of a scenario.
183
-
184
- >>> s = Scenario({"food": "wood chips"})
185
- >>> s.new_column_names(["food_preference"])
186
- Scenario({'food_preference': 'wood chips'})
187
- """
188
- try:
189
- assert len(new_names) == len(self.keys())
190
- except AssertionError:
191
- print("The number of new names must match the number of keys.")
192
-
193
- new_scenario = Scenario()
194
- for new_names, value in zip(new_names, self.values()):
195
- new_scenario[new_names] = value
196
- return new_scenario
197
-
198
- def table(self, tablefmt: str = "grid") -> str:
199
- """Display a scenario as a table."""
200
- return self.to_dataset().table(tablefmt=tablefmt)
201
-
202
- def json(self):
203
- return DisplayJSON(self.to_dict(add_edsl_version=False))
204
-
205
- def yaml(self):
206
- import yaml
207
-
208
- return DisplayYAML(self.to_dict(add_edsl_version=False))
209
-
210
- def to_dict(self, add_edsl_version: bool = True) -> dict:
211
- """Convert a scenario to a dictionary.
212
-
213
- Example:
214
-
215
- >>> s = Scenario({"food": "wood chips"})
216
- >>> s.to_dict()
217
- {'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
218
-
219
- >>> s.to_dict(add_edsl_version = False)
220
- {'food': 'wood chips'}
221
-
222
- """
223
- from edsl.scenarios.FileStore import FileStore
224
-
225
- d = self.data.copy()
226
- for key, value in d.items():
227
- if isinstance(value, FileStore):
228
- d[key] = value.to_dict(add_edsl_version=add_edsl_version)
229
- if add_edsl_version:
230
- from edsl import __version__
231
-
232
- d["edsl_version"] = __version__
233
- d["edsl_class_name"] = "Scenario"
234
-
235
- return d
236
-
237
- def __hash__(self) -> int:
238
- """Return a hash of the scenario.
239
-
240
- Example:
241
-
242
- >>> s = Scenario({"food": "wood chips"})
243
- >>> hash(s)
244
- 1153210385458344214
245
- """
246
- from edsl.utilities.utilities import dict_hash
247
-
248
- return dict_hash(self.to_dict(add_edsl_version=False))
249
-
250
- def __repr__(self):
251
- return "Scenario(" + repr(self.data) + ")"
252
-
253
- def to_dataset(self) -> "Dataset":
254
- """Convert a scenario to a dataset.
255
-
256
- >>> s = Scenario({"food": "wood chips"})
257
- >>> s.to_dataset()
258
- Dataset([{'key': ['food']}, {'value': ['wood chips']}])
259
- """
260
- from edsl.results.Dataset import Dataset
261
-
262
- keys = list(self.keys())
263
- values = list(self.values())
264
- return Dataset([{"key": keys}, {"value": values}])
265
-
266
- def select(self, list_of_keys: Collection[str]) -> "Scenario":
267
- """Select a subset of keys from a scenario.
268
-
269
- :param list_of_keys: The keys to select.
270
-
271
- Example:
272
-
273
- >>> s = Scenario({"food": "wood chips", "drink": "water"})
274
- >>> s.select(["food"])
275
- Scenario({'food': 'wood chips'})
276
- """
277
- new_scenario = Scenario()
278
- for key in list_of_keys:
279
- new_scenario[key] = self[key]
280
- return new_scenario
281
-
282
- def drop(self, list_of_keys: Collection[str]) -> "Scenario":
283
- """Drop a subset of keys from a scenario.
284
-
285
- :param list_of_keys: The keys to drop.
286
-
287
- Example:
288
-
289
- >>> s = Scenario({"food": "wood chips", "drink": "water"})
290
- >>> s.drop(["food"])
291
- Scenario({'drink': 'water'})
292
- """
293
- new_scenario = Scenario()
294
- for key in self.keys():
295
- if key not in list_of_keys:
296
- new_scenario[key] = self[key]
297
- return new_scenario
298
-
299
- def keep(self, list_of_keys: List[str]) -> "Scenario":
300
- """Keep a subset of keys from a scenario.
301
-
302
- :param list_of_keys: The keys to keep.
303
-
304
- Example:
305
-
306
- >>> s = Scenario({"food": "wood chips", "drink": "water"})
307
- >>> s.keep(["food"])
308
- Scenario({'food': 'wood chips'})
309
- """
310
-
311
- return self.select(list_of_keys)
312
-
313
- @classmethod
314
- def from_url(cls, url: str, field_name: Optional[str] = "text") -> "Scenario":
315
- """Creates a scenario from a URL.
316
-
317
- :param url: The URL to create the scenario from.
318
- :param field_name: The field name to use for the text.
319
-
320
- """
321
- import requests
322
-
323
- text = requests.get(url).text
324
- return cls({"url": url, field_name: text})
325
-
326
- @classmethod
327
- def from_file(cls, file_path: str, field_name: str) -> "Scenario":
328
- """Creates a scenario from a file.
329
-
330
- >>> import tempfile
331
- >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
332
- ... _ = f.write("This is a test.")
333
- ... _ = f.flush()
334
- ... s = Scenario.from_file(f.name, "file")
335
- >>> s
336
- Scenario({'file': FileStore(path='...', ...)})
337
-
338
- """
339
- from edsl.scenarios.FileStore import FileStore
340
-
341
- fs = FileStore(file_path)
342
- return cls({field_name: fs})
343
-
344
- @classmethod
345
- def from_image(
346
- cls, image_path: str, image_name: Optional[str] = None
347
- ) -> "Scenario":
348
- """
349
- Creates a scenario with a base64 encoding of an image.
350
-
351
- Args:
352
- image_path (str): Path to the image file.
353
-
354
- Returns:
355
- Scenario: A new Scenario instance with image information.
356
-
357
- """
358
- if not os.path.exists(image_path):
359
- raise FileNotFoundError(f"Image file not found: {image_path}")
360
-
361
- if image_name is None:
362
- image_name = os.path.basename(image_path).split(".")[0]
363
-
364
- return cls.from_file(image_path, image_name)
365
-
366
- @classmethod
367
- def from_pdf(cls, pdf_path: str):
368
- """Create a Scenario from a PDF file."""
369
- try:
370
- from edsl.scenarios.PdfExtractor import PdfExtractor
371
- extractor = PdfExtractor(pdf_path)
372
- return Scenario(extractor.get_pdf_dict())
373
- except ImportError as e:
374
- raise ImportError(
375
- f"Could not extract text from PDF: {str(e)}. "
376
- "PDF extraction requires the PyMuPDF library. "
377
- "Install it with: pip install pymupdf"
378
- )
379
-
380
- @classmethod
381
- def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
382
- """
383
- Convert each page of a PDF into an image and create key/value for it.
384
-
385
- :param pdf_path: Path to the PDF file.
386
- :param image_format: Format of the output images (default is 'jpeg').
387
- :return: ScenarioList instance containing the Scenario instances.
388
-
389
- The scenario has a key "filepath" and one or more keys "page_{i}" for each page.
390
- """
391
- import tempfile
392
- from pdf2image import convert_from_path
393
- from edsl.scenarios import Scenario
394
-
395
- with tempfile.TemporaryDirectory() as output_folder:
396
- # Convert PDF to images
397
- images = convert_from_path(pdf_path)
398
-
399
- scenario_dict = {"filepath":pdf_path}
400
-
401
- # Save each page as an image and create Scenario instances
402
- for i, image in enumerate(images):
403
- image_path = os.path.join(output_folder, f"page_{i}.{image_format}")
404
- image.save(image_path, image_format.upper())
405
-
406
- from edsl import FileStore
407
- scenario_dict[f"page_{i}"] = FileStore(image_path)
408
-
409
- scenario = Scenario(scenario_dict)
410
-
411
- return cls(scenario)
412
-
413
- @classmethod
414
- def from_docx(cls, docx_path: str) -> "Scenario":
415
- """Creates a scenario from the text of a docx file.
416
-
417
- :param docx_path: The path to the docx file.
418
-
419
- Example:
420
-
421
- >>> from docx import Document
422
- >>> doc = Document()
423
- >>> _ = doc.add_heading("EDSL Survey")
424
- >>> _ = doc.add_paragraph("This is a test.")
425
- >>> doc.save("test.docx")
426
- >>> s = Scenario.from_docx("test.docx")
427
- >>> s
428
- Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
429
- >>> import os; os.remove("test.docx")
430
- """
431
- from edsl.scenarios.DocxScenario import DocxScenario
432
-
433
- return Scenario(DocxScenario(docx_path).get_scenario_dict())
434
-
435
- def chunk(
436
- self,
437
- field,
438
- num_words: Optional[int] = None,
439
- num_lines: Optional[int] = None,
440
- include_original=False,
441
- hash_original=False,
442
- ) -> "ScenarioList":
443
- """Split a field into chunks of a given size.
444
-
445
- :param field: The field to split.
446
- :param num_words: The number of words in each chunk.
447
- :param num_lines: The number of lines in each chunk.
448
- :param include_original: Whether to include the original field in the new scenarios.
449
- :param hash_original: Whether to hash the original field in the new scenarios.
450
-
451
- If you specify `include_original=True`, the original field will be included in the new scenarios with an "_original" suffix.
452
-
453
- Either `num_words` or `num_lines` must be specified, but not both.
454
-
455
- The `hash_original` parameter is useful if you do not want to store the original text, but still want a unique identifier for it.
456
-
457
- Example:
458
-
459
- >>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
460
- >>> s.chunk("text", num_lines = 1)
461
- ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': 'This is a test.', 'text_chunk': 1, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': '', 'text_chunk': 2, 'text_char_count': 0, 'text_word_count': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 3, 'text_char_count': 15, 'text_word_count': 4})])
462
-
463
- >>> s.chunk("text", num_words = 2)
464
- ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 1, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 2, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 3, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 4, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 5, 'text_char_count': 7, 'text_word_count': 2})])
465
-
466
- >>> s = Scenario({"text": "Hello World"})
467
- >>> s.chunk("text", num_words = 1, include_original = True)
468
- ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'})])
469
-
470
- >>> s = Scenario({"text": "Hello World"})
471
- >>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
472
- ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
473
-
474
- >>> s.chunk("text")
475
- Traceback (most recent call last):
476
- ...
477
- ValueError: You must specify either num_words or num_lines.
478
-
479
- >>> s.chunk("text", num_words = 1, num_lines = 1)
480
- Traceback (most recent call last):
481
- ...
482
- ValueError: You must specify either num_words or num_lines, but not both.
483
- """
484
- from edsl.scenarios.DocumentChunker import DocumentChunker
485
-
486
- return DocumentChunker(self).chunk(
487
- field, num_words, num_lines, include_original, hash_original
488
- )
489
-
490
- @classmethod
491
- @remove_edsl_version
492
- def from_dict(cls, d: dict) -> "Scenario":
493
- """Convert a dictionary to a scenario.
494
-
495
- Example:
496
-
497
- >>> Scenario.from_dict({"food": "wood chips"})
498
- Scenario({'food': 'wood chips'})
499
- """
500
- from edsl.scenarios.FileStore import FileStore
501
-
502
- for key, value in d.items():
503
- # TODO: we should check this better if its a FileStore + add remote security check against path traversal
504
- if (
505
- isinstance(value, dict) and "base64_string" in value and "path" in value
506
- ) or isinstance(value, FileStore):
507
- d[key] = FileStore.from_dict(value)
508
- return cls(d)
509
-
510
- def _table(self) -> tuple[dict, list]:
511
- """Prepare generic table data.
512
- >>> s = Scenario({"food": "wood chips"})
513
- >>> s._table()
514
- ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
515
- """
516
- table_data = []
517
- for attr_name, attr_value in self.__dict__.items():
518
- table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
519
- column_names = ["Attribute", "Value"]
520
- return table_data, column_names
521
-
522
- @classmethod
523
- def example(cls, randomize: bool = False) -> Scenario:
524
- """
525
- Returns an example Scenario instance.
526
-
527
- :param randomize: If True, adds a random string to the value of the example key.
528
- """
529
- addition = "" if not randomize else str(uuid4())
530
- return cls(
531
- {
532
- "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
533
- }
534
- )
535
-
536
- def code(self) -> List[str]:
537
- """Return the code for the scenario."""
538
- lines = []
539
- lines.append("from edsl.scenario import Scenario")
540
- lines.append(f"s = Scenario({self.data})")
541
- # return f"Scenario({self.data})"
542
- return lines
543
-
544
-
545
- if __name__ == "__main__":
546
- import doctest
547
-
548
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,65 +0,0 @@
1
- from typing import Optional
2
-
3
-
4
- class ScenarioHtmlMixin:
5
- @classmethod
6
- def from_html(cls, url: str, field_name: Optional[str] = None) -> "Scenario":
7
- """Create a scenario from HTML content.
8
-
9
- :param html: The HTML content.
10
- :param field_name: The name of the field containing the HTML content.
11
-
12
-
13
- """
14
- html = cls.fetch_html(url)
15
- text = cls.extract_text(html)
16
- if not field_name:
17
- field_name = "text"
18
- return cls({"url": url, "html": html, field_name: text})
19
-
20
- def fetch_html(url):
21
- # Define the user-agent to mimic a browser
22
- import requests
23
- from requests.adapters import HTTPAdapter
24
- from requests.packages.urllib3.util.retry import Retry
25
-
26
- headers = {
27
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
28
- }
29
-
30
- # Create a session to manage cookies and retries
31
- session = requests.Session()
32
- retries = Retry(
33
- total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]
34
- )
35
- session.mount("http://", HTTPAdapter(max_retries=retries))
36
- session.mount("https://", HTTPAdapter(max_retries=retries))
37
-
38
- try:
39
- # Make the request
40
- response = session.get(url, headers=headers, timeout=10)
41
- response.raise_for_status() # Raise an exception for HTTP errors
42
- return response.text
43
- except requests.exceptions.RequestException as e:
44
- print(f"An error occurred: {e}")
45
- return None
46
-
47
- def extract_text(html):
48
- # Extract text from HTML using BeautifulSoup
49
- from bs4 import BeautifulSoup
50
-
51
- soup = BeautifulSoup(html, "html.parser")
52
- text = soup.get_text()
53
- return text
54
-
55
-
56
- if __name__ == "__main__":
57
- # Usage example
58
- url = "https://example.com"
59
- html = ScenarioHtmlMixin.fetch_html(url)
60
- if html:
61
- print("Successfully fetched the HTML content.")
62
- else:
63
- print("Failed to fetch the HTML content.")
64
-
65
- print(html)
@@ -1,45 +0,0 @@
1
- """Mixin class for exporting results."""
2
-
3
- from functools import wraps
4
- from edsl.results.DatasetExportMixin import DatasetExportMixin
5
-
6
-
7
- def to_dataset(func):
8
- """Convert the object to a Dataset object before calling the function."""
9
-
10
- @wraps(func)
11
- def wrapper(self, *args, **kwargs):
12
- """Return the function with the Results object converted to a Dataset object."""
13
- if self.__class__.__name__ == "ScenarioList":
14
- return func(self.to_dataset(), *args, **kwargs)
15
- else:
16
- raise Exception(
17
- f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
- )
19
-
20
- return wrapper
21
-
22
-
23
- def decorate_methods_from_mixin(cls, mixin_cls):
24
- for attr_name, attr_value in mixin_cls.__dict__.items():
25
- if callable(attr_value) and not attr_name.startswith("__"):
26
- setattr(cls, attr_name, to_dataset(attr_value))
27
- return cls
28
-
29
-
30
- # @decorate_all_methods
31
- class ScenarioListExportMixin(DatasetExportMixin):
32
- """Mixin class for exporting Results objects."""
33
-
34
- def __init_subclass__(cls, **kwargs):
35
- super().__init_subclass__(**kwargs)
36
- decorate_methods_from_mixin(cls, DatasetExportMixin)
37
-
38
- def to_docx(self, filename: str):
39
- """Export the ScenarioList to a .docx file."""
40
- dataset = self.to_dataset()
41
- from edsl.results.DatasetTree import Tree
42
-
43
- tree = Tree(dataset)
44
- tree.construct_tree()
45
- tree.to_docx(filename)
@@ -1,5 +0,0 @@
1
- from edsl.scenarios.handlers.txt import TxtMethods
2
-
3
-
4
- class LaTeXMethods(TxtMethods):
5
- pass
edsl/shared.py DELETED
@@ -1 +0,0 @@
1
- shared_globals = {}