edsl 0.1.14__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (407) hide show
  1. edsl/Base.py +348 -38
  2. edsl/BaseDiff.py +260 -0
  3. edsl/TemplateLoader.py +24 -0
  4. edsl/__init__.py +46 -10
  5. edsl/__version__.py +1 -0
  6. edsl/agents/Agent.py +842 -144
  7. edsl/agents/AgentList.py +521 -25
  8. edsl/agents/Invigilator.py +250 -374
  9. edsl/agents/InvigilatorBase.py +257 -0
  10. edsl/agents/PromptConstructor.py +272 -0
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/descriptors.py +43 -13
  14. edsl/agents/prompt_helpers.py +129 -0
  15. edsl/agents/question_option_processor.py +172 -0
  16. edsl/auto/AutoStudy.py +130 -0
  17. edsl/auto/StageBase.py +243 -0
  18. edsl/auto/StageGenerateSurvey.py +178 -0
  19. edsl/auto/StageLabelQuestions.py +125 -0
  20. edsl/auto/StagePersona.py +61 -0
  21. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  22. edsl/auto/StagePersonaDimensionValues.py +74 -0
  23. edsl/auto/StagePersonaDimensions.py +69 -0
  24. edsl/auto/StageQuestions.py +74 -0
  25. edsl/auto/SurveyCreatorPipeline.py +21 -0
  26. edsl/auto/utilities.py +218 -0
  27. edsl/base/Base.py +279 -0
  28. edsl/config.py +121 -104
  29. edsl/conversation/Conversation.py +290 -0
  30. edsl/conversation/car_buying.py +59 -0
  31. edsl/conversation/chips.py +95 -0
  32. edsl/conversation/mug_negotiation.py +81 -0
  33. edsl/conversation/next_speaker_utilities.py +93 -0
  34. edsl/coop/CoopFunctionsMixin.py +15 -0
  35. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  36. edsl/coop/PriceFetcher.py +54 -0
  37. edsl/coop/__init__.py +1 -0
  38. edsl/coop/coop.py +1029 -134
  39. edsl/coop/utils.py +131 -0
  40. edsl/data/Cache.py +560 -89
  41. edsl/data/CacheEntry.py +230 -0
  42. edsl/data/CacheHandler.py +168 -0
  43. edsl/data/RemoteCacheSync.py +186 -0
  44. edsl/data/SQLiteDict.py +292 -0
  45. edsl/data/__init__.py +5 -3
  46. edsl/data/orm.py +6 -33
  47. edsl/data_transfer_models.py +74 -27
  48. edsl/enums.py +165 -8
  49. edsl/exceptions/BaseException.py +21 -0
  50. edsl/exceptions/__init__.py +52 -46
  51. edsl/exceptions/agents.py +33 -15
  52. edsl/exceptions/cache.py +5 -0
  53. edsl/exceptions/coop.py +8 -0
  54. edsl/exceptions/general.py +34 -0
  55. edsl/exceptions/inference_services.py +5 -0
  56. edsl/exceptions/jobs.py +15 -0
  57. edsl/exceptions/language_models.py +46 -1
  58. edsl/exceptions/questions.py +80 -5
  59. edsl/exceptions/results.py +16 -5
  60. edsl/exceptions/scenarios.py +29 -0
  61. edsl/exceptions/surveys.py +13 -10
  62. edsl/inference_services/AnthropicService.py +106 -0
  63. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  64. edsl/inference_services/AvailableModelFetcher.py +215 -0
  65. edsl/inference_services/AwsBedrock.py +118 -0
  66. edsl/inference_services/AzureAI.py +215 -0
  67. edsl/inference_services/DeepInfraService.py +18 -0
  68. edsl/inference_services/GoogleService.py +143 -0
  69. edsl/inference_services/GroqService.py +20 -0
  70. edsl/inference_services/InferenceServiceABC.py +80 -0
  71. edsl/inference_services/InferenceServicesCollection.py +138 -0
  72. edsl/inference_services/MistralAIService.py +120 -0
  73. edsl/inference_services/OllamaService.py +18 -0
  74. edsl/inference_services/OpenAIService.py +236 -0
  75. edsl/inference_services/PerplexityService.py +160 -0
  76. edsl/inference_services/ServiceAvailability.py +135 -0
  77. edsl/inference_services/TestService.py +90 -0
  78. edsl/inference_services/TogetherAIService.py +172 -0
  79. edsl/inference_services/data_structures.py +134 -0
  80. edsl/inference_services/models_available_cache.py +118 -0
  81. edsl/inference_services/rate_limits_cache.py +25 -0
  82. edsl/inference_services/registry.py +41 -0
  83. edsl/inference_services/write_available.py +10 -0
  84. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  85. edsl/jobs/Answers.py +21 -20
  86. edsl/jobs/FetchInvigilator.py +47 -0
  87. edsl/jobs/InterviewTaskManager.py +98 -0
  88. edsl/jobs/InterviewsConstructor.py +50 -0
  89. edsl/jobs/Jobs.py +684 -204
  90. edsl/jobs/JobsChecks.py +172 -0
  91. edsl/jobs/JobsComponentConstructor.py +189 -0
  92. edsl/jobs/JobsPrompts.py +270 -0
  93. edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
  94. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  95. edsl/jobs/RequestTokenEstimator.py +30 -0
  96. edsl/jobs/async_interview_runner.py +138 -0
  97. edsl/jobs/buckets/BucketCollection.py +104 -0
  98. edsl/jobs/buckets/ModelBuckets.py +65 -0
  99. edsl/jobs/buckets/TokenBucket.py +283 -0
  100. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  101. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  102. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  103. edsl/jobs/data_structures.py +120 -0
  104. edsl/jobs/decorators.py +35 -0
  105. edsl/jobs/interviews/Interview.py +392 -0
  106. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -0
  107. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -0
  108. edsl/jobs/interviews/InterviewStatistic.py +63 -0
  109. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -0
  110. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -0
  111. edsl/jobs/interviews/InterviewStatusLog.py +92 -0
  112. edsl/jobs/interviews/ReportErrors.py +66 -0
  113. edsl/jobs/interviews/interview_status_enum.py +9 -0
  114. edsl/jobs/jobs_status_enums.py +9 -0
  115. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  116. edsl/jobs/results_exceptions_handler.py +98 -0
  117. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -110
  118. edsl/jobs/runners/JobsRunnerStatus.py +298 -0
  119. edsl/jobs/tasks/QuestionTaskCreator.py +244 -0
  120. edsl/jobs/tasks/TaskCreators.py +64 -0
  121. edsl/jobs/tasks/TaskHistory.py +470 -0
  122. edsl/jobs/tasks/TaskStatusLog.py +23 -0
  123. edsl/jobs/tasks/task_status_enum.py +161 -0
  124. edsl/jobs/tokens/InterviewTokenUsage.py +27 -0
  125. edsl/jobs/tokens/TokenUsage.py +34 -0
  126. edsl/language_models/ComputeCost.py +63 -0
  127. edsl/language_models/LanguageModel.py +507 -386
  128. edsl/language_models/ModelList.py +164 -0
  129. edsl/language_models/PriceManager.py +127 -0
  130. edsl/language_models/RawResponseHandler.py +106 -0
  131. edsl/language_models/RegisterLanguageModelsMeta.py +184 -0
  132. edsl/language_models/__init__.py +1 -8
  133. edsl/language_models/fake_openai_call.py +15 -0
  134. edsl/language_models/fake_openai_service.py +61 -0
  135. edsl/language_models/key_management/KeyLookup.py +63 -0
  136. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  137. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  138. edsl/language_models/key_management/__init__.py +0 -0
  139. edsl/language_models/key_management/models.py +131 -0
  140. edsl/language_models/model.py +256 -0
  141. edsl/language_models/repair.py +109 -41
  142. edsl/language_models/utilities.py +65 -0
  143. edsl/notebooks/Notebook.py +263 -0
  144. edsl/notebooks/NotebookToLaTeX.py +142 -0
  145. edsl/notebooks/__init__.py +1 -0
  146. edsl/prompts/Prompt.py +222 -93
  147. edsl/prompts/__init__.py +1 -1
  148. edsl/questions/ExceptionExplainer.py +77 -0
  149. edsl/questions/HTMLQuestion.py +103 -0
  150. edsl/questions/QuestionBase.py +518 -0
  151. edsl/questions/QuestionBasePromptsMixin.py +221 -0
  152. edsl/questions/QuestionBudget.py +164 -67
  153. edsl/questions/QuestionCheckBox.py +281 -62
  154. edsl/questions/QuestionDict.py +343 -0
  155. edsl/questions/QuestionExtract.py +136 -50
  156. edsl/questions/QuestionFreeText.py +79 -55
  157. edsl/questions/QuestionFunctional.py +138 -41
  158. edsl/questions/QuestionList.py +184 -57
  159. edsl/questions/QuestionMatrix.py +265 -0
  160. edsl/questions/QuestionMultipleChoice.py +293 -69
  161. edsl/questions/QuestionNumerical.py +109 -56
  162. edsl/questions/QuestionRank.py +244 -49
  163. edsl/questions/Quick.py +41 -0
  164. edsl/questions/SimpleAskMixin.py +74 -0
  165. edsl/questions/__init__.py +9 -6
  166. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +153 -38
  167. edsl/questions/compose_questions.py +13 -7
  168. edsl/questions/data_structures.py +20 -0
  169. edsl/questions/decorators.py +21 -0
  170. edsl/questions/derived/QuestionLikertFive.py +28 -26
  171. edsl/questions/derived/QuestionLinearScale.py +41 -28
  172. edsl/questions/derived/QuestionTopK.py +34 -26
  173. edsl/questions/derived/QuestionYesNo.py +40 -27
  174. edsl/questions/descriptors.py +228 -74
  175. edsl/questions/loop_processor.py +149 -0
  176. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  177. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  178. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  179. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  180. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  181. edsl/questions/prompt_templates/question_list.jinja +17 -0
  182. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  183. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  184. edsl/questions/question_base_gen_mixin.py +168 -0
  185. edsl/questions/question_registry.py +130 -46
  186. edsl/questions/register_questions_meta.py +71 -0
  187. edsl/questions/response_validator_abc.py +188 -0
  188. edsl/questions/response_validator_factory.py +34 -0
  189. edsl/questions/settings.py +5 -2
  190. edsl/questions/templates/__init__.py +0 -0
  191. edsl/questions/templates/budget/__init__.py +0 -0
  192. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  193. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  194. edsl/questions/templates/checkbox/__init__.py +0 -0
  195. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  196. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  197. edsl/questions/templates/dict/__init__.py +0 -0
  198. edsl/questions/templates/dict/answering_instructions.jinja +21 -0
  199. edsl/questions/templates/dict/question_presentation.jinja +1 -0
  200. edsl/questions/templates/extract/__init__.py +0 -0
  201. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  202. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  203. edsl/questions/templates/free_text/__init__.py +0 -0
  204. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  205. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  206. edsl/questions/templates/likert_five/__init__.py +0 -0
  207. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  208. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  209. edsl/questions/templates/linear_scale/__init__.py +0 -0
  210. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  211. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  212. edsl/questions/templates/list/__init__.py +0 -0
  213. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  214. edsl/questions/templates/list/question_presentation.jinja +5 -0
  215. edsl/questions/templates/matrix/__init__.py +1 -0
  216. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  217. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  218. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  219. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  220. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  221. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  222. edsl/questions/templates/numerical/__init__.py +0 -0
  223. edsl/questions/templates/numerical/answering_instructions.jinja +7 -0
  224. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  225. edsl/questions/templates/rank/__init__.py +0 -0
  226. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  227. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  228. edsl/questions/templates/top_k/__init__.py +0 -0
  229. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  230. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  231. edsl/questions/templates/yes_no/__init__.py +0 -0
  232. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  233. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  234. edsl/results/CSSParameterizer.py +108 -0
  235. edsl/results/Dataset.py +550 -19
  236. edsl/results/DatasetExportMixin.py +594 -0
  237. edsl/results/DatasetTree.py +295 -0
  238. edsl/results/MarkdownToDocx.py +122 -0
  239. edsl/results/MarkdownToPDF.py +111 -0
  240. edsl/results/Result.py +477 -173
  241. edsl/results/Results.py +987 -269
  242. edsl/results/ResultsExportMixin.py +28 -125
  243. edsl/results/ResultsGGMixin.py +83 -15
  244. edsl/results/TableDisplay.py +125 -0
  245. edsl/results/TextEditor.py +50 -0
  246. edsl/results/__init__.py +1 -1
  247. edsl/results/file_exports.py +252 -0
  248. edsl/results/results_fetch_mixin.py +33 -0
  249. edsl/results/results_selector.py +145 -0
  250. edsl/results/results_tools_mixin.py +98 -0
  251. edsl/results/smart_objects.py +96 -0
  252. edsl/results/table_data_class.py +12 -0
  253. edsl/results/table_display.css +78 -0
  254. edsl/results/table_renderers.py +118 -0
  255. edsl/results/tree_explore.py +115 -0
  256. edsl/scenarios/ConstructDownloadLink.py +109 -0
  257. edsl/scenarios/DocumentChunker.py +102 -0
  258. edsl/scenarios/DocxScenario.py +16 -0
  259. edsl/scenarios/FileStore.py +543 -0
  260. edsl/scenarios/PdfExtractor.py +40 -0
  261. edsl/scenarios/Scenario.py +431 -62
  262. edsl/scenarios/ScenarioHtmlMixin.py +65 -0
  263. edsl/scenarios/ScenarioList.py +1415 -45
  264. edsl/scenarios/ScenarioListExportMixin.py +45 -0
  265. edsl/scenarios/ScenarioListPdfMixin.py +239 -0
  266. edsl/scenarios/__init__.py +2 -0
  267. edsl/scenarios/directory_scanner.py +96 -0
  268. edsl/scenarios/file_methods.py +85 -0
  269. edsl/scenarios/handlers/__init__.py +13 -0
  270. edsl/scenarios/handlers/csv.py +49 -0
  271. edsl/scenarios/handlers/docx.py +76 -0
  272. edsl/scenarios/handlers/html.py +37 -0
  273. edsl/scenarios/handlers/json.py +111 -0
  274. edsl/scenarios/handlers/latex.py +5 -0
  275. edsl/scenarios/handlers/md.py +51 -0
  276. edsl/scenarios/handlers/pdf.py +68 -0
  277. edsl/scenarios/handlers/png.py +39 -0
  278. edsl/scenarios/handlers/pptx.py +105 -0
  279. edsl/scenarios/handlers/py.py +294 -0
  280. edsl/scenarios/handlers/sql.py +313 -0
  281. edsl/scenarios/handlers/sqlite.py +149 -0
  282. edsl/scenarios/handlers/txt.py +33 -0
  283. edsl/scenarios/scenario_join.py +131 -0
  284. edsl/scenarios/scenario_selector.py +156 -0
  285. edsl/shared.py +1 -0
  286. edsl/study/ObjectEntry.py +173 -0
  287. edsl/study/ProofOfWork.py +113 -0
  288. edsl/study/SnapShot.py +80 -0
  289. edsl/study/Study.py +521 -0
  290. edsl/study/__init__.py +4 -0
  291. edsl/surveys/ConstructDAG.py +92 -0
  292. edsl/surveys/DAG.py +92 -11
  293. edsl/surveys/EditSurvey.py +221 -0
  294. edsl/surveys/InstructionHandler.py +100 -0
  295. edsl/surveys/Memory.py +9 -4
  296. edsl/surveys/MemoryManagement.py +72 -0
  297. edsl/surveys/MemoryPlan.py +156 -35
  298. edsl/surveys/Rule.py +221 -74
  299. edsl/surveys/RuleCollection.py +241 -61
  300. edsl/surveys/RuleManager.py +172 -0
  301. edsl/surveys/Simulator.py +75 -0
  302. edsl/surveys/Survey.py +1079 -339
  303. edsl/surveys/SurveyCSS.py +273 -0
  304. edsl/surveys/SurveyExportMixin.py +235 -40
  305. edsl/surveys/SurveyFlowVisualization.py +181 -0
  306. edsl/surveys/SurveyQualtricsImport.py +284 -0
  307. edsl/surveys/SurveyToApp.py +141 -0
  308. edsl/surveys/__init__.py +4 -2
  309. edsl/surveys/base.py +19 -3
  310. edsl/surveys/descriptors.py +17 -6
  311. edsl/surveys/instructions/ChangeInstruction.py +48 -0
  312. edsl/surveys/instructions/Instruction.py +56 -0
  313. edsl/surveys/instructions/InstructionCollection.py +82 -0
  314. edsl/surveys/instructions/__init__.py +0 -0
  315. edsl/templates/error_reporting/base.html +24 -0
  316. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  317. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  318. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  319. edsl/templates/error_reporting/interview_details.html +116 -0
  320. edsl/templates/error_reporting/interviews.html +19 -0
  321. edsl/templates/error_reporting/overview.html +5 -0
  322. edsl/templates/error_reporting/performance_plot.html +2 -0
  323. edsl/templates/error_reporting/report.css +74 -0
  324. edsl/templates/error_reporting/report.html +118 -0
  325. edsl/templates/error_reporting/report.js +25 -0
  326. edsl/tools/__init__.py +1 -0
  327. edsl/tools/clusters.py +192 -0
  328. edsl/tools/embeddings.py +27 -0
  329. edsl/tools/embeddings_plotting.py +118 -0
  330. edsl/tools/plotting.py +112 -0
  331. edsl/tools/summarize.py +18 -0
  332. edsl/utilities/PrettyList.py +56 -0
  333. edsl/utilities/SystemInfo.py +5 -0
  334. edsl/utilities/__init__.py +21 -20
  335. edsl/utilities/ast_utilities.py +3 -0
  336. edsl/utilities/data/Registry.py +2 -0
  337. edsl/utilities/decorators.py +41 -0
  338. edsl/utilities/gcp_bucket/__init__.py +0 -0
  339. edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
  340. edsl/utilities/interface.py +310 -60
  341. edsl/utilities/is_notebook.py +18 -0
  342. edsl/utilities/is_valid_variable_name.py +11 -0
  343. edsl/utilities/naming_utilities.py +263 -0
  344. edsl/utilities/remove_edsl_version.py +24 -0
  345. edsl/utilities/repair_functions.py +28 -0
  346. edsl/utilities/restricted_python.py +70 -0
  347. edsl/utilities/utilities.py +203 -13
  348. edsl-0.1.40.dist-info/METADATA +111 -0
  349. edsl-0.1.40.dist-info/RECORD +362 -0
  350. {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/WHEEL +1 -1
  351. edsl/agents/AgentListExportMixin.py +0 -24
  352. edsl/coop/old.py +0 -31
  353. edsl/data/Database.py +0 -141
  354. edsl/data/crud.py +0 -121
  355. edsl/jobs/Interview.py +0 -417
  356. edsl/jobs/JobsRunner.py +0 -63
  357. edsl/jobs/JobsRunnerStatusMixin.py +0 -115
  358. edsl/jobs/base.py +0 -47
  359. edsl/jobs/buckets.py +0 -166
  360. edsl/jobs/runners/JobsRunnerDryRun.py +0 -19
  361. edsl/jobs/runners/JobsRunnerStreaming.py +0 -54
  362. edsl/jobs/task_management.py +0 -218
  363. edsl/jobs/token_tracking.py +0 -78
  364. edsl/language_models/DeepInfra.py +0 -69
  365. edsl/language_models/OpenAI.py +0 -98
  366. edsl/language_models/model_interfaces/GeminiPro.py +0 -66
  367. edsl/language_models/model_interfaces/LanguageModelOpenAIFour.py +0 -8
  368. edsl/language_models/model_interfaces/LanguageModelOpenAIThreeFiveTurbo.py +0 -8
  369. edsl/language_models/model_interfaces/LlamaTwo13B.py +0 -21
  370. edsl/language_models/model_interfaces/LlamaTwo70B.py +0 -21
  371. edsl/language_models/model_interfaces/Mixtral8x7B.py +0 -24
  372. edsl/language_models/registry.py +0 -81
  373. edsl/language_models/schemas.py +0 -15
  374. edsl/language_models/unused/ReplicateBase.py +0 -83
  375. edsl/prompts/QuestionInstructionsBase.py +0 -6
  376. edsl/prompts/library/agent_instructions.py +0 -29
  377. edsl/prompts/library/agent_persona.py +0 -17
  378. edsl/prompts/library/question_budget.py +0 -26
  379. edsl/prompts/library/question_checkbox.py +0 -32
  380. edsl/prompts/library/question_extract.py +0 -19
  381. edsl/prompts/library/question_freetext.py +0 -14
  382. edsl/prompts/library/question_linear_scale.py +0 -20
  383. edsl/prompts/library/question_list.py +0 -22
  384. edsl/prompts/library/question_multiple_choice.py +0 -44
  385. edsl/prompts/library/question_numerical.py +0 -31
  386. edsl/prompts/library/question_rank.py +0 -21
  387. edsl/prompts/prompt_config.py +0 -33
  388. edsl/prompts/registry.py +0 -185
  389. edsl/questions/Question.py +0 -240
  390. edsl/report/InputOutputDataTypes.py +0 -134
  391. edsl/report/RegressionMixin.py +0 -28
  392. edsl/report/ReportOutputs.py +0 -1228
  393. edsl/report/ResultsFetchMixin.py +0 -106
  394. edsl/report/ResultsOutputMixin.py +0 -14
  395. edsl/report/demo.ipynb +0 -645
  396. edsl/results/ResultsDBMixin.py +0 -184
  397. edsl/surveys/SurveyFlowVisualizationMixin.py +0 -92
  398. edsl/trackers/Tracker.py +0 -91
  399. edsl/trackers/TrackerAPI.py +0 -196
  400. edsl/trackers/TrackerTasks.py +0 -70
  401. edsl/utilities/pastebin.py +0 -141
  402. edsl-0.1.14.dist-info/METADATA +0 -69
  403. edsl-0.1.14.dist-info/RECORD +0 -141
  404. /edsl/{language_models/model_interfaces → inference_services}/__init__.py +0 -0
  405. /edsl/{report/__init__.py → jobs/runners/JobsRunnerStatusData.py} +0 -0
  406. /edsl/{trackers/__init__.py → language_models/ServiceDataSources.py} +0 -0
  407. {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/LICENSE +0 -0
@@ -1,48 +1,166 @@
1
+ """A Scenario is a dictionary with a key/value to parameterize a question."""
2
+
3
+ from __future__ import annotations
1
4
  import copy
5
+ import os
6
+ import json
2
7
  from collections import UserDict
3
- from rich.table import Table
8
+ from typing import Union, List, Optional, TYPE_CHECKING, Collection
9
+ from uuid import uuid4
4
10
 
5
11
  from edsl.Base import Base
12
+ from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
13
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
14
+ from edsl.exceptions.scenarios import ScenarioError
15
+
16
+ if TYPE_CHECKING:
17
+ from edsl.scenarios.ScenarioList import ScenarioList
18
+ from edsl.results.Dataset import Dataset
19
+
20
+
21
+ class DisplayJSON:
22
+ """Display a dictionary as JSON."""
23
+
24
+ def __init__(self, input_dict: dict):
25
+ self.text = json.dumps(input_dict, indent=4)
26
+
27
+ def __repr__(self):
28
+ return self.text
29
+
30
+
31
+ class DisplayYAML:
32
+ """Display a dictionary as YAML."""
33
+
34
+ def __init__(self, input_dict: dict):
35
+ import yaml
36
+
37
+ self.text = yaml.dump(input_dict)
38
+
39
+ def __repr__(self):
40
+ return self.text
41
+
42
+
43
+ class Scenario(Base, UserDict, ScenarioHtmlMixin):
44
+ """A Scenario is a dictionary of keys/values that can be used to parameterize questions."""
45
+
46
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
47
+
48
+ def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
49
+ """Initialize a new Scenario.
50
+
51
+ :param data: A dictionary of keys/values for parameterizing questions.
52
+ :param name: The name of the scenario.
53
+ """
54
+ if not isinstance(data, dict) and data is not None:
55
+ try:
56
+ data = dict(data)
57
+ except Exception as e:
58
+ raise ScenarioError(
59
+ f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
60
+ "Exception message:" + str(e),
61
+ )
62
+
63
+ super().__init__()
64
+ self.data = data if data is not None else {}
65
+ self.name = name
66
+
67
+ def replicate(self, n: int) -> "ScenarioList":
68
+ """Replicate a scenario n times to return a ScenarioList.
69
+
70
+ :param n: The number of times to replicate the scenario.
71
+
72
+ Example:
73
+ >>> s = Scenario({"food": "wood chips"})
74
+ >>> s.replicate(2)
75
+ ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
76
+ """
77
+ from edsl.scenarios.ScenarioList import ScenarioList
78
+
79
+ return ScenarioList([copy.deepcopy(self) for _ in range(n)])
80
+
81
+ @property
82
+ def has_jinja_braces(self) -> bool:
83
+ """Return whether the scenario has jinja braces. This matters for rendering.
84
+
85
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
86
+ >>> s.has_jinja_braces
87
+ True
88
+ """
89
+ for _, value in self.items():
90
+ if isinstance(value, str):
91
+ if "{{" in value and "}}" in value:
92
+ return True
93
+ return False
94
+
95
+ def _convert_jinja_braces(
96
+ self, replacement_left: str = "<<", replacement_right: str = ">>"
97
+ ) -> Scenario:
98
+ """Convert Jinja braces to some other character.
99
+
100
+ >>> s = Scenario({"food": "I love {{wood chips}}"})
101
+ >>> s._convert_jinja_braces()
102
+ Scenario({'food': 'I love <<wood chips>>'})
103
+
104
+ """
105
+ new_scenario = Scenario()
106
+ for key, value in self.items():
107
+ if isinstance(value, str):
108
+ new_scenario[key] = value.replace("{{", replacement_left).replace(
109
+ "}}", replacement_right
110
+ )
111
+ else:
112
+ new_scenario[key] = value
113
+ return new_scenario
114
+
115
+ def __add__(self, other_scenario: Scenario) -> Scenario:
116
+ """Combine two scenarios by taking the union of their keys
117
+
118
+ If the other scenario is None, then just return self.
6
119
 
120
+ :param other_scenario: The other scenario to combine with.
7
121
 
8
- class Scenario(Base, UserDict):
9
- """A Scenario is a dictionary of key/values that describe some situation."""
122
+ Example:
10
123
 
11
- def __add__(self, other_scenario):
12
- """Combine two scenarios. If the other scenario is None, then just return self.
13
124
  >>> s1 = Scenario({"price": 100, "quantity": 2})
14
125
  >>> s2 = Scenario({"color": "red"})
15
126
  >>> s1 + s2
16
- {'price': 100, 'quantity': 2, 'color': 'red'}
127
+ Scenario({'price': 100, 'quantity': 2, 'color': 'red'})
17
128
  >>> (s1 + s2).__class__.__name__
18
129
  'Scenario'
19
130
  """
20
131
  if other_scenario is None:
21
132
  return self
22
133
  else:
23
- new_scenario = Scenario()
24
- new_scenario.data = copy.deepcopy(self.data)
25
- new_scenario.update(copy.deepcopy(other_scenario))
26
- return Scenario(new_scenario)
134
+ data1 = copy.deepcopy(self.data)
135
+ data2 = copy.deepcopy(other_scenario.data)
136
+ s = Scenario(data1 | data2)
137
+ return s
27
138
 
28
- def to(self, question_or_survey) -> "Jobs":
29
- """Run a question/survey with this particular scenario.
30
- Useful if you want to reverse the typical chain of operations.
139
+ def rename(
140
+ self,
141
+ old_name_or_replacement_dict: Union[str, dict[str, str]],
142
+ new_name: Optional[str] = None,
143
+ ) -> Scenario:
144
+ """Rename the keys of a scenario.
31
145
 
32
- >>> from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
33
- >>> s = Scenario({"food": "wood chips"})
34
- >>> q = QuestionMultipleChoice(question_text = "Do you enjoy the taste of {{food}}?", question_options = ["Yes", "No"], question_name = "food_preference")
35
- >>> _ = s.to(q)
36
- """
37
- return question_or_survey.by(self)
146
+ :param old_name_or_replacement_dict: A dictionary of old keys to new keys *OR* a string of the old key.
147
+ :param new_name: The new name of the key.
38
148
 
39
- def rename(self, replacement_dict: dict) -> "Scenario":
40
- """Rename the keys of a scenario. Useful for changing the names of keys.
149
+ Example:
41
150
 
42
151
  >>> s = Scenario({"food": "wood chips"})
43
152
  >>> s.rename({"food": "food_preference"})
44
- {'food_preference': 'wood chips'}
153
+ Scenario({'food_preference': 'wood chips'})
154
+
155
+ >>> s = Scenario({"food": "wood chips"})
156
+ >>> s.rename("food", "snack")
157
+ Scenario({'snack': 'wood chips'})
45
158
  """
159
+ if isinstance(old_name_or_replacement_dict, str) and new_name is not None:
160
+ replacement_dict = {old_name_or_replacement_dict: new_name}
161
+ else:
162
+ replacement_dict = old_name_or_replacement_dict
163
+
46
164
  new_scenario = Scenario()
47
165
  for key, value in self.items():
48
166
  if key in replacement_dict:
@@ -51,79 +169,330 @@ class Scenario(Base, UserDict):
51
169
  new_scenario[key] = value
52
170
  return new_scenario
53
171
 
54
- def make_question(self, question_class: type):
55
- """Make a question from this scenario. Note it takes a QuestionClass (not a question)
56
- as an input.
57
-
58
- >>> from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
59
- >>> from edsl.agents.Agent import Agent
172
+ def new_column_names(self, new_names: List[str]) -> Scenario:
173
+ """Rename the keys of a scenario.
60
174
 
61
- >>> s = Scenario({"question_name": "feelings",
62
- ... "question_text": "How are you feeling?",
63
- ... "question_options": ["Very sad.", "Sad.", "Neutral.", "Happy.", "Very happy."]})
64
- >>> q = s.make_question(QuestionMultipleChoice)
65
- >>> q.by(Agent(traits = {'feeling': 'Very sad'})).run().select("feelings")
66
- [{'answer.feelings': ['Very sad.']}]
175
+ >>> s = Scenario({"food": "wood chips"})
176
+ >>> s.new_column_names(["food_preference"])
177
+ Scenario({'food_preference': 'wood chips'})
67
178
  """
68
- return question_class(**self)
179
+ try:
180
+ assert len(new_names) == len(self.keys())
181
+ except AssertionError:
182
+ print("The number of new names must match the number of keys.")
69
183
 
70
- def to_dict(self):
184
+ new_scenario = Scenario()
185
+ for new_names, value in zip(new_names, self.values()):
186
+ new_scenario[new_names] = value
187
+ return new_scenario
188
+
189
+ def table(self, tablefmt: str = "grid") -> str:
190
+ """Display a scenario as a table."""
191
+ return self.to_dataset().table(tablefmt=tablefmt)
192
+
193
+ def json(self):
194
+ return DisplayJSON(self.to_dict(add_edsl_version=False))
195
+
196
+ def yaml(self):
197
+ import yaml
198
+
199
+ return DisplayYAML(self.to_dict(add_edsl_version=False))
200
+
201
+ def to_dict(self, add_edsl_version: bool = True) -> dict:
71
202
  """Convert a scenario to a dictionary.
203
+
204
+ Example:
205
+
72
206
  >>> s = Scenario({"food": "wood chips"})
73
207
  >>> s.to_dict()
208
+ {'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
209
+
210
+ >>> s.to_dict(add_edsl_version = False)
74
211
  {'food': 'wood chips'}
212
+
75
213
  """
76
- return self.data
214
+ from edsl.scenarios.FileStore import FileStore
215
+
216
+ d = self.data.copy()
217
+ for key, value in d.items():
218
+ if isinstance(value, FileStore):
219
+ d[key] = value.to_dict(add_edsl_version=add_edsl_version)
220
+ if add_edsl_version:
221
+ from edsl import __version__
222
+
223
+ d["edsl_version"] = __version__
224
+ d["edsl_class_name"] = "Scenario"
225
+
226
+ return d
227
+
228
+ def __hash__(self) -> int:
229
+ """Return a hash of the scenario.
230
+
231
+ Example:
232
+
233
+ >>> s = Scenario({"food": "wood chips"})
234
+ >>> hash(s)
235
+ 1153210385458344214
236
+ """
237
+ from edsl.utilities.utilities import dict_hash
238
+
239
+ return dict_hash(self.to_dict(add_edsl_version=False))
240
+
241
+ def __repr__(self):
242
+ return "Scenario(" + repr(self.data) + ")"
243
+
244
+ def to_dataset(self) -> "Dataset":
245
+ """Convert a scenario to a dataset.
246
+
247
+ >>> s = Scenario({"food": "wood chips"})
248
+ >>> s.to_dataset()
249
+ Dataset([{'key': ['food']}, {'value': ['wood chips']}])
250
+ """
251
+ from edsl.results.Dataset import Dataset
252
+
253
+ keys = list(self.keys())
254
+ values = list(self.values())
255
+ return Dataset([{"key": keys}, {"value": values}])
256
+
257
+ def select(self, list_of_keys: Collection[str]) -> "Scenario":
258
+ """Select a subset of keys from a scenario.
259
+
260
+ :param list_of_keys: The keys to select.
261
+
262
+ Example:
263
+
264
+ >>> s = Scenario({"food": "wood chips", "drink": "water"})
265
+ >>> s.select(["food"])
266
+ Scenario({'food': 'wood chips'})
267
+ """
268
+ new_scenario = Scenario()
269
+ for key in list_of_keys:
270
+ new_scenario[key] = self[key]
271
+ return new_scenario
272
+
273
+ def drop(self, list_of_keys: Collection[str]) -> "Scenario":
274
+ """Drop a subset of keys from a scenario.
275
+
276
+ :param list_of_keys: The keys to drop.
277
+
278
+ Example:
279
+
280
+ >>> s = Scenario({"food": "wood chips", "drink": "water"})
281
+ >>> s.drop(["food"])
282
+ Scenario({'drink': 'water'})
283
+ """
284
+ new_scenario = Scenario()
285
+ for key in self.keys():
286
+ if key not in list_of_keys:
287
+ new_scenario[key] = self[key]
288
+ return new_scenario
289
+
290
+ def keep(self, list_of_keys: List[str]) -> "Scenario":
291
+ """Keep a subset of keys from a scenario.
292
+
293
+ :param list_of_keys: The keys to keep.
294
+
295
+ Example:
296
+
297
+ >>> s = Scenario({"food": "wood chips", "drink": "water"})
298
+ >>> s.keep(["food"])
299
+ Scenario({'food': 'wood chips'})
300
+ """
301
+
302
+ return self.select(list_of_keys)
303
+
304
+ @classmethod
305
+ def from_url(cls, url: str, field_name: Optional[str] = "text") -> "Scenario":
306
+ """Creates a scenario from a URL.
307
+
308
+ :param url: The URL to create the scenario from.
309
+ :param field_name: The field name to use for the text.
310
+
311
+ """
312
+ import requests
313
+
314
+ text = requests.get(url).text
315
+ return cls({"url": url, field_name: text})
316
+
317
+ @classmethod
318
+ def from_file(cls, file_path: str, field_name: str) -> "Scenario":
319
+ """Creates a scenario from a file.
320
+
321
+ >>> import tempfile
322
+ >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
323
+ ... _ = f.write("This is a test.")
324
+ ... _ = f.flush()
325
+ ... s = Scenario.from_file(f.name, "file")
326
+ >>> s
327
+ Scenario({'file': FileStore(path='...', ...)})
328
+
329
+ """
330
+ from edsl.scenarios.FileStore import FileStore
331
+
332
+ fs = FileStore(file_path)
333
+ return cls({field_name: fs})
334
+
335
+ @classmethod
336
+ def from_image(
337
+ cls, image_path: str, image_name: Optional[str] = None
338
+ ) -> "Scenario":
339
+ """
340
+ Creates a scenario with a base64 encoding of an image.
341
+
342
+ Args:
343
+ image_path (str): Path to the image file.
344
+
345
+ Returns:
346
+ Scenario: A new Scenario instance with image information.
347
+
348
+ """
349
+ if not os.path.exists(image_path):
350
+ raise FileNotFoundError(f"Image file not found: {image_path}")
351
+
352
+ if image_name is None:
353
+ image_name = os.path.basename(image_path).split(".")[0]
354
+
355
+ return cls.from_file(image_path, image_name)
77
356
 
78
357
  @classmethod
79
- def from_dict(cls, d):
358
+ def from_pdf(cls, pdf_path: str):
359
+ from edsl.scenarios.PdfExtractor import PdfExtractor
360
+
361
+ return PdfExtractor(pdf_path, cls).get_object()
362
+
363
+ @classmethod
364
+ def from_docx(cls, docx_path: str) -> "Scenario":
365
+ """Creates a scenario from the text of a docx file.
366
+
367
+ :param docx_path: The path to the docx file.
368
+
369
+ Example:
370
+
371
+ >>> from docx import Document
372
+ >>> doc = Document()
373
+ >>> _ = doc.add_heading("EDSL Survey")
374
+ >>> _ = doc.add_paragraph("This is a test.")
375
+ >>> doc.save("test.docx")
376
+ >>> s = Scenario.from_docx("test.docx")
377
+ >>> s
378
+ Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
379
+ >>> import os; os.remove("test.docx")
380
+ """
381
+ from edsl.scenarios.DocxScenario import DocxScenario
382
+
383
+ return Scenario(DocxScenario(docx_path).get_scenario_dict())
384
+
385
+ def chunk(
386
+ self,
387
+ field,
388
+ num_words: Optional[int] = None,
389
+ num_lines: Optional[int] = None,
390
+ include_original=False,
391
+ hash_original=False,
392
+ ) -> "ScenarioList":
393
+ """Split a field into chunks of a given size.
394
+
395
+ :param field: The field to split.
396
+ :param num_words: The number of words in each chunk.
397
+ :param num_lines: The number of lines in each chunk.
398
+ :param include_original: Whether to include the original field in the new scenarios.
399
+ :param hash_original: Whether to hash the original field in the new scenarios.
400
+
401
+ If you specify `include_original=True`, the original field will be included in the new scenarios with an "_original" suffix.
402
+
403
+ Either `num_words` or `num_lines` must be specified, but not both.
404
+
405
+ The `hash_original` parameter is useful if you do not want to store the original text, but still want a unique identifier for it.
406
+
407
+ Example:
408
+
409
+ >>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
410
+ >>> s.chunk("text", num_lines = 1)
411
+ ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 1}), Scenario({'text': '', 'text_chunk': 2}), Scenario({'text': 'This is a test.', 'text_chunk': 3})])
412
+
413
+ >>> s.chunk("text", num_words = 2)
414
+ ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0}), Scenario({'text': 'a test.', 'text_chunk': 1}), Scenario({'text': 'This is', 'text_chunk': 2}), Scenario({'text': 'a test.', 'text_chunk': 3}), Scenario({'text': 'This is', 'text_chunk': 4}), Scenario({'text': 'a test.', 'text_chunk': 5})])
415
+
416
+ >>> s = Scenario({"text": "Hello World"})
417
+ >>> s.chunk("text", num_words = 1, include_original = True)
418
+ ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'Hello World'})])
419
+
420
+ >>> s = Scenario({"text": "Hello World"})
421
+ >>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
422
+ ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
423
+
424
+ >>> s.chunk("text")
425
+ Traceback (most recent call last):
426
+ ...
427
+ ValueError: You must specify either num_words or num_lines.
428
+
429
+ >>> s.chunk("text", num_words = 1, num_lines = 1)
430
+ Traceback (most recent call last):
431
+ ...
432
+ ValueError: You must specify either num_words or num_lines, but not both.
433
+ """
434
+ from edsl.scenarios.DocumentChunker import DocumentChunker
435
+
436
+ return DocumentChunker(self).chunk(
437
+ field, num_words, num_lines, include_original, hash_original
438
+ )
439
+
440
+ @classmethod
441
+ @remove_edsl_version
442
+ def from_dict(cls, d: dict) -> "Scenario":
80
443
  """Convert a dictionary to a scenario.
444
+
445
+ Example:
446
+
81
447
  >>> Scenario.from_dict({"food": "wood chips"})
82
- {'food': 'wood chips'}
448
+ Scenario({'food': 'wood chips'})
83
449
  """
450
+ from edsl.scenarios.FileStore import FileStore
451
+
452
+ for key, value in d.items():
453
+ # TODO: we should check this better if its a FileStore + add remote security check against path traversal
454
+ if (
455
+ isinstance(value, dict) and "base64_string" in value and "path" in value
456
+ ) or isinstance(value, FileStore):
457
+ d[key] = FileStore.from_dict(value)
84
458
  return cls(d)
85
459
 
86
460
  def _table(self) -> tuple[dict, list]:
87
- """Prepare generic table data."""
461
+ """Prepare generic table data.
462
+ >>> s = Scenario({"food": "wood chips"})
463
+ >>> s._table()
464
+ ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
465
+ """
88
466
  table_data = []
89
467
  for attr_name, attr_value in self.__dict__.items():
90
468
  table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
91
469
  column_names = ["Attribute", "Value"]
92
470
  return table_data, column_names
93
471
 
94
- def rich_print(self):
95
- """Displays an object as a rich table."""
96
- table_data, column_names = self._table()
97
- table = Table(title=f"{self.__class__.__name__} Attributes")
98
- for column in column_names:
99
- table.add_column(column, style="bold")
100
-
101
- for row in table_data:
102
- row_data = [row[column] for column in column_names]
103
- table.add_row(*row_data)
104
-
105
- return table
106
-
107
472
  @classmethod
108
- def example(cls):
109
- """Returns an example scenario.
110
- >>> Scenario.example()
111
- {'persona': 'A reseacher studying whether LLMs can be used to generate surveys.'}
473
+ def example(cls, randomize: bool = False) -> Scenario:
474
+ """
475
+ Returns an example Scenario instance.
476
+
477
+ :param randomize: If True, adds a random string to the value of the example key.
112
478
  """
479
+ addition = "" if not randomize else str(uuid4())
113
480
  return cls(
114
481
  {
115
- "persona": "A reseacher studying whether LLMs can be used to generate surveys."
482
+ "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
116
483
  }
117
484
  )
118
485
 
119
- def code(self):
120
- """Returns the code for the scenario."""
486
+ def code(self) -> List[str]:
487
+ """Return the code for the scenario."""
121
488
  lines = []
122
489
  lines.append("from edsl.scenario import Scenario")
123
- return f"Scenario({self.data})"
490
+ lines.append(f"s = Scenario({self.data})")
491
+ # return f"Scenario({self.data})"
492
+ return lines
124
493
 
125
494
 
126
495
  if __name__ == "__main__":
127
496
  import doctest
128
497
 
129
- doctest.testmod()
498
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -0,0 +1,65 @@
1
+ from typing import Optional
2
+
3
+
4
+ class ScenarioHtmlMixin:
5
+ @classmethod
6
+ def from_html(cls, url: str, field_name: Optional[str] = None) -> "Scenario":
7
+ """Create a scenario from HTML content.
8
+
9
+ :param html: The HTML content.
10
+ :param field_name: The name of the field containing the HTML content.
11
+
12
+
13
+ """
14
+ html = cls.fetch_html(url)
15
+ text = cls.extract_text(html)
16
+ if not field_name:
17
+ field_name = "text"
18
+ return cls({"url": url, "html": html, field_name: text})
19
+
20
+ def fetch_html(url):
21
+ # Define the user-agent to mimic a browser
22
+ import requests
23
+ from requests.adapters import HTTPAdapter
24
+ from requests.packages.urllib3.util.retry import Retry
25
+
26
+ headers = {
27
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
28
+ }
29
+
30
+ # Create a session to manage cookies and retries
31
+ session = requests.Session()
32
+ retries = Retry(
33
+ total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]
34
+ )
35
+ session.mount("http://", HTTPAdapter(max_retries=retries))
36
+ session.mount("https://", HTTPAdapter(max_retries=retries))
37
+
38
+ try:
39
+ # Make the request
40
+ response = session.get(url, headers=headers, timeout=10)
41
+ response.raise_for_status() # Raise an exception for HTTP errors
42
+ return response.text
43
+ except requests.exceptions.RequestException as e:
44
+ print(f"An error occurred: {e}")
45
+ return None
46
+
47
+ def extract_text(html):
48
+ # Extract text from HTML using BeautifulSoup
49
+ from bs4 import BeautifulSoup
50
+
51
+ soup = BeautifulSoup(html, "html.parser")
52
+ text = soup.get_text()
53
+ return text
54
+
55
+
56
+ if __name__ == "__main__":
57
+ # Usage example
58
+ url = "https://example.com"
59
+ html = ScenarioHtmlMixin.fetch_html(url)
60
+ if html:
61
+ print("Successfully fetched the HTML content.")
62
+ else:
63
+ print("Failed to fetch the HTML content.")
64
+
65
+ print(html)