edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. edsl/Base.py +413 -332
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -867
  7. edsl/agents/AgentList.py +551 -413
  8. edsl/agents/Invigilator.py +284 -233
  9. edsl/agents/InvigilatorBase.py +257 -270
  10. edsl/agents/PromptConstructor.py +272 -354
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -157
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -1028
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -555
  42. edsl/data/CacheEntry.py +230 -233
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -78
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/hack.py +10 -0
  48. edsl/data/orm.py +10 -10
  49. edsl/data_transfer_models.py +74 -73
  50. edsl/enums.py +202 -175
  51. edsl/exceptions/BaseException.py +21 -21
  52. edsl/exceptions/__init__.py +54 -54
  53. edsl/exceptions/agents.py +54 -42
  54. edsl/exceptions/cache.py +5 -5
  55. edsl/exceptions/configuration.py +16 -16
  56. edsl/exceptions/coop.py +10 -10
  57. edsl/exceptions/data.py +14 -14
  58. edsl/exceptions/general.py +34 -34
  59. edsl/exceptions/inference_services.py +5 -0
  60. edsl/exceptions/jobs.py +33 -33
  61. edsl/exceptions/language_models.py +63 -63
  62. edsl/exceptions/prompts.py +15 -15
  63. edsl/exceptions/questions.py +109 -91
  64. edsl/exceptions/results.py +29 -29
  65. edsl/exceptions/scenarios.py +29 -22
  66. edsl/exceptions/surveys.py +37 -37
  67. edsl/inference_services/AnthropicService.py +106 -87
  68. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  69. edsl/inference_services/AvailableModelFetcher.py +215 -0
  70. edsl/inference_services/AwsBedrock.py +118 -120
  71. edsl/inference_services/AzureAI.py +215 -217
  72. edsl/inference_services/DeepInfraService.py +18 -18
  73. edsl/inference_services/GoogleService.py +143 -148
  74. edsl/inference_services/GroqService.py +20 -20
  75. edsl/inference_services/InferenceServiceABC.py +80 -147
  76. edsl/inference_services/InferenceServicesCollection.py +138 -97
  77. edsl/inference_services/MistralAIService.py +120 -123
  78. edsl/inference_services/OllamaService.py +18 -18
  79. edsl/inference_services/OpenAIService.py +236 -224
  80. edsl/inference_services/PerplexityService.py +160 -163
  81. edsl/inference_services/ServiceAvailability.py +135 -0
  82. edsl/inference_services/TestService.py +90 -89
  83. edsl/inference_services/TogetherAIService.py +172 -170
  84. edsl/inference_services/data_structures.py +134 -0
  85. edsl/inference_services/models_available_cache.py +118 -118
  86. edsl/inference_services/rate_limits_cache.py +25 -25
  87. edsl/inference_services/registry.py +41 -41
  88. edsl/inference_services/write_available.py +10 -10
  89. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  90. edsl/jobs/Answers.py +43 -56
  91. edsl/jobs/FetchInvigilator.py +47 -0
  92. edsl/jobs/InterviewTaskManager.py +98 -0
  93. edsl/jobs/InterviewsConstructor.py +50 -0
  94. edsl/jobs/Jobs.py +823 -898
  95. edsl/jobs/JobsChecks.py +172 -147
  96. edsl/jobs/JobsComponentConstructor.py +189 -0
  97. edsl/jobs/JobsPrompts.py +270 -268
  98. edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
  99. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  100. edsl/jobs/RequestTokenEstimator.py +30 -0
  101. edsl/jobs/__init__.py +1 -1
  102. edsl/jobs/async_interview_runner.py +138 -0
  103. edsl/jobs/buckets/BucketCollection.py +104 -63
  104. edsl/jobs/buckets/ModelBuckets.py +65 -65
  105. edsl/jobs/buckets/TokenBucket.py +283 -251
  106. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  107. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  108. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  109. edsl/jobs/data_structures.py +120 -0
  110. edsl/jobs/decorators.py +35 -0
  111. edsl/jobs/interviews/Interview.py +396 -661
  112. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  113. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  114. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  115. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  116. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  117. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  118. edsl/jobs/interviews/ReportErrors.py +66 -66
  119. edsl/jobs/interviews/interview_status_enum.py +9 -9
  120. edsl/jobs/jobs_status_enums.py +9 -0
  121. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  122. edsl/jobs/results_exceptions_handler.py +98 -0
  123. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
  124. edsl/jobs/runners/JobsRunnerStatus.py +297 -330
  125. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  126. edsl/jobs/tasks/TaskCreators.py +64 -64
  127. edsl/jobs/tasks/TaskHistory.py +470 -450
  128. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  129. edsl/jobs/tasks/task_status_enum.py +161 -163
  130. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  131. edsl/jobs/tokens/TokenUsage.py +34 -34
  132. edsl/language_models/ComputeCost.py +63 -0
  133. edsl/language_models/LanguageModel.py +626 -668
  134. edsl/language_models/ModelList.py +164 -155
  135. edsl/language_models/PriceManager.py +127 -0
  136. edsl/language_models/RawResponseHandler.py +106 -0
  137. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  138. edsl/language_models/ServiceDataSources.py +0 -0
  139. edsl/language_models/__init__.py +2 -3
  140. edsl/language_models/fake_openai_call.py +15 -15
  141. edsl/language_models/fake_openai_service.py +61 -61
  142. edsl/language_models/key_management/KeyLookup.py +63 -0
  143. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  144. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  145. edsl/language_models/key_management/__init__.py +0 -0
  146. edsl/language_models/key_management/models.py +131 -0
  147. edsl/language_models/model.py +256 -0
  148. edsl/language_models/repair.py +156 -156
  149. edsl/language_models/utilities.py +65 -64
  150. edsl/notebooks/Notebook.py +263 -258
  151. edsl/notebooks/NotebookToLaTeX.py +142 -0
  152. edsl/notebooks/__init__.py +1 -1
  153. edsl/prompts/Prompt.py +352 -362
  154. edsl/prompts/__init__.py +2 -2
  155. edsl/questions/ExceptionExplainer.py +77 -0
  156. edsl/questions/HTMLQuestion.py +103 -0
  157. edsl/questions/QuestionBase.py +518 -664
  158. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  159. edsl/questions/QuestionBudget.py +227 -227
  160. edsl/questions/QuestionCheckBox.py +359 -359
  161. edsl/questions/QuestionExtract.py +180 -182
  162. edsl/questions/QuestionFreeText.py +113 -114
  163. edsl/questions/QuestionFunctional.py +166 -166
  164. edsl/questions/QuestionList.py +223 -231
  165. edsl/questions/QuestionMatrix.py +265 -0
  166. edsl/questions/QuestionMultipleChoice.py +330 -286
  167. edsl/questions/QuestionNumerical.py +151 -153
  168. edsl/questions/QuestionRank.py +314 -324
  169. edsl/questions/Quick.py +41 -41
  170. edsl/questions/SimpleAskMixin.py +74 -73
  171. edsl/questions/__init__.py +27 -26
  172. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  173. edsl/questions/compose_questions.py +98 -98
  174. edsl/questions/data_structures.py +20 -0
  175. edsl/questions/decorators.py +21 -21
  176. edsl/questions/derived/QuestionLikertFive.py +76 -76
  177. edsl/questions/derived/QuestionLinearScale.py +90 -87
  178. edsl/questions/derived/QuestionTopK.py +93 -93
  179. edsl/questions/derived/QuestionYesNo.py +82 -82
  180. edsl/questions/descriptors.py +427 -413
  181. edsl/questions/loop_processor.py +149 -0
  182. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  183. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  184. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  185. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  186. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  187. edsl/questions/prompt_templates/question_list.jinja +17 -17
  188. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  189. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  190. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  191. edsl/questions/question_registry.py +177 -177
  192. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  193. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  194. edsl/questions/response_validator_factory.py +34 -0
  195. edsl/questions/settings.py +12 -12
  196. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  197. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  198. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  199. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  200. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  201. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  202. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  203. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  204. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  205. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  206. edsl/questions/templates/list/question_presentation.jinja +5 -5
  207. edsl/questions/templates/matrix/__init__.py +1 -0
  208. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  209. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  210. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  211. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  212. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  213. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  214. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  215. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  216. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  217. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  218. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  219. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  220. edsl/results/CSSParameterizer.py +108 -108
  221. edsl/results/Dataset.py +587 -424
  222. edsl/results/DatasetExportMixin.py +594 -731
  223. edsl/results/DatasetTree.py +295 -275
  224. edsl/results/MarkdownToDocx.py +122 -0
  225. edsl/results/MarkdownToPDF.py +111 -0
  226. edsl/results/Result.py +557 -465
  227. edsl/results/Results.py +1183 -1165
  228. edsl/results/ResultsExportMixin.py +45 -43
  229. edsl/results/ResultsGGMixin.py +121 -121
  230. edsl/results/TableDisplay.py +125 -198
  231. edsl/results/TextEditor.py +50 -0
  232. edsl/results/__init__.py +2 -2
  233. edsl/results/file_exports.py +252 -0
  234. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  235. edsl/results/{Selector.py → results_selector.py} +145 -135
  236. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  237. edsl/results/smart_objects.py +96 -0
  238. edsl/results/table_data_class.py +12 -0
  239. edsl/results/table_display.css +77 -77
  240. edsl/results/table_renderers.py +118 -0
  241. edsl/results/tree_explore.py +115 -115
  242. edsl/scenarios/ConstructDownloadLink.py +109 -0
  243. edsl/scenarios/DocumentChunker.py +102 -0
  244. edsl/scenarios/DocxScenario.py +16 -0
  245. edsl/scenarios/FileStore.py +511 -632
  246. edsl/scenarios/PdfExtractor.py +40 -0
  247. edsl/scenarios/Scenario.py +498 -601
  248. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  249. edsl/scenarios/ScenarioList.py +1458 -1287
  250. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  251. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  252. edsl/scenarios/__init__.py +3 -4
  253. edsl/scenarios/directory_scanner.py +96 -0
  254. edsl/scenarios/file_methods.py +85 -0
  255. edsl/scenarios/handlers/__init__.py +13 -0
  256. edsl/scenarios/handlers/csv.py +38 -0
  257. edsl/scenarios/handlers/docx.py +76 -0
  258. edsl/scenarios/handlers/html.py +37 -0
  259. edsl/scenarios/handlers/json.py +111 -0
  260. edsl/scenarios/handlers/latex.py +5 -0
  261. edsl/scenarios/handlers/md.py +51 -0
  262. edsl/scenarios/handlers/pdf.py +68 -0
  263. edsl/scenarios/handlers/png.py +39 -0
  264. edsl/scenarios/handlers/pptx.py +105 -0
  265. edsl/scenarios/handlers/py.py +294 -0
  266. edsl/scenarios/handlers/sql.py +313 -0
  267. edsl/scenarios/handlers/sqlite.py +149 -0
  268. edsl/scenarios/handlers/txt.py +33 -0
  269. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
  270. edsl/scenarios/scenario_selector.py +156 -0
  271. edsl/shared.py +1 -1
  272. edsl/study/ObjectEntry.py +173 -173
  273. edsl/study/ProofOfWork.py +113 -113
  274. edsl/study/SnapShot.py +80 -80
  275. edsl/study/Study.py +521 -528
  276. edsl/study/__init__.py +4 -4
  277. edsl/surveys/ConstructDAG.py +92 -0
  278. edsl/surveys/DAG.py +148 -148
  279. edsl/surveys/EditSurvey.py +221 -0
  280. edsl/surveys/InstructionHandler.py +100 -0
  281. edsl/surveys/Memory.py +31 -31
  282. edsl/surveys/MemoryManagement.py +72 -0
  283. edsl/surveys/MemoryPlan.py +244 -244
  284. edsl/surveys/Rule.py +327 -326
  285. edsl/surveys/RuleCollection.py +385 -387
  286. edsl/surveys/RuleManager.py +172 -0
  287. edsl/surveys/Simulator.py +75 -0
  288. edsl/surveys/Survey.py +1280 -1801
  289. edsl/surveys/SurveyCSS.py +273 -261
  290. edsl/surveys/SurveyExportMixin.py +259 -259
  291. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
  292. edsl/surveys/SurveyQualtricsImport.py +284 -284
  293. edsl/surveys/SurveyToApp.py +141 -0
  294. edsl/surveys/__init__.py +5 -3
  295. edsl/surveys/base.py +53 -53
  296. edsl/surveys/descriptors.py +60 -56
  297. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  298. edsl/surveys/instructions/Instruction.py +56 -65
  299. edsl/surveys/instructions/InstructionCollection.py +82 -77
  300. edsl/templates/error_reporting/base.html +23 -23
  301. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  302. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  303. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  304. edsl/templates/error_reporting/interview_details.html +115 -115
  305. edsl/templates/error_reporting/interviews.html +19 -19
  306. edsl/templates/error_reporting/overview.html +4 -4
  307. edsl/templates/error_reporting/performance_plot.html +1 -1
  308. edsl/templates/error_reporting/report.css +73 -73
  309. edsl/templates/error_reporting/report.html +117 -117
  310. edsl/templates/error_reporting/report.js +25 -25
  311. edsl/test_h +1 -0
  312. edsl/tools/__init__.py +1 -1
  313. edsl/tools/clusters.py +192 -192
  314. edsl/tools/embeddings.py +27 -27
  315. edsl/tools/embeddings_plotting.py +118 -118
  316. edsl/tools/plotting.py +112 -112
  317. edsl/tools/summarize.py +18 -18
  318. edsl/utilities/PrettyList.py +56 -0
  319. edsl/utilities/SystemInfo.py +28 -28
  320. edsl/utilities/__init__.py +22 -22
  321. edsl/utilities/ast_utilities.py +25 -25
  322. edsl/utilities/data/Registry.py +6 -6
  323. edsl/utilities/data/__init__.py +1 -1
  324. edsl/utilities/data/scooter_results.json +1 -1
  325. edsl/utilities/decorators.py +77 -77
  326. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  327. edsl/utilities/gcp_bucket/example.py +50 -0
  328. edsl/utilities/interface.py +627 -627
  329. edsl/utilities/is_notebook.py +18 -0
  330. edsl/utilities/is_valid_variable_name.py +11 -0
  331. edsl/utilities/naming_utilities.py +263 -263
  332. edsl/utilities/remove_edsl_version.py +24 -0
  333. edsl/utilities/repair_functions.py +28 -28
  334. edsl/utilities/restricted_python.py +70 -70
  335. edsl/utilities/utilities.py +436 -424
  336. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
  337. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
  338. edsl-0.1.39.dev4.dist-info/RECORD +361 -0
  339. edsl/language_models/KeyLookup.py +0 -30
  340. edsl/language_models/registry.py +0 -190
  341. edsl/language_models/unused/ReplicateBase.py +0 -83
  342. edsl/results/ResultsDBMixin.py +0 -238
  343. edsl-0.1.39.dev3.dist-info/RECORD +0 -277
  344. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
@@ -1,52 +1,45 @@
1
- """Mixin class for exporting results."""
2
-
3
- from functools import wraps
4
- from edsl.results.DatasetExportMixin import DatasetExportMixin
5
-
6
-
7
- def to_dataset(func):
8
- """Convert the object to a Dataset object before calling the function."""
9
-
10
- @wraps(func)
11
- def wrapper(self, *args, **kwargs):
12
- """Return the function with the Results object converted to a Dataset object."""
13
- if self.__class__.__name__ == "ScenarioList":
14
- return func(self.to_dataset(), *args, **kwargs)
15
- else:
16
- raise Exception(
17
- f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
- )
19
-
20
- return wrapper
21
-
22
-
23
- def decorate_methods_from_mixin(cls, mixin_cls):
24
- for attr_name, attr_value in mixin_cls.__dict__.items():
25
- if callable(attr_value) and not attr_name.startswith("__"):
26
- setattr(cls, attr_name, to_dataset(attr_value))
27
- return cls
28
-
29
-
30
- # def decorate_all_methods(cls):
31
- # for attr_name, attr_value in cls.__dict__.items():
32
- # if callable(attr_value):
33
- # setattr(cls, attr_name, to_dataset(attr_value))
34
- # return cls
35
-
36
-
37
- # @decorate_all_methods
38
- class ScenarioListExportMixin(DatasetExportMixin):
39
- """Mixin class for exporting Results objects."""
40
-
41
- def __init_subclass__(cls, **kwargs):
42
- super().__init_subclass__(**kwargs)
43
- decorate_methods_from_mixin(cls, DatasetExportMixin)
44
-
45
- def to_docx(self, filename: str):
46
- """Export the ScenarioList to a .docx file."""
47
- dataset = self.to_dataset()
48
- from edsl.results.DatasetTree import Tree
49
-
50
- tree = Tree(dataset)
51
- tree.construct_tree()
52
- tree.to_docx(filename)
1
+ """Mixin class for exporting results."""
2
+
3
+ from functools import wraps
4
+ from edsl.results.DatasetExportMixin import DatasetExportMixin
5
+
6
+
7
+ def to_dataset(func):
8
+ """Convert the object to a Dataset object before calling the function."""
9
+
10
+ @wraps(func)
11
+ def wrapper(self, *args, **kwargs):
12
+ """Return the function with the Results object converted to a Dataset object."""
13
+ if self.__class__.__name__ == "ScenarioList":
14
+ return func(self.to_dataset(), *args, **kwargs)
15
+ else:
16
+ raise Exception(
17
+ f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
+ )
19
+
20
+ return wrapper
21
+
22
+
23
+ def decorate_methods_from_mixin(cls, mixin_cls):
24
+ for attr_name, attr_value in mixin_cls.__dict__.items():
25
+ if callable(attr_value) and not attr_name.startswith("__"):
26
+ setattr(cls, attr_name, to_dataset(attr_value))
27
+ return cls
28
+
29
+
30
+ # @decorate_all_methods
31
+ class ScenarioListExportMixin(DatasetExportMixin):
32
+ """Mixin class for exporting Results objects."""
33
+
34
+ def __init_subclass__(cls, **kwargs):
35
+ super().__init_subclass__(**kwargs)
36
+ decorate_methods_from_mixin(cls, DatasetExportMixin)
37
+
38
+ def to_docx(self, filename: str):
39
+ """Export the ScenarioList to a .docx file."""
40
+ dataset = self.to_dataset()
41
+ from edsl.results.DatasetTree import Tree
42
+
43
+ tree = Tree(dataset)
44
+ tree.construct_tree()
45
+ tree.to_docx(filename)
@@ -1,261 +1,239 @@
1
- import fitz # PyMuPDF
2
- import os
3
- import copy
4
- import subprocess
5
- import requests
6
- import tempfile
7
- import os
8
-
9
- # import urllib.parse as urlparse
10
- from urllib.parse import urlparse
11
-
12
- # from edsl import Scenario
13
-
14
- import requests
15
- import re
16
- import tempfile
17
- import os
18
- import atexit
19
- from urllib.parse import urlparse, parse_qs
20
-
21
-
22
- class GoogleDriveDownloader:
23
- _temp_dir = None
24
- _temp_file_path = None
25
-
26
- @classmethod
27
- def fetch_from_drive(cls, url, filename=None):
28
- # Extract file ID from the URL
29
- file_id = cls._extract_file_id(url)
30
- if not file_id:
31
- raise ValueError("Invalid Google Drive URL")
32
-
33
- # Construct the download URL
34
- download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
35
-
36
- # Send a GET request to the URL
37
- session = requests.Session()
38
- response = session.get(download_url, stream=True)
39
- response.raise_for_status()
40
-
41
- # Check for large file download prompt
42
- for key, value in response.cookies.items():
43
- if key.startswith("download_warning"):
44
- params = {"id": file_id, "confirm": value}
45
- response = session.get(download_url, params=params, stream=True)
46
- break
47
-
48
- # Create a temporary file to save the download
49
- if not filename:
50
- filename = "downloaded_file"
51
-
52
- if cls._temp_dir is None:
53
- cls._temp_dir = tempfile.TemporaryDirectory()
54
- atexit.register(cls._cleanup)
55
-
56
- cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
57
-
58
- # Write the content to the temporary file
59
- with open(cls._temp_file_path, "wb") as f:
60
- for chunk in response.iter_content(32768):
61
- if chunk:
62
- f.write(chunk)
63
-
64
- print(f"File saved to: {cls._temp_file_path}")
65
-
66
- return cls._temp_file_path
67
-
68
- @staticmethod
69
- def _extract_file_id(url):
70
- # Try to extract file ID from '/file/d/' format
71
- file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
72
- if file_id_match:
73
- return file_id_match.group(1)
74
-
75
- # If not found, try to extract from 'open?id=' format
76
- parsed_url = urlparse(url)
77
- query_params = parse_qs(parsed_url.query)
78
- if "id" in query_params:
79
- return query_params["id"][0]
80
-
81
- return None
82
-
83
- @classmethod
84
- def _cleanup(cls):
85
- if cls._temp_dir:
86
- cls._temp_dir.cleanup()
87
-
88
- @classmethod
89
- def get_temp_file_path(cls):
90
- return cls._temp_file_path
91
-
92
-
93
- def fetch_and_save_pdf(url, filename):
94
- # Send a GET request to the URL
95
- response = requests.get(url)
96
-
97
- # Check if the request was successful
98
- response.raise_for_status()
99
-
100
- # Create a temporary directory
101
- with tempfile.TemporaryDirectory() as temp_dir:
102
- # Construct the full path for the file
103
- temp_file_path = os.path.join(temp_dir, filename)
104
-
105
- # Write the content to the temporary file
106
- with open(temp_file_path, "wb") as file:
107
- file.write(response.content)
108
-
109
- print(f"PDF saved to: {temp_file_path}")
110
-
111
- # Here you can perform operations with the file
112
- # The file will be automatically deleted when you exit this block
113
-
114
- return temp_file_path
115
-
116
-
117
- # Example usage:
118
- # url = "https://example.com/sample.pdf"
119
- # fetch_and_save_pdf(url, "sample.pdf")
120
-
121
-
122
- class ScenarioListPdfMixin:
123
- @classmethod
124
- def from_pdf(cls, filename_or_url, collapse_pages=False):
125
- # Check if the input is a URL
126
- if cls.is_url(filename_or_url):
127
- # Check if it's a Google Drive URL
128
- if "drive.google.com" in filename_or_url:
129
- temp_filename = GoogleDriveDownloader.fetch_from_drive(
130
- filename_or_url, "temp_pdf.pdf"
131
- )
132
- else:
133
- # For other URLs, use the previous fetch_and_save_pdf function
134
- temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
135
-
136
- scenarios = list(cls.extract_text_from_pdf(temp_filename))
137
- else:
138
- # If it's not a URL, assume it's a local file path
139
- scenarios = list(cls.extract_text_from_pdf(filename_or_url))
140
- if not collapse_pages:
141
- return cls(scenarios)
142
- else:
143
- txt = ""
144
- for scenario in scenarios:
145
- txt += scenario["text"]
146
- from edsl.scenarios import Scenario
147
-
148
- base_scenario = copy.copy(scenarios[0])
149
- base_scenario["text"] = txt
150
- return base_scenario
151
-
152
- @staticmethod
153
- def is_url(string):
154
- try:
155
- result = urlparse(string)
156
- return all([result.scheme, result.netloc])
157
- except ValueError:
158
- return False
159
-
160
- @classmethod
161
- def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
162
- """
163
- Convert each page of a PDF into an image and create Scenario instances.
164
-
165
- :param pdf_path: Path to the PDF file.
166
- :param image_format: Format of the output images (default is 'jpeg').
167
- :return: ScenarioList instance containing the Scenario instances.
168
- """
169
- import tempfile
170
- from pdf2image import convert_from_path
171
- from edsl.scenarios import Scenario
172
-
173
- with tempfile.TemporaryDirectory() as output_folder:
174
- # Convert PDF to images
175
- images = convert_from_path(pdf_path)
176
-
177
- scenarios = []
178
-
179
- # Save each page as an image and create Scenario instances
180
- for i, image in enumerate(images):
181
- image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
182
- image.save(image_path, image_format.upper())
183
-
184
- scenario = Scenario._from_filepath_image(image_path)
185
- scenarios.append(scenario)
186
-
187
- # print(f"Saved {len(images)} pages as images in {output_folder}")
188
- return cls(scenarios)
189
-
190
- @staticmethod
191
- def extract_text_from_pdf(pdf_path):
192
- from edsl import Scenario
193
-
194
- # TODO: Add test case
195
- # Ensure the file exists
196
- if not os.path.exists(pdf_path):
197
- raise FileNotFoundError(f"The file {pdf_path} does not exist.")
198
-
199
- # Open the PDF file
200
- document = fitz.open(pdf_path)
201
-
202
- # Get the filename from the path
203
- filename = os.path.basename(pdf_path)
204
-
205
- # Iterate through each page and extract text
206
- for page_num in range(len(document)):
207
- page = document.load_page(page_num)
208
- text = page.get_text()
209
-
210
- # Create a dictionary for the current page
211
- page_info = {"filename": filename, "page": page_num + 1, "text": text}
212
- yield Scenario(page_info)
213
-
214
- def create_hello_world_pdf(pdf_path):
215
- # LaTeX content
216
- latex_content = r"""
217
- \documentclass{article}
218
- \title{Hello World}
219
- \author{John}
220
- \date{\today}
221
- \begin{document}
222
- \maketitle
223
- \section{Hello, World!}
224
- This is a simple hello world example created with LaTeX and Python.
225
- \end{document}
226
- """
227
-
228
- # Create a .tex file
229
- tex_filename = pdf_path + ".tex"
230
- with open(tex_filename, "w") as tex_file:
231
- tex_file.write(latex_content)
232
-
233
- # Compile the .tex file to PDF
234
- subprocess.run(["pdflatex", tex_filename], check=True)
235
-
236
- # Optionally, clean up auxiliary files generated by pdflatex
237
- aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
238
- for aux_file in aux_files:
239
- try:
240
- os.remove(aux_file)
241
- except FileNotFoundError:
242
- pass
243
-
244
-
245
- if __name__ == "__main__":
246
- pass
247
-
248
- # from edsl import ScenarioList
249
-
250
- # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
251
- # pass
252
-
253
- # #ScenarioListNew.create_hello_world_pdf('hello_world')
254
- # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
255
- # #print(scenarios)
256
-
257
- # from edsl import ScenarioList, QuestionFreeText
258
- # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
259
- # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
260
- # results = q.by(homo_silicus).run(progress_bar = True)
261
- # results.select('scenario.page', 'answer.key_point').order_by('page').print()
1
+ import os
2
+ import re
3
+ import copy
4
+ import atexit
5
+ import tempfile
6
+ import subprocess
7
+
8
+
9
+ class GoogleDriveDownloader:
10
+ _temp_dir = None
11
+ _temp_file_path = None
12
+
13
+ @classmethod
14
+ def fetch_from_drive(cls, url, filename=None):
15
+ import requests
16
+
17
+ # Extract file ID from the URL
18
+ file_id = cls._extract_file_id(url)
19
+ if not file_id:
20
+ raise ValueError("Invalid Google Drive URL")
21
+
22
+ # Construct the download URL
23
+ download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
24
+
25
+ # Send a GET request to the URL
26
+ session = requests.Session()
27
+ response = session.get(download_url, stream=True)
28
+ response.raise_for_status()
29
+
30
+ # Check for large file download prompt
31
+ for key, value in response.cookies.items():
32
+ if key.startswith("download_warning"):
33
+ params = {"id": file_id, "confirm": value}
34
+ response = session.get(download_url, params=params, stream=True)
35
+ break
36
+
37
+ # Create a temporary file to save the download
38
+ if not filename:
39
+ filename = "downloaded_file"
40
+
41
+ if cls._temp_dir is None:
42
+ cls._temp_dir = tempfile.TemporaryDirectory()
43
+ atexit.register(cls._cleanup)
44
+
45
+ cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
46
+
47
+ # Write the content to the temporary file
48
+ with open(cls._temp_file_path, "wb") as f:
49
+ for chunk in response.iter_content(32768):
50
+ if chunk:
51
+ f.write(chunk)
52
+
53
+ print(f"File saved to: {cls._temp_file_path}")
54
+
55
+ return cls._temp_file_path
56
+
57
+ @staticmethod
58
+ def _extract_file_id(url):
59
+ from urllib.parse import urlparse, parse_qs
60
+
61
+ # Try to extract file ID from '/file/d/' format
62
+ file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
63
+ if file_id_match:
64
+ return file_id_match.group(1)
65
+
66
+ # If not found, try to extract from 'open?id=' format
67
+ parsed_url = urlparse(url)
68
+ query_params = parse_qs(parsed_url.query)
69
+ if "id" in query_params:
70
+ return query_params["id"][0]
71
+
72
+ return None
73
+
74
+ @classmethod
75
+ def _cleanup(cls):
76
+ if cls._temp_dir:
77
+ cls._temp_dir.cleanup()
78
+
79
+ @classmethod
80
+ def get_temp_file_path(cls):
81
+ return cls._temp_file_path
82
+
83
+
84
+ def fetch_and_save_pdf(url, filename):
85
+ # Send a GET request to the URL
86
+ import requests
87
+
88
+ response = requests.get(url)
89
+
90
+ # Check if the request was successful
91
+ response.raise_for_status()
92
+
93
+ # Create a temporary directory
94
+ with tempfile.TemporaryDirectory() as temp_dir:
95
+ # Construct the full path for the file
96
+ temp_file_path = os.path.join(temp_dir, filename)
97
+
98
+ # Write the content to the temporary file
99
+ with open(temp_file_path, "wb") as file:
100
+ file.write(response.content)
101
+
102
+ print(f"PDF saved to: {temp_file_path}")
103
+
104
+ # Here you can perform operations with the file
105
+ # The file will be automatically deleted when you exit this block
106
+
107
+ return temp_file_path
108
+
109
+
110
+ class ScenarioListPdfMixin:
111
+ @classmethod
112
+ def from_pdf(cls, filename_or_url, collapse_pages=False):
113
+ # Check if the input is a URL
114
+ if cls.is_url(filename_or_url):
115
+ # Check if it's a Google Drive URL
116
+ if "drive.google.com" in filename_or_url:
117
+ temp_filename = GoogleDriveDownloader.fetch_from_drive(
118
+ filename_or_url, "temp_pdf.pdf"
119
+ )
120
+ else:
121
+ # For other URLs, use the previous fetch_and_save_pdf function
122
+ temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
123
+
124
+ scenarios = list(cls.extract_text_from_pdf(temp_filename))
125
+ else:
126
+ # If it's not a URL, assume it's a local file path
127
+ scenarios = list(cls.extract_text_from_pdf(filename_or_url))
128
+ if not collapse_pages:
129
+ return cls(scenarios)
130
+ else:
131
+ txt = ""
132
+ for scenario in scenarios:
133
+ txt += scenario["text"]
134
+ from edsl.scenarios import Scenario
135
+
136
+ base_scenario = copy.copy(scenarios[0])
137
+ base_scenario["text"] = txt
138
+ return base_scenario
139
+
140
+ @staticmethod
141
+ def is_url(string):
142
+ from urllib.parse import urlparse
143
+
144
+ try:
145
+ result = urlparse(string)
146
+ return all([result.scheme, result.netloc])
147
+ except ValueError:
148
+ return False
149
+
150
+ @classmethod
151
+ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
152
+ """
153
+ Convert each page of a PDF into an image and create Scenario instances.
154
+
155
+ :param pdf_path: Path to the PDF file.
156
+ :param image_format: Format of the output images (default is 'jpeg').
157
+ :return: ScenarioList instance containing the Scenario instances.
158
+ """
159
+ import tempfile
160
+ from pdf2image import convert_from_path
161
+ from edsl.scenarios import Scenario
162
+
163
+ with tempfile.TemporaryDirectory() as output_folder:
164
+ # Convert PDF to images
165
+ images = convert_from_path(pdf_path)
166
+
167
+ scenarios = []
168
+
169
+ # Save each page as an image and create Scenario instances
170
+ for i, image in enumerate(images):
171
+ image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
172
+ image.save(image_path, image_format.upper())
173
+
174
+ scenario = Scenario._from_filepath_image(image_path)
175
+ scenarios.append(scenario)
176
+
177
+ # print(f"Saved {len(images)} pages as images in {output_folder}")
178
+ return cls(scenarios)
179
+
180
+ @staticmethod
181
+ def extract_text_from_pdf(pdf_path):
182
+ from edsl.scenarios.Scenario import Scenario
183
+ import fitz # PyMuPDF
184
+
185
+ # TODO: Add test case
186
+ # Ensure the file exists
187
+ if not os.path.exists(pdf_path):
188
+ raise FileNotFoundError(f"The file {pdf_path} does not exist.")
189
+
190
+ # Open the PDF file
191
+ document = fitz.open(pdf_path)
192
+
193
+ # Get the filename from the path
194
+ filename = os.path.basename(pdf_path)
195
+
196
+ # Iterate through each page and extract text
197
+ for page_num in range(len(document)):
198
+ page = document.load_page(page_num)
199
+ text = page.get_text()
200
+
201
+ # Create a dictionary for the current page
202
+ page_info = {"filename": filename, "page": page_num + 1, "text": text}
203
+ yield Scenario(page_info)
204
+
205
+ def create_hello_world_pdf(pdf_path):
206
+ # LaTeX content
207
+ latex_content = r"""
208
+ \documentclass{article}
209
+ \title{Hello World}
210
+ \author{John}
211
+ \date{\today}
212
+ \begin{document}
213
+ \maketitle
214
+ \section{Hello, World!}
215
+ This is a simple hello world example created with LaTeX and Python.
216
+ \end{document}
217
+ """
218
+
219
+ # Create a .tex file
220
+ tex_filename = pdf_path + ".tex"
221
+ with open(tex_filename, "w") as tex_file:
222
+ tex_file.write(latex_content)
223
+
224
+ # Compile the .tex file to PDF
225
+ subprocess.run(["pdflatex", tex_filename], check=True)
226
+
227
+ # Optionally, clean up auxiliary files generated by pdflatex
228
+ aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
229
+ for aux_file in aux_files:
230
+ try:
231
+ os.remove(aux_file)
232
+ except FileNotFoundError:
233
+ pass
234
+
235
+
236
+ if __name__ == "__main__":
237
+ import doctest
238
+
239
+ doctest.testmod()
@@ -1,4 +1,3 @@
1
- from edsl.scenarios.Scenario import Scenario
2
- from edsl.scenarios.ScenarioList import ScenarioList
3
-
4
- # from edsl.scenarios.FileStore import FileStore
1
+ from edsl.scenarios.Scenario import Scenario
2
+ from edsl.scenarios.ScenarioList import ScenarioList
3
+ from edsl.scenarios.FileStore import FileStore