edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. edsl/Base.py +332 -385
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -57
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -1079
  7. edsl/agents/AgentList.py +413 -551
  8. edsl/agents/Invigilator.py +233 -285
  9. edsl/agents/InvigilatorBase.py +270 -254
  10. edsl/agents/PromptConstructor.py +354 -252
  11. edsl/agents/__init__.py +3 -2
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +157 -177
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -59
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -1090
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -562
  37. edsl/data/CacheEntry.py +233 -230
  38. edsl/data/CacheHandler.py +149 -170
  39. edsl/data/RemoteCacheSync.py +78 -78
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -5
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -74
  44. edsl/enums.py +175 -195
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -54
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -109
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -29
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -84
  61. edsl/inference_services/AwsBedrock.py +120 -118
  62. edsl/inference_services/AzureAI.py +217 -215
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -139
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -80
  67. edsl/inference_services/InferenceServicesCollection.py +97 -122
  68. edsl/inference_services/MistralAIService.py +123 -120
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -221
  71. edsl/inference_services/PerplexityService.py +163 -160
  72. edsl/inference_services/TestService.py +89 -92
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -41
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -43
  79. edsl/jobs/Jobs.py +898 -757
  80. edsl/jobs/JobsChecks.py +147 -172
  81. edsl/jobs/JobsPrompts.py +268 -270
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -287
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -104
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -283
  87. edsl/jobs/interviews/Interview.py +661 -358
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -421
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -330
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -244
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -449
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -161
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -0
  106. edsl/language_models/LanguageModel.py +668 -571
  107. edsl/language_models/ModelList.py +155 -153
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -2
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -180
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -0
  115. edsl/language_models/utilities.py +64 -65
  116. edsl/notebooks/Notebook.py +258 -263
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -352
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -334
  121. edsl/questions/QuestionBase.py +664 -509
  122. edsl/questions/QuestionBaseGenMixin.py +161 -165
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -221
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -182
  127. edsl/questions/QuestionFreeText.py +114 -113
  128. edsl/questions/QuestionFunctional.py +166 -166
  129. edsl/questions/QuestionList.py +231 -229
  130. edsl/questions/QuestionMultipleChoice.py +286 -330
  131. edsl/questions/QuestionNumerical.py +153 -151
  132. edsl/questions/QuestionRank.py +324 -314
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -200
  136. edsl/questions/SimpleAskMixin.py +73 -74
  137. edsl/questions/__init__.py +26 -27
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -90
  142. edsl/questions/derived/QuestionTopK.py +93 -93
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -427
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -177
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -108
  177. edsl/results/Dataset.py +424 -587
  178. edsl/results/DatasetExportMixin.py +731 -653
  179. edsl/results/DatasetTree.py +275 -295
  180. edsl/results/Result.py +465 -451
  181. edsl/results/Results.py +1165 -1172
  182. edsl/results/ResultsDBMixin.py +238 -0
  183. edsl/results/ResultsExportMixin.py +43 -45
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -145
  188. edsl/results/TableDisplay.py +198 -125
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +77 -77
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -511
  193. edsl/scenarios/Scenario.py +601 -498
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -65
  195. edsl/scenarios/ScenarioJoin.py +127 -131
  196. edsl/scenarios/ScenarioList.py +1287 -1430
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -45
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -239
  199. edsl/scenarios/__init__.py +4 -3
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -521
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -327
  210. edsl/surveys/RuleCollection.py +387 -385
  211. edsl/surveys/Survey.py +1801 -1229
  212. edsl/surveys/SurveyCSS.py +261 -273
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +179 -181
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -5
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -60
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -48
  220. edsl/surveys/instructions/Instruction.py +65 -56
  221. edsl/surveys/instructions/InstructionCollection.py +77 -82
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -19
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/utilities/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -436
  252. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +10 -12
  254. edsl-0.1.39.dev3.dist-info/RECORD +277 -0
  255. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  256. edsl/agents/QuestionOptionProcessor.py +0 -172
  257. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  258. edsl/coop/CoopFunctionsMixin.py +0 -15
  259. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  260. edsl/exceptions/inference_services.py +0 -5
  261. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  262. edsl/inference_services/AvailableModelFetcher.py +0 -209
  263. edsl/inference_services/ServiceAvailability.py +0 -135
  264. edsl/inference_services/data_structures.py +0 -62
  265. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -188
  266. edsl/jobs/FetchInvigilator.py +0 -40
  267. edsl/jobs/InterviewTaskManager.py +0 -98
  268. edsl/jobs/InterviewsConstructor.py +0 -48
  269. edsl/jobs/JobsComponentConstructor.py +0 -189
  270. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  271. edsl/jobs/RequestTokenEstimator.py +0 -30
  272. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  273. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  274. edsl/jobs/decorators.py +0 -35
  275. edsl/jobs/jobs_status_enums.py +0 -9
  276. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  277. edsl/language_models/ComputeCost.py +0 -63
  278. edsl/language_models/PriceManager.py +0 -127
  279. edsl/language_models/RawResponseHandler.py +0 -106
  280. edsl/language_models/ServiceDataSources.py +0 -0
  281. edsl/language_models/key_management/KeyLookup.py +0 -63
  282. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  283. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  284. edsl/language_models/key_management/__init__.py +0 -0
  285. edsl/language_models/key_management/models.py +0 -131
  286. edsl/notebooks/NotebookToLaTeX.py +0 -142
  287. edsl/questions/ExceptionExplainer.py +0 -77
  288. edsl/questions/HTMLQuestion.py +0 -103
  289. edsl/questions/LoopProcessor.py +0 -149
  290. edsl/questions/QuestionMatrix.py +0 -265
  291. edsl/questions/ResponseValidatorFactory.py +0 -28
  292. edsl/questions/templates/matrix/__init__.py +0 -1
  293. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  294. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  295. edsl/results/MarkdownToDocx.py +0 -122
  296. edsl/results/MarkdownToPDF.py +0 -111
  297. edsl/results/TextEditor.py +0 -50
  298. edsl/results/smart_objects.py +0 -96
  299. edsl/results/table_data_class.py +0 -12
  300. edsl/results/table_renderers.py +0 -118
  301. edsl/scenarios/ConstructDownloadLink.py +0 -109
  302. edsl/scenarios/DirectoryScanner.py +0 -96
  303. edsl/scenarios/DocumentChunker.py +0 -102
  304. edsl/scenarios/DocxScenario.py +0 -16
  305. edsl/scenarios/PdfExtractor.py +0 -40
  306. edsl/scenarios/ScenarioSelector.py +0 -156
  307. edsl/scenarios/file_methods.py +0 -85
  308. edsl/scenarios/handlers/__init__.py +0 -13
  309. edsl/scenarios/handlers/csv.py +0 -38
  310. edsl/scenarios/handlers/docx.py +0 -76
  311. edsl/scenarios/handlers/html.py +0 -37
  312. edsl/scenarios/handlers/json.py +0 -111
  313. edsl/scenarios/handlers/latex.py +0 -5
  314. edsl/scenarios/handlers/md.py +0 -51
  315. edsl/scenarios/handlers/pdf.py +0 -68
  316. edsl/scenarios/handlers/png.py +0 -39
  317. edsl/scenarios/handlers/pptx.py +0 -105
  318. edsl/scenarios/handlers/py.py +0 -294
  319. edsl/scenarios/handlers/sql.py +0 -313
  320. edsl/scenarios/handlers/sqlite.py +0 -149
  321. edsl/scenarios/handlers/txt.py +0 -33
  322. edsl/surveys/ConstructDAG.py +0 -92
  323. edsl/surveys/EditSurvey.py +0 -221
  324. edsl/surveys/InstructionHandler.py +0 -100
  325. edsl/surveys/MemoryManagement.py +0 -72
  326. edsl/surveys/RuleManager.py +0 -172
  327. edsl/surveys/Simulator.py +0 -75
  328. edsl/surveys/SurveyToApp.py +0 -141
  329. edsl/utilities/PrettyList.py +0 -56
  330. edsl/utilities/is_notebook.py +0 -18
  331. edsl/utilities/is_valid_variable_name.py +0 -11
  332. edsl/utilities/remove_edsl_version.py +0 -24
  333. edsl-0.1.39.dev2.dist-info/RECORD +0 -352
  334. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0
@@ -1,45 +1,52 @@
1
- """Mixin class for exporting results."""
2
-
3
- from functools import wraps
4
- from edsl.results.DatasetExportMixin import DatasetExportMixin
5
-
6
-
7
- def to_dataset(func):
8
- """Convert the object to a Dataset object before calling the function."""
9
-
10
- @wraps(func)
11
- def wrapper(self, *args, **kwargs):
12
- """Return the function with the Results object converted to a Dataset object."""
13
- if self.__class__.__name__ == "ScenarioList":
14
- return func(self.to_dataset(), *args, **kwargs)
15
- else:
16
- raise Exception(
17
- f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
- )
19
-
20
- return wrapper
21
-
22
-
23
- def decorate_methods_from_mixin(cls, mixin_cls):
24
- for attr_name, attr_value in mixin_cls.__dict__.items():
25
- if callable(attr_value) and not attr_name.startswith("__"):
26
- setattr(cls, attr_name, to_dataset(attr_value))
27
- return cls
28
-
29
-
30
- # @decorate_all_methods
31
- class ScenarioListExportMixin(DatasetExportMixin):
32
- """Mixin class for exporting Results objects."""
33
-
34
- def __init_subclass__(cls, **kwargs):
35
- super().__init_subclass__(**kwargs)
36
- decorate_methods_from_mixin(cls, DatasetExportMixin)
37
-
38
- def to_docx(self, filename: str):
39
- """Export the ScenarioList to a .docx file."""
40
- dataset = self.to_dataset()
41
- from edsl.results.DatasetTree import Tree
42
-
43
- tree = Tree(dataset)
44
- tree.construct_tree()
45
- tree.to_docx(filename)
1
+ """Mixin class for exporting results."""
2
+
3
+ from functools import wraps
4
+ from edsl.results.DatasetExportMixin import DatasetExportMixin
5
+
6
+
7
+ def to_dataset(func):
8
+ """Convert the object to a Dataset object before calling the function."""
9
+
10
+ @wraps(func)
11
+ def wrapper(self, *args, **kwargs):
12
+ """Return the function with the Results object converted to a Dataset object."""
13
+ if self.__class__.__name__ == "ScenarioList":
14
+ return func(self.to_dataset(), *args, **kwargs)
15
+ else:
16
+ raise Exception(
17
+ f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
+ )
19
+
20
+ return wrapper
21
+
22
+
23
+ def decorate_methods_from_mixin(cls, mixin_cls):
24
+ for attr_name, attr_value in mixin_cls.__dict__.items():
25
+ if callable(attr_value) and not attr_name.startswith("__"):
26
+ setattr(cls, attr_name, to_dataset(attr_value))
27
+ return cls
28
+
29
+
30
+ # def decorate_all_methods(cls):
31
+ # for attr_name, attr_value in cls.__dict__.items():
32
+ # if callable(attr_value):
33
+ # setattr(cls, attr_name, to_dataset(attr_value))
34
+ # return cls
35
+
36
+
37
+ # @decorate_all_methods
38
+ class ScenarioListExportMixin(DatasetExportMixin):
39
+ """Mixin class for exporting Results objects."""
40
+
41
+ def __init_subclass__(cls, **kwargs):
42
+ super().__init_subclass__(**kwargs)
43
+ decorate_methods_from_mixin(cls, DatasetExportMixin)
44
+
45
+ def to_docx(self, filename: str):
46
+ """Export the ScenarioList to a .docx file."""
47
+ dataset = self.to_dataset()
48
+ from edsl.results.DatasetTree import Tree
49
+
50
+ tree = Tree(dataset)
51
+ tree.construct_tree()
52
+ tree.to_docx(filename)
@@ -1,239 +1,261 @@
1
- import os
2
- import re
3
- import copy
4
- import atexit
5
- import tempfile
6
- import subprocess
7
-
8
-
9
- class GoogleDriveDownloader:
10
- _temp_dir = None
11
- _temp_file_path = None
12
-
13
- @classmethod
14
- def fetch_from_drive(cls, url, filename=None):
15
- import requests
16
-
17
- # Extract file ID from the URL
18
- file_id = cls._extract_file_id(url)
19
- if not file_id:
20
- raise ValueError("Invalid Google Drive URL")
21
-
22
- # Construct the download URL
23
- download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
24
-
25
- # Send a GET request to the URL
26
- session = requests.Session()
27
- response = session.get(download_url, stream=True)
28
- response.raise_for_status()
29
-
30
- # Check for large file download prompt
31
- for key, value in response.cookies.items():
32
- if key.startswith("download_warning"):
33
- params = {"id": file_id, "confirm": value}
34
- response = session.get(download_url, params=params, stream=True)
35
- break
36
-
37
- # Create a temporary file to save the download
38
- if not filename:
39
- filename = "downloaded_file"
40
-
41
- if cls._temp_dir is None:
42
- cls._temp_dir = tempfile.TemporaryDirectory()
43
- atexit.register(cls._cleanup)
44
-
45
- cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
46
-
47
- # Write the content to the temporary file
48
- with open(cls._temp_file_path, "wb") as f:
49
- for chunk in response.iter_content(32768):
50
- if chunk:
51
- f.write(chunk)
52
-
53
- print(f"File saved to: {cls._temp_file_path}")
54
-
55
- return cls._temp_file_path
56
-
57
- @staticmethod
58
- def _extract_file_id(url):
59
- from urllib.parse import urlparse, parse_qs
60
-
61
- # Try to extract file ID from '/file/d/' format
62
- file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
63
- if file_id_match:
64
- return file_id_match.group(1)
65
-
66
- # If not found, try to extract from 'open?id=' format
67
- parsed_url = urlparse(url)
68
- query_params = parse_qs(parsed_url.query)
69
- if "id" in query_params:
70
- return query_params["id"][0]
71
-
72
- return None
73
-
74
- @classmethod
75
- def _cleanup(cls):
76
- if cls._temp_dir:
77
- cls._temp_dir.cleanup()
78
-
79
- @classmethod
80
- def get_temp_file_path(cls):
81
- return cls._temp_file_path
82
-
83
-
84
- def fetch_and_save_pdf(url, filename):
85
- # Send a GET request to the URL
86
- import requests
87
-
88
- response = requests.get(url)
89
-
90
- # Check if the request was successful
91
- response.raise_for_status()
92
-
93
- # Create a temporary directory
94
- with tempfile.TemporaryDirectory() as temp_dir:
95
- # Construct the full path for the file
96
- temp_file_path = os.path.join(temp_dir, filename)
97
-
98
- # Write the content to the temporary file
99
- with open(temp_file_path, "wb") as file:
100
- file.write(response.content)
101
-
102
- print(f"PDF saved to: {temp_file_path}")
103
-
104
- # Here you can perform operations with the file
105
- # The file will be automatically deleted when you exit this block
106
-
107
- return temp_file_path
108
-
109
-
110
- class ScenarioListPdfMixin:
111
- @classmethod
112
- def from_pdf(cls, filename_or_url, collapse_pages=False):
113
- # Check if the input is a URL
114
- if cls.is_url(filename_or_url):
115
- # Check if it's a Google Drive URL
116
- if "drive.google.com" in filename_or_url:
117
- temp_filename = GoogleDriveDownloader.fetch_from_drive(
118
- filename_or_url, "temp_pdf.pdf"
119
- )
120
- else:
121
- # For other URLs, use the previous fetch_and_save_pdf function
122
- temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
123
-
124
- scenarios = list(cls.extract_text_from_pdf(temp_filename))
125
- else:
126
- # If it's not a URL, assume it's a local file path
127
- scenarios = list(cls.extract_text_from_pdf(filename_or_url))
128
- if not collapse_pages:
129
- return cls(scenarios)
130
- else:
131
- txt = ""
132
- for scenario in scenarios:
133
- txt += scenario["text"]
134
- from edsl.scenarios import Scenario
135
-
136
- base_scenario = copy.copy(scenarios[0])
137
- base_scenario["text"] = txt
138
- return base_scenario
139
-
140
- @staticmethod
141
- def is_url(string):
142
- from urllib.parse import urlparse
143
-
144
- try:
145
- result = urlparse(string)
146
- return all([result.scheme, result.netloc])
147
- except ValueError:
148
- return False
149
-
150
- @classmethod
151
- def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
152
- """
153
- Convert each page of a PDF into an image and create Scenario instances.
154
-
155
- :param pdf_path: Path to the PDF file.
156
- :param image_format: Format of the output images (default is 'jpeg').
157
- :return: ScenarioList instance containing the Scenario instances.
158
- """
159
- import tempfile
160
- from pdf2image import convert_from_path
161
- from edsl.scenarios import Scenario
162
-
163
- with tempfile.TemporaryDirectory() as output_folder:
164
- # Convert PDF to images
165
- images = convert_from_path(pdf_path)
166
-
167
- scenarios = []
168
-
169
- # Save each page as an image and create Scenario instances
170
- for i, image in enumerate(images):
171
- image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
172
- image.save(image_path, image_format.upper())
173
-
174
- scenario = Scenario._from_filepath_image(image_path)
175
- scenarios.append(scenario)
176
-
177
- # print(f"Saved {len(images)} pages as images in {output_folder}")
178
- return cls(scenarios)
179
-
180
- @staticmethod
181
- def extract_text_from_pdf(pdf_path):
182
- from edsl.scenarios.Scenario import Scenario
183
- import fitz # PyMuPDF
184
-
185
- # TODO: Add test case
186
- # Ensure the file exists
187
- if not os.path.exists(pdf_path):
188
- raise FileNotFoundError(f"The file {pdf_path} does not exist.")
189
-
190
- # Open the PDF file
191
- document = fitz.open(pdf_path)
192
-
193
- # Get the filename from the path
194
- filename = os.path.basename(pdf_path)
195
-
196
- # Iterate through each page and extract text
197
- for page_num in range(len(document)):
198
- page = document.load_page(page_num)
199
- text = page.get_text()
200
-
201
- # Create a dictionary for the current page
202
- page_info = {"filename": filename, "page": page_num + 1, "text": text}
203
- yield Scenario(page_info)
204
-
205
- def create_hello_world_pdf(pdf_path):
206
- # LaTeX content
207
- latex_content = r"""
208
- \documentclass{article}
209
- \title{Hello World}
210
- \author{John}
211
- \date{\today}
212
- \begin{document}
213
- \maketitle
214
- \section{Hello, World!}
215
- This is a simple hello world example created with LaTeX and Python.
216
- \end{document}
217
- """
218
-
219
- # Create a .tex file
220
- tex_filename = pdf_path + ".tex"
221
- with open(tex_filename, "w") as tex_file:
222
- tex_file.write(latex_content)
223
-
224
- # Compile the .tex file to PDF
225
- subprocess.run(["pdflatex", tex_filename], check=True)
226
-
227
- # Optionally, clean up auxiliary files generated by pdflatex
228
- aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
229
- for aux_file in aux_files:
230
- try:
231
- os.remove(aux_file)
232
- except FileNotFoundError:
233
- pass
234
-
235
-
236
- if __name__ == "__main__":
237
- import doctest
238
-
239
- doctest.testmod()
1
+ import fitz # PyMuPDF
2
+ import os
3
+ import copy
4
+ import subprocess
5
+ import requests
6
+ import tempfile
7
+ import os
8
+
9
+ # import urllib.parse as urlparse
10
+ from urllib.parse import urlparse
11
+
12
+ # from edsl import Scenario
13
+
14
+ import requests
15
+ import re
16
+ import tempfile
17
+ import os
18
+ import atexit
19
+ from urllib.parse import urlparse, parse_qs
20
+
21
+
22
+ class GoogleDriveDownloader:
23
+ _temp_dir = None
24
+ _temp_file_path = None
25
+
26
+ @classmethod
27
+ def fetch_from_drive(cls, url, filename=None):
28
+ # Extract file ID from the URL
29
+ file_id = cls._extract_file_id(url)
30
+ if not file_id:
31
+ raise ValueError("Invalid Google Drive URL")
32
+
33
+ # Construct the download URL
34
+ download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
35
+
36
+ # Send a GET request to the URL
37
+ session = requests.Session()
38
+ response = session.get(download_url, stream=True)
39
+ response.raise_for_status()
40
+
41
+ # Check for large file download prompt
42
+ for key, value in response.cookies.items():
43
+ if key.startswith("download_warning"):
44
+ params = {"id": file_id, "confirm": value}
45
+ response = session.get(download_url, params=params, stream=True)
46
+ break
47
+
48
+ # Create a temporary file to save the download
49
+ if not filename:
50
+ filename = "downloaded_file"
51
+
52
+ if cls._temp_dir is None:
53
+ cls._temp_dir = tempfile.TemporaryDirectory()
54
+ atexit.register(cls._cleanup)
55
+
56
+ cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
57
+
58
+ # Write the content to the temporary file
59
+ with open(cls._temp_file_path, "wb") as f:
60
+ for chunk in response.iter_content(32768):
61
+ if chunk:
62
+ f.write(chunk)
63
+
64
+ print(f"File saved to: {cls._temp_file_path}")
65
+
66
+ return cls._temp_file_path
67
+
68
+ @staticmethod
69
+ def _extract_file_id(url):
70
+ # Try to extract file ID from '/file/d/' format
71
+ file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
72
+ if file_id_match:
73
+ return file_id_match.group(1)
74
+
75
+ # If not found, try to extract from 'open?id=' format
76
+ parsed_url = urlparse(url)
77
+ query_params = parse_qs(parsed_url.query)
78
+ if "id" in query_params:
79
+ return query_params["id"][0]
80
+
81
+ return None
82
+
83
+ @classmethod
84
+ def _cleanup(cls):
85
+ if cls._temp_dir:
86
+ cls._temp_dir.cleanup()
87
+
88
+ @classmethod
89
+ def get_temp_file_path(cls):
90
+ return cls._temp_file_path
91
+
92
+
93
+ def fetch_and_save_pdf(url, filename):
94
+ # Send a GET request to the URL
95
+ response = requests.get(url)
96
+
97
+ # Check if the request was successful
98
+ response.raise_for_status()
99
+
100
+ # Create a temporary directory
101
+ with tempfile.TemporaryDirectory() as temp_dir:
102
+ # Construct the full path for the file
103
+ temp_file_path = os.path.join(temp_dir, filename)
104
+
105
+ # Write the content to the temporary file
106
+ with open(temp_file_path, "wb") as file:
107
+ file.write(response.content)
108
+
109
+ print(f"PDF saved to: {temp_file_path}")
110
+
111
+ # Here you can perform operations with the file
112
+ # The file will be automatically deleted when you exit this block
113
+
114
+ return temp_file_path
115
+
116
+
117
+ # Example usage:
118
+ # url = "https://example.com/sample.pdf"
119
+ # fetch_and_save_pdf(url, "sample.pdf")
120
+
121
+
122
+ class ScenarioListPdfMixin:
123
+ @classmethod
124
+ def from_pdf(cls, filename_or_url, collapse_pages=False):
125
+ # Check if the input is a URL
126
+ if cls.is_url(filename_or_url):
127
+ # Check if it's a Google Drive URL
128
+ if "drive.google.com" in filename_or_url:
129
+ temp_filename = GoogleDriveDownloader.fetch_from_drive(
130
+ filename_or_url, "temp_pdf.pdf"
131
+ )
132
+ else:
133
+ # For other URLs, use the previous fetch_and_save_pdf function
134
+ temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
135
+
136
+ scenarios = list(cls.extract_text_from_pdf(temp_filename))
137
+ else:
138
+ # If it's not a URL, assume it's a local file path
139
+ scenarios = list(cls.extract_text_from_pdf(filename_or_url))
140
+ if not collapse_pages:
141
+ return cls(scenarios)
142
+ else:
143
+ txt = ""
144
+ for scenario in scenarios:
145
+ txt += scenario["text"]
146
+ from edsl.scenarios import Scenario
147
+
148
+ base_scenario = copy.copy(scenarios[0])
149
+ base_scenario["text"] = txt
150
+ return base_scenario
151
+
152
+ @staticmethod
153
+ def is_url(string):
154
+ try:
155
+ result = urlparse(string)
156
+ return all([result.scheme, result.netloc])
157
+ except ValueError:
158
+ return False
159
+
160
+ @classmethod
161
+ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
162
+ """
163
+ Convert each page of a PDF into an image and create Scenario instances.
164
+
165
+ :param pdf_path: Path to the PDF file.
166
+ :param image_format: Format of the output images (default is 'jpeg').
167
+ :return: ScenarioList instance containing the Scenario instances.
168
+ """
169
+ import tempfile
170
+ from pdf2image import convert_from_path
171
+ from edsl.scenarios import Scenario
172
+
173
+ with tempfile.TemporaryDirectory() as output_folder:
174
+ # Convert PDF to images
175
+ images = convert_from_path(pdf_path)
176
+
177
+ scenarios = []
178
+
179
+ # Save each page as an image and create Scenario instances
180
+ for i, image in enumerate(images):
181
+ image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
182
+ image.save(image_path, image_format.upper())
183
+
184
+ scenario = Scenario._from_filepath_image(image_path)
185
+ scenarios.append(scenario)
186
+
187
+ # print(f"Saved {len(images)} pages as images in {output_folder}")
188
+ return cls(scenarios)
189
+
190
+ @staticmethod
191
+ def extract_text_from_pdf(pdf_path):
192
+ from edsl import Scenario
193
+
194
+ # TODO: Add test case
195
+ # Ensure the file exists
196
+ if not os.path.exists(pdf_path):
197
+ raise FileNotFoundError(f"The file {pdf_path} does not exist.")
198
+
199
+ # Open the PDF file
200
+ document = fitz.open(pdf_path)
201
+
202
+ # Get the filename from the path
203
+ filename = os.path.basename(pdf_path)
204
+
205
+ # Iterate through each page and extract text
206
+ for page_num in range(len(document)):
207
+ page = document.load_page(page_num)
208
+ text = page.get_text()
209
+
210
+ # Create a dictionary for the current page
211
+ page_info = {"filename": filename, "page": page_num + 1, "text": text}
212
+ yield Scenario(page_info)
213
+
214
+ def create_hello_world_pdf(pdf_path):
215
+ # LaTeX content
216
+ latex_content = r"""
217
+ \documentclass{article}
218
+ \title{Hello World}
219
+ \author{John}
220
+ \date{\today}
221
+ \begin{document}
222
+ \maketitle
223
+ \section{Hello, World!}
224
+ This is a simple hello world example created with LaTeX and Python.
225
+ \end{document}
226
+ """
227
+
228
+ # Create a .tex file
229
+ tex_filename = pdf_path + ".tex"
230
+ with open(tex_filename, "w") as tex_file:
231
+ tex_file.write(latex_content)
232
+
233
+ # Compile the .tex file to PDF
234
+ subprocess.run(["pdflatex", tex_filename], check=True)
235
+
236
+ # Optionally, clean up auxiliary files generated by pdflatex
237
+ aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
238
+ for aux_file in aux_files:
239
+ try:
240
+ os.remove(aux_file)
241
+ except FileNotFoundError:
242
+ pass
243
+
244
+
245
+ if __name__ == "__main__":
246
+ pass
247
+
248
+ # from edsl import ScenarioList
249
+
250
+ # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
251
+ # pass
252
+
253
+ # #ScenarioListNew.create_hello_world_pdf('hello_world')
254
+ # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
255
+ # #print(scenarios)
256
+
257
+ # from edsl import ScenarioList, QuestionFreeText
258
+ # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
259
+ # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
260
+ # results = q.by(homo_silicus).run(progress_bar = True)
261
+ # results.select('scenario.page', 'answer.key_point').order_by('page').print()
@@ -1,3 +1,4 @@
1
- from edsl.scenarios.Scenario import Scenario
2
- from edsl.scenarios.ScenarioList import ScenarioList
3
- from edsl.scenarios.FileStore import FileStore
1
+ from edsl.scenarios.Scenario import Scenario
2
+ from edsl.scenarios.ScenarioList import ScenarioList
3
+
4
+ # from edsl.scenarios.FileStore import FileStore
edsl/shared.py CHANGED
@@ -1 +1 @@
1
- shared_globals = {}
1
+ shared_globals = {}