edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/hack.py +10 -0
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/test_h +1 -0
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/gcp_bucket/example.py +50 -0
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev4.dist-info/RECORD +361 -0
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
@@ -1,52 +1,45 @@
|
|
1
|
-
"""Mixin class for exporting results."""
|
2
|
-
|
3
|
-
from functools import wraps
|
4
|
-
from edsl.results.DatasetExportMixin import DatasetExportMixin
|
5
|
-
|
6
|
-
|
7
|
-
def to_dataset(func):
|
8
|
-
"""Convert the object to a Dataset object before calling the function."""
|
9
|
-
|
10
|
-
@wraps(func)
|
11
|
-
def wrapper(self, *args, **kwargs):
|
12
|
-
"""Return the function with the Results object converted to a Dataset object."""
|
13
|
-
if self.__class__.__name__ == "ScenarioList":
|
14
|
-
return func(self.to_dataset(), *args, **kwargs)
|
15
|
-
else:
|
16
|
-
raise Exception(
|
17
|
-
f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
|
18
|
-
)
|
19
|
-
|
20
|
-
return wrapper
|
21
|
-
|
22
|
-
|
23
|
-
def decorate_methods_from_mixin(cls, mixin_cls):
|
24
|
-
for attr_name, attr_value in mixin_cls.__dict__.items():
|
25
|
-
if callable(attr_value) and not attr_name.startswith("__"):
|
26
|
-
setattr(cls, attr_name, to_dataset(attr_value))
|
27
|
-
return cls
|
28
|
-
|
29
|
-
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
"""Export the ScenarioList to a .docx file."""
|
47
|
-
dataset = self.to_dataset()
|
48
|
-
from edsl.results.DatasetTree import Tree
|
49
|
-
|
50
|
-
tree = Tree(dataset)
|
51
|
-
tree.construct_tree()
|
52
|
-
tree.to_docx(filename)
|
1
|
+
"""Mixin class for exporting results."""
|
2
|
+
|
3
|
+
from functools import wraps
|
4
|
+
from edsl.results.DatasetExportMixin import DatasetExportMixin
|
5
|
+
|
6
|
+
|
7
|
+
def to_dataset(func):
|
8
|
+
"""Convert the object to a Dataset object before calling the function."""
|
9
|
+
|
10
|
+
@wraps(func)
|
11
|
+
def wrapper(self, *args, **kwargs):
|
12
|
+
"""Return the function with the Results object converted to a Dataset object."""
|
13
|
+
if self.__class__.__name__ == "ScenarioList":
|
14
|
+
return func(self.to_dataset(), *args, **kwargs)
|
15
|
+
else:
|
16
|
+
raise Exception(
|
17
|
+
f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
|
18
|
+
)
|
19
|
+
|
20
|
+
return wrapper
|
21
|
+
|
22
|
+
|
23
|
+
def decorate_methods_from_mixin(cls, mixin_cls):
|
24
|
+
for attr_name, attr_value in mixin_cls.__dict__.items():
|
25
|
+
if callable(attr_value) and not attr_name.startswith("__"):
|
26
|
+
setattr(cls, attr_name, to_dataset(attr_value))
|
27
|
+
return cls
|
28
|
+
|
29
|
+
|
30
|
+
# @decorate_all_methods
|
31
|
+
class ScenarioListExportMixin(DatasetExportMixin):
|
32
|
+
"""Mixin class for exporting Results objects."""
|
33
|
+
|
34
|
+
def __init_subclass__(cls, **kwargs):
|
35
|
+
super().__init_subclass__(**kwargs)
|
36
|
+
decorate_methods_from_mixin(cls, DatasetExportMixin)
|
37
|
+
|
38
|
+
def to_docx(self, filename: str):
|
39
|
+
"""Export the ScenarioList to a .docx file."""
|
40
|
+
dataset = self.to_dataset()
|
41
|
+
from edsl.results.DatasetTree import Tree
|
42
|
+
|
43
|
+
tree = Tree(dataset)
|
44
|
+
tree.construct_tree()
|
45
|
+
tree.to_docx(filename)
|
@@ -1,261 +1,239 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
import copy
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
import
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
return
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
os.remove(aux_file)
|
241
|
-
except FileNotFoundError:
|
242
|
-
pass
|
243
|
-
|
244
|
-
|
245
|
-
if __name__ == "__main__":
|
246
|
-
pass
|
247
|
-
|
248
|
-
# from edsl import ScenarioList
|
249
|
-
|
250
|
-
# class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
|
251
|
-
# pass
|
252
|
-
|
253
|
-
# #ScenarioListNew.create_hello_world_pdf('hello_world')
|
254
|
-
# #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
|
255
|
-
# #print(scenarios)
|
256
|
-
|
257
|
-
# from edsl import ScenarioList, QuestionFreeText
|
258
|
-
# homo_silicus = ScenarioList.from_pdf('w31122.pdf')
|
259
|
-
# q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
|
260
|
-
# results = q.by(homo_silicus).run(progress_bar = True)
|
261
|
-
# results.select('scenario.page', 'answer.key_point').order_by('page').print()
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
import copy
|
4
|
+
import atexit
|
5
|
+
import tempfile
|
6
|
+
import subprocess
|
7
|
+
|
8
|
+
|
9
|
+
class GoogleDriveDownloader:
|
10
|
+
_temp_dir = None
|
11
|
+
_temp_file_path = None
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
def fetch_from_drive(cls, url, filename=None):
|
15
|
+
import requests
|
16
|
+
|
17
|
+
# Extract file ID from the URL
|
18
|
+
file_id = cls._extract_file_id(url)
|
19
|
+
if not file_id:
|
20
|
+
raise ValueError("Invalid Google Drive URL")
|
21
|
+
|
22
|
+
# Construct the download URL
|
23
|
+
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
|
24
|
+
|
25
|
+
# Send a GET request to the URL
|
26
|
+
session = requests.Session()
|
27
|
+
response = session.get(download_url, stream=True)
|
28
|
+
response.raise_for_status()
|
29
|
+
|
30
|
+
# Check for large file download prompt
|
31
|
+
for key, value in response.cookies.items():
|
32
|
+
if key.startswith("download_warning"):
|
33
|
+
params = {"id": file_id, "confirm": value}
|
34
|
+
response = session.get(download_url, params=params, stream=True)
|
35
|
+
break
|
36
|
+
|
37
|
+
# Create a temporary file to save the download
|
38
|
+
if not filename:
|
39
|
+
filename = "downloaded_file"
|
40
|
+
|
41
|
+
if cls._temp_dir is None:
|
42
|
+
cls._temp_dir = tempfile.TemporaryDirectory()
|
43
|
+
atexit.register(cls._cleanup)
|
44
|
+
|
45
|
+
cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
|
46
|
+
|
47
|
+
# Write the content to the temporary file
|
48
|
+
with open(cls._temp_file_path, "wb") as f:
|
49
|
+
for chunk in response.iter_content(32768):
|
50
|
+
if chunk:
|
51
|
+
f.write(chunk)
|
52
|
+
|
53
|
+
print(f"File saved to: {cls._temp_file_path}")
|
54
|
+
|
55
|
+
return cls._temp_file_path
|
56
|
+
|
57
|
+
@staticmethod
|
58
|
+
def _extract_file_id(url):
|
59
|
+
from urllib.parse import urlparse, parse_qs
|
60
|
+
|
61
|
+
# Try to extract file ID from '/file/d/' format
|
62
|
+
file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
|
63
|
+
if file_id_match:
|
64
|
+
return file_id_match.group(1)
|
65
|
+
|
66
|
+
# If not found, try to extract from 'open?id=' format
|
67
|
+
parsed_url = urlparse(url)
|
68
|
+
query_params = parse_qs(parsed_url.query)
|
69
|
+
if "id" in query_params:
|
70
|
+
return query_params["id"][0]
|
71
|
+
|
72
|
+
return None
|
73
|
+
|
74
|
+
@classmethod
|
75
|
+
def _cleanup(cls):
|
76
|
+
if cls._temp_dir:
|
77
|
+
cls._temp_dir.cleanup()
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def get_temp_file_path(cls):
|
81
|
+
return cls._temp_file_path
|
82
|
+
|
83
|
+
|
84
|
+
def fetch_and_save_pdf(url, filename):
|
85
|
+
# Send a GET request to the URL
|
86
|
+
import requests
|
87
|
+
|
88
|
+
response = requests.get(url)
|
89
|
+
|
90
|
+
# Check if the request was successful
|
91
|
+
response.raise_for_status()
|
92
|
+
|
93
|
+
# Create a temporary directory
|
94
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
95
|
+
# Construct the full path for the file
|
96
|
+
temp_file_path = os.path.join(temp_dir, filename)
|
97
|
+
|
98
|
+
# Write the content to the temporary file
|
99
|
+
with open(temp_file_path, "wb") as file:
|
100
|
+
file.write(response.content)
|
101
|
+
|
102
|
+
print(f"PDF saved to: {temp_file_path}")
|
103
|
+
|
104
|
+
# Here you can perform operations with the file
|
105
|
+
# The file will be automatically deleted when you exit this block
|
106
|
+
|
107
|
+
return temp_file_path
|
108
|
+
|
109
|
+
|
110
|
+
class ScenarioListPdfMixin:
|
111
|
+
@classmethod
|
112
|
+
def from_pdf(cls, filename_or_url, collapse_pages=False):
|
113
|
+
# Check if the input is a URL
|
114
|
+
if cls.is_url(filename_or_url):
|
115
|
+
# Check if it's a Google Drive URL
|
116
|
+
if "drive.google.com" in filename_or_url:
|
117
|
+
temp_filename = GoogleDriveDownloader.fetch_from_drive(
|
118
|
+
filename_or_url, "temp_pdf.pdf"
|
119
|
+
)
|
120
|
+
else:
|
121
|
+
# For other URLs, use the previous fetch_and_save_pdf function
|
122
|
+
temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
|
123
|
+
|
124
|
+
scenarios = list(cls.extract_text_from_pdf(temp_filename))
|
125
|
+
else:
|
126
|
+
# If it's not a URL, assume it's a local file path
|
127
|
+
scenarios = list(cls.extract_text_from_pdf(filename_or_url))
|
128
|
+
if not collapse_pages:
|
129
|
+
return cls(scenarios)
|
130
|
+
else:
|
131
|
+
txt = ""
|
132
|
+
for scenario in scenarios:
|
133
|
+
txt += scenario["text"]
|
134
|
+
from edsl.scenarios import Scenario
|
135
|
+
|
136
|
+
base_scenario = copy.copy(scenarios[0])
|
137
|
+
base_scenario["text"] = txt
|
138
|
+
return base_scenario
|
139
|
+
|
140
|
+
@staticmethod
|
141
|
+
def is_url(string):
|
142
|
+
from urllib.parse import urlparse
|
143
|
+
|
144
|
+
try:
|
145
|
+
result = urlparse(string)
|
146
|
+
return all([result.scheme, result.netloc])
|
147
|
+
except ValueError:
|
148
|
+
return False
|
149
|
+
|
150
|
+
@classmethod
|
151
|
+
def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
152
|
+
"""
|
153
|
+
Convert each page of a PDF into an image and create Scenario instances.
|
154
|
+
|
155
|
+
:param pdf_path: Path to the PDF file.
|
156
|
+
:param image_format: Format of the output images (default is 'jpeg').
|
157
|
+
:return: ScenarioList instance containing the Scenario instances.
|
158
|
+
"""
|
159
|
+
import tempfile
|
160
|
+
from pdf2image import convert_from_path
|
161
|
+
from edsl.scenarios import Scenario
|
162
|
+
|
163
|
+
with tempfile.TemporaryDirectory() as output_folder:
|
164
|
+
# Convert PDF to images
|
165
|
+
images = convert_from_path(pdf_path)
|
166
|
+
|
167
|
+
scenarios = []
|
168
|
+
|
169
|
+
# Save each page as an image and create Scenario instances
|
170
|
+
for i, image in enumerate(images):
|
171
|
+
image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
|
172
|
+
image.save(image_path, image_format.upper())
|
173
|
+
|
174
|
+
scenario = Scenario._from_filepath_image(image_path)
|
175
|
+
scenarios.append(scenario)
|
176
|
+
|
177
|
+
# print(f"Saved {len(images)} pages as images in {output_folder}")
|
178
|
+
return cls(scenarios)
|
179
|
+
|
180
|
+
@staticmethod
|
181
|
+
def extract_text_from_pdf(pdf_path):
|
182
|
+
from edsl.scenarios.Scenario import Scenario
|
183
|
+
import fitz # PyMuPDF
|
184
|
+
|
185
|
+
# TODO: Add test case
|
186
|
+
# Ensure the file exists
|
187
|
+
if not os.path.exists(pdf_path):
|
188
|
+
raise FileNotFoundError(f"The file {pdf_path} does not exist.")
|
189
|
+
|
190
|
+
# Open the PDF file
|
191
|
+
document = fitz.open(pdf_path)
|
192
|
+
|
193
|
+
# Get the filename from the path
|
194
|
+
filename = os.path.basename(pdf_path)
|
195
|
+
|
196
|
+
# Iterate through each page and extract text
|
197
|
+
for page_num in range(len(document)):
|
198
|
+
page = document.load_page(page_num)
|
199
|
+
text = page.get_text()
|
200
|
+
|
201
|
+
# Create a dictionary for the current page
|
202
|
+
page_info = {"filename": filename, "page": page_num + 1, "text": text}
|
203
|
+
yield Scenario(page_info)
|
204
|
+
|
205
|
+
def create_hello_world_pdf(pdf_path):
|
206
|
+
# LaTeX content
|
207
|
+
latex_content = r"""
|
208
|
+
\documentclass{article}
|
209
|
+
\title{Hello World}
|
210
|
+
\author{John}
|
211
|
+
\date{\today}
|
212
|
+
\begin{document}
|
213
|
+
\maketitle
|
214
|
+
\section{Hello, World!}
|
215
|
+
This is a simple hello world example created with LaTeX and Python.
|
216
|
+
\end{document}
|
217
|
+
"""
|
218
|
+
|
219
|
+
# Create a .tex file
|
220
|
+
tex_filename = pdf_path + ".tex"
|
221
|
+
with open(tex_filename, "w") as tex_file:
|
222
|
+
tex_file.write(latex_content)
|
223
|
+
|
224
|
+
# Compile the .tex file to PDF
|
225
|
+
subprocess.run(["pdflatex", tex_filename], check=True)
|
226
|
+
|
227
|
+
# Optionally, clean up auxiliary files generated by pdflatex
|
228
|
+
aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
|
229
|
+
for aux_file in aux_files:
|
230
|
+
try:
|
231
|
+
os.remove(aux_file)
|
232
|
+
except FileNotFoundError:
|
233
|
+
pass
|
234
|
+
|
235
|
+
|
236
|
+
if __name__ == "__main__":
|
237
|
+
import doctest
|
238
|
+
|
239
|
+
doctest.testmod()
|
edsl/scenarios/__init__.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from edsl.scenarios.Scenario import Scenario
|
2
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
3
|
-
|
4
|
-
# from edsl.scenarios.FileStore import FileStore
|
1
|
+
from edsl.scenarios.Scenario import Scenario
|
2
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
3
|
+
from edsl.scenarios.FileStore import FileStore
|