edsl 0.1.14__py3-none-any.whl → 0.1.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +348 -38
- edsl/BaseDiff.py +260 -0
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +46 -10
- edsl/__version__.py +1 -0
- edsl/agents/Agent.py +842 -144
- edsl/agents/AgentList.py +521 -25
- edsl/agents/Invigilator.py +250 -374
- edsl/agents/InvigilatorBase.py +257 -0
- edsl/agents/PromptConstructor.py +272 -0
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/descriptors.py +43 -13
- edsl/agents/prompt_helpers.py +129 -0
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -0
- edsl/auto/StageBase.py +243 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +74 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +218 -0
- edsl/base/Base.py +279 -0
- edsl/config.py +121 -104
- edsl/conversation/Conversation.py +290 -0
- edsl/conversation/car_buying.py +59 -0
- edsl/conversation/chips.py +95 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -0
- edsl/coop/__init__.py +1 -0
- edsl/coop/coop.py +1029 -134
- edsl/coop/utils.py +131 -0
- edsl/data/Cache.py +560 -89
- edsl/data/CacheEntry.py +230 -0
- edsl/data/CacheHandler.py +168 -0
- edsl/data/RemoteCacheSync.py +186 -0
- edsl/data/SQLiteDict.py +292 -0
- edsl/data/__init__.py +5 -3
- edsl/data/orm.py +6 -33
- edsl/data_transfer_models.py +74 -27
- edsl/enums.py +165 -8
- edsl/exceptions/BaseException.py +21 -0
- edsl/exceptions/__init__.py +52 -46
- edsl/exceptions/agents.py +33 -15
- edsl/exceptions/cache.py +5 -0
- edsl/exceptions/coop.py +8 -0
- edsl/exceptions/general.py +34 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +15 -0
- edsl/exceptions/language_models.py +46 -1
- edsl/exceptions/questions.py +80 -5
- edsl/exceptions/results.py +16 -5
- edsl/exceptions/scenarios.py +29 -0
- edsl/exceptions/surveys.py +13 -10
- edsl/inference_services/AnthropicService.py +106 -0
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -0
- edsl/inference_services/AzureAI.py +215 -0
- edsl/inference_services/DeepInfraService.py +18 -0
- edsl/inference_services/GoogleService.py +143 -0
- edsl/inference_services/GroqService.py +20 -0
- edsl/inference_services/InferenceServiceABC.py +80 -0
- edsl/inference_services/InferenceServicesCollection.py +138 -0
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +236 -0
- edsl/inference_services/PerplexityService.py +160 -0
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -0
- edsl/inference_services/TogetherAIService.py +172 -0
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/registry.py +41 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +21 -20
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +684 -204
- edsl/jobs/JobsChecks.py +172 -0
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -0
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -0
- edsl/jobs/buckets/ModelBuckets.py +65 -0
- edsl/jobs/buckets/TokenBucket.py +283 -0
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +392 -0
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -0
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -0
- edsl/jobs/interviews/InterviewStatistic.py +63 -0
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -0
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -0
- edsl/jobs/interviews/InterviewStatusLog.py +92 -0
- edsl/jobs/interviews/ReportErrors.py +66 -0
- edsl/jobs/interviews/interview_status_enum.py +9 -0
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -110
- edsl/jobs/runners/JobsRunnerStatus.py +298 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -0
- edsl/jobs/tasks/TaskCreators.py +64 -0
- edsl/jobs/tasks/TaskHistory.py +470 -0
- edsl/jobs/tasks/TaskStatusLog.py +23 -0
- edsl/jobs/tasks/task_status_enum.py +161 -0
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -0
- edsl/jobs/tokens/TokenUsage.py +34 -0
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +507 -386
- edsl/language_models/ModelList.py +164 -0
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -0
- edsl/language_models/__init__.py +1 -8
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +109 -41
- edsl/language_models/utilities.py +65 -0
- edsl/notebooks/Notebook.py +263 -0
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -0
- edsl/prompts/Prompt.py +222 -93
- edsl/prompts/__init__.py +1 -1
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -0
- edsl/questions/QuestionBasePromptsMixin.py +221 -0
- edsl/questions/QuestionBudget.py +164 -67
- edsl/questions/QuestionCheckBox.py +281 -62
- edsl/questions/QuestionDict.py +343 -0
- edsl/questions/QuestionExtract.py +136 -50
- edsl/questions/QuestionFreeText.py +79 -55
- edsl/questions/QuestionFunctional.py +138 -41
- edsl/questions/QuestionList.py +184 -57
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +293 -69
- edsl/questions/QuestionNumerical.py +109 -56
- edsl/questions/QuestionRank.py +244 -49
- edsl/questions/Quick.py +41 -0
- edsl/questions/SimpleAskMixin.py +74 -0
- edsl/questions/__init__.py +9 -6
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +153 -38
- edsl/questions/compose_questions.py +13 -7
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +28 -26
- edsl/questions/derived/QuestionLinearScale.py +41 -28
- edsl/questions/derived/QuestionTopK.py +34 -26
- edsl/questions/derived/QuestionYesNo.py +40 -27
- edsl/questions/descriptors.py +228 -74
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_base_gen_mixin.py +168 -0
- edsl/questions/question_registry.py +130 -46
- edsl/questions/register_questions_meta.py +71 -0
- edsl/questions/response_validator_abc.py +188 -0
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +5 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +7 -0
- edsl/questions/templates/budget/question_presentation.jinja +7 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/dict/__init__.py +0 -0
- edsl/questions/templates/dict/answering_instructions.jinja +21 -0
- edsl/questions/templates/dict/question_presentation.jinja +1 -0
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +7 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/CSSParameterizer.py +108 -0
- edsl/results/Dataset.py +550 -19
- edsl/results/DatasetExportMixin.py +594 -0
- edsl/results/DatasetTree.py +295 -0
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +477 -173
- edsl/results/Results.py +987 -269
- edsl/results/ResultsExportMixin.py +28 -125
- edsl/results/ResultsGGMixin.py +83 -15
- edsl/results/TableDisplay.py +125 -0
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/file_exports.py +252 -0
- edsl/results/results_fetch_mixin.py +33 -0
- edsl/results/results_selector.py +145 -0
- edsl/results/results_tools_mixin.py +98 -0
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +78 -0
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +543 -0
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +431 -62
- edsl/scenarios/ScenarioHtmlMixin.py +65 -0
- edsl/scenarios/ScenarioList.py +1415 -45
- edsl/scenarios/ScenarioListExportMixin.py +45 -0
- edsl/scenarios/ScenarioListPdfMixin.py +239 -0
- edsl/scenarios/__init__.py +2 -0
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +49 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/scenario_join.py +131 -0
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +80 -0
- edsl/study/Study.py +521 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +92 -11
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +9 -4
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +156 -35
- edsl/surveys/Rule.py +221 -74
- edsl/surveys/RuleCollection.py +241 -61
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1079 -339
- edsl/surveys/SurveyCSS.py +273 -0
- edsl/surveys/SurveyExportMixin.py +235 -40
- edsl/surveys/SurveyFlowVisualization.py +181 -0
- edsl/surveys/SurveyQualtricsImport.py +284 -0
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/base.py +19 -3
- edsl/surveys/descriptors.py +17 -6
- edsl/surveys/instructions/ChangeInstruction.py +48 -0
- edsl/surveys/instructions/Instruction.py +56 -0
- edsl/surveys/instructions/InstructionCollection.py +82 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +116 -0
- edsl/templates/error_reporting/interviews.html +19 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- edsl/tools/__init__.py +1 -0
- edsl/tools/clusters.py +192 -0
- edsl/tools/embeddings.py +27 -0
- edsl/tools/embeddings_plotting.py +118 -0
- edsl/tools/plotting.py +112 -0
- edsl/tools/summarize.py +18 -0
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +5 -0
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/ast_utilities.py +3 -0
- edsl/utilities/data/Registry.py +2 -0
- edsl/utilities/decorators.py +41 -0
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/interface.py +310 -60
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/restricted_python.py +70 -0
- edsl/utilities/utilities.py +203 -13
- edsl-0.1.40.dist-info/METADATA +111 -0
- edsl-0.1.40.dist-info/RECORD +362 -0
- {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/WHEEL +1 -1
- edsl/agents/AgentListExportMixin.py +0 -24
- edsl/coop/old.py +0 -31
- edsl/data/Database.py +0 -141
- edsl/data/crud.py +0 -121
- edsl/jobs/Interview.py +0 -417
- edsl/jobs/JobsRunner.py +0 -63
- edsl/jobs/JobsRunnerStatusMixin.py +0 -115
- edsl/jobs/base.py +0 -47
- edsl/jobs/buckets.py +0 -166
- edsl/jobs/runners/JobsRunnerDryRun.py +0 -19
- edsl/jobs/runners/JobsRunnerStreaming.py +0 -54
- edsl/jobs/task_management.py +0 -218
- edsl/jobs/token_tracking.py +0 -78
- edsl/language_models/DeepInfra.py +0 -69
- edsl/language_models/OpenAI.py +0 -98
- edsl/language_models/model_interfaces/GeminiPro.py +0 -66
- edsl/language_models/model_interfaces/LanguageModelOpenAIFour.py +0 -8
- edsl/language_models/model_interfaces/LanguageModelOpenAIThreeFiveTurbo.py +0 -8
- edsl/language_models/model_interfaces/LlamaTwo13B.py +0 -21
- edsl/language_models/model_interfaces/LlamaTwo70B.py +0 -21
- edsl/language_models/model_interfaces/Mixtral8x7B.py +0 -24
- edsl/language_models/registry.py +0 -81
- edsl/language_models/schemas.py +0 -15
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/prompts/QuestionInstructionsBase.py +0 -6
- edsl/prompts/library/agent_instructions.py +0 -29
- edsl/prompts/library/agent_persona.py +0 -17
- edsl/prompts/library/question_budget.py +0 -26
- edsl/prompts/library/question_checkbox.py +0 -32
- edsl/prompts/library/question_extract.py +0 -19
- edsl/prompts/library/question_freetext.py +0 -14
- edsl/prompts/library/question_linear_scale.py +0 -20
- edsl/prompts/library/question_list.py +0 -22
- edsl/prompts/library/question_multiple_choice.py +0 -44
- edsl/prompts/library/question_numerical.py +0 -31
- edsl/prompts/library/question_rank.py +0 -21
- edsl/prompts/prompt_config.py +0 -33
- edsl/prompts/registry.py +0 -185
- edsl/questions/Question.py +0 -240
- edsl/report/InputOutputDataTypes.py +0 -134
- edsl/report/RegressionMixin.py +0 -28
- edsl/report/ReportOutputs.py +0 -1228
- edsl/report/ResultsFetchMixin.py +0 -106
- edsl/report/ResultsOutputMixin.py +0 -14
- edsl/report/demo.ipynb +0 -645
- edsl/results/ResultsDBMixin.py +0 -184
- edsl/surveys/SurveyFlowVisualizationMixin.py +0 -92
- edsl/trackers/Tracker.py +0 -91
- edsl/trackers/TrackerAPI.py +0 -196
- edsl/trackers/TrackerTasks.py +0 -70
- edsl/utilities/pastebin.py +0 -141
- edsl-0.1.14.dist-info/METADATA +0 -69
- edsl-0.1.14.dist-info/RECORD +0 -141
- /edsl/{language_models/model_interfaces → inference_services}/__init__.py +0 -0
- /edsl/{report/__init__.py → jobs/runners/JobsRunnerStatusData.py} +0 -0
- /edsl/{trackers/__init__.py → language_models/ServiceDataSources.py} +0 -0
- {edsl-0.1.14.dist-info → edsl-0.1.40.dist-info}/LICENSE +0 -0
edsl/scenarios/ScenarioList.py
CHANGED
@@ -1,47 +1,1337 @@
|
|
1
|
+
"""A list of Scenarios to be used in a survey."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
|
-
from
|
3
|
-
|
4
|
+
from typing import (
|
5
|
+
Any,
|
6
|
+
Optional,
|
7
|
+
Union,
|
8
|
+
List,
|
9
|
+
Callable,
|
10
|
+
Literal,
|
11
|
+
TYPE_CHECKING,
|
12
|
+
)
|
4
13
|
|
5
|
-
|
14
|
+
try:
|
15
|
+
from typing import TypeAlias
|
16
|
+
except ImportError:
|
17
|
+
from typing_extensions import TypeAlias
|
18
|
+
|
19
|
+
import csv
|
20
|
+
import random
|
21
|
+
from io import StringIO
|
22
|
+
import inspect
|
23
|
+
from collections import UserList, defaultdict
|
24
|
+
from collections.abc import Iterable
|
25
|
+
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from urllib.parse import ParseResult
|
28
|
+
from edsl.results.Dataset import Dataset
|
29
|
+
from edsl.jobs.Jobs import Jobs
|
30
|
+
from edsl.surveys.Survey import Survey
|
31
|
+
from edsl.questions.QuestionBase import QuestionBase
|
32
|
+
|
33
|
+
|
34
|
+
from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
|
35
|
+
|
36
|
+
from tabulate import tabulate_formats
|
6
37
|
|
7
|
-
from edsl.scenarios.Scenario import Scenario
|
8
38
|
from edsl.Base import Base
|
39
|
+
from edsl.utilities.remove_edsl_version import remove_edsl_version
|
40
|
+
|
41
|
+
from edsl.scenarios.Scenario import Scenario
|
42
|
+
from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
|
43
|
+
from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
|
44
|
+
from edsl.utilities.naming_utilities import sanitize_string
|
45
|
+
from edsl.utilities.is_valid_variable_name import is_valid_variable_name
|
46
|
+
from edsl.exceptions.scenarios import ScenarioError
|
47
|
+
|
48
|
+
from edsl.scenarios.directory_scanner import DirectoryScanner
|
9
49
|
|
10
50
|
|
11
|
-
class
|
12
|
-
|
51
|
+
class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
|
52
|
+
pass
|
53
|
+
|
54
|
+
|
55
|
+
if TYPE_CHECKING:
|
56
|
+
from edsl.results.Dataset import Dataset
|
57
|
+
|
58
|
+
TableFormat: TypeAlias = Literal[
|
59
|
+
"plain",
|
60
|
+
"simple",
|
61
|
+
"github",
|
62
|
+
"grid",
|
63
|
+
"fancy_grid",
|
64
|
+
"pipe",
|
65
|
+
"orgtbl",
|
66
|
+
"rst",
|
67
|
+
"mediawiki",
|
68
|
+
"html",
|
69
|
+
"latex",
|
70
|
+
"latex_raw",
|
71
|
+
"latex_booktabs",
|
72
|
+
"tsv",
|
73
|
+
]
|
74
|
+
|
75
|
+
|
76
|
+
class ScenarioList(Base, UserList, ScenarioListMixin):
|
77
|
+
"""Class for creating a list of scenarios to be used in a survey."""
|
78
|
+
|
79
|
+
__documentation__ = (
|
80
|
+
"https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
|
81
|
+
)
|
82
|
+
|
83
|
+
def __init__(
|
84
|
+
self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
|
85
|
+
):
|
86
|
+
"""Initialize the ScenarioList class."""
|
13
87
|
if data is not None:
|
14
88
|
super().__init__(data)
|
89
|
+
else:
|
90
|
+
super().__init__([])
|
91
|
+
self.codebook = codebook or {}
|
92
|
+
|
93
|
+
def unique(self) -> ScenarioList:
|
94
|
+
"""Return a list of unique scenarios.
|
95
|
+
|
96
|
+
>>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
|
97
|
+
>>> s.unique()
|
98
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
|
99
|
+
"""
|
100
|
+
return ScenarioList(list(set(self)))
|
101
|
+
|
102
|
+
@property
|
103
|
+
def has_jinja_braces(self) -> bool:
|
104
|
+
"""Check if the ScenarioList has Jinja braces."""
|
105
|
+
return any([scenario.has_jinja_braces for scenario in self])
|
106
|
+
|
107
|
+
def _convert_jinja_braces(self) -> ScenarioList:
|
108
|
+
"""Convert Jinja braces to Python braces."""
|
109
|
+
return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
|
110
|
+
|
111
|
+
def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
|
112
|
+
"""Give valid names to the scenario keys, using an existing codebook if provided.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
existing_codebook (dict, optional): Existing mapping of original keys to valid names.
|
116
|
+
Defaults to None.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
ScenarioList: A new ScenarioList with valid variable names and updated codebook.
|
120
|
+
|
121
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
122
|
+
>>> s.give_valid_names()
|
123
|
+
ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
124
|
+
>>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
125
|
+
>>> s.give_valid_names()
|
126
|
+
ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
127
|
+
>>> s.give_valid_names({'are you there John?': 'custom_name'})
|
128
|
+
ScenarioList([Scenario({'custom_name': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
129
|
+
"""
|
130
|
+
codebook = existing_codebook.copy() if existing_codebook else {}
|
131
|
+
new_scenarios = []
|
132
|
+
|
133
|
+
for scenario in self:
|
134
|
+
new_scenario = {}
|
135
|
+
for key in scenario:
|
136
|
+
if is_valid_variable_name(key):
|
137
|
+
new_scenario[key] = scenario[key]
|
138
|
+
continue
|
139
|
+
|
140
|
+
if key in codebook:
|
141
|
+
new_key = codebook[key]
|
142
|
+
else:
|
143
|
+
new_key = sanitize_string(key)
|
144
|
+
if not is_valid_variable_name(new_key):
|
145
|
+
new_key = f"var_{len(codebook)}"
|
146
|
+
codebook[key] = new_key
|
147
|
+
|
148
|
+
new_scenario[new_key] = scenario[key]
|
149
|
+
|
150
|
+
new_scenarios.append(Scenario(new_scenario))
|
151
|
+
|
152
|
+
return ScenarioList(new_scenarios, codebook)
|
153
|
+
|
154
|
+
def unpivot(
|
155
|
+
self,
|
156
|
+
id_vars: Optional[List[str]] = None,
|
157
|
+
value_vars: Optional[List[str]] = None,
|
158
|
+
) -> ScenarioList:
|
159
|
+
"""
|
160
|
+
Unpivot the ScenarioList, allowing for id variables to be specified.
|
161
|
+
|
162
|
+
Parameters:
|
163
|
+
id_vars (list): Fields to use as identifier variables (kept in each entry)
|
164
|
+
value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
|
165
|
+
|
166
|
+
Example:
|
167
|
+
>>> s = ScenarioList([
|
168
|
+
... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
|
169
|
+
... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
|
170
|
+
... ])
|
171
|
+
>>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
|
172
|
+
ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
|
173
|
+
"""
|
174
|
+
if id_vars is None:
|
175
|
+
id_vars = []
|
176
|
+
if value_vars is None:
|
177
|
+
value_vars = [field for field in self[0].keys() if field not in id_vars]
|
178
|
+
|
179
|
+
new_scenarios = []
|
180
|
+
for scenario in self:
|
181
|
+
for var in value_vars:
|
182
|
+
new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
|
183
|
+
new_scenario["variable"] = var
|
184
|
+
new_scenario["value"] = scenario[var]
|
185
|
+
new_scenarios.append(Scenario(new_scenario))
|
186
|
+
|
187
|
+
return ScenarioList(new_scenarios)
|
188
|
+
|
189
|
+
def sem_filter(self, language_predicate: str) -> ScenarioList:
|
190
|
+
"""Filter the ScenarioList based on a language predicate.
|
191
|
+
|
192
|
+
:param language_predicate: The language predicate to use.
|
193
|
+
|
194
|
+
Inspired by:
|
195
|
+
@misc{patel2024semanticoperators,
|
196
|
+
title={Semantic Operators: A Declarative Model for Rich, AI-based Analytics Over Text Data},
|
197
|
+
author={Liana Patel and Siddharth Jha and Parth Asawa and Melissa Pan and Carlos Guestrin and Matei Zaharia},
|
198
|
+
year={2024},
|
199
|
+
eprint={2407.11418},
|
200
|
+
archivePrefix={arXiv},
|
201
|
+
primaryClass={cs.DB},
|
202
|
+
url={https://arxiv.org/abs/2407.11418},
|
203
|
+
}
|
204
|
+
"""
|
205
|
+
from edsl import QuestionYesNo
|
206
|
+
|
207
|
+
new_scenario_list = self.duplicate()
|
208
|
+
q = QuestionYesNo(
|
209
|
+
question_text=language_predicate, question_name="binary_outcome"
|
210
|
+
)
|
211
|
+
results = q.by(new_scenario_list).run(verbose=False)
|
212
|
+
new_scenario_list = new_scenario_list.add_list(
|
213
|
+
"criteria", results.select("binary_outcome").to_list()
|
214
|
+
)
|
215
|
+
return new_scenario_list.filter("criteria == 'Yes'").drop("criteria")
|
216
|
+
|
217
|
+
def pivot(
|
218
|
+
self,
|
219
|
+
id_vars: List[str] = None,
|
220
|
+
var_name="variable",
|
221
|
+
value_name="value",
|
222
|
+
) -> ScenarioList:
|
223
|
+
"""
|
224
|
+
Pivot the ScenarioList from long to wide format.
|
225
|
+
|
226
|
+
Parameters:
|
227
|
+
id_vars (list): Fields to use as identifier variables
|
228
|
+
var_name (str): Name of the variable column (default: 'variable')
|
229
|
+
value_name (str): Name of the value column (default: 'value')
|
230
|
+
|
231
|
+
Example:
|
232
|
+
>>> s = ScenarioList([
|
233
|
+
... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
|
234
|
+
... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
|
235
|
+
... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
|
236
|
+
... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
|
237
|
+
... ])
|
238
|
+
>>> s.pivot(id_vars=['id', 'year'])
|
239
|
+
ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
|
240
|
+
"""
|
241
|
+
pivoted_dict = {}
|
242
|
+
|
243
|
+
for scenario in self:
|
244
|
+
# Create a tuple of id values to use as a key
|
245
|
+
id_key = tuple(scenario[id_var] for id_var in id_vars)
|
246
|
+
|
247
|
+
# If this combination of id values hasn't been seen before, initialize it
|
248
|
+
if id_key not in pivoted_dict:
|
249
|
+
pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
|
250
|
+
|
251
|
+
# Add the variable-value pair to the dict
|
252
|
+
variable = scenario[var_name]
|
253
|
+
value = scenario[value_name]
|
254
|
+
pivoted_dict[id_key][variable] = value
|
255
|
+
|
256
|
+
# Convert the dict of dicts to a list of Scenarios
|
257
|
+
pivoted_scenarios = [
|
258
|
+
Scenario(dict(zip(id_vars, id_key), **values))
|
259
|
+
for id_key, values in pivoted_dict.items()
|
260
|
+
]
|
261
|
+
|
262
|
+
return ScenarioList(pivoted_scenarios)
|
263
|
+
|
264
|
+
def group_by(
|
265
|
+
self, id_vars: List[str], variables: List[str], func: Callable
|
266
|
+
) -> ScenarioList:
|
267
|
+
"""
|
268
|
+
Group the ScenarioList by id_vars and apply a function to the specified variables.
|
269
|
+
|
270
|
+
:param id_vars: Fields to use as identifier variables
|
271
|
+
:param variables: Fields to group and aggregate
|
272
|
+
:param func: Function to apply to the grouped variables
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
ScenarioList: A new ScenarioList with the grouped and aggregated results
|
276
|
+
|
277
|
+
Example:
|
278
|
+
>>> def avg_sum(a, b):
|
279
|
+
... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
|
280
|
+
>>> s = ScenarioList([
|
281
|
+
... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
|
282
|
+
... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
|
283
|
+
... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
|
284
|
+
... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
|
285
|
+
... ])
|
286
|
+
>>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
|
287
|
+
ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
|
288
|
+
"""
|
289
|
+
# Check if the function is compatible with the specified variables
|
290
|
+
func_params = inspect.signature(func).parameters
|
291
|
+
if len(func_params) != len(variables):
|
292
|
+
raise ScenarioError(
|
293
|
+
f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
|
294
|
+
)
|
295
|
+
|
296
|
+
# Group the scenarios
|
297
|
+
grouped: dict[str, list] = defaultdict(lambda: defaultdict(list))
|
298
|
+
for scenario in self:
|
299
|
+
key = tuple(scenario[id_var] for id_var in id_vars)
|
300
|
+
for var in variables:
|
301
|
+
grouped[key][var].append(scenario[var])
|
302
|
+
|
303
|
+
# Apply the function to each group
|
304
|
+
result = []
|
305
|
+
for key, group in grouped.items():
|
306
|
+
try:
|
307
|
+
aggregated = func(*[group[var] for var in variables])
|
308
|
+
except Exception as e:
|
309
|
+
raise ScenarioError(f"Error applying function to group {key}: {str(e)}")
|
310
|
+
|
311
|
+
if not isinstance(aggregated, dict):
|
312
|
+
raise ScenarioError(
|
313
|
+
f"Function {func.__name__} must return a dictionary"
|
314
|
+
)
|
315
|
+
|
316
|
+
new_scenario = dict(zip(id_vars, key))
|
317
|
+
new_scenario.update(aggregated)
|
318
|
+
result.append(Scenario(new_scenario))
|
319
|
+
|
320
|
+
return ScenarioList(result)
|
321
|
+
|
322
|
+
@property
|
323
|
+
def parameters(self) -> set:
|
324
|
+
"""Return the set of parameters in the ScenarioList
|
325
|
+
|
326
|
+
Example:
|
327
|
+
|
328
|
+
>>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
|
329
|
+
>>> s.parameters == {'a', 'b'}
|
330
|
+
True
|
331
|
+
"""
|
332
|
+
if len(self) == 0:
|
333
|
+
return set()
|
334
|
+
|
335
|
+
return set.union(*[set(s.keys()) for s in self])
|
336
|
+
|
337
|
+
def __hash__(self) -> int:
|
338
|
+
"""Return the hash of the ScenarioList.
|
339
|
+
|
340
|
+
>>> s = ScenarioList.example()
|
341
|
+
>>> hash(s)
|
342
|
+
1262252885757976162
|
343
|
+
"""
|
344
|
+
from edsl.utilities.utilities import dict_hash
|
345
|
+
|
346
|
+
return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
|
347
|
+
|
348
|
+
def __eq__(self, other: Any) -> bool:
|
349
|
+
return hash(self) == hash(other)
|
350
|
+
|
351
|
+
def __repr__(self):
|
352
|
+
return f"ScenarioList({self.data})"
|
353
|
+
|
354
|
+
def __mul__(self, other: ScenarioList) -> ScenarioList:
|
355
|
+
"""Takes the cross product of two ScenarioLists.
|
356
|
+
|
357
|
+
>>> s1 = ScenarioList.from_list("a", [1, 2])
|
358
|
+
>>> s2 = ScenarioList.from_list("b", [3, 4])
|
359
|
+
>>> s1 * s2
|
360
|
+
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
|
361
|
+
"""
|
362
|
+
from itertools import product
|
363
|
+
|
364
|
+
new_sl = []
|
365
|
+
for s1, s2 in list(product(self, other)):
|
366
|
+
new_sl.append(s1 + s2)
|
367
|
+
return ScenarioList(new_sl)
|
368
|
+
|
369
|
+
def times(self, other: ScenarioList) -> ScenarioList:
|
370
|
+
"""Takes the cross product of two ScenarioLists.
|
371
|
+
|
372
|
+
Example:
|
373
|
+
|
374
|
+
>>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
|
375
|
+
>>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
|
376
|
+
>>> s1.times(s2)
|
377
|
+
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
|
378
|
+
"""
|
379
|
+
return self.__mul__(other)
|
380
|
+
|
381
|
+
def shuffle(self, seed: Optional[str] = None) -> ScenarioList:
|
382
|
+
"""Shuffle the ScenarioList.
|
15
383
|
|
16
|
-
|
17
|
-
|
384
|
+
>>> s = ScenarioList.from_list("a", [1,2,3,4])
|
385
|
+
>>> s.shuffle(seed = "1234")
|
386
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 4}), Scenario({'a': 3}), Scenario({'a': 2})])
|
387
|
+
"""
|
388
|
+
sl = self.duplicate()
|
389
|
+
if seed:
|
390
|
+
random.seed(seed)
|
391
|
+
random.shuffle(sl.data)
|
392
|
+
return sl
|
393
|
+
|
394
|
+
def sample(self, n: int, seed: Optional[str] = None) -> ScenarioList:
|
395
|
+
"""Return a random sample from the ScenarioList
|
396
|
+
|
397
|
+
>>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
|
398
|
+
>>> s.sample(3, seed = "edsl")
|
399
|
+
ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
|
400
|
+
"""
|
401
|
+
if seed:
|
402
|
+
random.seed(seed)
|
403
|
+
|
404
|
+
sl = self.duplicate()
|
405
|
+
return ScenarioList(random.sample(sl.data, n))
|
406
|
+
|
407
|
+
def expand(self, expand_field: str, number_field: bool = False) -> ScenarioList:
|
408
|
+
"""Expand the ScenarioList by a field.
|
409
|
+
|
410
|
+
:param expand_field: The field to expand.
|
411
|
+
:param number_field: Whether to add a field with the index of the value
|
412
|
+
|
413
|
+
Example:
|
414
|
+
|
415
|
+
>>> s = ScenarioList( [ Scenario({'a':1, 'b':[1,2]}) ] )
|
416
|
+
>>> s.expand('b')
|
417
|
+
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
418
|
+
>>> s.expand('b', number_field=True)
|
419
|
+
ScenarioList([Scenario({'a': 1, 'b': 1, 'b_number': 1}), Scenario({'a': 1, 'b': 2, 'b_number': 2})])
|
420
|
+
"""
|
421
|
+
new_scenarios = []
|
422
|
+
for scenario in self:
|
423
|
+
values = scenario[expand_field]
|
424
|
+
if not isinstance(values, Iterable) or isinstance(values, str):
|
425
|
+
values = [values]
|
426
|
+
for index, value in enumerate(values):
|
427
|
+
new_scenario = scenario.copy()
|
428
|
+
new_scenario[expand_field] = value
|
429
|
+
if number_field:
|
430
|
+
new_scenario[expand_field + "_number"] = index + 1
|
431
|
+
new_scenarios.append(new_scenario)
|
432
|
+
return ScenarioList(new_scenarios)
|
433
|
+
|
434
|
+
def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
|
435
|
+
"""Concatenate specified fields into a single field.
|
436
|
+
|
437
|
+
:param fields: The fields to concatenate.
|
438
|
+
:param separator: The separator to use.
|
439
|
+
|
440
|
+
Returns:
|
441
|
+
ScenarioList: A new ScenarioList with concatenated fields.
|
442
|
+
|
443
|
+
Example:
|
444
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
445
|
+
>>> s.concatenate(['a', 'b', 'c'])
|
446
|
+
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
447
|
+
"""
|
448
|
+
new_scenarios = []
|
449
|
+
for scenario in self:
|
450
|
+
new_scenario = scenario.copy()
|
451
|
+
concat_values = []
|
452
|
+
for field in fields:
|
453
|
+
if field in new_scenario:
|
454
|
+
concat_values.append(str(new_scenario[field]))
|
455
|
+
del new_scenario[field]
|
456
|
+
|
457
|
+
new_field_name = f"concat_{'_'.join(fields)}"
|
458
|
+
new_scenario[new_field_name] = separator.join(concat_values)
|
459
|
+
new_scenarios.append(new_scenario)
|
460
|
+
|
461
|
+
return ScenarioList(new_scenarios)
|
462
|
+
|
463
|
+
def unpack_dict(
|
464
|
+
self, field: str, prefix: Optional[str] = None, drop_field: bool = False
|
465
|
+
) -> ScenarioList:
|
466
|
+
"""Unpack a dictionary field into separate fields.
|
467
|
+
|
468
|
+
:param field: The field to unpack.
|
469
|
+
:param prefix: An optional prefix to add to the new fields.
|
470
|
+
:param drop_field: Whether to drop the original field.
|
471
|
+
|
472
|
+
Example:
|
473
|
+
|
474
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
|
475
|
+
>>> s.unpack_dict('b')
|
476
|
+
ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
|
477
|
+
>>> s.unpack_dict('b', prefix='new_')
|
478
|
+
ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'new_c': 2, 'new_d': 3})])
|
479
|
+
"""
|
480
|
+
new_scenarios = []
|
481
|
+
for scenario in self:
|
482
|
+
new_scenario = scenario.copy()
|
483
|
+
for key, value in scenario[field].items():
|
484
|
+
if prefix:
|
485
|
+
new_scenario[prefix + key] = value
|
486
|
+
else:
|
487
|
+
new_scenario[key] = value
|
488
|
+
if drop_field:
|
489
|
+
new_scenario.pop(field)
|
490
|
+
new_scenarios.append(new_scenario)
|
491
|
+
return ScenarioList(new_scenarios)
|
492
|
+
|
493
|
+
def transform(
|
494
|
+
self, field: str, func: Callable, new_name: Optional[str] = None
|
495
|
+
) -> ScenarioList:
|
496
|
+
"""Transform a field using a function.
|
497
|
+
|
498
|
+
:param field: The field to transform.
|
499
|
+
:param func: The function to apply to the field.
|
500
|
+
:param new_name: An optional new name for the transformed field.
|
501
|
+
|
502
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
503
|
+
>>> s.transform('b', lambda x: x + 1)
|
504
|
+
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 2})])
|
505
|
+
|
506
|
+
"""
|
507
|
+
new_scenarios = []
|
508
|
+
for scenario in self:
|
509
|
+
new_scenario = scenario.copy()
|
510
|
+
new_scenario[new_name or field] = func(scenario[field])
|
511
|
+
new_scenarios.append(new_scenario)
|
512
|
+
return ScenarioList(new_scenarios)
|
513
|
+
|
514
|
+
def mutate(
|
515
|
+
self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
|
516
|
+
) -> ScenarioList:
|
517
|
+
"""
|
518
|
+
Return a new ScenarioList with a new variable added.
|
519
|
+
|
520
|
+
:param new_var_string: A string with the new variable assignment.
|
521
|
+
:param functions_dict: A dictionary of functions to use in the assignment.
|
522
|
+
|
523
|
+
Example:
|
524
|
+
|
525
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
526
|
+
>>> s.mutate("c = a + b")
|
527
|
+
ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 1, 'b': 1, 'c': 2})])
|
528
|
+
|
529
|
+
"""
|
530
|
+
if "=" not in new_var_string:
|
531
|
+
raise ScenarioError(
|
532
|
+
f"Mutate requires an '=' in the string, but '{new_var_string}' doesn't have one."
|
533
|
+
)
|
534
|
+
raw_var_name, expression = new_var_string.split("=", 1)
|
535
|
+
var_name = raw_var_name.strip()
|
536
|
+
from edsl.utilities.utilities import is_valid_variable_name
|
537
|
+
|
538
|
+
if not is_valid_variable_name(var_name):
|
539
|
+
raise ScenarioError(f"{var_name} is not a valid variable name.")
|
540
|
+
|
541
|
+
# create the evaluator
|
542
|
+
functions_dict = functions_dict or {}
|
543
|
+
|
544
|
+
def create_evaluator(scenario) -> EvalWithCompoundTypes:
|
545
|
+
return EvalWithCompoundTypes(names=scenario, functions=functions_dict)
|
546
|
+
|
547
|
+
def new_scenario(old_scenario: Scenario, var_name: str) -> Scenario:
|
548
|
+
evaluator = create_evaluator(old_scenario)
|
549
|
+
value = evaluator.eval(expression)
|
550
|
+
new_s = old_scenario.copy()
|
551
|
+
new_s[var_name] = value
|
552
|
+
return new_s
|
553
|
+
|
554
|
+
try:
|
555
|
+
new_data = [new_scenario(s, var_name) for s in self]
|
556
|
+
except Exception as e:
|
557
|
+
raise ScenarioError(f"Error in mutate. Exception:{e}")
|
558
|
+
|
559
|
+
return ScenarioList(new_data)
|
560
|
+
|
561
|
+
def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
|
562
|
+
"""Order the scenarios by one or more fields.
|
563
|
+
|
564
|
+
:param fields: The fields to order by.
|
565
|
+
:param reverse: Whether to reverse the order.
|
566
|
+
Example:
|
567
|
+
|
568
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
569
|
+
>>> s.order_by('b', 'a')
|
570
|
+
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
571
|
+
"""
|
572
|
+
|
573
|
+
def get_sort_key(scenario: Any) -> tuple:
|
574
|
+
return tuple(scenario[field] for field in fields)
|
575
|
+
|
576
|
+
return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
|
577
|
+
|
578
|
+
def duplicate(self) -> ScenarioList:
|
579
|
+
"""Return a copy of the ScenarioList.
|
580
|
+
|
581
|
+
>>> sl = ScenarioList.example()
|
582
|
+
>>> sl_copy = sl.duplicate()
|
583
|
+
>>> sl == sl_copy
|
584
|
+
True
|
585
|
+
>>> sl is sl_copy
|
586
|
+
False
|
587
|
+
"""
|
588
|
+
return ScenarioList([scenario.copy() for scenario in self])
|
589
|
+
|
590
|
+
def filter(self, expression: str) -> ScenarioList:
|
591
|
+
"""
|
592
|
+
Filter a list of scenarios based on an expression.
|
593
|
+
|
594
|
+
:param expression: The expression to filter by.
|
595
|
+
|
596
|
+
Example:
|
597
|
+
|
598
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
599
|
+
>>> s.filter("b == 2")
|
600
|
+
ScenarioList([Scenario({'a': 1, 'b': 2})])
|
601
|
+
"""
|
602
|
+
sl = self.duplicate()
|
603
|
+
base_keys = set(self[0].keys())
|
604
|
+
keys = set()
|
605
|
+
for scenario in sl:
|
606
|
+
keys.update(scenario.keys())
|
607
|
+
if keys != base_keys:
|
608
|
+
import warnings
|
609
|
+
|
610
|
+
warnings.warn(
|
611
|
+
"Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
|
612
|
+
)
|
613
|
+
|
614
|
+
def create_evaluator(scenario: Scenario):
|
615
|
+
"""Create an evaluator for the given result.
|
616
|
+
The 'combined_dict' is a mapping of all values for that Result object.
|
617
|
+
"""
|
618
|
+
return EvalWithCompoundTypes(names=scenario)
|
619
|
+
|
620
|
+
try:
|
621
|
+
# iterates through all the results and evaluates the expression
|
622
|
+
new_data = []
|
623
|
+
for scenario in sl:
|
624
|
+
if create_evaluator(scenario).eval(expression):
|
625
|
+
new_data.append(scenario)
|
626
|
+
except NameNotDefined as e:
|
627
|
+
available_fields = ", ".join(self.data[0].keys() if self.data else [])
|
628
|
+
raise ScenarioError(
|
629
|
+
f"Error in filter: '{e}'\n"
|
630
|
+
f"The expression '{expression}' refers to a field that does not exist.\n"
|
631
|
+
f"Scenario: {scenario}\n"
|
632
|
+
f"Available fields: {available_fields}\n"
|
633
|
+
"Check your filter expression or consult the documentation: "
|
634
|
+
"https://docs.expectedparrot.com/en/latest/scenarios.html#module-edsl.scenarios.Scenario"
|
635
|
+
) from None
|
636
|
+
except Exception as e:
|
637
|
+
raise ScenarioError(f"Error in filter. Exception:{e}")
|
638
|
+
|
639
|
+
return ScenarioList(new_data)
|
640
|
+
|
641
|
+
def from_urls(
|
642
|
+
self, urls: list[str], field_name: Optional[str] = "text"
|
643
|
+
) -> ScenarioList:
|
644
|
+
"""Create a ScenarioList from a list of URLs.
|
645
|
+
|
646
|
+
:param urls: A list of URLs.
|
647
|
+
:param field_name: The name of the field to store the text from the URLs.
|
648
|
+
|
649
|
+
"""
|
650
|
+
return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
|
651
|
+
|
652
|
+
def select(self, *fields: str) -> ScenarioList:
|
653
|
+
"""
|
654
|
+
Selects scenarios with only the references fields.
|
655
|
+
|
656
|
+
:param fields: The fields to select.
|
657
|
+
|
658
|
+
Example:
|
659
|
+
|
660
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
661
|
+
>>> s.select('a')
|
662
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
|
663
|
+
"""
|
664
|
+
from edsl.scenarios.scenario_selector import ScenarioSelector
|
665
|
+
|
666
|
+
return ScenarioSelector(self).select(*fields)
|
667
|
+
|
668
|
+
def drop(self, *fields: str) -> ScenarioList:
|
669
|
+
"""Drop fields from the scenarios.
|
670
|
+
|
671
|
+
Example:
|
672
|
+
|
673
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
674
|
+
>>> s.drop('a')
|
675
|
+
ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
|
676
|
+
"""
|
677
|
+
sl = self.duplicate()
|
678
|
+
return ScenarioList([scenario.drop(fields) for scenario in sl])
|
679
|
+
|
680
|
+
def keep(self, *fields: str) -> ScenarioList:
|
681
|
+
"""Keep only the specified fields in the scenarios.
|
682
|
+
|
683
|
+
:param fields: The fields to keep.
|
684
|
+
|
685
|
+
Example:
|
686
|
+
|
687
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
688
|
+
>>> s.keep('a')
|
689
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
|
690
|
+
"""
|
691
|
+
sl = self.duplicate()
|
692
|
+
return ScenarioList([scenario.keep(fields) for scenario in sl])
|
18
693
|
|
19
694
|
@classmethod
|
20
|
-
def
|
21
|
-
|
695
|
+
def from_list(
|
696
|
+
cls, name: str, values: list, func: Optional[Callable] = None
|
697
|
+
) -> ScenarioList:
|
698
|
+
"""Create a ScenarioList from a list of values.
|
699
|
+
|
700
|
+
:param name: The name of the field.
|
701
|
+
:param values: The list of values.
|
702
|
+
:param func: An optional function to apply to the values.
|
703
|
+
|
704
|
+
Example:
|
705
|
+
|
706
|
+
>>> ScenarioList.from_list('name', ['Alice', 'Bob'])
|
707
|
+
ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
708
|
+
"""
|
709
|
+
if not func:
|
710
|
+
func = lambda x: x
|
711
|
+
return cls([Scenario({name: func(value)}) for value in values])
|
712
|
+
|
713
|
+
def table(
|
714
|
+
self,
|
715
|
+
*fields: str,
|
716
|
+
tablefmt: Optional[TableFormat] = None,
|
717
|
+
pretty_labels: Optional[dict[str, str]] = None,
|
718
|
+
) -> str:
|
719
|
+
"""Return the ScenarioList as a table."""
|
720
|
+
|
721
|
+
from tabulate import tabulate_formats
|
722
|
+
|
723
|
+
if tablefmt is not None and tablefmt not in tabulate_formats:
|
724
|
+
raise ValueError(
|
725
|
+
f"Invalid table format: {tablefmt}",
|
726
|
+
f"Valid formats are: {tabulate_formats}",
|
727
|
+
)
|
728
|
+
return self.to_dataset().table(
|
729
|
+
*fields, tablefmt=tablefmt, pretty_labels=pretty_labels
|
730
|
+
)
|
731
|
+
|
732
|
+
def tree(self, node_list: Optional[List[str]] = None) -> str:
|
733
|
+
"""Return the ScenarioList as a tree.
|
734
|
+
|
735
|
+
:param node_list: The list of nodes to include in the tree.
|
736
|
+
"""
|
737
|
+
return self.to_dataset().tree(node_list)
|
738
|
+
|
739
|
+
def _summary(self) -> dict:
|
740
|
+
"""Return a summary of the ScenarioList.
|
741
|
+
|
742
|
+
>>> ScenarioList.example()._summary()
|
743
|
+
{'scenarios': 2, 'keys': ['persona']}
|
744
|
+
"""
|
745
|
+
d = {
|
746
|
+
"scenarios": len(self),
|
747
|
+
"keys": list(self.parameters),
|
748
|
+
}
|
749
|
+
return d
|
750
|
+
|
751
|
+
def reorder_keys(self, new_order: List[str]) -> ScenarioList:
|
752
|
+
"""Reorder the keys in the scenarios.
|
753
|
+
|
754
|
+
:param new_order: The new order of the keys.
|
755
|
+
|
756
|
+
Example:
|
757
|
+
|
758
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 3, 'b': 4})])
|
759
|
+
>>> s.reorder_keys(['b', 'a'])
|
760
|
+
ScenarioList([Scenario({'b': 2, 'a': 1}), Scenario({'b': 4, 'a': 3})])
|
761
|
+
>>> s.reorder_keys(['a', 'b', 'c'])
|
762
|
+
Traceback (most recent call last):
|
763
|
+
...
|
764
|
+
AssertionError
|
765
|
+
"""
|
766
|
+
assert set(new_order) == set(self.parameters)
|
767
|
+
|
768
|
+
new_scenarios = []
|
769
|
+
for scenario in self:
|
770
|
+
new_scenario = Scenario({key: scenario[key] for key in new_order})
|
771
|
+
new_scenarios.append(new_scenario)
|
772
|
+
return ScenarioList(new_scenarios)
|
773
|
+
|
774
|
+
def to_dataset(self) -> "Dataset":
|
775
|
+
"""
|
776
|
+
Convert the ScenarioList to a Dataset.
|
777
|
+
|
778
|
+
>>> s = ScenarioList.from_list("a", [1,2,3])
|
779
|
+
>>> s.to_dataset()
|
780
|
+
Dataset([{'a': [1, 2, 3]}])
|
781
|
+
>>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
|
782
|
+
>>> s.to_dataset()
|
783
|
+
Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
|
784
|
+
"""
|
785
|
+
from edsl.results.Dataset import Dataset
|
786
|
+
|
787
|
+
keys = list(self[0].keys())
|
788
|
+
for scenario in self:
|
789
|
+
new_keys = list(scenario.keys())
|
790
|
+
if new_keys != keys:
|
791
|
+
keys = list(set(keys + new_keys))
|
792
|
+
data = [
|
793
|
+
{key: [scenario.get(key, None) for scenario in self.data]} for key in keys
|
794
|
+
]
|
795
|
+
return Dataset(data)
|
796
|
+
|
797
|
+
def unpack(
|
798
|
+
self, field: str, new_names: Optional[List[str]] = None, keep_original=True
|
799
|
+
) -> ScenarioList:
|
800
|
+
"""Unpack a field into multiple fields.
|
801
|
+
|
802
|
+
Example:
|
803
|
+
|
804
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': [2, True]}), Scenario({'a': 3, 'b': [3, False]})])
|
805
|
+
>>> s.unpack('b')
|
806
|
+
ScenarioList([Scenario({'a': 1, 'b': [2, True], 'b_0': 2, 'b_1': True}), Scenario({'a': 3, 'b': [3, False], 'b_0': 3, 'b_1': False})])
|
807
|
+
>>> s.unpack('b', new_names=['c', 'd'], keep_original=False)
|
808
|
+
ScenarioList([Scenario({'a': 1, 'c': 2, 'd': True}), Scenario({'a': 3, 'c': 3, 'd': False})])
|
809
|
+
|
810
|
+
"""
|
811
|
+
new_names = new_names or [f"{field}_{i}" for i in range(len(self[0][field]))]
|
812
|
+
new_scenarios = []
|
813
|
+
for scenario in self:
|
814
|
+
new_scenario = scenario.copy()
|
815
|
+
if len(new_names) == 1:
|
816
|
+
new_scenario[new_names[0]] = scenario[field]
|
817
|
+
else:
|
818
|
+
for i, new_name in enumerate(new_names):
|
819
|
+
new_scenario[new_name] = scenario[field][i]
|
820
|
+
|
821
|
+
if not keep_original:
|
822
|
+
del new_scenario[field]
|
823
|
+
new_scenarios.append(new_scenario)
|
824
|
+
return ScenarioList(new_scenarios)
|
825
|
+
|
826
|
+
@classmethod
|
827
|
+
def from_list_of_tuples(self, *names: str, values: List[Tuple]) -> ScenarioList:
|
828
|
+
sl = ScenarioList.from_list(names[0], [value[0] for value in values])
|
829
|
+
for index, name in enumerate(names[1:]):
|
830
|
+
sl = sl.add_list(name, [value[index + 1] for value in values])
|
831
|
+
return sl
|
832
|
+
|
833
|
+
def add_list(self, name: str, values: List[Any]) -> ScenarioList:
|
834
|
+
"""Add a list of values to a ScenarioList.
|
835
|
+
|
836
|
+
Example:
|
837
|
+
|
838
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
839
|
+
>>> s.add_list('age', [30, 25])
|
840
|
+
ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
841
|
+
"""
|
842
|
+
sl = self.duplicate()
|
843
|
+
if len(values) != len(sl):
|
844
|
+
raise ScenarioError(
|
845
|
+
f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
|
846
|
+
)
|
847
|
+
for i, value in enumerate(values):
|
848
|
+
sl[i][name] = value
|
849
|
+
return sl
|
850
|
+
|
851
|
+
@classmethod
|
852
|
+
def create_empty_scenario_list(cls, n: int) -> ScenarioList:
|
853
|
+
"""Create an empty ScenarioList with n scenarios.
|
854
|
+
|
855
|
+
Example:
|
856
|
+
|
857
|
+
>>> ScenarioList.create_empty_scenario_list(3)
|
858
|
+
ScenarioList([Scenario({}), Scenario({}), Scenario({})])
|
859
|
+
"""
|
860
|
+
return ScenarioList([Scenario({}) for _ in range(n)])
|
861
|
+
|
862
|
+
def add_value(self, name: str, value: Any) -> ScenarioList:
|
863
|
+
"""Add a value to all scenarios in a ScenarioList.
|
864
|
+
|
865
|
+
Example:
|
866
|
+
|
867
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
868
|
+
>>> s.add_value('age', 30)
|
869
|
+
ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
|
870
|
+
"""
|
871
|
+
sl = self.duplicate()
|
872
|
+
for scenario in sl:
|
873
|
+
scenario[name] = value
|
874
|
+
return sl
|
875
|
+
|
876
|
+
def rename(self, replacement_dict: dict) -> ScenarioList:
|
877
|
+
"""Rename the fields in the scenarios.
|
878
|
+
|
879
|
+
:param replacement_dict: A dictionary with the old names as keys and the new names as values.
|
880
|
+
|
881
|
+
Example:
|
882
|
+
|
883
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
884
|
+
>>> s.rename({'name': 'first_name', 'age': 'years'})
|
885
|
+
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
886
|
+
|
887
|
+
"""
|
888
|
+
|
889
|
+
new_list = ScenarioList([])
|
890
|
+
for obj in self:
|
891
|
+
new_obj = obj.rename(replacement_dict)
|
892
|
+
new_list.append(new_obj)
|
893
|
+
return new_list
|
894
|
+
|
895
|
+
## NEEDS TO BE FIXED
|
896
|
+
# def new_column_names(self, new_names: List[str]) -> ScenarioList:
|
897
|
+
# """Rename the fields in the scenarios.
|
898
|
+
|
899
|
+
# Example:
|
900
|
+
|
901
|
+
# >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
902
|
+
# >>> s.new_column_names(['first_name', 'years'])
|
903
|
+
# ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
904
|
+
|
905
|
+
# """
|
906
|
+
# new_list = ScenarioList([])
|
907
|
+
# for obj in self:
|
908
|
+
# new_obj = obj.new_column_names(new_names)
|
909
|
+
# new_list.append(new_obj)
|
910
|
+
# return new_list
|
911
|
+
|
912
|
+
@classmethod
|
913
|
+
def from_sqlite(cls, filepath: str, table: str):
|
914
|
+
"""Create a ScenarioList from a SQLite database."""
|
915
|
+
import sqlite3
|
916
|
+
|
917
|
+
with sqlite3.connect(filepath) as conn:
|
918
|
+
cursor = conn.cursor()
|
919
|
+
cursor.execute(f"SELECT * FROM {table}")
|
920
|
+
columns = [description[0] for description in cursor.description]
|
921
|
+
data = cursor.fetchall()
|
922
|
+
return cls([Scenario(dict(zip(columns, row))) for row in data])
|
923
|
+
|
924
|
+
@classmethod
|
925
|
+
def from_latex(cls, tex_file_path: str):
|
926
|
+
with open(tex_file_path, "r") as file:
|
927
|
+
lines = file.readlines()
|
928
|
+
|
929
|
+
processed_lines = []
|
930
|
+
non_blank_lines = [
|
931
|
+
(i, line.strip()) for i, line in enumerate(lines) if line.strip()
|
932
|
+
]
|
933
|
+
|
934
|
+
for index, (line_no, text) in enumerate(non_blank_lines):
|
935
|
+
entry = {
|
936
|
+
"line_no": line_no + 1, # Using 1-based index for line numbers
|
937
|
+
"text": text,
|
938
|
+
"line_before": non_blank_lines[index - 1][1] if index > 0 else None,
|
939
|
+
"line_after": (
|
940
|
+
non_blank_lines[index + 1][1]
|
941
|
+
if index < len(non_blank_lines) - 1
|
942
|
+
else None
|
943
|
+
),
|
944
|
+
}
|
945
|
+
processed_lines.append(entry)
|
946
|
+
|
947
|
+
return ScenarioList([Scenario(entry) for entry in processed_lines])
|
948
|
+
|
949
|
+
@classmethod
|
950
|
+
def from_google_doc(cls, url: str) -> ScenarioList:
|
951
|
+
"""Create a ScenarioList from a Google Doc.
|
952
|
+
|
953
|
+
This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
|
954
|
+
and then reads it using the from_docx class method.
|
955
|
+
|
956
|
+
Args:
|
957
|
+
url (str): The URL to the Google Doc.
|
958
|
+
|
959
|
+
Returns:
|
960
|
+
ScenarioList: An instance of the ScenarioList class.
|
961
|
+
|
962
|
+
"""
|
963
|
+
import tempfile
|
964
|
+
import requests
|
965
|
+
from docx import Document
|
966
|
+
|
967
|
+
if "/edit" in url:
|
968
|
+
doc_id = url.split("/d/")[1].split("/edit")[0]
|
969
|
+
else:
|
970
|
+
raise ValueError("Invalid Google Doc URL format.")
|
971
|
+
|
972
|
+
export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
|
973
|
+
|
974
|
+
# Download the Google Doc as a Word file (.docx)
|
975
|
+
response = requests.get(export_url)
|
976
|
+
response.raise_for_status() # Ensure the request was successful
|
977
|
+
|
978
|
+
# Save the Word file to a temporary file
|
979
|
+
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
|
980
|
+
temp_file.write(response.content)
|
981
|
+
temp_filename = temp_file.name
|
982
|
+
|
983
|
+
# Call the from_docx class method with the temporary file
|
984
|
+
return cls.from_docx(temp_filename)
|
985
|
+
|
986
|
+
@classmethod
|
987
|
+
def from_pandas(cls, df) -> ScenarioList:
|
988
|
+
"""Create a ScenarioList from a pandas DataFrame.
|
989
|
+
|
990
|
+
Example:
|
991
|
+
|
992
|
+
>>> import pandas as pd
|
993
|
+
>>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25], 'location': ['New York', 'Los Angeles']})
|
994
|
+
>>> ScenarioList.from_pandas(df)
|
995
|
+
ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
|
996
|
+
"""
|
997
|
+
return cls([Scenario(row) for row in df.to_dict(orient="records")])
|
998
|
+
|
999
|
+
@classmethod
|
1000
|
+
def from_wikipedia(cls, url: str, table_index: int = 0):
|
1001
|
+
"""
|
1002
|
+
Extracts a table from a Wikipedia page.
|
1003
|
+
|
1004
|
+
Parameters:
|
1005
|
+
url (str): The URL of the Wikipedia page.
|
1006
|
+
table_index (int): The index of the table to extract (default is 0).
|
1007
|
+
|
1008
|
+
Returns:
|
1009
|
+
pd.DataFrame: A DataFrame containing the extracted table.
|
1010
|
+
# # Example usage
|
1011
|
+
# url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
|
1012
|
+
# df = from_wikipedia(url, 0)
|
1013
|
+
|
1014
|
+
# if not df.empty:
|
1015
|
+
# print(df.head())
|
1016
|
+
# else:
|
1017
|
+
# print("Failed to extract table.")
|
1018
|
+
|
1019
|
+
|
1020
|
+
"""
|
1021
|
+
import pandas as pd
|
1022
|
+
import requests
|
1023
|
+
from requests.exceptions import RequestException
|
1024
|
+
|
1025
|
+
try:
|
1026
|
+
# Check if the URL is reachable
|
1027
|
+
response = requests.get(url)
|
1028
|
+
response.raise_for_status() # Raises HTTPError for bad responses
|
1029
|
+
|
1030
|
+
# Extract tables from the Wikipedia page
|
1031
|
+
tables = pd.read_html(url)
|
1032
|
+
|
1033
|
+
# Ensure the requested table index is within the range of available tables
|
1034
|
+
if table_index >= len(tables) or table_index < 0:
|
1035
|
+
raise IndexError(
|
1036
|
+
f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
|
1037
|
+
)
|
1038
|
+
|
1039
|
+
# Return the requested table as a DataFrame
|
1040
|
+
# return tables[table_index]
|
1041
|
+
return cls.from_pandas(tables[table_index])
|
1042
|
+
|
1043
|
+
except RequestException as e:
|
1044
|
+
print(f"Error fetching the URL: {e}")
|
1045
|
+
except ValueError as e:
|
1046
|
+
print(f"Error parsing tables: {e}")
|
1047
|
+
except IndexError as e:
|
1048
|
+
print(e)
|
1049
|
+
except Exception as e:
|
1050
|
+
print(f"An unexpected error occurred: {e}")
|
1051
|
+
|
1052
|
+
# Return an empty DataFrame in case of an error
|
1053
|
+
# return cls.from_pandas(pd.DataFrame())
|
1054
|
+
|
1055
|
+
def to_key_value(self, field: str, value=None) -> Union[dict, set]:
|
1056
|
+
"""Return the set of values in the field.
|
1057
|
+
|
1058
|
+
:param field: The field to extract values from.
|
1059
|
+
:param value: An optional field to use as the value in the key-value pair.
|
1060
|
+
|
1061
|
+
Example:
|
1062
|
+
|
1063
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
1064
|
+
>>> s.to_key_value('name') == {'Alice', 'Bob'}
|
1065
|
+
True
|
1066
|
+
"""
|
1067
|
+
if value is None:
|
1068
|
+
return {scenario[field] for scenario in self}
|
1069
|
+
else:
|
1070
|
+
return {scenario[field]: scenario[value] for scenario in self}
|
1071
|
+
|
1072
|
+
@classmethod
|
1073
|
+
def from_excel(
|
1074
|
+
cls, filename: str, sheet_name: Optional[str] = None
|
1075
|
+
) -> ScenarioList:
|
1076
|
+
"""Create a ScenarioList from an Excel file.
|
1077
|
+
|
1078
|
+
If the Excel file contains multiple sheets and no sheet_name is provided,
|
1079
|
+
the method will print the available sheets and require the user to specify one.
|
1080
|
+
|
1081
|
+
Example:
|
1082
|
+
|
1083
|
+
>>> import tempfile
|
1084
|
+
>>> import os
|
1085
|
+
>>> import pandas as pd
|
1086
|
+
>>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
|
1087
|
+
... df1 = pd.DataFrame({
|
1088
|
+
... 'name': ['Alice', 'Bob'],
|
1089
|
+
... 'age': [30, 25],
|
1090
|
+
... 'location': ['New York', 'Los Angeles']
|
1091
|
+
... })
|
1092
|
+
... df2 = pd.DataFrame({
|
1093
|
+
... 'name': ['Charlie', 'David'],
|
1094
|
+
... 'age': [35, 40],
|
1095
|
+
... 'location': ['Chicago', 'Boston']
|
1096
|
+
... })
|
1097
|
+
... with pd.ExcelWriter(f.name) as writer:
|
1098
|
+
... df1.to_excel(writer, sheet_name='Sheet1', index=False)
|
1099
|
+
... df2.to_excel(writer, sheet_name='Sheet2', index=False)
|
1100
|
+
... temp_filename = f.name
|
1101
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
|
1102
|
+
>>> len(scenario_list)
|
1103
|
+
2
|
1104
|
+
>>> scenario_list[0]['name']
|
1105
|
+
'Alice'
|
1106
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
|
1107
|
+
Traceback (most recent call last):
|
1108
|
+
...
|
1109
|
+
ValueError: Please provide a sheet name to load data from.
|
1110
|
+
"""
|
1111
|
+
from edsl.scenarios.Scenario import Scenario
|
1112
|
+
import pandas as pd
|
1113
|
+
|
1114
|
+
# Get all sheets
|
1115
|
+
all_sheets = pd.read_excel(filename, sheet_name=None)
|
1116
|
+
|
1117
|
+
# If no sheet_name is provided and there is more than one sheet, print available sheets
|
1118
|
+
if sheet_name is None:
|
1119
|
+
if len(all_sheets) > 1:
|
1120
|
+
print("The Excel file contains multiple sheets:")
|
1121
|
+
for name in all_sheets.keys():
|
1122
|
+
print(f"- {name}")
|
1123
|
+
raise ValueError("Please provide a sheet name to load data from.")
|
1124
|
+
else:
|
1125
|
+
# If there is only one sheet, use it
|
1126
|
+
sheet_name = list(all_sheets.keys())[0]
|
1127
|
+
|
1128
|
+
# Load the specified or determined sheet
|
1129
|
+
df = pd.read_excel(filename, sheet_name=sheet_name)
|
22
1130
|
|
23
1131
|
observations = []
|
24
|
-
|
25
|
-
|
1132
|
+
for _, row in df.iterrows():
|
1133
|
+
observations.append(Scenario(row.to_dict()))
|
1134
|
+
|
1135
|
+
return cls(observations)
|
1136
|
+
|
1137
|
+
@classmethod
|
1138
|
+
def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
|
1139
|
+
"""Create a ScenarioList from a Google Sheet.
|
1140
|
+
|
1141
|
+
This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
|
1142
|
+
and then reads it using the from_excel class method.
|
1143
|
+
|
1144
|
+
Args:
|
1145
|
+
url (str): The URL to the Google Sheet.
|
1146
|
+
sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
|
1147
|
+
the same as from_excel regarding multiple sheets.
|
1148
|
+
|
1149
|
+
Returns:
|
1150
|
+
ScenarioList: An instance of the ScenarioList class.
|
1151
|
+
|
1152
|
+
"""
|
1153
|
+
import pandas as pd
|
1154
|
+
import tempfile
|
1155
|
+
import requests
|
1156
|
+
|
1157
|
+
if "/edit" in url:
|
1158
|
+
sheet_id = url.split("/d/")[1].split("/edit")[0]
|
1159
|
+
else:
|
1160
|
+
raise ValueError("Invalid Google Sheet URL format.")
|
1161
|
+
|
1162
|
+
export_url = (
|
1163
|
+
f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
|
1164
|
+
)
|
1165
|
+
|
1166
|
+
# Download the Google Sheet as an Excel file
|
1167
|
+
response = requests.get(export_url)
|
1168
|
+
response.raise_for_status() # Ensure the request was successful
|
1169
|
+
|
1170
|
+
# Save the Excel file to a temporary file
|
1171
|
+
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
|
1172
|
+
temp_file.write(response.content)
|
1173
|
+
temp_filename = temp_file.name
|
1174
|
+
|
1175
|
+
# Call the from_excel class method with the temporary file
|
1176
|
+
return cls.from_excel(temp_filename, sheet_name=sheet_name)
|
1177
|
+
|
1178
|
+
@classmethod
|
1179
|
+
def from_delimited_file(
|
1180
|
+
cls, source: Union[str, "ParseResult"], delimiter: str = ","
|
1181
|
+
) -> ScenarioList:
|
1182
|
+
"""Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
|
1183
|
+
import requests
|
1184
|
+
from edsl.scenarios.Scenario import Scenario
|
1185
|
+
from urllib.parse import urlparse
|
1186
|
+
from urllib.parse import ParseResult
|
1187
|
+
|
1188
|
+
headers = {
|
1189
|
+
"Accept": "text/csv,application/csv,text/plain",
|
1190
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
def is_url(source):
|
1194
|
+
try:
|
1195
|
+
result = urlparse(source)
|
1196
|
+
return all([result.scheme, result.netloc])
|
1197
|
+
except ValueError:
|
1198
|
+
return False
|
1199
|
+
|
1200
|
+
try:
|
1201
|
+
if isinstance(source, str) and is_url(source):
|
1202
|
+
response = requests.get(source, headers=headers)
|
1203
|
+
response.raise_for_status()
|
1204
|
+
file_obj = StringIO(response.text)
|
1205
|
+
elif isinstance(source, ParseResult):
|
1206
|
+
response = requests.get(source.geturl(), headers=headers)
|
1207
|
+
response.raise_for_status()
|
1208
|
+
file_obj = StringIO(response.text)
|
1209
|
+
else:
|
1210
|
+
file_obj = open(source, "r")
|
1211
|
+
|
1212
|
+
reader = csv.reader(file_obj, delimiter=delimiter)
|
26
1213
|
header = next(reader)
|
27
|
-
for row in reader
|
28
|
-
|
1214
|
+
observations = [Scenario(dict(zip(header, row))) for row in reader]
|
1215
|
+
|
1216
|
+
finally:
|
1217
|
+
file_obj.close()
|
1218
|
+
|
29
1219
|
return cls(observations)
|
30
1220
|
|
31
|
-
|
32
|
-
|
1221
|
+
# Convenience methods for specific file types
|
1222
|
+
@classmethod
|
1223
|
+
def from_csv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
|
1224
|
+
"""Create a ScenarioList from a CSV file or URL."""
|
1225
|
+
return cls.from_delimited_file(source, delimiter=",")
|
1226
|
+
|
1227
|
+
def left_join(self, other: ScenarioList, by: Union[str, list[str]]) -> ScenarioList:
|
1228
|
+
"""Perform a left join with another ScenarioList, following SQL join semantics.
|
1229
|
+
|
1230
|
+
Args:
|
1231
|
+
other: The ScenarioList to join with
|
1232
|
+
by: String or list of strings representing the key(s) to join on. Cannot be empty.
|
1233
|
+
|
1234
|
+
>>> s1 = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
1235
|
+
>>> s2 = ScenarioList([Scenario({'name': 'Alice', 'location': 'New York'}), Scenario({'name': 'Charlie', 'location': 'Los Angeles'})])
|
1236
|
+
>>> s3 = s1.left_join(s2, 'name')
|
1237
|
+
>>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
|
1238
|
+
True
|
1239
|
+
"""
|
1240
|
+
from edsl.scenarios.scenario_join import ScenarioJoin
|
1241
|
+
|
1242
|
+
sj = ScenarioJoin(self, other)
|
1243
|
+
return sj.left_join(by)
|
33
1244
|
|
34
1245
|
@classmethod
|
35
|
-
def
|
1246
|
+
def from_tsv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
|
1247
|
+
"""Create a ScenarioList from a TSV file or URL."""
|
1248
|
+
return cls.from_delimited_file(source, delimiter="\t")
|
1249
|
+
|
1250
|
+
def to_dict(self, sort: bool = False, add_edsl_version: bool = True) -> dict:
|
1251
|
+
"""
|
1252
|
+
>>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
|
1253
|
+
>>> s.to_dict()
|
1254
|
+
{'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
|
1255
|
+
|
1256
|
+
"""
|
1257
|
+
if sort:
|
1258
|
+
data = sorted(self, key=lambda x: hash(x))
|
1259
|
+
else:
|
1260
|
+
data = self
|
1261
|
+
d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
|
1262
|
+
|
1263
|
+
if add_edsl_version:
|
1264
|
+
from edsl import __version__
|
1265
|
+
|
1266
|
+
d["edsl_version"] = __version__
|
1267
|
+
d["edsl_class_name"] = self.__class__.__name__
|
1268
|
+
return d
|
1269
|
+
|
1270
|
+
def to(self, survey: Union["Survey", "QuestionBase"]) -> "Jobs":
|
1271
|
+
"""Create a Jobs object from a ScenarioList and a Survey object.
|
1272
|
+
|
1273
|
+
:param survey: The Survey object to use for the Jobs object.
|
1274
|
+
|
1275
|
+
Example:
|
1276
|
+
>>> from edsl import Survey
|
1277
|
+
>>> from edsl.jobs.Jobs import Jobs
|
1278
|
+
>>> from edsl import ScenarioList
|
1279
|
+
>>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
|
1280
|
+
True
|
1281
|
+
"""
|
1282
|
+
from edsl.surveys.Survey import Survey
|
1283
|
+
from edsl.questions.QuestionBase import QuestionBase
|
1284
|
+
from edsl.jobs.Jobs import Jobs
|
1285
|
+
|
1286
|
+
if isinstance(survey, QuestionBase):
|
1287
|
+
return Survey([survey]).by(self)
|
1288
|
+
else:
|
1289
|
+
return survey.by(self)
|
1290
|
+
|
1291
|
+
@classmethod
|
1292
|
+
def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
|
1293
|
+
"""Create a `ScenarioList` from a list of dictionaries.
|
1294
|
+
|
1295
|
+
Example:
|
1296
|
+
|
1297
|
+
>>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
|
1298
|
+
ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
1299
|
+
|
1300
|
+
"""
|
1301
|
+
from edsl.scenarios.Scenario import Scenario
|
1302
|
+
|
36
1303
|
return cls([Scenario(s) for s in scenario_dicts_list])
|
37
1304
|
|
38
1305
|
@classmethod
|
39
|
-
|
1306
|
+
@remove_edsl_version
|
1307
|
+
def from_dict(cls, data) -> ScenarioList:
|
1308
|
+
"""Create a `ScenarioList` from a dictionary."""
|
1309
|
+
from edsl.scenarios.Scenario import Scenario
|
1310
|
+
|
40
1311
|
return cls([Scenario.from_dict(s) for s in data["scenarios"]])
|
41
1312
|
|
42
|
-
|
43
|
-
|
44
|
-
"
|
1313
|
+
@classmethod
|
1314
|
+
def from_nested_dict(cls, data: dict) -> ScenarioList:
|
1315
|
+
"""Create a `ScenarioList` from a nested dictionary.
|
1316
|
+
|
1317
|
+
>>> data = {"headline": ["Armistice Signed, War Over: Celebrations Erupt Across City"], "date": ["1918-11-11"], "author": ["Jane Smith"]}
|
1318
|
+
>>> ScenarioList.from_nested_dict(data)
|
1319
|
+
ScenarioList([Scenario({'headline': 'Armistice Signed, War Over: Celebrations Erupt Across City', 'date': '1918-11-11', 'author': 'Jane Smith'})])
|
1320
|
+
|
1321
|
+
"""
|
1322
|
+
length_of_first_list = len(next(iter(data.values())))
|
1323
|
+
s = ScenarioList.create_empty_scenario_list(n=length_of_first_list)
|
1324
|
+
|
1325
|
+
if any(len(v) != length_of_first_list for v in data.values()):
|
1326
|
+
raise ValueError(
|
1327
|
+
"All lists in the dictionary must be of the same length.",
|
1328
|
+
)
|
1329
|
+
for key, list_of_values in data.items():
|
1330
|
+
s = s.add_list(key, list_of_values)
|
1331
|
+
return s
|
1332
|
+
|
1333
|
+
def code(self) -> str:
|
1334
|
+
"""Create the Python code representation of a survey."""
|
45
1335
|
header_lines = [
|
46
1336
|
"from edsl.scenarios.Scenario import Scenario",
|
47
1337
|
"from edsl.scenarios.ScenarioList import ScenarioList",
|
@@ -55,34 +1345,114 @@ class ScenarioList(Base, UserList):
|
|
55
1345
|
return lines
|
56
1346
|
|
57
1347
|
@classmethod
|
58
|
-
def example(cls):
|
59
|
-
|
1348
|
+
def example(cls, randomize: bool = False) -> ScenarioList:
|
1349
|
+
"""
|
1350
|
+
Return an example ScenarioList instance.
|
60
1351
|
|
61
|
-
|
62
|
-
"""
|
63
|
-
|
64
|
-
table.add_column("Index", style="bold")
|
65
|
-
table.add_column("Scenario")
|
66
|
-
for i, s in enumerate(self):
|
67
|
-
table.add_row(str(i), s.rich_print())
|
68
|
-
return table
|
1352
|
+
:params randomize: If True, use Scenario's randomize method to randomize the values.
|
1353
|
+
"""
|
1354
|
+
return cls([Scenario.example(randomize), Scenario.example(randomize)])
|
69
1355
|
|
1356
|
+
# def rich_print(self) -> None:
|
1357
|
+
# """Display an object as a table."""
|
1358
|
+
# from rich.table import Table
|
70
1359
|
|
71
|
-
|
72
|
-
|
73
|
-
|
1360
|
+
# table = Table(title="ScenarioList")
|
1361
|
+
# table.add_column("Index", style="bold")
|
1362
|
+
# table.add_column("Scenario")
|
1363
|
+
# for i, s in enumerate(self):
|
1364
|
+
# table.add_row(str(i), s.rich_print())
|
1365
|
+
# return table
|
74
1366
|
|
75
|
-
|
76
|
-
|
77
|
-
question_options=["Yes", "No"],
|
78
|
-
question_name="food_preference",
|
79
|
-
)
|
1367
|
+
def __getitem__(self, key: Union[int, slice]) -> Any:
|
1368
|
+
"""Return the item at the given index.
|
80
1369
|
|
81
|
-
|
82
|
-
[Scenario({
|
83
|
-
|
1370
|
+
Example:
|
1371
|
+
>>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1372
|
+
>>> s[0]
|
1373
|
+
Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
|
1374
|
+
|
1375
|
+
>>> s[:1]
|
1376
|
+
ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1377
|
+
|
1378
|
+
"""
|
1379
|
+
if isinstance(key, slice):
|
1380
|
+
return ScenarioList(super().__getitem__(key))
|
1381
|
+
elif isinstance(key, int):
|
1382
|
+
return super().__getitem__(key)
|
1383
|
+
else:
|
1384
|
+
return self.to_dict(add_edsl_version=False)[key]
|
1385
|
+
|
1386
|
+
def to_agent_list(self):
|
1387
|
+
"""Convert the ScenarioList to an AgentList.
|
1388
|
+
|
1389
|
+
Example:
|
84
1390
|
|
85
|
-
|
1391
|
+
>>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1392
|
+
>>> s.to_agent_list()
|
1393
|
+
AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1394
|
+
"""
|
1395
|
+
from edsl.agents.AgentList import AgentList
|
1396
|
+
from edsl.agents.Agent import Agent
|
1397
|
+
import warnings
|
1398
|
+
|
1399
|
+
agents = []
|
1400
|
+
for scenario in self:
|
1401
|
+
new_scenario = scenario.copy().data
|
1402
|
+
if "name" in new_scenario:
|
1403
|
+
name = new_scenario.pop("name")
|
1404
|
+
proposed_agent_name = "agent_name"
|
1405
|
+
while proposed_agent_name not in new_scenario:
|
1406
|
+
proposed_agent_name += "_"
|
1407
|
+
warnings.warn(
|
1408
|
+
f"The 'name' field is reserved for the agent's name---putting this value in {proposed_agent_name}"
|
1409
|
+
)
|
1410
|
+
new_scenario[proposed_agent_name] = name
|
1411
|
+
new_agent = Agent(traits=new_scenario, name=name)
|
1412
|
+
if "agent_parameters" in new_scenario:
|
1413
|
+
agent_parameters = new_scenario.pop("agent_parameters")
|
1414
|
+
instruction = agent_parameters.get("instruction", None)
|
1415
|
+
name = agent_parameters.get("name", None)
|
1416
|
+
new_agent = Agent(
|
1417
|
+
traits=new_scenario, name=name, instruction=instruction
|
1418
|
+
)
|
1419
|
+
else:
|
1420
|
+
new_agent = Agent(traits=new_scenario)
|
1421
|
+
|
1422
|
+
agents.append(new_agent)
|
1423
|
+
|
1424
|
+
return AgentList(agents)
|
1425
|
+
|
1426
|
+
def chunk(
|
1427
|
+
self,
|
1428
|
+
field,
|
1429
|
+
num_words: Optional[int] = None,
|
1430
|
+
num_lines: Optional[int] = None,
|
1431
|
+
include_original=False,
|
1432
|
+
hash_original=False,
|
1433
|
+
) -> "ScenarioList":
|
1434
|
+
"""Chunk the scenarios based on a field.
|
1435
|
+
|
1436
|
+
Example:
|
1437
|
+
|
1438
|
+
>>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
|
1439
|
+
>>> s.chunk('text', num_words=3)
|
1440
|
+
ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
|
1441
|
+
"""
|
1442
|
+
new_scenarios = []
|
1443
|
+
for scenario in self:
|
1444
|
+
replacement_scenarios = scenario.chunk(
|
1445
|
+
field,
|
1446
|
+
num_words=num_words,
|
1447
|
+
num_lines=num_lines,
|
1448
|
+
include_original=include_original,
|
1449
|
+
hash_original=hash_original,
|
1450
|
+
)
|
1451
|
+
new_scenarios.extend(replacement_scenarios)
|
1452
|
+
return ScenarioList(new_scenarios)
|
1453
|
+
|
1454
|
+
|
1455
|
+
if __name__ == "__main__":
|
1456
|
+
import doctest
|
86
1457
|
|
87
|
-
|
88
|
-
print(results)
|
1458
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|