edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,4 +1,21 @@
|
|
1
|
-
"""
|
1
|
+
"""
|
2
|
+
ScenarioList provides a collection of Scenario objects with advanced operations.
|
3
|
+
|
4
|
+
The ScenarioList module extends the functionality of a simple list of Scenario objects,
|
5
|
+
providing powerful operations for data manipulation, filtering, transformation, and analysis.
|
6
|
+
It serves as a bridge between individual Scenarios and higher-level EDSL components like
|
7
|
+
Surveys and Jobs.
|
8
|
+
|
9
|
+
Key features include:
|
10
|
+
- Collection operations (filtering, sorting, sampling, and iteration)
|
11
|
+
- Data manipulation (transformation, joining, grouping, pivoting)
|
12
|
+
- Format conversion (to/from pandas, CSV, Excel, etc.)
|
13
|
+
- Advanced selection and retrieval mechanisms
|
14
|
+
- Integration with other EDSL components
|
15
|
+
|
16
|
+
ScenarioList is a core component in the EDSL framework for creating, managing, and
|
17
|
+
manipulating collections of Scenarios for experiments, surveys, and data processing tasks.
|
18
|
+
"""
|
2
19
|
|
3
20
|
from __future__ import annotations
|
4
21
|
from typing import (
|
@@ -10,12 +27,7 @@ from typing import (
|
|
10
27
|
Literal,
|
11
28
|
TYPE_CHECKING,
|
12
29
|
)
|
13
|
-
|
14
|
-
try:
|
15
|
-
from typing import TypeAlias
|
16
|
-
except ImportError:
|
17
|
-
from typing_extensions import TypeAlias
|
18
|
-
|
30
|
+
import warnings
|
19
31
|
import csv
|
20
32
|
import random
|
21
33
|
from io import StringIO
|
@@ -23,37 +35,34 @@ import inspect
|
|
23
35
|
from collections import UserList, defaultdict
|
24
36
|
from collections.abc import Iterable
|
25
37
|
|
26
|
-
if TYPE_CHECKING:
|
27
|
-
from urllib.parse import ParseResult
|
28
|
-
from edsl.results.Dataset import Dataset
|
29
|
-
from edsl.jobs.Jobs import Jobs
|
30
|
-
from edsl.surveys.Survey import Survey
|
31
|
-
from edsl.questions.QuestionBase import QuestionBase
|
32
|
-
|
33
|
-
|
34
38
|
from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
|
35
|
-
|
36
39
|
from tabulate import tabulate_formats
|
37
40
|
|
38
|
-
|
39
|
-
from
|
41
|
+
try:
|
42
|
+
from typing import TypeAlias
|
43
|
+
except ImportError:
|
44
|
+
from typing_extensions import TypeAlias
|
40
45
|
|
41
|
-
|
42
|
-
from
|
43
|
-
from
|
44
|
-
from
|
45
|
-
from
|
46
|
-
from
|
46
|
+
if TYPE_CHECKING:
|
47
|
+
from urllib.parse import ParseResult
|
48
|
+
from ..dataset import Dataset
|
49
|
+
from ..jobs import Jobs
|
50
|
+
from ..surveys import Survey
|
51
|
+
from ..questions import QuestionBase
|
47
52
|
|
48
|
-
from edsl.scenarios.directory_scanner import DirectoryScanner
|
49
53
|
|
54
|
+
from ..base import Base
|
55
|
+
from ..utilities import remove_edsl_version, sanitize_string, is_valid_variable_name, dict_hash
|
56
|
+
from ..dataset import ScenarioListOperationsMixin
|
50
57
|
|
51
|
-
|
52
|
-
|
58
|
+
from .exceptions import ScenarioError
|
59
|
+
from .scenario import Scenario
|
60
|
+
from .directory_scanner import DirectoryScanner
|
61
|
+
from .scenario_list_pdf_tools import PdfTools
|
53
62
|
|
54
63
|
|
55
64
|
if TYPE_CHECKING:
|
56
|
-
from
|
65
|
+
from ..dataset import Dataset
|
57
66
|
|
58
67
|
TableFormat: TypeAlias = Literal[
|
59
68
|
"plain",
|
@@ -72,9 +81,42 @@ TableFormat: TypeAlias = Literal[
|
|
72
81
|
"tsv",
|
73
82
|
]
|
74
83
|
|
75
|
-
|
76
|
-
|
77
|
-
|
84
|
+
class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
|
85
|
+
"""
|
86
|
+
A collection of Scenario objects with advanced operations for manipulation and analysis.
|
87
|
+
|
88
|
+
ScenarioList extends Python's UserList to provide specialized functionality for
|
89
|
+
working with collections of Scenario objects. It inherits from Base to integrate
|
90
|
+
with EDSL's object model and from ScenarioListOperationsMixin to provide
|
91
|
+
powerful data manipulation capabilities.
|
92
|
+
|
93
|
+
The class provides methods for filtering, sorting, joining, transforming, and
|
94
|
+
analyzing collections of Scenarios. It's designed to work seamlessly with other
|
95
|
+
EDSL components like Surveys, Jobs, and Questions.
|
96
|
+
|
97
|
+
Attributes:
|
98
|
+
data (list): The underlying list of Scenario objects.
|
99
|
+
codebook (dict): Optional metadata describing the fields in the scenarios.
|
100
|
+
|
101
|
+
Examples:
|
102
|
+
Create a ScenarioList from Scenario objects:
|
103
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
104
|
+
>>> s1 = Scenario({"product": "apple", "price": 1.99})
|
105
|
+
>>> s2 = Scenario({"product": "banana", "price": 0.99})
|
106
|
+
>>> sl = ScenarioList([s1, s2])
|
107
|
+
|
108
|
+
Filter scenarios based on a condition:
|
109
|
+
>>> cheap_fruits = sl.filter("price < 1.50")
|
110
|
+
>>> len(cheap_fruits)
|
111
|
+
1
|
112
|
+
>>> cheap_fruits[0]["product"]
|
113
|
+
'banana'
|
114
|
+
|
115
|
+
Add a new column based on existing data:
|
116
|
+
>>> sl_with_tax = sl.mutate("tax = price * 0.08")
|
117
|
+
>>> sl_with_tax[0]["tax"]
|
118
|
+
0.1592
|
119
|
+
"""
|
78
120
|
|
79
121
|
__documentation__ = (
|
80
122
|
"https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
|
@@ -83,7 +125,24 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
83
125
|
def __init__(
|
84
126
|
self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
|
85
127
|
):
|
86
|
-
"""
|
128
|
+
"""
|
129
|
+
Initialize a new ScenarioList with optional data and codebook.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
data: A list of Scenario objects. If None, an empty list is used.
|
133
|
+
codebook: A dictionary mapping field names to descriptions or metadata.
|
134
|
+
Used for documentation and to provide context for fields.
|
135
|
+
|
136
|
+
Examples:
|
137
|
+
>>> sl = ScenarioList() # Empty list
|
138
|
+
>>> s1 = Scenario({"product": "apple"})
|
139
|
+
>>> s2 = Scenario({"product": "banana"})
|
140
|
+
>>> sl = ScenarioList([s1, s2]) # With data
|
141
|
+
|
142
|
+
>>> # With a codebook
|
143
|
+
>>> codebook = {"product": "Fruit name", "price": "Price in USD"}
|
144
|
+
>>> sl = ScenarioList([s1, s2], codebook=codebook)
|
145
|
+
"""
|
87
146
|
if data is not None:
|
88
147
|
super().__init__(data)
|
89
148
|
else:
|
@@ -91,21 +150,85 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
91
150
|
self.codebook = codebook or {}
|
92
151
|
|
93
152
|
def unique(self) -> ScenarioList:
|
94
|
-
"""
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
153
|
+
"""
|
154
|
+
Return a new ScenarioList containing only unique Scenario objects.
|
155
|
+
|
156
|
+
This method removes duplicate Scenario objects based on their hash values,
|
157
|
+
which are determined by their content. Two Scenarios with identical key-value
|
158
|
+
pairs will have the same hash and be considered duplicates.
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
A new ScenarioList containing only unique Scenario objects.
|
162
|
+
|
163
|
+
Examples:
|
164
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
165
|
+
>>> s1 = Scenario({"a": 1})
|
166
|
+
>>> s2 = Scenario({"a": 1}) # Same content as s1
|
167
|
+
>>> s3 = Scenario({"a": 2})
|
168
|
+
>>> sl = ScenarioList([s1, s2, s3])
|
169
|
+
>>> unique_sl = sl.unique()
|
170
|
+
>>> len(unique_sl)
|
171
|
+
2
|
172
|
+
>>> unique_sl
|
173
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
|
174
|
+
|
175
|
+
Notes:
|
176
|
+
- The order of scenarios in the result is not guaranteed due to the use of sets
|
177
|
+
- Uniqueness is determined by the Scenario's __hash__ method
|
178
|
+
- The original ScenarioList is not modified
|
99
179
|
"""
|
100
180
|
return ScenarioList(list(set(self)))
|
101
181
|
|
102
182
|
@property
|
103
183
|
def has_jinja_braces(self) -> bool:
|
104
|
-
"""
|
184
|
+
"""
|
185
|
+
Check if any Scenario in the list contains values with Jinja template braces.
|
186
|
+
|
187
|
+
This property checks all Scenarios in the list to determine if any contain
|
188
|
+
string values with Jinja template syntax ({{ and }}). This is important for
|
189
|
+
rendering templates and avoiding conflicts with other templating systems.
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
True if any Scenario contains values with Jinja braces, False otherwise.
|
193
|
+
|
194
|
+
Examples:
|
195
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
196
|
+
>>> s1 = Scenario({"text": "Plain text"})
|
197
|
+
>>> s2 = Scenario({"text": "Template with {{variable}}"})
|
198
|
+
>>> sl1 = ScenarioList([s1])
|
199
|
+
>>> sl1.has_jinja_braces
|
200
|
+
False
|
201
|
+
>>> sl2 = ScenarioList([s1, s2])
|
202
|
+
>>> sl2.has_jinja_braces
|
203
|
+
True
|
204
|
+
"""
|
105
205
|
return any([scenario.has_jinja_braces for scenario in self])
|
106
206
|
|
107
207
|
def _convert_jinja_braces(self) -> ScenarioList:
|
108
|
-
"""
|
208
|
+
"""
|
209
|
+
Convert Jinja braces to alternative symbols in all Scenarios in the list.
|
210
|
+
|
211
|
+
This method creates a new ScenarioList where all Jinja template braces
|
212
|
+
({{ and }}) in string values are converted to alternative symbols (<< and >>).
|
213
|
+
This is useful when you need to prevent template processing or avoid conflicts
|
214
|
+
with other templating systems.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
A new ScenarioList with converted braces in all Scenarios.
|
218
|
+
|
219
|
+
Examples:
|
220
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
221
|
+
>>> s = Scenario({"text": "Template with {{variable}}"})
|
222
|
+
>>> sl = ScenarioList([s])
|
223
|
+
>>> converted = sl._convert_jinja_braces()
|
224
|
+
>>> converted[0]["text"]
|
225
|
+
'Template with <<variable>>'
|
226
|
+
|
227
|
+
Notes:
|
228
|
+
- The original ScenarioList is not modified
|
229
|
+
- This is primarily intended for internal use
|
230
|
+
- The default replacement symbols are << and >>
|
231
|
+
"""
|
109
232
|
return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
|
110
233
|
|
111
234
|
def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
|
@@ -202,7 +325,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
202
325
|
url={https://arxiv.org/abs/2407.11418},
|
203
326
|
}
|
204
327
|
"""
|
205
|
-
from
|
328
|
+
from ..questions import QuestionYesNo
|
206
329
|
|
207
330
|
new_scenario_list = self.duplicate()
|
208
331
|
q = QuestionYesNo(
|
@@ -341,8 +464,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
341
464
|
>>> hash(s)
|
342
465
|
1262252885757976162
|
343
466
|
"""
|
344
|
-
from edsl.utilities.utilities import dict_hash
|
345
|
-
|
346
467
|
return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
|
347
468
|
|
348
469
|
def __eq__(self, other: Any) -> bool:
|
@@ -360,7 +481,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
360
481
|
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
|
361
482
|
"""
|
362
483
|
from itertools import product
|
363
|
-
from
|
484
|
+
from .scenario import Scenario
|
364
485
|
if isinstance(other, Scenario):
|
365
486
|
other = ScenarioList([other])
|
366
487
|
elif not isinstance(other, ScenarioList):
|
@@ -436,12 +557,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
436
557
|
new_scenarios.append(new_scenario)
|
437
558
|
return ScenarioList(new_scenarios)
|
438
559
|
|
439
|
-
def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
|
560
|
+
def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
|
440
561
|
"""Private method to handle concatenation logic for different output types.
|
441
562
|
|
442
563
|
:param fields: The fields to concatenate.
|
443
564
|
:param output_type: The type of output ("string", "list", or "set").
|
444
565
|
:param separator: The separator to use for string concatenation.
|
566
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
567
|
+
If None, defaults to "concat_field1_field2_..."
|
445
568
|
|
446
569
|
Returns:
|
447
570
|
ScenarioList: A new ScenarioList with concatenated fields.
|
@@ -461,17 +584,17 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
461
584
|
values.append(new_scenario[field])
|
462
585
|
del new_scenario[field]
|
463
586
|
|
464
|
-
|
587
|
+
field_name = new_field_name if new_field_name is not None else f"concat_{'_'.join(fields)}"
|
465
588
|
|
466
589
|
if output_type == "string":
|
467
590
|
# Convert all values to strings and join with separator
|
468
|
-
new_scenario[
|
591
|
+
new_scenario[field_name] = separator.join(str(v) for v in values)
|
469
592
|
elif output_type == "list":
|
470
593
|
# Keep as a list
|
471
|
-
new_scenario[
|
594
|
+
new_scenario[field_name] = values
|
472
595
|
elif output_type == "set":
|
473
596
|
# Convert to a set (removes duplicates)
|
474
|
-
new_scenario[
|
597
|
+
new_scenario[field_name] = set(values)
|
475
598
|
else:
|
476
599
|
raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
|
477
600
|
|
@@ -479,11 +602,12 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
479
602
|
|
480
603
|
return ScenarioList(new_scenarios)
|
481
604
|
|
482
|
-
def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
|
605
|
+
def concatenate(self, fields: List[str], separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
|
483
606
|
"""Concatenate specified fields into a single string field.
|
484
607
|
|
485
608
|
:param fields: The fields to concatenate.
|
486
609
|
:param separator: The separator to use.
|
610
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
487
611
|
|
488
612
|
Returns:
|
489
613
|
ScenarioList: A new ScenarioList with concatenated fields.
|
@@ -492,13 +616,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
492
616
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
493
617
|
>>> s.concatenate(['a', 'b', 'c'])
|
494
618
|
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
619
|
+
>>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
|
620
|
+
ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
|
495
621
|
"""
|
496
|
-
return self._concatenate(fields, output_type="string", separator=separator)
|
622
|
+
return self._concatenate(fields, output_type="string", separator=separator, new_field_name=new_field_name)
|
497
623
|
|
498
|
-
def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
|
624
|
+
def concatenate_to_list(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
|
499
625
|
"""Concatenate specified fields into a single list field.
|
500
626
|
|
501
627
|
:param fields: The fields to concatenate.
|
628
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
502
629
|
|
503
630
|
Returns:
|
504
631
|
ScenarioList: A new ScenarioList with fields concatenated into a list.
|
@@ -507,13 +634,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
507
634
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
508
635
|
>>> s.concatenate_to_list(['a', 'b', 'c'])
|
509
636
|
ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
|
637
|
+
>>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
|
638
|
+
ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
|
510
639
|
"""
|
511
|
-
return self._concatenate(fields, output_type="list")
|
640
|
+
return self._concatenate(fields, output_type="list", new_field_name=new_field_name)
|
512
641
|
|
513
|
-
def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
|
642
|
+
def concatenate_to_set(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
|
514
643
|
"""Concatenate specified fields into a single set field.
|
515
644
|
|
516
645
|
:param fields: The fields to concatenate.
|
646
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
517
647
|
|
518
648
|
Returns:
|
519
649
|
ScenarioList: A new ScenarioList with fields concatenated into a set.
|
@@ -522,11 +652,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
522
652
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
523
653
|
>>> s.concatenate_to_set(['a', 'b', 'c'])
|
524
654
|
ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
|
525
|
-
>>> s
|
526
|
-
|
527
|
-
ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
|
655
|
+
>>> s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
|
656
|
+
ScenarioList([Scenario({'unique_values': {1, 2, 3}}), Scenario({'unique_values': {4, 5, 6}})])
|
528
657
|
"""
|
529
|
-
return self._concatenate(fields, output_type="set")
|
658
|
+
return self._concatenate(fields, output_type="set", new_field_name=new_field_name)
|
530
659
|
|
531
660
|
def unpack_dict(
|
532
661
|
self, field: str, prefix: Optional[str] = None, drop_field: bool = False
|
@@ -601,7 +730,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
601
730
|
)
|
602
731
|
raw_var_name, expression = new_var_string.split("=", 1)
|
603
732
|
var_name = raw_var_name.strip()
|
604
|
-
from edsl.utilities.utilities import is_valid_variable_name
|
605
733
|
|
606
734
|
if not is_valid_variable_name(var_name):
|
607
735
|
raise ScenarioError(f"{var_name} is not a valid variable name.")
|
@@ -729,7 +857,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
729
857
|
>>> s.select('a')
|
730
858
|
ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
|
731
859
|
"""
|
732
|
-
from
|
860
|
+
from .scenario_selector import ScenarioSelector
|
733
861
|
|
734
862
|
return ScenarioSelector(self).select(*fields)
|
735
863
|
|
@@ -850,7 +978,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
850
978
|
>>> s.to_dataset()
|
851
979
|
Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
|
852
980
|
"""
|
853
|
-
from
|
981
|
+
from ..dataset import Dataset
|
854
982
|
|
855
983
|
keys = list(self[0].keys())
|
856
984
|
for scenario in self:
|
@@ -959,6 +1087,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
959
1087
|
new_list.append(new_obj)
|
960
1088
|
return new_list
|
961
1089
|
|
1090
|
+
|
1091
|
+
|
962
1092
|
def replace_names(self, new_names: list) -> ScenarioList:
|
963
1093
|
"""Replace the field names in the scenarios with a new list of names.
|
964
1094
|
|
@@ -1156,6 +1286,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1156
1286
|
|
1157
1287
|
return scenario_list
|
1158
1288
|
|
1289
|
+
@classmethod
|
1159
1290
|
def from_wikipedia(cls, url: str, table_index: int = 0):
|
1160
1291
|
"""
|
1161
1292
|
Extracts a table from a Wikipedia page.
|
@@ -1230,13 +1361,19 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1230
1361
|
|
1231
1362
|
@classmethod
|
1232
1363
|
def from_excel(
|
1233
|
-
cls, filename: str, sheet_name: Optional[str] = None
|
1364
|
+
cls, filename: str, sheet_name: Optional[str] = None, skip_rows: Optional[List[int]] = None, use_codebook: bool = False
|
1234
1365
|
) -> ScenarioList:
|
1235
1366
|
"""Create a ScenarioList from an Excel file.
|
1236
1367
|
|
1237
1368
|
If the Excel file contains multiple sheets and no sheet_name is provided,
|
1238
1369
|
the method will print the available sheets and require the user to specify one.
|
1239
1370
|
|
1371
|
+
Args:
|
1372
|
+
filename (str): Path to the Excel file
|
1373
|
+
sheet_name (Optional[str]): Name of the sheet to load. If None and multiple sheets exist,
|
1374
|
+
will raise an error listing available sheets.
|
1375
|
+
skip_rows (Optional[List[int]]): List of row indices to skip (0-based). If None, all rows are included.
|
1376
|
+
|
1240
1377
|
Example:
|
1241
1378
|
|
1242
1379
|
>>> import tempfile
|
@@ -1244,30 +1381,33 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1244
1381
|
>>> import pandas as pd
|
1245
1382
|
>>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
|
1246
1383
|
... df1 = pd.DataFrame({
|
1247
|
-
... 'name': ['Alice', 'Bob'],
|
1248
|
-
... 'age': [30, 25],
|
1249
|
-
... 'location': ['New York', 'Los Angeles']
|
1384
|
+
... 'name': ['Alice', 'Bob', 'Charlie'],
|
1385
|
+
... 'age': [30, 25, 35],
|
1386
|
+
... 'location': ['New York', 'Los Angeles', 'Chicago']
|
1250
1387
|
... })
|
1251
1388
|
... df2 = pd.DataFrame({
|
1252
|
-
... 'name': ['
|
1253
|
-
... 'age': [
|
1254
|
-
... 'location': ['
|
1389
|
+
... 'name': ['David', 'Eve'],
|
1390
|
+
... 'age': [40, 45],
|
1391
|
+
... 'location': ['Boston', 'Seattle']
|
1255
1392
|
... })
|
1256
1393
|
... with pd.ExcelWriter(f.name) as writer:
|
1257
1394
|
... df1.to_excel(writer, sheet_name='Sheet1', index=False)
|
1258
1395
|
... df2.to_excel(writer, sheet_name='Sheet2', index=False)
|
1259
1396
|
... temp_filename = f.name
|
1397
|
+
>>> # Load all rows
|
1260
1398
|
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
|
1261
1399
|
>>> len(scenario_list)
|
1400
|
+
3
|
1401
|
+
>>> # Skip the second row (index 1)
|
1402
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1', skip_rows=[1])
|
1403
|
+
>>> len(scenario_list)
|
1262
1404
|
2
|
1263
1405
|
>>> scenario_list[0]['name']
|
1264
1406
|
'Alice'
|
1265
|
-
>>> scenario_list
|
1266
|
-
|
1267
|
-
...
|
1268
|
-
ValueError: Please provide a sheet name to load data from.
|
1407
|
+
>>> scenario_list[1]['name']
|
1408
|
+
'Charlie'
|
1269
1409
|
"""
|
1270
|
-
from
|
1410
|
+
from .scenario import Scenario
|
1271
1411
|
import pandas as pd
|
1272
1412
|
|
1273
1413
|
# Get all sheets
|
@@ -1287,11 +1427,28 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1287
1427
|
# Load the specified or determined sheet
|
1288
1428
|
df = pd.read_excel(filename, sheet_name=sheet_name)
|
1289
1429
|
|
1430
|
+
# Skip specified rows if any
|
1431
|
+
if skip_rows:
|
1432
|
+
df = df.drop(skip_rows)
|
1433
|
+
# Reset index to ensure continuous indexing
|
1434
|
+
df = df.reset_index(drop=True)
|
1435
|
+
|
1436
|
+
if use_codebook:
|
1437
|
+
codebook = {f"col_{i}": col for i, col in enumerate(df.columns)}
|
1438
|
+
koobedoc = {col:f"col_{i}" for i, col in enumerate(df.columns)}
|
1439
|
+
|
1290
1440
|
observations = []
|
1291
1441
|
for _, row in df.iterrows():
|
1292
|
-
|
1442
|
+
if use_codebook:
|
1443
|
+
observations.append(Scenario({koobedoc.get(k):v for k,v in row.to_dict().items()}))
|
1444
|
+
else:
|
1445
|
+
observations.append(Scenario(row.to_dict()))
|
1293
1446
|
|
1294
|
-
|
1447
|
+
|
1448
|
+
if use_codebook:
|
1449
|
+
return cls(observations, codebook=codebook)
|
1450
|
+
else:
|
1451
|
+
return cls(observations)
|
1295
1452
|
|
1296
1453
|
@classmethod
|
1297
1454
|
def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
|
@@ -1359,7 +1516,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1359
1516
|
) -> ScenarioList:
|
1360
1517
|
"""Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
|
1361
1518
|
import requests
|
1362
|
-
from
|
1519
|
+
from .scenario import Scenario
|
1363
1520
|
from urllib.parse import urlparse
|
1364
1521
|
from urllib.parse import ParseResult
|
1365
1522
|
|
@@ -1415,7 +1572,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1415
1572
|
>>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
|
1416
1573
|
True
|
1417
1574
|
"""
|
1418
|
-
from
|
1575
|
+
from .scenario_join import ScenarioJoin
|
1419
1576
|
|
1420
1577
|
sj = ScenarioJoin(self, other)
|
1421
1578
|
return sj.left_join(by)
|
@@ -1439,7 +1596,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1439
1596
|
d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
|
1440
1597
|
|
1441
1598
|
if add_edsl_version:
|
1442
|
-
from
|
1599
|
+
from .. import __version__
|
1443
1600
|
|
1444
1601
|
d["edsl_version"] = __version__
|
1445
1602
|
d["edsl_class_name"] = self.__class__.__name__
|
@@ -1451,15 +1608,13 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1451
1608
|
:param survey: The Survey object to use for the Jobs object.
|
1452
1609
|
|
1453
1610
|
Example:
|
1454
|
-
>>> from edsl import Survey
|
1455
|
-
>>> from edsl.jobs.Jobs import Jobs
|
1456
|
-
>>> from edsl import ScenarioList
|
1611
|
+
>>> from edsl import Survey, Jobs, ScenarioList
|
1457
1612
|
>>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
|
1458
1613
|
True
|
1459
1614
|
"""
|
1460
|
-
from
|
1461
|
-
from
|
1462
|
-
from
|
1615
|
+
from ..surveys import Survey
|
1616
|
+
from ..questions import QuestionBase
|
1617
|
+
from ..jobs import Jobs
|
1463
1618
|
|
1464
1619
|
if isinstance(survey, QuestionBase):
|
1465
1620
|
return Survey([survey]).by(self)
|
@@ -1476,7 +1631,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1476
1631
|
ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
1477
1632
|
|
1478
1633
|
"""
|
1479
|
-
from
|
1634
|
+
from .scenario import Scenario
|
1480
1635
|
|
1481
1636
|
return cls([Scenario(s) for s in scenario_dicts_list])
|
1482
1637
|
|
@@ -1484,7 +1639,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1484
1639
|
@remove_edsl_version
|
1485
1640
|
def from_dict(cls, data) -> ScenarioList:
|
1486
1641
|
"""Create a `ScenarioList` from a dictionary."""
|
1487
|
-
from
|
1642
|
+
from .scenario import Scenario
|
1488
1643
|
|
1489
1644
|
return cls([Scenario.from_dict(s) for s in data["scenarios"]])
|
1490
1645
|
|
@@ -1511,8 +1666,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1511
1666
|
def code(self) -> str:
|
1512
1667
|
"""Create the Python code representation of a survey."""
|
1513
1668
|
header_lines = [
|
1514
|
-
"from edsl.scenarios
|
1515
|
-
"from edsl.scenarios
|
1669
|
+
"from edsl.scenarios import Scenario",
|
1670
|
+
"from edsl.scenarios import ScenarioList",
|
1516
1671
|
]
|
1517
1672
|
lines = ["\n".join(header_lines)]
|
1518
1673
|
names = []
|
@@ -1531,17 +1686,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1531
1686
|
"""
|
1532
1687
|
return cls([Scenario.example(randomize), Scenario.example(randomize)])
|
1533
1688
|
|
1534
|
-
# def rich_print(self) -> None:
|
1535
|
-
# """Display an object as a table."""
|
1536
|
-
# from rich.table import Table
|
1537
|
-
|
1538
|
-
# table = Table(title="ScenarioList")
|
1539
|
-
# table.add_column("Index", style="bold")
|
1540
|
-
# table.add_column("Scenario")
|
1541
|
-
# for i, s in enumerate(self):
|
1542
|
-
# table.add_row(str(i), s.rich_print())
|
1543
|
-
# return table
|
1544
|
-
|
1545
1689
|
def __getitem__(self, key: Union[int, slice]) -> Any:
|
1546
1690
|
"""Return the item at the given index.
|
1547
1691
|
|
@@ -1570,9 +1714,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1570
1714
|
>>> s.to_agent_list()
|
1571
1715
|
AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1572
1716
|
"""
|
1573
|
-
from
|
1574
|
-
from edsl.agents.Agent import Agent
|
1575
|
-
import warnings
|
1717
|
+
from ..agents import AgentList, Agent
|
1576
1718
|
|
1577
1719
|
agents = []
|
1578
1720
|
for scenario in self:
|
@@ -1629,12 +1771,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1629
1771
|
new_scenarios.extend(replacement_scenarios)
|
1630
1772
|
return ScenarioList(new_scenarios)
|
1631
1773
|
|
1632
|
-
def collapse(self, field: str) -> ScenarioList:
|
1774
|
+
def collapse(self, field: str, separator: Optional[str] = None, add_count: bool = False) -> ScenarioList:
|
1633
1775
|
"""Collapse a ScenarioList by grouping on all fields except the specified one,
|
1634
1776
|
collecting the values of the specified field into a list.
|
1635
1777
|
|
1636
1778
|
Args:
|
1637
1779
|
field: The field to collapse (whose values will be collected into lists)
|
1780
|
+
separator: Optional string to join the values with instead of keeping as a list
|
1781
|
+
add_count: If True, adds a field showing the number of collapsed rows
|
1638
1782
|
|
1639
1783
|
Returns:
|
1640
1784
|
ScenarioList: A new ScenarioList with the specified field collapsed into lists
|
@@ -1642,12 +1786,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1642
1786
|
Example:
|
1643
1787
|
>>> s = ScenarioList([
|
1644
1788
|
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
|
1645
|
-
... Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
|
1646
1789
|
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
|
1647
1790
|
... Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
|
1648
1791
|
... ])
|
1649
|
-
>>> s.collapse('item')
|
1650
|
-
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']
|
1792
|
+
>>> s.collapse('item', add_count=True)
|
1793
|
+
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry'], 'num_collapsed_rows': 2}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach'], 'num_collapsed_rows': 1})])
|
1651
1794
|
"""
|
1652
1795
|
if not self:
|
1653
1796
|
return ScenarioList([])
|
@@ -1667,11 +1810,186 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1667
1810
|
result = []
|
1668
1811
|
for key, values in grouped.items():
|
1669
1812
|
new_scenario = dict(zip(id_vars, key))
|
1670
|
-
|
1813
|
+
if separator:
|
1814
|
+
new_scenario[field] = separator.join(values)
|
1815
|
+
else:
|
1816
|
+
new_scenario[field] = values
|
1817
|
+
if add_count:
|
1818
|
+
new_scenario['num_collapsed_rows'] = len(values)
|
1819
|
+
result.append(Scenario(new_scenario))
|
1820
|
+
|
1821
|
+
return ScenarioList(result)
|
1822
|
+
|
1823
|
+
def create_comparisons(
|
1824
|
+
self,
|
1825
|
+
bidirectional: bool = False,
|
1826
|
+
num_options: int = 2,
|
1827
|
+
option_prefix: str = "option_",
|
1828
|
+
use_alphabet: bool = False
|
1829
|
+
) -> ScenarioList:
|
1830
|
+
"""Create a new ScenarioList with comparisons between scenarios.
|
1831
|
+
|
1832
|
+
Each scenario in the result contains multiple original scenarios as dictionaries,
|
1833
|
+
allowing for side-by-side comparison.
|
1834
|
+
|
1835
|
+
Args:
|
1836
|
+
bidirectional (bool): If True, include both (A,B) and (B,A) comparisons.
|
1837
|
+
If False, only include (A,B) where A comes before B in the original list.
|
1838
|
+
num_options (int): Number of scenarios to include in each comparison.
|
1839
|
+
Default is 2 for pairwise comparisons.
|
1840
|
+
option_prefix (str): Prefix for the keys in the resulting scenarios.
|
1841
|
+
Default is "option_", resulting in keys like "option_1", "option_2", etc.
|
1842
|
+
Ignored if use_alphabet is True.
|
1843
|
+
use_alphabet (bool): If True, use letters as keys (A, B, C, etc.) instead of
|
1844
|
+
the option_prefix with numbers.
|
1845
|
+
|
1846
|
+
Returns:
|
1847
|
+
ScenarioList: A new ScenarioList where each scenario contains multiple original
|
1848
|
+
scenarios as dictionaries.
|
1849
|
+
|
1850
|
+
Example:
|
1851
|
+
>>> s = ScenarioList([
|
1852
|
+
... Scenario({'id': 1, 'text': 'Option A'}),
|
1853
|
+
... Scenario({'id': 2, 'text': 'Option B'}),
|
1854
|
+
... Scenario({'id': 3, 'text': 'Option C'})
|
1855
|
+
... ])
|
1856
|
+
>>> s.create_comparisons(use_alphabet=True)
|
1857
|
+
ScenarioList([Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 2, 'text': 'Option B'}}), Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 3, 'text': 'Option C'}}), Scenario({'A': {'id': 2, 'text': 'Option B'}, 'B': {'id': 3, 'text': 'Option C'}})])
|
1858
|
+
>>> s.create_comparisons(num_options=3, use_alphabet=True)
|
1859
|
+
ScenarioList([Scenario({'A': {'id': 1, 'text': 'Option A'}, 'B': {'id': 2, 'text': 'Option B'}, 'C': {'id': 3, 'text': 'Option C'}})])
|
1860
|
+
"""
|
1861
|
+
from itertools import combinations, permutations
|
1862
|
+
import string
|
1863
|
+
|
1864
|
+
if num_options < 2:
|
1865
|
+
raise ValueError("num_options must be at least 2")
|
1866
|
+
|
1867
|
+
if num_options > len(self):
|
1868
|
+
raise ValueError(f"num_options ({num_options}) cannot exceed the number of scenarios ({len(self)})")
|
1869
|
+
|
1870
|
+
if use_alphabet and num_options > 26:
|
1871
|
+
raise ValueError("When using alphabet labels, num_options cannot exceed 26 (the number of letters in the English alphabet)")
|
1872
|
+
|
1873
|
+
# Convert each scenario to a dictionary
|
1874
|
+
scenario_dicts = [scenario.to_dict(add_edsl_version=False) for scenario in self]
|
1875
|
+
|
1876
|
+
# Generate combinations or permutations based on bidirectional flag
|
1877
|
+
if bidirectional:
|
1878
|
+
# For bidirectional, use permutations to get all ordered arrangements
|
1879
|
+
if num_options == 2:
|
1880
|
+
# For pairwise, we can use permutations with r=2
|
1881
|
+
scenario_groups = permutations(scenario_dicts, 2)
|
1882
|
+
else:
|
1883
|
+
# For more than 2 options with bidirectional=True,
|
1884
|
+
# we need all permutations of the specified size
|
1885
|
+
scenario_groups = permutations(scenario_dicts, num_options)
|
1886
|
+
else:
|
1887
|
+
# For unidirectional, use combinations to get unordered groups
|
1888
|
+
scenario_groups = combinations(scenario_dicts, num_options)
|
1889
|
+
|
1890
|
+
# Create new scenarios with the combinations
|
1891
|
+
result = []
|
1892
|
+
for group in scenario_groups:
|
1893
|
+
new_scenario = {}
|
1894
|
+
for i, scenario_dict in enumerate(group):
|
1895
|
+
if use_alphabet:
|
1896
|
+
# Use uppercase letters (A, B, C, etc.)
|
1897
|
+
key = string.ascii_uppercase[i]
|
1898
|
+
else:
|
1899
|
+
# Use the option prefix with numbers (option_1, option_2, etc.)
|
1900
|
+
key = f"{option_prefix}{i+1}"
|
1901
|
+
new_scenario[key] = scenario_dict
|
1671
1902
|
result.append(Scenario(new_scenario))
|
1672
1903
|
|
1673
1904
|
return ScenarioList(result)
|
1674
1905
|
|
1906
|
+
@classmethod
|
1907
|
+
def from_parquet(cls, filepath: str) -> ScenarioList:
|
1908
|
+
"""Create a ScenarioList from a Parquet file.
|
1909
|
+
|
1910
|
+
Args:
|
1911
|
+
filepath (str): Path to the Parquet file
|
1912
|
+
|
1913
|
+
Returns:
|
1914
|
+
ScenarioList: A ScenarioList containing the data from the Parquet file
|
1915
|
+
|
1916
|
+
Example:
|
1917
|
+
>>> import pandas as pd
|
1918
|
+
>>> import tempfile
|
1919
|
+
>>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25]})
|
1920
|
+
>>> # The following would create and read a parquet file if dependencies are installed:
|
1921
|
+
>>> # with tempfile.NamedTemporaryFile(suffix='.parquet', delete=False) as f:
|
1922
|
+
>>> # df.to_parquet(f.name)
|
1923
|
+
>>> # scenario_list = ScenarioList.from_parquet(f.name)
|
1924
|
+
>>> # Instead, we'll demonstrate the equivalent result:
|
1925
|
+
>>> scenario_list = ScenarioList.from_pandas(df)
|
1926
|
+
>>> len(scenario_list)
|
1927
|
+
2
|
1928
|
+
>>> scenario_list[0]['name']
|
1929
|
+
'Alice'
|
1930
|
+
"""
|
1931
|
+
import pandas as pd
|
1932
|
+
|
1933
|
+
try:
|
1934
|
+
# Try to read the Parquet file with pandas
|
1935
|
+
df = pd.read_parquet(filepath)
|
1936
|
+
except ImportError as e:
|
1937
|
+
# Handle missing dependencies with a helpful error message
|
1938
|
+
if "pyarrow" in str(e) or "fastparquet" in str(e):
|
1939
|
+
raise ImportError(
|
1940
|
+
"Missing dependencies for Parquet support. Please install either pyarrow or fastparquet:\n"
|
1941
|
+
" pip install pyarrow\n"
|
1942
|
+
" or\n"
|
1943
|
+
" pip install fastparquet"
|
1944
|
+
) from e
|
1945
|
+
else:
|
1946
|
+
raise
|
1947
|
+
|
1948
|
+
# Convert the DataFrame to a ScenarioList
|
1949
|
+
return cls.from_pandas(df)
|
1950
|
+
|
1951
|
+
def replace_values(self, replacements:dict) -> "ScenarioList":
|
1952
|
+
"""
|
1953
|
+
Create new scenarios with values replaced according to the provided replacement dictionary.
|
1954
|
+
|
1955
|
+
Args:
|
1956
|
+
replacements (dict): Dictionary of values to replace {old_value: new_value}
|
1957
|
+
|
1958
|
+
Returns:
|
1959
|
+
ScenarioList: A new ScenarioList with replaced values
|
1960
|
+
|
1961
|
+
Examples:
|
1962
|
+
>>> scenarios = ScenarioList([
|
1963
|
+
... Scenario({'a': 'nan', 'b': 1}),
|
1964
|
+
... Scenario({'a': 2, 'b': 'nan'})
|
1965
|
+
... ])
|
1966
|
+
>>> replaced = scenarios.replace_values({'nan': None})
|
1967
|
+
>>> print(replaced)
|
1968
|
+
ScenarioList([Scenario({'a': None, 'b': 1}), Scenario({'a': 2, 'b': None})])
|
1969
|
+
>>> # Original scenarios remain unchanged
|
1970
|
+
>>> print(scenarios)
|
1971
|
+
ScenarioList([Scenario({'a': 'nan', 'b': 1}), Scenario({'a': 2, 'b': 'nan'})])
|
1972
|
+
"""
|
1973
|
+
new_scenarios = []
|
1974
|
+
for scenario in self:
|
1975
|
+
new_scenario = {}
|
1976
|
+
for key, value in scenario.items():
|
1977
|
+
if str(value) in replacements:
|
1978
|
+
new_scenario[key] = replacements[str(value)]
|
1979
|
+
else:
|
1980
|
+
new_scenario[key] = value
|
1981
|
+
new_scenarios.append(Scenario(new_scenario))
|
1982
|
+
return ScenarioList(new_scenarios)
|
1983
|
+
|
1984
|
+
@classmethod
|
1985
|
+
def from_pdf(cls, filename_or_url, collapse_pages=False):
|
1986
|
+
return PdfTools.from_pdf(filename_or_url, collapse_pages)
|
1987
|
+
|
1988
|
+
@classmethod
|
1989
|
+
def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
1990
|
+
return PdfTools.from_pdf_to_image(pdf_path, image_format)
|
1991
|
+
|
1992
|
+
|
1675
1993
|
|
1676
1994
|
if __name__ == "__main__":
|
1677
1995
|
import doctest
|