edsl 0.1.47__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +303 -67
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +313 -167
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +4 -9
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +365 -220
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/{FileStore.py → file_store.py} +275 -189
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +18 -19
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/METADATA +1 -1
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -493
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -582
- edsl/data/CacheEntry.py +0 -238
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -544
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1301
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.47.dist-info/RECORD +0 -354
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,4 +1,21 @@
|
|
1
|
-
"""
|
1
|
+
"""
|
2
|
+
ScenarioList provides a collection of Scenario objects with advanced operations.
|
3
|
+
|
4
|
+
The ScenarioList module extends the functionality of a simple list of Scenario objects,
|
5
|
+
providing powerful operations for data manipulation, filtering, transformation, and analysis.
|
6
|
+
It serves as a bridge between individual Scenarios and higher-level EDSL components like
|
7
|
+
Surveys and Jobs.
|
8
|
+
|
9
|
+
Key features include:
|
10
|
+
- Collection operations (filtering, sorting, sampling, and iteration)
|
11
|
+
- Data manipulation (transformation, joining, grouping, pivoting)
|
12
|
+
- Format conversion (to/from pandas, CSV, Excel, etc.)
|
13
|
+
- Advanced selection and retrieval mechanisms
|
14
|
+
- Integration with other EDSL components
|
15
|
+
|
16
|
+
ScenarioList is a core component in the EDSL framework for creating, managing, and
|
17
|
+
manipulating collections of Scenarios for experiments, surveys, and data processing tasks.
|
18
|
+
"""
|
2
19
|
|
3
20
|
from __future__ import annotations
|
4
21
|
from typing import (
|
@@ -10,12 +27,7 @@ from typing import (
|
|
10
27
|
Literal,
|
11
28
|
TYPE_CHECKING,
|
12
29
|
)
|
13
|
-
|
14
|
-
try:
|
15
|
-
from typing import TypeAlias
|
16
|
-
except ImportError:
|
17
|
-
from typing_extensions import TypeAlias
|
18
|
-
|
30
|
+
import warnings
|
19
31
|
import csv
|
20
32
|
import random
|
21
33
|
from io import StringIO
|
@@ -23,37 +35,34 @@ import inspect
|
|
23
35
|
from collections import UserList, defaultdict
|
24
36
|
from collections.abc import Iterable
|
25
37
|
|
26
|
-
if TYPE_CHECKING:
|
27
|
-
from urllib.parse import ParseResult
|
28
|
-
from edsl.results.Dataset import Dataset
|
29
|
-
from edsl.jobs.Jobs import Jobs
|
30
|
-
from edsl.surveys.Survey import Survey
|
31
|
-
from edsl.questions.QuestionBase import QuestionBase
|
32
|
-
|
33
|
-
|
34
38
|
from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
|
35
|
-
|
36
39
|
from tabulate import tabulate_formats
|
37
40
|
|
38
|
-
|
39
|
-
from
|
41
|
+
try:
|
42
|
+
from typing import TypeAlias
|
43
|
+
except ImportError:
|
44
|
+
from typing_extensions import TypeAlias
|
40
45
|
|
41
|
-
|
42
|
-
from
|
43
|
-
from
|
44
|
-
from
|
45
|
-
from
|
46
|
-
from
|
46
|
+
if TYPE_CHECKING:
|
47
|
+
from urllib.parse import ParseResult
|
48
|
+
from ..dataset import Dataset
|
49
|
+
from ..jobs import Jobs
|
50
|
+
from ..surveys import Survey
|
51
|
+
from ..questions import QuestionBase
|
47
52
|
|
48
|
-
from edsl.scenarios.directory_scanner import DirectoryScanner
|
49
53
|
|
54
|
+
from ..base import Base
|
55
|
+
from ..utilities import remove_edsl_version, sanitize_string, is_valid_variable_name, dict_hash
|
56
|
+
from ..dataset import ScenarioListOperationsMixin
|
50
57
|
|
51
|
-
|
52
|
-
|
58
|
+
from .exceptions import ScenarioError
|
59
|
+
from .scenario import Scenario
|
60
|
+
from .directory_scanner import DirectoryScanner
|
61
|
+
from .scenario_list_pdf_tools import PdfTools
|
53
62
|
|
54
63
|
|
55
64
|
if TYPE_CHECKING:
|
56
|
-
from
|
65
|
+
from ..dataset import Dataset
|
57
66
|
|
58
67
|
TableFormat: TypeAlias = Literal[
|
59
68
|
"plain",
|
@@ -72,9 +81,42 @@ TableFormat: TypeAlias = Literal[
|
|
72
81
|
"tsv",
|
73
82
|
]
|
74
83
|
|
75
|
-
|
76
|
-
|
77
|
-
|
84
|
+
class ScenarioList(Base, UserList, ScenarioListOperationsMixin):
|
85
|
+
"""
|
86
|
+
A collection of Scenario objects with advanced operations for manipulation and analysis.
|
87
|
+
|
88
|
+
ScenarioList extends Python's UserList to provide specialized functionality for
|
89
|
+
working with collections of Scenario objects. It inherits from Base to integrate
|
90
|
+
with EDSL's object model and from ScenarioListOperationsMixin to provide
|
91
|
+
powerful data manipulation capabilities.
|
92
|
+
|
93
|
+
The class provides methods for filtering, sorting, joining, transforming, and
|
94
|
+
analyzing collections of Scenarios. It's designed to work seamlessly with other
|
95
|
+
EDSL components like Surveys, Jobs, and Questions.
|
96
|
+
|
97
|
+
Attributes:
|
98
|
+
data (list): The underlying list of Scenario objects.
|
99
|
+
codebook (dict): Optional metadata describing the fields in the scenarios.
|
100
|
+
|
101
|
+
Examples:
|
102
|
+
Create a ScenarioList from Scenario objects:
|
103
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
104
|
+
>>> s1 = Scenario({"product": "apple", "price": 1.99})
|
105
|
+
>>> s2 = Scenario({"product": "banana", "price": 0.99})
|
106
|
+
>>> sl = ScenarioList([s1, s2])
|
107
|
+
|
108
|
+
Filter scenarios based on a condition:
|
109
|
+
>>> cheap_fruits = sl.filter("price < 1.50")
|
110
|
+
>>> len(cheap_fruits)
|
111
|
+
1
|
112
|
+
>>> cheap_fruits[0]["product"]
|
113
|
+
'banana'
|
114
|
+
|
115
|
+
Add a new column based on existing data:
|
116
|
+
>>> sl_with_tax = sl.mutate("tax = price * 0.08")
|
117
|
+
>>> sl_with_tax[0]["tax"]
|
118
|
+
0.1592
|
119
|
+
"""
|
78
120
|
|
79
121
|
__documentation__ = (
|
80
122
|
"https://docs.expectedparrot.com/en/latest/scenarios.html#scenariolist"
|
@@ -83,7 +125,24 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
83
125
|
def __init__(
|
84
126
|
self, data: Optional[list] = None, codebook: Optional[dict[str, str]] = None
|
85
127
|
):
|
86
|
-
"""
|
128
|
+
"""
|
129
|
+
Initialize a new ScenarioList with optional data and codebook.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
data: A list of Scenario objects. If None, an empty list is used.
|
133
|
+
codebook: A dictionary mapping field names to descriptions or metadata.
|
134
|
+
Used for documentation and to provide context for fields.
|
135
|
+
|
136
|
+
Examples:
|
137
|
+
>>> sl = ScenarioList() # Empty list
|
138
|
+
>>> s1 = Scenario({"product": "apple"})
|
139
|
+
>>> s2 = Scenario({"product": "banana"})
|
140
|
+
>>> sl = ScenarioList([s1, s2]) # With data
|
141
|
+
|
142
|
+
>>> # With a codebook
|
143
|
+
>>> codebook = {"product": "Fruit name", "price": "Price in USD"}
|
144
|
+
>>> sl = ScenarioList([s1, s2], codebook=codebook)
|
145
|
+
"""
|
87
146
|
if data is not None:
|
88
147
|
super().__init__(data)
|
89
148
|
else:
|
@@ -91,21 +150,85 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
91
150
|
self.codebook = codebook or {}
|
92
151
|
|
93
152
|
def unique(self) -> ScenarioList:
|
94
|
-
"""
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
153
|
+
"""
|
154
|
+
Return a new ScenarioList containing only unique Scenario objects.
|
155
|
+
|
156
|
+
This method removes duplicate Scenario objects based on their hash values,
|
157
|
+
which are determined by their content. Two Scenarios with identical key-value
|
158
|
+
pairs will have the same hash and be considered duplicates.
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
A new ScenarioList containing only unique Scenario objects.
|
162
|
+
|
163
|
+
Examples:
|
164
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
165
|
+
>>> s1 = Scenario({"a": 1})
|
166
|
+
>>> s2 = Scenario({"a": 1}) # Same content as s1
|
167
|
+
>>> s3 = Scenario({"a": 2})
|
168
|
+
>>> sl = ScenarioList([s1, s2, s3])
|
169
|
+
>>> unique_sl = sl.unique()
|
170
|
+
>>> len(unique_sl)
|
171
|
+
2
|
172
|
+
>>> unique_sl
|
173
|
+
ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
|
174
|
+
|
175
|
+
Notes:
|
176
|
+
- The order of scenarios in the result is not guaranteed due to the use of sets
|
177
|
+
- Uniqueness is determined by the Scenario's __hash__ method
|
178
|
+
- The original ScenarioList is not modified
|
99
179
|
"""
|
100
180
|
return ScenarioList(list(set(self)))
|
101
181
|
|
102
182
|
@property
|
103
183
|
def has_jinja_braces(self) -> bool:
|
104
|
-
"""
|
184
|
+
"""
|
185
|
+
Check if any Scenario in the list contains values with Jinja template braces.
|
186
|
+
|
187
|
+
This property checks all Scenarios in the list to determine if any contain
|
188
|
+
string values with Jinja template syntax ({{ and }}). This is important for
|
189
|
+
rendering templates and avoiding conflicts with other templating systems.
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
True if any Scenario contains values with Jinja braces, False otherwise.
|
193
|
+
|
194
|
+
Examples:
|
195
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
196
|
+
>>> s1 = Scenario({"text": "Plain text"})
|
197
|
+
>>> s2 = Scenario({"text": "Template with {{variable}}"})
|
198
|
+
>>> sl1 = ScenarioList([s1])
|
199
|
+
>>> sl1.has_jinja_braces
|
200
|
+
False
|
201
|
+
>>> sl2 = ScenarioList([s1, s2])
|
202
|
+
>>> sl2.has_jinja_braces
|
203
|
+
True
|
204
|
+
"""
|
105
205
|
return any([scenario.has_jinja_braces for scenario in self])
|
106
206
|
|
107
207
|
def _convert_jinja_braces(self) -> ScenarioList:
|
108
|
-
"""
|
208
|
+
"""
|
209
|
+
Convert Jinja braces to alternative symbols in all Scenarios in the list.
|
210
|
+
|
211
|
+
This method creates a new ScenarioList where all Jinja template braces
|
212
|
+
({{ and }}) in string values are converted to alternative symbols (<< and >>).
|
213
|
+
This is useful when you need to prevent template processing or avoid conflicts
|
214
|
+
with other templating systems.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
A new ScenarioList with converted braces in all Scenarios.
|
218
|
+
|
219
|
+
Examples:
|
220
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
221
|
+
>>> s = Scenario({"text": "Template with {{variable}}"})
|
222
|
+
>>> sl = ScenarioList([s])
|
223
|
+
>>> converted = sl._convert_jinja_braces()
|
224
|
+
>>> converted[0]["text"]
|
225
|
+
'Template with <<variable>>'
|
226
|
+
|
227
|
+
Notes:
|
228
|
+
- The original ScenarioList is not modified
|
229
|
+
- This is primarily intended for internal use
|
230
|
+
- The default replacement symbols are << and >>
|
231
|
+
"""
|
109
232
|
return ScenarioList([scenario._convert_jinja_braces() for scenario in self])
|
110
233
|
|
111
234
|
def give_valid_names(self, existing_codebook: dict = None) -> ScenarioList:
|
@@ -202,7 +325,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
202
325
|
url={https://arxiv.org/abs/2407.11418},
|
203
326
|
}
|
204
327
|
"""
|
205
|
-
from
|
328
|
+
from ..questions import QuestionYesNo
|
206
329
|
|
207
330
|
new_scenario_list = self.duplicate()
|
208
331
|
q = QuestionYesNo(
|
@@ -341,8 +464,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
341
464
|
>>> hash(s)
|
342
465
|
1262252885757976162
|
343
466
|
"""
|
344
|
-
from edsl.utilities.utilities import dict_hash
|
345
|
-
|
346
467
|
return dict_hash(self.to_dict(sort=True, add_edsl_version=False))
|
347
468
|
|
348
469
|
def __eq__(self, other: Any) -> bool:
|
@@ -360,7 +481,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
360
481
|
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
|
361
482
|
"""
|
362
483
|
from itertools import product
|
363
|
-
from
|
484
|
+
from .scenario import Scenario
|
364
485
|
if isinstance(other, Scenario):
|
365
486
|
other = ScenarioList([other])
|
366
487
|
elif not isinstance(other, ScenarioList):
|
@@ -436,12 +557,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
436
557
|
new_scenarios.append(new_scenario)
|
437
558
|
return ScenarioList(new_scenarios)
|
438
559
|
|
439
|
-
def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
|
560
|
+
def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
|
440
561
|
"""Private method to handle concatenation logic for different output types.
|
441
562
|
|
442
563
|
:param fields: The fields to concatenate.
|
443
564
|
:param output_type: The type of output ("string", "list", or "set").
|
444
565
|
:param separator: The separator to use for string concatenation.
|
566
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
567
|
+
If None, defaults to "concat_field1_field2_..."
|
445
568
|
|
446
569
|
Returns:
|
447
570
|
ScenarioList: A new ScenarioList with concatenated fields.
|
@@ -461,17 +584,17 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
461
584
|
values.append(new_scenario[field])
|
462
585
|
del new_scenario[field]
|
463
586
|
|
464
|
-
|
587
|
+
field_name = new_field_name if new_field_name is not None else f"concat_{'_'.join(fields)}"
|
465
588
|
|
466
589
|
if output_type == "string":
|
467
590
|
# Convert all values to strings and join with separator
|
468
|
-
new_scenario[
|
591
|
+
new_scenario[field_name] = separator.join(str(v) for v in values)
|
469
592
|
elif output_type == "list":
|
470
593
|
# Keep as a list
|
471
|
-
new_scenario[
|
594
|
+
new_scenario[field_name] = values
|
472
595
|
elif output_type == "set":
|
473
596
|
# Convert to a set (removes duplicates)
|
474
|
-
new_scenario[
|
597
|
+
new_scenario[field_name] = set(values)
|
475
598
|
else:
|
476
599
|
raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
|
477
600
|
|
@@ -479,11 +602,12 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
479
602
|
|
480
603
|
return ScenarioList(new_scenarios)
|
481
604
|
|
482
|
-
def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
|
605
|
+
def concatenate(self, fields: List[str], separator: str = ";", new_field_name: Optional[str] = None) -> ScenarioList:
|
483
606
|
"""Concatenate specified fields into a single string field.
|
484
607
|
|
485
608
|
:param fields: The fields to concatenate.
|
486
609
|
:param separator: The separator to use.
|
610
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
487
611
|
|
488
612
|
Returns:
|
489
613
|
ScenarioList: A new ScenarioList with concatenated fields.
|
@@ -492,13 +616,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
492
616
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
493
617
|
>>> s.concatenate(['a', 'b', 'c'])
|
494
618
|
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
619
|
+
>>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
|
620
|
+
ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
|
495
621
|
"""
|
496
|
-
return self._concatenate(fields, output_type="string", separator=separator)
|
622
|
+
return self._concatenate(fields, output_type="string", separator=separator, new_field_name=new_field_name)
|
497
623
|
|
498
|
-
def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
|
624
|
+
def concatenate_to_list(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
|
499
625
|
"""Concatenate specified fields into a single list field.
|
500
626
|
|
501
627
|
:param fields: The fields to concatenate.
|
628
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
502
629
|
|
503
630
|
Returns:
|
504
631
|
ScenarioList: A new ScenarioList with fields concatenated into a list.
|
@@ -507,13 +634,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
507
634
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
508
635
|
>>> s.concatenate_to_list(['a', 'b', 'c'])
|
509
636
|
ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
|
637
|
+
>>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
|
638
|
+
ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
|
510
639
|
"""
|
511
|
-
return self._concatenate(fields, output_type="list")
|
640
|
+
return self._concatenate(fields, output_type="list", new_field_name=new_field_name)
|
512
641
|
|
513
|
-
def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
|
642
|
+
def concatenate_to_set(self, fields: List[str], new_field_name: Optional[str] = None) -> ScenarioList:
|
514
643
|
"""Concatenate specified fields into a single set field.
|
515
644
|
|
516
645
|
:param fields: The fields to concatenate.
|
646
|
+
:param new_field_name: Optional custom name for the concatenated field.
|
517
647
|
|
518
648
|
Returns:
|
519
649
|
ScenarioList: A new ScenarioList with fields concatenated into a set.
|
@@ -522,11 +652,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
522
652
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
523
653
|
>>> s.concatenate_to_set(['a', 'b', 'c'])
|
524
654
|
ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
|
525
|
-
>>> s
|
526
|
-
|
527
|
-
ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
|
655
|
+
>>> s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
|
656
|
+
ScenarioList([Scenario({'unique_values': {1, 2, 3}}), Scenario({'unique_values': {4, 5, 6}})])
|
528
657
|
"""
|
529
|
-
return self._concatenate(fields, output_type="set")
|
658
|
+
return self._concatenate(fields, output_type="set", new_field_name=new_field_name)
|
530
659
|
|
531
660
|
def unpack_dict(
|
532
661
|
self, field: str, prefix: Optional[str] = None, drop_field: bool = False
|
@@ -601,7 +730,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
601
730
|
)
|
602
731
|
raw_var_name, expression = new_var_string.split("=", 1)
|
603
732
|
var_name = raw_var_name.strip()
|
604
|
-
from edsl.utilities.utilities import is_valid_variable_name
|
605
733
|
|
606
734
|
if not is_valid_variable_name(var_name):
|
607
735
|
raise ScenarioError(f"{var_name} is not a valid variable name.")
|
@@ -729,7 +857,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
729
857
|
>>> s.select('a')
|
730
858
|
ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
|
731
859
|
"""
|
732
|
-
from
|
860
|
+
from .scenario_selector import ScenarioSelector
|
733
861
|
|
734
862
|
return ScenarioSelector(self).select(*fields)
|
735
863
|
|
@@ -850,7 +978,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
850
978
|
>>> s.to_dataset()
|
851
979
|
Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
|
852
980
|
"""
|
853
|
-
from
|
981
|
+
from ..dataset import Dataset
|
854
982
|
|
855
983
|
keys = list(self[0].keys())
|
856
984
|
for scenario in self:
|
@@ -959,6 +1087,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
959
1087
|
new_list.append(new_obj)
|
960
1088
|
return new_list
|
961
1089
|
|
1090
|
+
|
1091
|
+
|
962
1092
|
def replace_names(self, new_names: list) -> ScenarioList:
|
963
1093
|
"""Replace the field names in the scenarios with a new list of names.
|
964
1094
|
|
@@ -1231,13 +1361,19 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1231
1361
|
|
1232
1362
|
@classmethod
|
1233
1363
|
def from_excel(
|
1234
|
-
cls, filename: str, sheet_name: Optional[str] = None
|
1364
|
+
cls, filename: str, sheet_name: Optional[str] = None, skip_rows: Optional[List[int]] = None, use_codebook: bool = False
|
1235
1365
|
) -> ScenarioList:
|
1236
1366
|
"""Create a ScenarioList from an Excel file.
|
1237
1367
|
|
1238
1368
|
If the Excel file contains multiple sheets and no sheet_name is provided,
|
1239
1369
|
the method will print the available sheets and require the user to specify one.
|
1240
1370
|
|
1371
|
+
Args:
|
1372
|
+
filename (str): Path to the Excel file
|
1373
|
+
sheet_name (Optional[str]): Name of the sheet to load. If None and multiple sheets exist,
|
1374
|
+
will raise an error listing available sheets.
|
1375
|
+
skip_rows (Optional[List[int]]): List of row indices to skip (0-based). If None, all rows are included.
|
1376
|
+
|
1241
1377
|
Example:
|
1242
1378
|
|
1243
1379
|
>>> import tempfile
|
@@ -1245,30 +1381,33 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1245
1381
|
>>> import pandas as pd
|
1246
1382
|
>>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
|
1247
1383
|
... df1 = pd.DataFrame({
|
1248
|
-
... 'name': ['Alice', 'Bob'],
|
1249
|
-
... 'age': [30, 25],
|
1250
|
-
... 'location': ['New York', 'Los Angeles']
|
1384
|
+
... 'name': ['Alice', 'Bob', 'Charlie'],
|
1385
|
+
... 'age': [30, 25, 35],
|
1386
|
+
... 'location': ['New York', 'Los Angeles', 'Chicago']
|
1251
1387
|
... })
|
1252
1388
|
... df2 = pd.DataFrame({
|
1253
|
-
... 'name': ['
|
1254
|
-
... 'age': [
|
1255
|
-
... 'location': ['
|
1389
|
+
... 'name': ['David', 'Eve'],
|
1390
|
+
... 'age': [40, 45],
|
1391
|
+
... 'location': ['Boston', 'Seattle']
|
1256
1392
|
... })
|
1257
1393
|
... with pd.ExcelWriter(f.name) as writer:
|
1258
1394
|
... df1.to_excel(writer, sheet_name='Sheet1', index=False)
|
1259
1395
|
... df2.to_excel(writer, sheet_name='Sheet2', index=False)
|
1260
1396
|
... temp_filename = f.name
|
1397
|
+
>>> # Load all rows
|
1261
1398
|
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
|
1262
1399
|
>>> len(scenario_list)
|
1400
|
+
3
|
1401
|
+
>>> # Skip the second row (index 1)
|
1402
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1', skip_rows=[1])
|
1403
|
+
>>> len(scenario_list)
|
1263
1404
|
2
|
1264
1405
|
>>> scenario_list[0]['name']
|
1265
1406
|
'Alice'
|
1266
|
-
>>> scenario_list
|
1267
|
-
|
1268
|
-
...
|
1269
|
-
ValueError: Please provide a sheet name to load data from.
|
1407
|
+
>>> scenario_list[1]['name']
|
1408
|
+
'Charlie'
|
1270
1409
|
"""
|
1271
|
-
from
|
1410
|
+
from .scenario import Scenario
|
1272
1411
|
import pandas as pd
|
1273
1412
|
|
1274
1413
|
# Get all sheets
|
@@ -1288,11 +1427,28 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1288
1427
|
# Load the specified or determined sheet
|
1289
1428
|
df = pd.read_excel(filename, sheet_name=sheet_name)
|
1290
1429
|
|
1430
|
+
# Skip specified rows if any
|
1431
|
+
if skip_rows:
|
1432
|
+
df = df.drop(skip_rows)
|
1433
|
+
# Reset index to ensure continuous indexing
|
1434
|
+
df = df.reset_index(drop=True)
|
1435
|
+
|
1436
|
+
if use_codebook:
|
1437
|
+
codebook = {f"col_{i}": col for i, col in enumerate(df.columns)}
|
1438
|
+
koobedoc = {col:f"col_{i}" for i, col in enumerate(df.columns)}
|
1439
|
+
|
1291
1440
|
observations = []
|
1292
1441
|
for _, row in df.iterrows():
|
1293
|
-
|
1442
|
+
if use_codebook:
|
1443
|
+
observations.append(Scenario({koobedoc.get(k):v for k,v in row.to_dict().items()}))
|
1444
|
+
else:
|
1445
|
+
observations.append(Scenario(row.to_dict()))
|
1294
1446
|
|
1295
|
-
|
1447
|
+
|
1448
|
+
if use_codebook:
|
1449
|
+
return cls(observations, codebook=codebook)
|
1450
|
+
else:
|
1451
|
+
return cls(observations)
|
1296
1452
|
|
1297
1453
|
@classmethod
|
1298
1454
|
def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
|
@@ -1360,7 +1516,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1360
1516
|
) -> ScenarioList:
|
1361
1517
|
"""Create a ScenarioList from a delimited file (CSV/TSV) or URL."""
|
1362
1518
|
import requests
|
1363
|
-
from
|
1519
|
+
from .scenario import Scenario
|
1364
1520
|
from urllib.parse import urlparse
|
1365
1521
|
from urllib.parse import ParseResult
|
1366
1522
|
|
@@ -1416,7 +1572,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1416
1572
|
>>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
|
1417
1573
|
True
|
1418
1574
|
"""
|
1419
|
-
from
|
1575
|
+
from .scenario_join import ScenarioJoin
|
1420
1576
|
|
1421
1577
|
sj = ScenarioJoin(self, other)
|
1422
1578
|
return sj.left_join(by)
|
@@ -1440,7 +1596,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1440
1596
|
d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
|
1441
1597
|
|
1442
1598
|
if add_edsl_version:
|
1443
|
-
from
|
1599
|
+
from .. import __version__
|
1444
1600
|
|
1445
1601
|
d["edsl_version"] = __version__
|
1446
1602
|
d["edsl_class_name"] = self.__class__.__name__
|
@@ -1452,15 +1608,13 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1452
1608
|
:param survey: The Survey object to use for the Jobs object.
|
1453
1609
|
|
1454
1610
|
Example:
|
1455
|
-
>>> from edsl import Survey
|
1456
|
-
>>> from edsl.jobs.Jobs import Jobs
|
1457
|
-
>>> from edsl import ScenarioList
|
1611
|
+
>>> from edsl import Survey, Jobs, ScenarioList
|
1458
1612
|
>>> isinstance(ScenarioList.example().to(Survey.example()), Jobs)
|
1459
1613
|
True
|
1460
1614
|
"""
|
1461
|
-
from
|
1462
|
-
from
|
1463
|
-
from
|
1615
|
+
from ..surveys import Survey
|
1616
|
+
from ..questions import QuestionBase
|
1617
|
+
from ..jobs import Jobs
|
1464
1618
|
|
1465
1619
|
if isinstance(survey, QuestionBase):
|
1466
1620
|
return Survey([survey]).by(self)
|
@@ -1477,7 +1631,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1477
1631
|
ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
1478
1632
|
|
1479
1633
|
"""
|
1480
|
-
from
|
1634
|
+
from .scenario import Scenario
|
1481
1635
|
|
1482
1636
|
return cls([Scenario(s) for s in scenario_dicts_list])
|
1483
1637
|
|
@@ -1485,7 +1639,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1485
1639
|
@remove_edsl_version
|
1486
1640
|
def from_dict(cls, data) -> ScenarioList:
|
1487
1641
|
"""Create a `ScenarioList` from a dictionary."""
|
1488
|
-
from
|
1642
|
+
from .scenario import Scenario
|
1489
1643
|
|
1490
1644
|
return cls([Scenario.from_dict(s) for s in data["scenarios"]])
|
1491
1645
|
|
@@ -1512,8 +1666,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1512
1666
|
def code(self) -> str:
|
1513
1667
|
"""Create the Python code representation of a survey."""
|
1514
1668
|
header_lines = [
|
1515
|
-
"from edsl.scenarios
|
1516
|
-
"from edsl.scenarios
|
1669
|
+
"from edsl.scenarios import Scenario",
|
1670
|
+
"from edsl.scenarios import ScenarioList",
|
1517
1671
|
]
|
1518
1672
|
lines = ["\n".join(header_lines)]
|
1519
1673
|
names = []
|
@@ -1532,17 +1686,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1532
1686
|
"""
|
1533
1687
|
return cls([Scenario.example(randomize), Scenario.example(randomize)])
|
1534
1688
|
|
1535
|
-
# def rich_print(self) -> None:
|
1536
|
-
# """Display an object as a table."""
|
1537
|
-
# from rich.table import Table
|
1538
|
-
|
1539
|
-
# table = Table(title="ScenarioList")
|
1540
|
-
# table.add_column("Index", style="bold")
|
1541
|
-
# table.add_column("Scenario")
|
1542
|
-
# for i, s in enumerate(self):
|
1543
|
-
# table.add_row(str(i), s.rich_print())
|
1544
|
-
# return table
|
1545
|
-
|
1546
1689
|
def __getitem__(self, key: Union[int, slice]) -> Any:
|
1547
1690
|
"""Return the item at the given index.
|
1548
1691
|
|
@@ -1571,9 +1714,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1571
1714
|
>>> s.to_agent_list()
|
1572
1715
|
AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
|
1573
1716
|
"""
|
1574
|
-
from
|
1575
|
-
from edsl.agents.Agent import Agent
|
1576
|
-
import warnings
|
1717
|
+
from ..agents import AgentList, Agent
|
1577
1718
|
|
1578
1719
|
agents = []
|
1579
1720
|
for scenario in self:
|
@@ -1630,12 +1771,14 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1630
1771
|
new_scenarios.extend(replacement_scenarios)
|
1631
1772
|
return ScenarioList(new_scenarios)
|
1632
1773
|
|
1633
|
-
def collapse(self, field: str) -> ScenarioList:
|
1774
|
+
def collapse(self, field: str, separator: Optional[str] = None, add_count: bool = False) -> ScenarioList:
|
1634
1775
|
"""Collapse a ScenarioList by grouping on all fields except the specified one,
|
1635
1776
|
collecting the values of the specified field into a list.
|
1636
1777
|
|
1637
1778
|
Args:
|
1638
1779
|
field: The field to collapse (whose values will be collected into lists)
|
1780
|
+
separator: Optional string to join the values with instead of keeping as a list
|
1781
|
+
add_count: If True, adds a field showing the number of collapsed rows
|
1639
1782
|
|
1640
1783
|
Returns:
|
1641
1784
|
ScenarioList: A new ScenarioList with the specified field collapsed into lists
|
@@ -1643,12 +1786,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1643
1786
|
Example:
|
1644
1787
|
>>> s = ScenarioList([
|
1645
1788
|
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
|
1646
|
-
... Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
|
1647
1789
|
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
|
1648
1790
|
... Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
|
1649
1791
|
... ])
|
1650
|
-
>>> s.collapse('item')
|
1651
|
-
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']
|
1792
|
+
>>> s.collapse('item', add_count=True)
|
1793
|
+
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry'], 'num_collapsed_rows': 2}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach'], 'num_collapsed_rows': 1})])
|
1652
1794
|
"""
|
1653
1795
|
if not self:
|
1654
1796
|
return ScenarioList([])
|
@@ -1668,7 +1810,12 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1668
1810
|
result = []
|
1669
1811
|
for key, values in grouped.items():
|
1670
1812
|
new_scenario = dict(zip(id_vars, key))
|
1671
|
-
|
1813
|
+
if separator:
|
1814
|
+
new_scenario[field] = separator.join(values)
|
1815
|
+
else:
|
1816
|
+
new_scenario[field] = values
|
1817
|
+
if add_count:
|
1818
|
+
new_scenario['num_collapsed_rows'] = len(values)
|
1672
1819
|
result.append(Scenario(new_scenario))
|
1673
1820
|
|
1674
1821
|
return ScenarioList(result)
|
@@ -1801,6 +1948,47 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1801
1948
|
# Convert the DataFrame to a ScenarioList
|
1802
1949
|
return cls.from_pandas(df)
|
1803
1950
|
|
1951
|
+
def replace_values(self, replacements:dict) -> "ScenarioList":
|
1952
|
+
"""
|
1953
|
+
Create new scenarios with values replaced according to the provided replacement dictionary.
|
1954
|
+
|
1955
|
+
Args:
|
1956
|
+
replacements (dict): Dictionary of values to replace {old_value: new_value}
|
1957
|
+
|
1958
|
+
Returns:
|
1959
|
+
ScenarioList: A new ScenarioList with replaced values
|
1960
|
+
|
1961
|
+
Examples:
|
1962
|
+
>>> scenarios = ScenarioList([
|
1963
|
+
... Scenario({'a': 'nan', 'b': 1}),
|
1964
|
+
... Scenario({'a': 2, 'b': 'nan'})
|
1965
|
+
... ])
|
1966
|
+
>>> replaced = scenarios.replace_values({'nan': None})
|
1967
|
+
>>> print(replaced)
|
1968
|
+
ScenarioList([Scenario({'a': None, 'b': 1}), Scenario({'a': 2, 'b': None})])
|
1969
|
+
>>> # Original scenarios remain unchanged
|
1970
|
+
>>> print(scenarios)
|
1971
|
+
ScenarioList([Scenario({'a': 'nan', 'b': 1}), Scenario({'a': 2, 'b': 'nan'})])
|
1972
|
+
"""
|
1973
|
+
new_scenarios = []
|
1974
|
+
for scenario in self:
|
1975
|
+
new_scenario = {}
|
1976
|
+
for key, value in scenario.items():
|
1977
|
+
if str(value) in replacements:
|
1978
|
+
new_scenario[key] = replacements[str(value)]
|
1979
|
+
else:
|
1980
|
+
new_scenario[key] = value
|
1981
|
+
new_scenarios.append(Scenario(new_scenario))
|
1982
|
+
return ScenarioList(new_scenarios)
|
1983
|
+
|
1984
|
+
@classmethod
|
1985
|
+
def from_pdf(cls, filename_or_url, collapse_pages=False):
|
1986
|
+
return PdfTools.from_pdf(filename_or_url, collapse_pages)
|
1987
|
+
|
1988
|
+
@classmethod
|
1989
|
+
def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
1990
|
+
return PdfTools.from_pdf_to_image(pdf_path, image_format)
|
1991
|
+
|
1804
1992
|
|
1805
1993
|
|
1806
1994
|
if __name__ == "__main__":
|