edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,911 +0,0 @@
|
|
1
|
-
"""Mixin class for exporting results."""
|
2
|
-
|
3
|
-
import io
|
4
|
-
import warnings
|
5
|
-
import textwrap
|
6
|
-
from typing import Optional, Tuple, Union, List
|
7
|
-
|
8
|
-
from edsl.results.file_exports import CSVExport, ExcelExport, JSONLExport, SQLiteExport
|
9
|
-
|
10
|
-
|
11
|
-
class DatasetExportMixin:
|
12
|
-
"""Mixin class for exporting Dataset objects."""
|
13
|
-
|
14
|
-
def relevant_columns(
|
15
|
-
self, data_type: Optional[str] = None, remove_prefix=False
|
16
|
-
) -> list:
|
17
|
-
"""Return the set of keys that are present in the dataset.
|
18
|
-
|
19
|
-
:param data_type: The data type to filter by.
|
20
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
21
|
-
|
22
|
-
>>> from edsl.results.Dataset import Dataset
|
23
|
-
>>> d = Dataset([{'a.b':[1,2,3,4]}])
|
24
|
-
>>> d.relevant_columns()
|
25
|
-
['a.b']
|
26
|
-
|
27
|
-
>>> d.relevant_columns(remove_prefix=True)
|
28
|
-
['b']
|
29
|
-
|
30
|
-
>>> d = Dataset([{'a':[1,2,3,4]}, {'b':[5,6,7,8]}])
|
31
|
-
>>> d.relevant_columns()
|
32
|
-
['a', 'b']
|
33
|
-
|
34
|
-
>>> from edsl.results import Results; Results.example().select('how_feeling', 'how_feeling_yesterday').relevant_columns()
|
35
|
-
['answer.how_feeling', 'answer.how_feeling_yesterday']
|
36
|
-
|
37
|
-
>>> from edsl.results import Results
|
38
|
-
>>> sorted(Results.example().select().relevant_columns(data_type = "model"))
|
39
|
-
['model.frequency_penalty', ...]
|
40
|
-
|
41
|
-
>>> Results.example().relevant_columns(data_type = "flimflam")
|
42
|
-
Traceback (most recent call last):
|
43
|
-
...
|
44
|
-
ValueError: No columns found for data type: flimflam. Available data types are: ...
|
45
|
-
"""
|
46
|
-
columns = [list(x.keys())[0] for x in self]
|
47
|
-
if remove_prefix:
|
48
|
-
columns = [column.split(".")[-1] for column in columns]
|
49
|
-
|
50
|
-
def get_data_type(column):
|
51
|
-
if "." in column:
|
52
|
-
return column.split(".")[0]
|
53
|
-
else:
|
54
|
-
return None
|
55
|
-
|
56
|
-
if data_type:
|
57
|
-
all_columns = columns[:]
|
58
|
-
columns = [
|
59
|
-
column for column in columns if get_data_type(column) == data_type
|
60
|
-
]
|
61
|
-
if len(columns) == 0:
|
62
|
-
all_data_types = sorted(
|
63
|
-
list(set(get_data_type(column) for column in all_columns))
|
64
|
-
)
|
65
|
-
raise ValueError(
|
66
|
-
f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
|
67
|
-
)
|
68
|
-
|
69
|
-
return columns
|
70
|
-
|
71
|
-
def num_observations(self):
|
72
|
-
"""Return the number of observations in the dataset.
|
73
|
-
|
74
|
-
>>> from edsl.results.Results import Results
|
75
|
-
>>> Results.example().num_observations()
|
76
|
-
4
|
77
|
-
"""
|
78
|
-
_num_observations = None
|
79
|
-
for entry in self:
|
80
|
-
key, values = list(entry.items())[0]
|
81
|
-
if _num_observations is None:
|
82
|
-
_num_observations = len(values)
|
83
|
-
else:
|
84
|
-
if len(values) != _num_observations:
|
85
|
-
raise ValueError(
|
86
|
-
f"The number of observations is not consistent across columns. "
|
87
|
-
f"Column '{key}' has {len(values)} observations, but previous columns had {_num_observations} observations."
|
88
|
-
)
|
89
|
-
|
90
|
-
return _num_observations
|
91
|
-
|
92
|
-
def _make_tabular(
|
93
|
-
self, remove_prefix: bool, pretty_labels: Optional[dict] = None
|
94
|
-
) -> tuple[list, List[list]]:
|
95
|
-
"""Turn the results into a tabular format.
|
96
|
-
|
97
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
98
|
-
|
99
|
-
>>> from edsl.results import Results
|
100
|
-
>>> r = Results.example()
|
101
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True)
|
102
|
-
(['how_feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
103
|
-
|
104
|
-
>>> r.select('how_feeling')._make_tabular(remove_prefix = True, pretty_labels = {'how_feeling': "How are you feeling"})
|
105
|
-
(['How are you feeling'], [['OK'], ['Great'], ['Terrible'], ['OK']])
|
106
|
-
"""
|
107
|
-
|
108
|
-
def create_dict_from_list_of_dicts(list_of_dicts):
|
109
|
-
for entry in list_of_dicts:
|
110
|
-
key, list_of_values = list(entry.items())[0]
|
111
|
-
yield key, list_of_values
|
112
|
-
|
113
|
-
tabular_repr = dict(create_dict_from_list_of_dicts(self.data))
|
114
|
-
|
115
|
-
full_header = [list(x.keys())[0] for x in self]
|
116
|
-
|
117
|
-
rows = []
|
118
|
-
for i in range(self.num_observations()):
|
119
|
-
row = [tabular_repr[h][i] for h in full_header]
|
120
|
-
rows.append(row)
|
121
|
-
|
122
|
-
if remove_prefix:
|
123
|
-
header = [h.split(".")[-1] for h in full_header]
|
124
|
-
else:
|
125
|
-
header = full_header
|
126
|
-
|
127
|
-
if pretty_labels is not None:
|
128
|
-
header = [pretty_labels.get(h, h) for h in header]
|
129
|
-
|
130
|
-
return header, rows
|
131
|
-
|
132
|
-
def print_long(self):
|
133
|
-
"""Print the results in a long format.
|
134
|
-
>>> from edsl.results import Results
|
135
|
-
>>> r = Results.example()
|
136
|
-
>>> r.select('how_feeling').print_long()
|
137
|
-
answer.how_feeling: OK
|
138
|
-
answer.how_feeling: Great
|
139
|
-
answer.how_feeling: Terrible
|
140
|
-
answer.how_feeling: OK
|
141
|
-
"""
|
142
|
-
for entry in self:
|
143
|
-
key, list_of_values = list(entry.items())[0]
|
144
|
-
for value in list_of_values:
|
145
|
-
print(f"{key}: {value}")
|
146
|
-
|
147
|
-
def _get_tabular_data(
|
148
|
-
self,
|
149
|
-
remove_prefix: bool = False,
|
150
|
-
pretty_labels: Optional[dict] = None,
|
151
|
-
) -> Tuple[List[str], List[List]]:
|
152
|
-
"""Internal method to get tabular data in a standard format.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
remove_prefix: Whether to remove the prefix from column names
|
156
|
-
pretty_labels: Dictionary mapping original column names to pretty labels
|
157
|
-
|
158
|
-
Returns:
|
159
|
-
Tuple containing (header_row, data_rows)
|
160
|
-
"""
|
161
|
-
if pretty_labels is None:
|
162
|
-
pretty_labels = {}
|
163
|
-
|
164
|
-
return self._make_tabular(
|
165
|
-
remove_prefix=remove_prefix, pretty_labels=pretty_labels
|
166
|
-
)
|
167
|
-
|
168
|
-
def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
|
169
|
-
"""Export the results to a FileStore instance containing JSONL data."""
|
170
|
-
exporter = JSONLExport(data=self, filename=filename)
|
171
|
-
return exporter.export()
|
172
|
-
|
173
|
-
def to_sqlite(
|
174
|
-
self,
|
175
|
-
filename: Optional[str] = None,
|
176
|
-
remove_prefix: bool = False,
|
177
|
-
pretty_labels: Optional[dict] = None,
|
178
|
-
table_name: str = "results",
|
179
|
-
if_exists: str = "replace",
|
180
|
-
) -> Optional["FileStore"]:
|
181
|
-
"""Export the results to a SQLite database file."""
|
182
|
-
exporter = SQLiteExport(
|
183
|
-
data=self,
|
184
|
-
filename=filename,
|
185
|
-
remove_prefix=remove_prefix,
|
186
|
-
pretty_labels=pretty_labels,
|
187
|
-
table_name=table_name,
|
188
|
-
if_exists=if_exists,
|
189
|
-
)
|
190
|
-
return exporter.export()
|
191
|
-
|
192
|
-
def to_csv(
|
193
|
-
self,
|
194
|
-
filename: Optional[str] = None,
|
195
|
-
remove_prefix: bool = False,
|
196
|
-
pretty_labels: Optional[dict] = None,
|
197
|
-
) -> Optional["FileStore"]:
|
198
|
-
"""Export the results to a FileStore instance containing CSV data."""
|
199
|
-
exporter = CSVExport(
|
200
|
-
data=self,
|
201
|
-
filename=filename,
|
202
|
-
remove_prefix=remove_prefix,
|
203
|
-
pretty_labels=pretty_labels,
|
204
|
-
)
|
205
|
-
return exporter.export()
|
206
|
-
|
207
|
-
def to_excel(
|
208
|
-
self,
|
209
|
-
filename: Optional[str] = None,
|
210
|
-
remove_prefix: bool = False,
|
211
|
-
pretty_labels: Optional[dict] = None,
|
212
|
-
sheet_name: Optional[str] = None,
|
213
|
-
) -> Optional["FileStore"]:
|
214
|
-
"""Export the results to a FileStore instance containing Excel data."""
|
215
|
-
exporter = ExcelExport(
|
216
|
-
data=self,
|
217
|
-
filename=filename,
|
218
|
-
remove_prefix=remove_prefix,
|
219
|
-
pretty_labels=pretty_labels,
|
220
|
-
sheet_name=sheet_name,
|
221
|
-
)
|
222
|
-
return exporter.export()
|
223
|
-
|
224
|
-
def _db(
|
225
|
-
self, remove_prefix: bool = True, shape: str = "wide"
|
226
|
-
) -> "sqlalchemy.engine.Engine":
|
227
|
-
"""Create a SQLite database in memory and return the connection.
|
228
|
-
|
229
|
-
Args:
|
230
|
-
remove_prefix: Whether to remove the prefix from the column names
|
231
|
-
shape: The shape of the data in the database ("wide" or "long")
|
232
|
-
|
233
|
-
Returns:
|
234
|
-
A database connection
|
235
|
-
>>> from sqlalchemy import text
|
236
|
-
>>> from edsl import Results
|
237
|
-
>>> engine = Results.example()._db()
|
238
|
-
>>> len(engine.execute(text("SELECT * FROM self")).fetchall())
|
239
|
-
4
|
240
|
-
>>> engine = Results.example()._db(shape = "long")
|
241
|
-
>>> len(engine.execute(text("SELECT * FROM self")).fetchall())
|
242
|
-
172
|
243
|
-
"""
|
244
|
-
from sqlalchemy import create_engine, text
|
245
|
-
|
246
|
-
engine = create_engine("sqlite:///:memory:")
|
247
|
-
if remove_prefix and shape == "wide":
|
248
|
-
df = self.remove_prefix().to_pandas(lists_as_strings=True)
|
249
|
-
else:
|
250
|
-
df = self.to_pandas(lists_as_strings=True)
|
251
|
-
|
252
|
-
if shape == "long":
|
253
|
-
# Melt the dataframe to convert it to long format
|
254
|
-
df = df.melt(var_name="key", value_name="value")
|
255
|
-
# Add a row number column for reference
|
256
|
-
df.insert(0, "row_number", range(1, len(df) + 1))
|
257
|
-
|
258
|
-
# Split the key into data_type and key
|
259
|
-
df["data_type"] = df["key"].apply(
|
260
|
-
lambda x: x.split(".")[0] if "." in x else None
|
261
|
-
)
|
262
|
-
df["key"] = df["key"].apply(
|
263
|
-
lambda x: ".".join(x.split(".")[1:]) if "." in x else x
|
264
|
-
)
|
265
|
-
|
266
|
-
df.to_sql(
|
267
|
-
"self",
|
268
|
-
engine,
|
269
|
-
index=False,
|
270
|
-
if_exists="replace",
|
271
|
-
)
|
272
|
-
return engine.connect()
|
273
|
-
|
274
|
-
def sql(
|
275
|
-
self,
|
276
|
-
query: str,
|
277
|
-
transpose: bool = None,
|
278
|
-
transpose_by: str = None,
|
279
|
-
remove_prefix: bool = True,
|
280
|
-
shape: str = "wide",
|
281
|
-
) -> Union["pd.DataFrame", str]:
|
282
|
-
"""Execute a SQL query and return the results as a DataFrame.
|
283
|
-
|
284
|
-
Args:
|
285
|
-
query: The SQL query to execute
|
286
|
-
shape: The shape of the data in the database (wide or long)
|
287
|
-
remove_prefix: Whether to remove the prefix from the column names
|
288
|
-
transpose: Whether to transpose the DataFrame
|
289
|
-
transpose_by: The column to use as the index when transposing
|
290
|
-
csv: Whether to return the DataFrame as a CSV string
|
291
|
-
to_list: Whether to return the results as a list
|
292
|
-
to_latex: Whether to return the results as LaTeX
|
293
|
-
filename: Optional filename to save the results to
|
294
|
-
|
295
|
-
Returns:
|
296
|
-
DataFrame, CSV string, list, or LaTeX string depending on parameters
|
297
|
-
|
298
|
-
Examples:
|
299
|
-
>>> from edsl import Results
|
300
|
-
>>> r = Results.example();
|
301
|
-
>>> len(r.sql("SELECT * FROM self", shape = "wide"))
|
302
|
-
4
|
303
|
-
>>> len(r.sql("SELECT * FROM self", shape = "long"))
|
304
|
-
172
|
305
|
-
"""
|
306
|
-
import pandas as pd
|
307
|
-
|
308
|
-
conn = self._db(remove_prefix=remove_prefix, shape=shape)
|
309
|
-
df = pd.read_sql_query(query, conn)
|
310
|
-
|
311
|
-
# Transpose the DataFrame if transpose is True
|
312
|
-
if transpose or transpose_by:
|
313
|
-
df = pd.DataFrame(df)
|
314
|
-
if transpose_by:
|
315
|
-
df = df.set_index(transpose_by)
|
316
|
-
else:
|
317
|
-
df = df.set_index(df.columns[0])
|
318
|
-
df = df.transpose()
|
319
|
-
from edsl.results.Dataset import Dataset
|
320
|
-
|
321
|
-
return Dataset.from_pandas_dataframe(df)
|
322
|
-
|
323
|
-
def to_pandas(
|
324
|
-
self, remove_prefix: bool = False, lists_as_strings=False
|
325
|
-
) -> "DataFrame":
|
326
|
-
"""Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
|
327
|
-
|
328
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
329
|
-
|
330
|
-
"""
|
331
|
-
return self._to_pandas_strings(remove_prefix)
|
332
|
-
|
333
|
-
def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
|
334
|
-
"""Convert the results to a pandas DataFrame.
|
335
|
-
|
336
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
337
|
-
|
338
|
-
>>> from edsl.results import Results
|
339
|
-
>>> r = Results.example()
|
340
|
-
>>> r.select('how_feeling').to_pandas()
|
341
|
-
answer.how_feeling
|
342
|
-
0 OK
|
343
|
-
1 Great
|
344
|
-
2 Terrible
|
345
|
-
3 OK
|
346
|
-
"""
|
347
|
-
|
348
|
-
import pandas as pd
|
349
|
-
|
350
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
351
|
-
csv_buffer = io.StringIO(csv_string)
|
352
|
-
df = pd.read_csv(csv_buffer)
|
353
|
-
# df_sorted = df.sort_index(axis=1) # Sort columns alphabetically
|
354
|
-
return df
|
355
|
-
|
356
|
-
def to_polars(
|
357
|
-
self, remove_prefix: bool = False, lists_as_strings=False
|
358
|
-
) -> "pl.DataFrame":
|
359
|
-
"""Convert the results to a Polars DataFrame.
|
360
|
-
|
361
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
362
|
-
"""
|
363
|
-
return self._to_polars_strings(remove_prefix)
|
364
|
-
|
365
|
-
def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
|
366
|
-
"""Convert the results to a Polars DataFrame.
|
367
|
-
|
368
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
369
|
-
"""
|
370
|
-
import polars as pl
|
371
|
-
|
372
|
-
csv_string = self.to_csv(remove_prefix=remove_prefix).text
|
373
|
-
df = pl.read_csv(io.StringIO(csv_string))
|
374
|
-
return df
|
375
|
-
|
376
|
-
def to_scenario_list(self, remove_prefix: bool = True) -> list[dict]:
|
377
|
-
"""Convert the results to a list of dictionaries, one per scenario.
|
378
|
-
|
379
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
380
|
-
|
381
|
-
>>> from edsl.results import Results
|
382
|
-
>>> r = Results.example()
|
383
|
-
>>> r.select('how_feeling').to_scenario_list()
|
384
|
-
ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
|
385
|
-
"""
|
386
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
387
|
-
from edsl.scenarios.Scenario import Scenario
|
388
|
-
|
389
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
390
|
-
scenarios = []
|
391
|
-
for d in list_of_dicts:
|
392
|
-
scenarios.append(Scenario(d))
|
393
|
-
return ScenarioList(scenarios)
|
394
|
-
|
395
|
-
def to_agent_list(self, remove_prefix: bool = True):
|
396
|
-
"""Convert the results to a list of dictionaries, one per agent.
|
397
|
-
|
398
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
399
|
-
|
400
|
-
>>> from edsl.results import Results
|
401
|
-
>>> r = Results.example()
|
402
|
-
>>> r.select('how_feeling').to_agent_list()
|
403
|
-
AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
|
404
|
-
"""
|
405
|
-
from edsl.agents import Agent
|
406
|
-
from edsl.agents.AgentList import AgentList
|
407
|
-
|
408
|
-
list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
|
409
|
-
agents = []
|
410
|
-
for d in list_of_dicts:
|
411
|
-
if "name" in d:
|
412
|
-
d["agent_name"] = d.pop("name")
|
413
|
-
agents.append(Agent(d, name=d["agent_name"]))
|
414
|
-
if "agent_parameters" in d:
|
415
|
-
agent_parameters = d.pop("agent_parameters")
|
416
|
-
agent_name = agent_parameters.get("name", None)
|
417
|
-
instruction = agent_parameters.get("instruction", None)
|
418
|
-
agents.append(Agent(d, name=agent_name, instruction=instruction))
|
419
|
-
else:
|
420
|
-
agents.append(Agent(d))
|
421
|
-
return AgentList(agents)
|
422
|
-
|
423
|
-
def to_dicts(self, remove_prefix: bool = True) -> list[dict]:
|
424
|
-
"""Convert the results to a list of dictionaries.
|
425
|
-
|
426
|
-
:param remove_prefix: Whether to remove the prefix from the column names.
|
427
|
-
|
428
|
-
>>> from edsl.results import Results
|
429
|
-
>>> r = Results.example()
|
430
|
-
>>> r.select('how_feeling').to_dicts()
|
431
|
-
[{'how_feeling': 'OK'}, {'how_feeling': 'Great'}, {'how_feeling': 'Terrible'}, {'how_feeling': 'OK'}]
|
432
|
-
|
433
|
-
"""
|
434
|
-
list_of_keys = []
|
435
|
-
list_of_values = []
|
436
|
-
for entry in self:
|
437
|
-
key, values = list(entry.items())[0]
|
438
|
-
list_of_keys.append(key)
|
439
|
-
list_of_values.append(values)
|
440
|
-
|
441
|
-
if remove_prefix:
|
442
|
-
list_of_keys = [key.split(".")[-1] for key in list_of_keys]
|
443
|
-
|
444
|
-
list_of_dicts = []
|
445
|
-
for entries in zip(*list_of_values):
|
446
|
-
list_of_dicts.append(dict(zip(list_of_keys, entries)))
|
447
|
-
|
448
|
-
return list_of_dicts
|
449
|
-
|
450
|
-
def to_list(self, flatten=False, remove_none=False, unzipped=False) -> list[list]:
|
451
|
-
"""Convert the results to a list of lists.
|
452
|
-
|
453
|
-
:param flatten: Whether to flatten the list of lists.
|
454
|
-
:param remove_none: Whether to remove None values from the list.
|
455
|
-
|
456
|
-
>>> from edsl.results import Results
|
457
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday')
|
458
|
-
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
|
459
|
-
|
460
|
-
>>> Results.example().select('how_feeling', 'how_feeling_yesterday').to_list()
|
461
|
-
[('OK', 'Great'), ('Great', 'Good'), ('Terrible', 'OK'), ('OK', 'Terrible')]
|
462
|
-
|
463
|
-
>>> r = Results.example()
|
464
|
-
>>> r.select('how_feeling').to_list()
|
465
|
-
['OK', 'Great', 'Terrible', 'OK']
|
466
|
-
|
467
|
-
>>> from edsl.results.Dataset import Dataset
|
468
|
-
>>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}]).select('a.b').to_list(flatten = True)
|
469
|
-
[1, 9, 2, 3, 4]
|
470
|
-
|
471
|
-
>>> from edsl.results.Dataset import Dataset
|
472
|
-
>>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
|
473
|
-
Traceback (most recent call last):
|
474
|
-
...
|
475
|
-
ValueError: Cannot flatten a list of lists when there are multiple columns selected.
|
476
|
-
|
477
|
-
|
478
|
-
"""
|
479
|
-
if len(self.relevant_columns()) > 1 and flatten:
|
480
|
-
raise ValueError(
|
481
|
-
"Cannot flatten a list of lists when there are multiple columns selected."
|
482
|
-
)
|
483
|
-
|
484
|
-
if len(self.relevant_columns()) == 1:
|
485
|
-
# if only one 'column' is selected (which is typical for this method
|
486
|
-
list_to_return = list(self[0].values())[0]
|
487
|
-
else:
|
488
|
-
keys = self.relevant_columns()
|
489
|
-
data = self.to_dicts(remove_prefix=False)
|
490
|
-
list_to_return = []
|
491
|
-
for d in data:
|
492
|
-
list_to_return.append(tuple([d[key] for key in keys]))
|
493
|
-
|
494
|
-
if remove_none:
|
495
|
-
list_to_return = [item for item in list_to_return if item is not None]
|
496
|
-
|
497
|
-
if flatten:
|
498
|
-
new_list = []
|
499
|
-
for item in list_to_return:
|
500
|
-
if isinstance(item, list):
|
501
|
-
new_list.extend(item)
|
502
|
-
else:
|
503
|
-
new_list.append(item)
|
504
|
-
list_to_return = new_list
|
505
|
-
|
506
|
-
from edsl.utilities.PrettyList import PrettyList
|
507
|
-
|
508
|
-
return PrettyList(list_to_return)
|
509
|
-
|
510
|
-
def html(
|
511
|
-
self,
|
512
|
-
filename: Optional[str] = None,
|
513
|
-
cta: str = "Open in browser",
|
514
|
-
return_link: bool = False,
|
515
|
-
):
|
516
|
-
import os
|
517
|
-
import tempfile
|
518
|
-
from edsl.utilities.utilities import is_notebook
|
519
|
-
from IPython.display import HTML, display
|
520
|
-
from edsl.utilities.utilities import is_notebook
|
521
|
-
|
522
|
-
df = self.to_pandas()
|
523
|
-
|
524
|
-
if filename is None:
|
525
|
-
current_directory = os.getcwd()
|
526
|
-
filename = tempfile.NamedTemporaryFile(
|
527
|
-
"w", delete=False, suffix=".html", dir=current_directory
|
528
|
-
).name
|
529
|
-
|
530
|
-
with open(filename, "w") as f:
|
531
|
-
f.write(df.to_html())
|
532
|
-
|
533
|
-
if is_notebook():
|
534
|
-
html_url = f"/files/{filename}"
|
535
|
-
html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
|
536
|
-
display(HTML(html_link))
|
537
|
-
else:
|
538
|
-
print(f"Saved to {filename}")
|
539
|
-
import webbrowser
|
540
|
-
import os
|
541
|
-
|
542
|
-
webbrowser.open(f"file://{os.path.abspath(filename)}")
|
543
|
-
|
544
|
-
if return_link:
|
545
|
-
return filename
|
546
|
-
|
547
|
-
def report(self, *fields: Optional[str], top_n: Optional[int] = None,
|
548
|
-
header_fields: Optional[List[str]] = None, divider: bool = True,
|
549
|
-
return_string: bool = False) -> Optional[str]:
|
550
|
-
"""Takes the fields in order and returns a report of the results by iterating through rows.
|
551
|
-
The row number is printed as # Observation: <row number>
|
552
|
-
The name of the field is used as markdown header at level "##"
|
553
|
-
The content of that field is then printed.
|
554
|
-
Then the next field and so on.
|
555
|
-
Once that row is done, a new line is printed and the next row is shown.
|
556
|
-
If in a jupyter notebook, the report is displayed as markdown.
|
557
|
-
|
558
|
-
Args:
|
559
|
-
*fields: The fields to include in the report. If none provided, all fields are used.
|
560
|
-
top_n: Optional limit on the number of observations to include.
|
561
|
-
header_fields: Optional list of fields to include in the main header instead of as sections.
|
562
|
-
divider: If True, adds a horizontal rule between observations for better visual separation.
|
563
|
-
return_string: If True, returns the markdown string. If False (default in notebooks),
|
564
|
-
only displays the markdown without returning.
|
565
|
-
|
566
|
-
Returns:
|
567
|
-
A string containing the markdown report if return_string is True, otherwise None.
|
568
|
-
|
569
|
-
Examples:
|
570
|
-
>>> from edsl.results import Results
|
571
|
-
>>> r = Results.example()
|
572
|
-
>>> report = r.select('how_feeling', 'how_feeling_yesterday').report(return_string=True)
|
573
|
-
>>> "# Observation: 1" in report
|
574
|
-
True
|
575
|
-
>>> "## answer.how_feeling" in report
|
576
|
-
True
|
577
|
-
>>> report = r.select('how_feeling').report(header_fields=['answer.how_feeling'], return_string=True)
|
578
|
-
>>> "# Observation: 1 (`how_feeling`: OK)" in report
|
579
|
-
True
|
580
|
-
"""
|
581
|
-
from edsl.utilities.utilities import is_notebook
|
582
|
-
|
583
|
-
# If no fields specified, use all columns
|
584
|
-
if not fields:
|
585
|
-
fields = self.relevant_columns()
|
586
|
-
|
587
|
-
# Initialize header_fields if not provided
|
588
|
-
if header_fields is None:
|
589
|
-
header_fields = []
|
590
|
-
|
591
|
-
# Validate all fields
|
592
|
-
all_fields = list(fields) + [f for f in header_fields if f not in fields]
|
593
|
-
for field in all_fields:
|
594
|
-
if field not in self.relevant_columns():
|
595
|
-
raise ValueError(f"Field '{field}' not found in dataset")
|
596
|
-
|
597
|
-
# Get data for each field
|
598
|
-
field_data = {}
|
599
|
-
for field in all_fields:
|
600
|
-
for entry in self:
|
601
|
-
if field in entry:
|
602
|
-
field_data[field] = entry[field]
|
603
|
-
break
|
604
|
-
|
605
|
-
# Number of observations to process
|
606
|
-
num_obs = self.num_observations()
|
607
|
-
if top_n is not None:
|
608
|
-
num_obs = min(num_obs, top_n)
|
609
|
-
|
610
|
-
# Build the report
|
611
|
-
report_lines = []
|
612
|
-
for i in range(num_obs):
|
613
|
-
# Create header with observation number and any header fields
|
614
|
-
header = f"# Observation: {i+1}"
|
615
|
-
if header_fields:
|
616
|
-
header_parts = []
|
617
|
-
for field in header_fields:
|
618
|
-
value = field_data[field][i]
|
619
|
-
# Get the field name without prefix for cleaner display
|
620
|
-
display_name = field.split('.')[-1] if '.' in field else field
|
621
|
-
# Format with backticks for monospace
|
622
|
-
header_parts.append(f"`{display_name}`: {value}")
|
623
|
-
if header_parts:
|
624
|
-
header += f" ({', '.join(header_parts)})"
|
625
|
-
report_lines.append(header)
|
626
|
-
|
627
|
-
# Add the remaining fields
|
628
|
-
for field in fields:
|
629
|
-
if field not in header_fields:
|
630
|
-
report_lines.append(f"## {field}")
|
631
|
-
value = field_data[field][i]
|
632
|
-
if isinstance(value, list) or isinstance(value, dict):
|
633
|
-
import json
|
634
|
-
report_lines.append(f"```\n{json.dumps(value, indent=2)}\n```")
|
635
|
-
else:
|
636
|
-
report_lines.append(str(value))
|
637
|
-
|
638
|
-
# Add divider between observations if requested
|
639
|
-
if divider and i < num_obs - 1:
|
640
|
-
report_lines.append("\n---\n")
|
641
|
-
else:
|
642
|
-
report_lines.append("") # Empty line between observations
|
643
|
-
|
644
|
-
report_text = "\n".join(report_lines)
|
645
|
-
|
646
|
-
# In notebooks, display as markdown and optionally return
|
647
|
-
is_nb = is_notebook()
|
648
|
-
if is_nb:
|
649
|
-
from IPython.display import Markdown, display
|
650
|
-
display(Markdown(report_text))
|
651
|
-
|
652
|
-
# Return the string if requested or if not in a notebook
|
653
|
-
if return_string or not is_nb:
|
654
|
-
return report_text
|
655
|
-
return None
|
656
|
-
|
657
|
-
def tally(
|
658
|
-
self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
|
659
|
-
) -> Union[dict, "Dataset"]:
|
660
|
-
"""Tally the values of a field or perform a cross-tab of multiple fields.
|
661
|
-
|
662
|
-
:param fields: The field(s) to tally, multiple fields for cross-tabulation.
|
663
|
-
|
664
|
-
>>> from edsl.results import Results
|
665
|
-
>>> r = Results.example()
|
666
|
-
>>> r.select('how_feeling').tally('answer.how_feeling', output = "dict")
|
667
|
-
{'OK': 2, 'Great': 1, 'Terrible': 1}
|
668
|
-
>>> from edsl.results.Dataset import Dataset
|
669
|
-
>>> expected = Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible']}, {'count': [2, 1, 1]}])
|
670
|
-
>>> r.select('how_feeling').tally('answer.how_feeling', output = "Dataset") == expected
|
671
|
-
True
|
672
|
-
"""
|
673
|
-
from collections import Counter
|
674
|
-
|
675
|
-
if len(fields) == 0:
|
676
|
-
fields = self.relevant_columns()
|
677
|
-
|
678
|
-
relevant_columns_without_prefix = [
|
679
|
-
column.split(".")[-1] for column in self.relevant_columns()
|
680
|
-
]
|
681
|
-
|
682
|
-
if not all(
|
683
|
-
f in self.relevant_columns() or f in relevant_columns_without_prefix
|
684
|
-
for f in fields
|
685
|
-
):
|
686
|
-
raise ValueError("One or more specified fields are not in the dataset.")
|
687
|
-
|
688
|
-
if len(fields) == 1:
|
689
|
-
field = fields[0]
|
690
|
-
values = self._key_to_value(field)
|
691
|
-
else:
|
692
|
-
values = list(zip(*(self._key_to_value(field) for field in fields)))
|
693
|
-
|
694
|
-
for value in values:
|
695
|
-
if isinstance(value, list):
|
696
|
-
value = tuple(value)
|
697
|
-
|
698
|
-
tally = dict(Counter(values))
|
699
|
-
sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
|
700
|
-
if top_n is not None:
|
701
|
-
sorted_tally = dict(list(sorted_tally.items())[:top_n])
|
702
|
-
|
703
|
-
from edsl.results.Dataset import Dataset
|
704
|
-
|
705
|
-
if output == "dict":
|
706
|
-
# why did I do this?
|
707
|
-
warnings.warn(
|
708
|
-
textwrap.dedent(
|
709
|
-
"""\
|
710
|
-
The default output from tally will change to Dataset in the future.
|
711
|
-
Use output='Dataset' to get the Dataset object for now.
|
712
|
-
"""
|
713
|
-
)
|
714
|
-
)
|
715
|
-
return sorted_tally
|
716
|
-
elif output == "Dataset":
|
717
|
-
dataset = Dataset(
|
718
|
-
[
|
719
|
-
{"value": list(sorted_tally.keys())},
|
720
|
-
{"count": list(sorted_tally.values())},
|
721
|
-
]
|
722
|
-
)
|
723
|
-
# return dataset
|
724
|
-
sl = dataset.to_scenario_list().unpack(
|
725
|
-
"value",
|
726
|
-
new_names=[fields] if isinstance(fields, str) else fields,
|
727
|
-
keep_original=False,
|
728
|
-
)
|
729
|
-
keys = list(sl[0].keys())
|
730
|
-
keys.remove("count")
|
731
|
-
keys.append("count")
|
732
|
-
return sl.reorder_keys(keys).to_dataset()
|
733
|
-
|
734
|
-
def flatten(self, field, keep_original=False):
|
735
|
-
"""
|
736
|
-
Flatten a field containing a list of dictionaries into separate fields.
|
737
|
-
|
738
|
-
>>> from edsl.results.Dataset import Dataset
|
739
|
-
>>> Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('a')
|
740
|
-
Dataset([{'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
|
741
|
-
|
742
|
-
|
743
|
-
>>> Dataset([{'answer.example': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('answer.example')
|
744
|
-
Dataset([{'c': [5]}, {'answer.example.a': [1]}, {'answer.example.b': [2]}])
|
745
|
-
|
746
|
-
|
747
|
-
Args:
|
748
|
-
field: The field to flatten
|
749
|
-
keep_original: If True, keeps the original field in the dataset
|
750
|
-
|
751
|
-
Returns:
|
752
|
-
A new dataset with the flattened fields
|
753
|
-
"""
|
754
|
-
from edsl.results.Dataset import Dataset
|
755
|
-
|
756
|
-
# Ensure the dataset isn't empty
|
757
|
-
if not self.data:
|
758
|
-
return self.copy()
|
759
|
-
|
760
|
-
# Find all columns that contain the field
|
761
|
-
matching_entries = []
|
762
|
-
for entry in self.data:
|
763
|
-
col_name = next(iter(entry.keys()))
|
764
|
-
if field == col_name or (
|
765
|
-
'.' in col_name and
|
766
|
-
(col_name.endswith('.' + field) or col_name.startswith(field + '.'))
|
767
|
-
):
|
768
|
-
matching_entries.append(entry)
|
769
|
-
|
770
|
-
# Check if the field is ambiguous
|
771
|
-
if len(matching_entries) > 1:
|
772
|
-
matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
|
773
|
-
raise ValueError(
|
774
|
-
f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
|
775
|
-
f"Please specify the full column name to flatten."
|
776
|
-
)
|
777
|
-
|
778
|
-
# Get the number of observations
|
779
|
-
num_observations = self.num_observations()
|
780
|
-
|
781
|
-
# Find the column to flatten
|
782
|
-
field_entry = None
|
783
|
-
for entry in self.data:
|
784
|
-
if field in entry:
|
785
|
-
field_entry = entry
|
786
|
-
break
|
787
|
-
|
788
|
-
if field_entry is None:
|
789
|
-
warnings.warn(
|
790
|
-
f"Field '{field}' not found in dataset, returning original dataset"
|
791
|
-
)
|
792
|
-
return self.copy()
|
793
|
-
|
794
|
-
# Create new dictionary for flattened data
|
795
|
-
flattened_data = []
|
796
|
-
|
797
|
-
# Copy all existing columns except the one we're flattening (if keep_original is False)
|
798
|
-
for entry in self.data:
|
799
|
-
col_name = next(iter(entry.keys()))
|
800
|
-
if col_name != field or keep_original:
|
801
|
-
flattened_data.append(entry.copy())
|
802
|
-
|
803
|
-
# Get field data and make sure it's valid
|
804
|
-
field_values = field_entry[field]
|
805
|
-
if not all(isinstance(item, dict) for item in field_values if item is not None):
|
806
|
-
warnings.warn(
|
807
|
-
f"Field '{field}' contains non-dictionary values that cannot be flattened"
|
808
|
-
)
|
809
|
-
return self.copy()
|
810
|
-
|
811
|
-
# Collect all unique keys across all dictionaries
|
812
|
-
all_keys = set()
|
813
|
-
for item in field_values:
|
814
|
-
if isinstance(item, dict):
|
815
|
-
all_keys.update(item.keys())
|
816
|
-
|
817
|
-
# Create new columns for each key
|
818
|
-
for key in sorted(all_keys): # Sort for consistent output
|
819
|
-
new_values = []
|
820
|
-
for i in range(num_observations):
|
821
|
-
value = None
|
822
|
-
if i < len(field_values) and isinstance(field_values[i], dict):
|
823
|
-
value = field_values[i].get(key, None)
|
824
|
-
new_values.append(value)
|
825
|
-
|
826
|
-
# Add this as a new column
|
827
|
-
flattened_data.append({f"{field}.{key}": new_values})
|
828
|
-
|
829
|
-
# Return a new Dataset with the flattened data
|
830
|
-
return Dataset(flattened_data)
|
831
|
-
|
832
|
-
def unpack_list(
|
833
|
-
self,
|
834
|
-
field: str,
|
835
|
-
new_names: Optional[List[str]] = None,
|
836
|
-
keep_original: bool = True,
|
837
|
-
) -> "Dataset":
|
838
|
-
"""Unpack list columns into separate columns with provided names or numeric suffixes.
|
839
|
-
|
840
|
-
For example, if a dataset contains:
|
841
|
-
[{'data': [[1, 2, 3], [4, 5, 6]], 'other': ['x', 'y']}]
|
842
|
-
|
843
|
-
After d.unpack_list('data'), it should become:
|
844
|
-
[{'other': ['x', 'y'], 'data_1': [1, 4], 'data_2': [2, 5], 'data_3': [3, 6]}]
|
845
|
-
|
846
|
-
Args:
|
847
|
-
field: The field containing lists to unpack
|
848
|
-
new_names: Optional list of names for the unpacked fields. If None, uses numeric suffixes.
|
849
|
-
keep_original: If True, keeps the original field in the dataset
|
850
|
-
|
851
|
-
Returns:
|
852
|
-
A new Dataset with unpacked columns
|
853
|
-
|
854
|
-
Examples:
|
855
|
-
>>> from edsl.results.Dataset import Dataset
|
856
|
-
>>> d = Dataset([{'data': [[1, 2, 3], [4, 5, 6]]}])
|
857
|
-
>>> d.unpack_list('data')
|
858
|
-
Dataset([{'data': [[1, 2, 3], [4, 5, 6]]}, {'data_1': [1, 4]}, {'data_2': [2, 5]}, {'data_3': [3, 6]}])
|
859
|
-
|
860
|
-
>>> d.unpack_list('data', new_names=['first', 'second', 'third'])
|
861
|
-
Dataset([{'data': [[1, 2, 3], [4, 5, 6]]}, {'first': [1, 4]}, {'second': [2, 5]}, {'third': [3, 6]}])
|
862
|
-
"""
|
863
|
-
from edsl.results.Dataset import Dataset
|
864
|
-
|
865
|
-
# Create a copy of the dataset
|
866
|
-
result = Dataset(self.data.copy())
|
867
|
-
|
868
|
-
# Find the field in the dataset
|
869
|
-
field_index = None
|
870
|
-
for i, entry in enumerate(result.data):
|
871
|
-
if field in entry:
|
872
|
-
field_index = i
|
873
|
-
break
|
874
|
-
|
875
|
-
if field_index is None:
|
876
|
-
raise ValueError(f"Field '{field}' not found in dataset")
|
877
|
-
|
878
|
-
field_data = result.data[field_index][field]
|
879
|
-
|
880
|
-
# Check if values are lists
|
881
|
-
if not all(isinstance(v, list) for v in field_data):
|
882
|
-
raise ValueError(f"Field '{field}' does not contain lists in all entries")
|
883
|
-
|
884
|
-
# Get the maximum length of lists
|
885
|
-
max_len = max(len(v) for v in field_data)
|
886
|
-
|
887
|
-
# Create new fields for each index
|
888
|
-
for i in range(max_len):
|
889
|
-
if new_names and i < len(new_names):
|
890
|
-
new_field = new_names[i]
|
891
|
-
else:
|
892
|
-
new_field = f"{field}_{i+1}"
|
893
|
-
|
894
|
-
# Extract the i-th element from each list
|
895
|
-
new_values = []
|
896
|
-
for item in field_data:
|
897
|
-
new_values.append(item[i] if i < len(item) else None)
|
898
|
-
|
899
|
-
result.data.append({new_field: new_values})
|
900
|
-
|
901
|
-
# Remove the original field if keep_original is False
|
902
|
-
if not keep_original:
|
903
|
-
result.data.pop(field_index)
|
904
|
-
|
905
|
-
return result
|
906
|
-
|
907
|
-
|
908
|
-
if __name__ == "__main__":
|
909
|
-
import doctest
|
910
|
-
|
911
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|