edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev5.dist-info/RECORD +358 -0
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
@@ -0,0 +1,149 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
import sqlite3
|
5
|
+
|
6
|
+
|
7
|
+
class SQLiteMethods(FileMethods):
|
8
|
+
suffix = "db" # or "sqlite", depending on your preference
|
9
|
+
|
10
|
+
def extract_text(self):
|
11
|
+
"""
|
12
|
+
Extracts a text representation of the database schema and table contents.
|
13
|
+
"""
|
14
|
+
with sqlite3.connect(self.path) as conn:
|
15
|
+
cursor = conn.cursor()
|
16
|
+
|
17
|
+
# Get all table names
|
18
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
19
|
+
tables = cursor.fetchall()
|
20
|
+
|
21
|
+
full_text = []
|
22
|
+
|
23
|
+
# For each table, get schema and contents
|
24
|
+
for (table_name,) in tables:
|
25
|
+
# Get table schema
|
26
|
+
cursor.execute(
|
27
|
+
f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}';"
|
28
|
+
)
|
29
|
+
schema = cursor.fetchone()[0]
|
30
|
+
full_text.append(f"Table: {table_name}")
|
31
|
+
full_text.append(f"Schema: {schema}")
|
32
|
+
|
33
|
+
# Get table contents
|
34
|
+
cursor.execute(f"SELECT * FROM {table_name};")
|
35
|
+
rows = cursor.fetchall()
|
36
|
+
|
37
|
+
# Get column names
|
38
|
+
column_names = [description[0] for description in cursor.description]
|
39
|
+
full_text.append(f"Columns: {', '.join(column_names)}")
|
40
|
+
|
41
|
+
# Add row data
|
42
|
+
for row in rows:
|
43
|
+
full_text.append(str(row))
|
44
|
+
full_text.append("\n")
|
45
|
+
|
46
|
+
return "\n".join(full_text)
|
47
|
+
|
48
|
+
def view_system(self):
|
49
|
+
"""
|
50
|
+
Opens the database with the system's default SQLite viewer if available.
|
51
|
+
"""
|
52
|
+
import os
|
53
|
+
import subprocess
|
54
|
+
|
55
|
+
if os.path.exists(self.path):
|
56
|
+
try:
|
57
|
+
if (os_name := os.name) == "posix":
|
58
|
+
# Try DB Browser for SQLite on macOS
|
59
|
+
subprocess.run(
|
60
|
+
["open", "-a", "DB Browser for SQLite", self.path], check=True
|
61
|
+
)
|
62
|
+
elif os_name == "nt":
|
63
|
+
# Try DB Browser for SQLite on Windows
|
64
|
+
subprocess.run(["DB Browser for SQLite.exe", self.path], check=True)
|
65
|
+
else:
|
66
|
+
# Try sqlitebrowser on Linux
|
67
|
+
subprocess.run(["sqlitebrowser", self.path], check=True)
|
68
|
+
except Exception as e:
|
69
|
+
print(f"Error opening SQLite database: {e}")
|
70
|
+
else:
|
71
|
+
print("SQLite database file was not found.")
|
72
|
+
|
73
|
+
def view_notebook(self):
|
74
|
+
"""
|
75
|
+
Displays database contents in a Jupyter notebook.
|
76
|
+
"""
|
77
|
+
import pandas as pd
|
78
|
+
from IPython.display import HTML, display
|
79
|
+
|
80
|
+
with sqlite3.connect(self.path) as conn:
|
81
|
+
# Get all table names
|
82
|
+
cursor = conn.cursor()
|
83
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
84
|
+
tables = cursor.fetchall()
|
85
|
+
|
86
|
+
html_parts = []
|
87
|
+
for (table_name,) in tables:
|
88
|
+
# Read table into pandas DataFrame
|
89
|
+
df = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)
|
90
|
+
|
91
|
+
# Convert to HTML with styling
|
92
|
+
table_html = f"""
|
93
|
+
<div style="margin-bottom: 20px;">
|
94
|
+
<h3>{table_name}</h3>
|
95
|
+
{df.to_html(index=False)}
|
96
|
+
</div>
|
97
|
+
"""
|
98
|
+
html_parts.append(table_html)
|
99
|
+
|
100
|
+
# Combine all tables into one scrollable div
|
101
|
+
html = f"""
|
102
|
+
<div style="width: 800px; height: 800px; padding: 20px;
|
103
|
+
border: 1px solid #ccc; overflow-y: auto;">
|
104
|
+
{''.join(html_parts)}
|
105
|
+
</div>
|
106
|
+
"""
|
107
|
+
display(HTML(html))
|
108
|
+
|
109
|
+
def example(self):
|
110
|
+
"""
|
111
|
+
Creates an example SQLite database for testing.
|
112
|
+
"""
|
113
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".db") as tmp:
|
114
|
+
conn = sqlite3.connect(tmp.name)
|
115
|
+
cursor = conn.cursor()
|
116
|
+
|
117
|
+
# Create a sample table
|
118
|
+
cursor.execute(
|
119
|
+
"""
|
120
|
+
CREATE TABLE survey_responses (
|
121
|
+
id INTEGER PRIMARY KEY,
|
122
|
+
question TEXT,
|
123
|
+
response TEXT
|
124
|
+
)
|
125
|
+
"""
|
126
|
+
)
|
127
|
+
|
128
|
+
# Insert some sample data
|
129
|
+
sample_data = [
|
130
|
+
(1, "First Survey Question", "Response 1"),
|
131
|
+
(2, "Second Survey Question", "Response 2"),
|
132
|
+
]
|
133
|
+
cursor.executemany(
|
134
|
+
"INSERT INTO survey_responses (id, question, response) VALUES (?, ?, ?)",
|
135
|
+
sample_data,
|
136
|
+
)
|
137
|
+
|
138
|
+
conn.commit()
|
139
|
+
conn.close()
|
140
|
+
tmp.close()
|
141
|
+
|
142
|
+
return tmp.name
|
143
|
+
|
144
|
+
|
145
|
+
if __name__ == "__main__":
|
146
|
+
sqlite_temp = SQLiteMethods.example()
|
147
|
+
from edsl.scenarios.FileStore import FileStore
|
148
|
+
|
149
|
+
fs = FileStore(sqlite_temp)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
|
4
|
+
|
5
|
+
class TxtMethods(FileMethods):
|
6
|
+
suffix = "txt"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening TXT: {e}")
|
22
|
+
else:
|
23
|
+
print("TXT file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
from IPython.display import FileLink, display
|
27
|
+
|
28
|
+
display(FileLink(self.path))
|
29
|
+
|
30
|
+
def example(self):
|
31
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
|
32
|
+
f.write(b"Hello, World!")
|
33
|
+
return f.name
|
@@ -1,127 +1,131 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
from typing import Union, TYPE_CHECKING
|
3
|
-
|
4
|
-
|
5
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
6
|
-
from edsl.scenarios.Scenario import Scenario
|
7
|
-
|
8
|
-
|
9
|
-
class ScenarioJoin:
|
10
|
-
"""Handles join operations between two ScenarioLists.
|
11
|
-
|
12
|
-
This class encapsulates all join-related logic, making it easier to maintain
|
13
|
-
and extend with other join types (inner, right, full) in the future.
|
14
|
-
"""
|
15
|
-
|
16
|
-
def __init__(self, left: "ScenarioList", right: "ScenarioList"):
|
17
|
-
"""Initialize join operation with two ScenarioLists.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
left: The left ScenarioList
|
21
|
-
right: The right ScenarioList
|
22
|
-
"""
|
23
|
-
self.left = left
|
24
|
-
self.right = right
|
25
|
-
|
26
|
-
def left_join(self, by: Union[str, list[str]]) -> ScenarioList:
|
27
|
-
"""Perform a left join between the two ScenarioLists.
|
28
|
-
|
29
|
-
Args:
|
30
|
-
by: String or list of strings representing the key(s) to join on. Cannot be empty.
|
31
|
-
|
32
|
-
Returns:
|
33
|
-
A new ScenarioList containing the joined scenarios
|
34
|
-
|
35
|
-
Raises:
|
36
|
-
ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
|
37
|
-
"""
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
for scenario in self.
|
82
|
-
all_keys.update(scenario.keys())
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Union, TYPE_CHECKING
|
3
|
+
|
4
|
+
if TYPE_CHECKING:
|
5
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
6
|
+
from edsl.scenarios.Scenario import Scenario
|
7
|
+
|
8
|
+
|
9
|
+
class ScenarioJoin:
|
10
|
+
"""Handles join operations between two ScenarioLists.
|
11
|
+
|
12
|
+
This class encapsulates all join-related logic, making it easier to maintain
|
13
|
+
and extend with other join types (inner, right, full) in the future.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, left: "ScenarioList", right: "ScenarioList"):
|
17
|
+
"""Initialize join operation with two ScenarioLists.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
left: The left ScenarioList
|
21
|
+
right: The right ScenarioList
|
22
|
+
"""
|
23
|
+
self.left = left
|
24
|
+
self.right = right
|
25
|
+
|
26
|
+
def left_join(self, by: Union[str, list[str]]) -> "ScenarioList":
|
27
|
+
"""Perform a left join between the two ScenarioLists.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
by: String or list of strings representing the key(s) to join on. Cannot be empty.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
A new ScenarioList containing the joined scenarios
|
34
|
+
|
35
|
+
Raises:
|
36
|
+
ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
|
37
|
+
"""
|
38
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
39
|
+
|
40
|
+
self._validate_join_keys(by)
|
41
|
+
by_keys = [by] if isinstance(by, str) else by
|
42
|
+
|
43
|
+
other_dict = self._create_lookup_dict(self.right, by_keys)
|
44
|
+
all_keys = self._get_all_keys()
|
45
|
+
|
46
|
+
return ScenarioList(
|
47
|
+
self._create_joined_scenarios(by_keys, other_dict, all_keys)
|
48
|
+
)
|
49
|
+
|
50
|
+
def _validate_join_keys(self, by: Union[str, list[str]]) -> None:
|
51
|
+
"""Validate join keys exist in both ScenarioLists."""
|
52
|
+
if not by:
|
53
|
+
raise ValueError(
|
54
|
+
"Join keys cannot be empty. Please specify at least one key to join on."
|
55
|
+
)
|
56
|
+
|
57
|
+
by_keys = [by] if isinstance(by, str) else by
|
58
|
+
left_keys = set(next(iter(self.left)).keys()) if self.left else set()
|
59
|
+
right_keys = set(next(iter(self.right)).keys()) if self.right else set()
|
60
|
+
|
61
|
+
missing_left = set(by_keys) - left_keys
|
62
|
+
missing_right = set(by_keys) - right_keys
|
63
|
+
if missing_left or missing_right:
|
64
|
+
missing = missing_left | missing_right
|
65
|
+
raise ValueError(f"Join key(s) {missing} not found in both ScenarioLists")
|
66
|
+
|
67
|
+
@staticmethod
|
68
|
+
def _get_key_tuple(scenario: Scenario, keys: list[str]) -> tuple:
|
69
|
+
"""Create a tuple of values for the join keys."""
|
70
|
+
return tuple(scenario[k] for k in keys)
|
71
|
+
|
72
|
+
def _create_lookup_dict(self, scenarios: ScenarioList, by_keys: list[str]) -> dict:
|
73
|
+
"""Create a lookup dictionary for the right scenarios."""
|
74
|
+
return {
|
75
|
+
self._get_key_tuple(scenario, by_keys): scenario for scenario in scenarios
|
76
|
+
}
|
77
|
+
|
78
|
+
def _get_all_keys(self) -> set:
|
79
|
+
"""Get all unique keys from both ScenarioLists."""
|
80
|
+
all_keys = set()
|
81
|
+
for scenario in self.left:
|
82
|
+
all_keys.update(scenario.keys())
|
83
|
+
for scenario in self.right:
|
84
|
+
all_keys.update(scenario.keys())
|
85
|
+
return all_keys
|
86
|
+
|
87
|
+
def _create_joined_scenarios(
|
88
|
+
self, by_keys: list[str], other_dict: dict, all_keys: set
|
89
|
+
) -> list[Scenario]:
|
90
|
+
"""Create the joined scenarios."""
|
91
|
+
from edsl.scenarios.Scenario import Scenario
|
92
|
+
|
93
|
+
new_scenarios = []
|
94
|
+
|
95
|
+
for scenario in self.left:
|
96
|
+
new_scenario = {key: None for key in all_keys}
|
97
|
+
new_scenario.update(scenario)
|
98
|
+
|
99
|
+
key_tuple = self._get_key_tuple(scenario, by_keys)
|
100
|
+
if matching_scenario := other_dict.get(key_tuple):
|
101
|
+
self._handle_matching_scenario(
|
102
|
+
new_scenario, scenario, matching_scenario, by_keys
|
103
|
+
)
|
104
|
+
|
105
|
+
new_scenarios.append(Scenario(new_scenario))
|
106
|
+
|
107
|
+
return new_scenarios
|
108
|
+
|
109
|
+
def _handle_matching_scenario(
|
110
|
+
self,
|
111
|
+
new_scenario: dict,
|
112
|
+
left_scenario: "Scenario",
|
113
|
+
right_scenario: "Scenario",
|
114
|
+
by_keys: list[str],
|
115
|
+
) -> None:
|
116
|
+
"""Handle merging of matching scenarios and conflict warnings."""
|
117
|
+
overlapping_keys = set(left_scenario.keys()) & set(right_scenario.keys())
|
118
|
+
|
119
|
+
for key in overlapping_keys:
|
120
|
+
if key not in by_keys and left_scenario[key] != right_scenario[key]:
|
121
|
+
join_conditions = [f"{k}='{left_scenario[k]}'" for k in by_keys]
|
122
|
+
print(
|
123
|
+
f"Warning: Conflicting values for key '{key}' where "
|
124
|
+
f"{' AND '.join(join_conditions)}. "
|
125
|
+
f"Keeping left value: {left_scenario[key]} "
|
126
|
+
f"(discarding: {right_scenario[key]})"
|
127
|
+
)
|
128
|
+
|
129
|
+
# Only update with non-overlapping keys from matching scenario
|
130
|
+
new_keys = set(right_scenario.keys()) - set(left_scenario.keys())
|
131
|
+
new_scenario.update({k: right_scenario[k] for k in new_keys})
|
@@ -0,0 +1,156 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
|
4
|
+
class ScenarioSelector:
|
5
|
+
"""
|
6
|
+
A class for performing advanced field selection on ScenarioList objects,
|
7
|
+
including support for wildcard patterns.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
scenario_list: The ScenarioList object to perform selections on
|
11
|
+
|
12
|
+
Examples:
|
13
|
+
>>> from edsl import Scenario, ScenarioList
|
14
|
+
>>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3}), Scenario({'test_1': 4, 'test_2': 5, 'other': 6})])
|
15
|
+
>>> selector = ScenarioSelector(scenarios)
|
16
|
+
>>> selector.select('test*')
|
17
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self, scenario_list: "ScenarioList"):
|
21
|
+
"""Initialize with a ScenarioList object."""
|
22
|
+
self.scenario_list = scenario_list
|
23
|
+
self.available_fields = (
|
24
|
+
list(scenario_list.data[0].keys()) if scenario_list.data else []
|
25
|
+
)
|
26
|
+
|
27
|
+
def _match_field_pattern(self, pattern: str, field: str) -> bool:
|
28
|
+
"""
|
29
|
+
Checks if a field name matches a pattern with wildcards.
|
30
|
+
Supports '*' as wildcard at start or end of pattern.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
pattern: The pattern to match against, may contain '*' at start or end
|
34
|
+
field: The field name to check
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
>>> from edsl.scenarios import ScenarioList, Scenario
|
38
|
+
>>> selector = ScenarioSelector(ScenarioList([]))
|
39
|
+
>>> selector._match_field_pattern('test*', 'test_field')
|
40
|
+
True
|
41
|
+
>>> selector._match_field_pattern('*field', 'test_field')
|
42
|
+
True
|
43
|
+
>>> selector._match_field_pattern('test', 'test')
|
44
|
+
True
|
45
|
+
>>> selector._match_field_pattern('*test*', 'my_test_field')
|
46
|
+
True
|
47
|
+
"""
|
48
|
+
if "*" not in pattern:
|
49
|
+
return pattern == field
|
50
|
+
|
51
|
+
if pattern.startswith("*") and pattern.endswith("*"):
|
52
|
+
return pattern[1:-1] in field
|
53
|
+
elif pattern.startswith("*"):
|
54
|
+
return field.endswith(pattern[1:])
|
55
|
+
elif pattern.endswith("*"):
|
56
|
+
return field.startswith(pattern[:-1])
|
57
|
+
return pattern == field
|
58
|
+
|
59
|
+
def _get_matching_fields(self, patterns: list[str]) -> list[str]:
|
60
|
+
"""
|
61
|
+
Gets all fields that match any of the given patterns.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
patterns: List of field patterns, may contain wildcards
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
List of field names that match at least one pattern
|
68
|
+
|
69
|
+
Examples:
|
70
|
+
>>> from edsl import Scenario, ScenarioList
|
71
|
+
>>> scenarios = ScenarioList([
|
72
|
+
... Scenario({'test_1': 1, 'test_2': 2, 'other': 3})
|
73
|
+
... ])
|
74
|
+
>>> selector = ScenarioSelector(scenarios)
|
75
|
+
>>> selector._get_matching_fields(['test*'])
|
76
|
+
['test_1', 'test_2']
|
77
|
+
"""
|
78
|
+
matching_fields = set()
|
79
|
+
for pattern in patterns:
|
80
|
+
matches = [
|
81
|
+
field
|
82
|
+
for field in self.available_fields
|
83
|
+
if self._match_field_pattern(pattern, field)
|
84
|
+
]
|
85
|
+
matching_fields.update(matches)
|
86
|
+
return sorted(list(matching_fields))
|
87
|
+
|
88
|
+
def select(self, *fields) -> "ScenarioList":
|
89
|
+
"""
|
90
|
+
Selects scenarios with only the referenced fields.
|
91
|
+
Supports wildcard patterns using '*' at the start or end of field names.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
*fields: Field names or patterns to select. Patterns may include '*' for wildcards.
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
A new ScenarioList containing only the matched fields.
|
98
|
+
|
99
|
+
Raises:
|
100
|
+
ValueError: If no fields match the given patterns.
|
101
|
+
|
102
|
+
Examples:
|
103
|
+
>>> from edsl import Scenario, ScenarioList
|
104
|
+
>>> scenarios = ScenarioList([
|
105
|
+
... Scenario({'test_1': 1, 'test_2': 2, 'other': 3}),
|
106
|
+
... Scenario({'test_1': 4, 'test_2': 5, 'other': 6})
|
107
|
+
... ])
|
108
|
+
>>> selector = ScenarioSelector(scenarios)
|
109
|
+
>>> selector.select('test*') # Selects all fields starting with 'test'
|
110
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
111
|
+
>>> selector.select('*_1') # Selects all fields ending with '_1'
|
112
|
+
ScenarioList([Scenario({'test_1': 1}), Scenario({'test_1': 4})])
|
113
|
+
>>> selector.select('test_1', '*_2') # Multiple patterns
|
114
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
115
|
+
"""
|
116
|
+
if not self.scenario_list.data:
|
117
|
+
return self.scenario_list.__class__([])
|
118
|
+
|
119
|
+
# Convert single string to list for consistent processing
|
120
|
+
patterns = list(fields)
|
121
|
+
|
122
|
+
# Get all fields that match the patterns
|
123
|
+
fields_to_select = self._get_matching_fields(patterns)
|
124
|
+
|
125
|
+
# If no fields match, raise an informative error
|
126
|
+
if not fields_to_select:
|
127
|
+
raise ValueError(
|
128
|
+
f"No fields matched the given patterns: {patterns}. "
|
129
|
+
f"Available fields are: {self.available_fields}"
|
130
|
+
)
|
131
|
+
|
132
|
+
return self.scenario_list.__class__(
|
133
|
+
[scenario.select(fields_to_select) for scenario in self.scenario_list.data]
|
134
|
+
)
|
135
|
+
|
136
|
+
def get_available_fields(self) -> list[str]:
|
137
|
+
"""
|
138
|
+
Returns a list of all available fields in the ScenarioList.
|
139
|
+
|
140
|
+
Returns:
|
141
|
+
List of field names available for selection.
|
142
|
+
|
143
|
+
Examples:
|
144
|
+
>>> from edsl import Scenario, ScenarioList
|
145
|
+
>>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3})])
|
146
|
+
>>> selector = ScenarioSelector(scenarios)
|
147
|
+
>>> selector.get_available_fields()
|
148
|
+
['other', 'test_1', 'test_2']
|
149
|
+
"""
|
150
|
+
return sorted(self.available_fields)
|
151
|
+
|
152
|
+
|
153
|
+
if __name__ == "__main__":
|
154
|
+
import doctest
|
155
|
+
|
156
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/shared.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
shared_globals = {}
|
1
|
+
shared_globals = {}
|