edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. edsl/Base.py +413 -332
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -867
  7. edsl/agents/AgentList.py +551 -413
  8. edsl/agents/Invigilator.py +284 -233
  9. edsl/agents/InvigilatorBase.py +257 -270
  10. edsl/agents/PromptConstructor.py +272 -354
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -157
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -1028
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -555
  42. edsl/data/CacheEntry.py +230 -233
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -78
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/hack.py +10 -0
  48. edsl/data/orm.py +10 -10
  49. edsl/data_transfer_models.py +74 -73
  50. edsl/enums.py +202 -175
  51. edsl/exceptions/BaseException.py +21 -21
  52. edsl/exceptions/__init__.py +54 -54
  53. edsl/exceptions/agents.py +54 -42
  54. edsl/exceptions/cache.py +5 -5
  55. edsl/exceptions/configuration.py +16 -16
  56. edsl/exceptions/coop.py +10 -10
  57. edsl/exceptions/data.py +14 -14
  58. edsl/exceptions/general.py +34 -34
  59. edsl/exceptions/inference_services.py +5 -0
  60. edsl/exceptions/jobs.py +33 -33
  61. edsl/exceptions/language_models.py +63 -63
  62. edsl/exceptions/prompts.py +15 -15
  63. edsl/exceptions/questions.py +109 -91
  64. edsl/exceptions/results.py +29 -29
  65. edsl/exceptions/scenarios.py +29 -22
  66. edsl/exceptions/surveys.py +37 -37
  67. edsl/inference_services/AnthropicService.py +106 -87
  68. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  69. edsl/inference_services/AvailableModelFetcher.py +215 -0
  70. edsl/inference_services/AwsBedrock.py +118 -120
  71. edsl/inference_services/AzureAI.py +215 -217
  72. edsl/inference_services/DeepInfraService.py +18 -18
  73. edsl/inference_services/GoogleService.py +143 -148
  74. edsl/inference_services/GroqService.py +20 -20
  75. edsl/inference_services/InferenceServiceABC.py +80 -147
  76. edsl/inference_services/InferenceServicesCollection.py +138 -97
  77. edsl/inference_services/MistralAIService.py +120 -123
  78. edsl/inference_services/OllamaService.py +18 -18
  79. edsl/inference_services/OpenAIService.py +236 -224
  80. edsl/inference_services/PerplexityService.py +160 -163
  81. edsl/inference_services/ServiceAvailability.py +135 -0
  82. edsl/inference_services/TestService.py +90 -89
  83. edsl/inference_services/TogetherAIService.py +172 -170
  84. edsl/inference_services/data_structures.py +134 -0
  85. edsl/inference_services/models_available_cache.py +118 -118
  86. edsl/inference_services/rate_limits_cache.py +25 -25
  87. edsl/inference_services/registry.py +41 -41
  88. edsl/inference_services/write_available.py +10 -10
  89. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  90. edsl/jobs/Answers.py +43 -56
  91. edsl/jobs/FetchInvigilator.py +47 -0
  92. edsl/jobs/InterviewTaskManager.py +98 -0
  93. edsl/jobs/InterviewsConstructor.py +50 -0
  94. edsl/jobs/Jobs.py +823 -898
  95. edsl/jobs/JobsChecks.py +172 -147
  96. edsl/jobs/JobsComponentConstructor.py +189 -0
  97. edsl/jobs/JobsPrompts.py +270 -268
  98. edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
  99. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  100. edsl/jobs/RequestTokenEstimator.py +30 -0
  101. edsl/jobs/__init__.py +1 -1
  102. edsl/jobs/async_interview_runner.py +138 -0
  103. edsl/jobs/buckets/BucketCollection.py +104 -63
  104. edsl/jobs/buckets/ModelBuckets.py +65 -65
  105. edsl/jobs/buckets/TokenBucket.py +283 -251
  106. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  107. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  108. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  109. edsl/jobs/data_structures.py +120 -0
  110. edsl/jobs/decorators.py +35 -0
  111. edsl/jobs/interviews/Interview.py +396 -661
  112. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  113. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  114. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  115. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  116. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  117. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  118. edsl/jobs/interviews/ReportErrors.py +66 -66
  119. edsl/jobs/interviews/interview_status_enum.py +9 -9
  120. edsl/jobs/jobs_status_enums.py +9 -0
  121. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  122. edsl/jobs/results_exceptions_handler.py +98 -0
  123. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
  124. edsl/jobs/runners/JobsRunnerStatus.py +297 -330
  125. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  126. edsl/jobs/tasks/TaskCreators.py +64 -64
  127. edsl/jobs/tasks/TaskHistory.py +470 -450
  128. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  129. edsl/jobs/tasks/task_status_enum.py +161 -163
  130. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  131. edsl/jobs/tokens/TokenUsage.py +34 -34
  132. edsl/language_models/ComputeCost.py +63 -0
  133. edsl/language_models/LanguageModel.py +626 -668
  134. edsl/language_models/ModelList.py +164 -155
  135. edsl/language_models/PriceManager.py +127 -0
  136. edsl/language_models/RawResponseHandler.py +106 -0
  137. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  138. edsl/language_models/ServiceDataSources.py +0 -0
  139. edsl/language_models/__init__.py +2 -3
  140. edsl/language_models/fake_openai_call.py +15 -15
  141. edsl/language_models/fake_openai_service.py +61 -61
  142. edsl/language_models/key_management/KeyLookup.py +63 -0
  143. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  144. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  145. edsl/language_models/key_management/__init__.py +0 -0
  146. edsl/language_models/key_management/models.py +131 -0
  147. edsl/language_models/model.py +256 -0
  148. edsl/language_models/repair.py +156 -156
  149. edsl/language_models/utilities.py +65 -64
  150. edsl/notebooks/Notebook.py +263 -258
  151. edsl/notebooks/NotebookToLaTeX.py +142 -0
  152. edsl/notebooks/__init__.py +1 -1
  153. edsl/prompts/Prompt.py +352 -362
  154. edsl/prompts/__init__.py +2 -2
  155. edsl/questions/ExceptionExplainer.py +77 -0
  156. edsl/questions/HTMLQuestion.py +103 -0
  157. edsl/questions/QuestionBase.py +518 -664
  158. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  159. edsl/questions/QuestionBudget.py +227 -227
  160. edsl/questions/QuestionCheckBox.py +359 -359
  161. edsl/questions/QuestionExtract.py +180 -182
  162. edsl/questions/QuestionFreeText.py +113 -114
  163. edsl/questions/QuestionFunctional.py +166 -166
  164. edsl/questions/QuestionList.py +223 -231
  165. edsl/questions/QuestionMatrix.py +265 -0
  166. edsl/questions/QuestionMultipleChoice.py +330 -286
  167. edsl/questions/QuestionNumerical.py +151 -153
  168. edsl/questions/QuestionRank.py +314 -324
  169. edsl/questions/Quick.py +41 -41
  170. edsl/questions/SimpleAskMixin.py +74 -73
  171. edsl/questions/__init__.py +27 -26
  172. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  173. edsl/questions/compose_questions.py +98 -98
  174. edsl/questions/data_structures.py +20 -0
  175. edsl/questions/decorators.py +21 -21
  176. edsl/questions/derived/QuestionLikertFive.py +76 -76
  177. edsl/questions/derived/QuestionLinearScale.py +90 -87
  178. edsl/questions/derived/QuestionTopK.py +93 -93
  179. edsl/questions/derived/QuestionYesNo.py +82 -82
  180. edsl/questions/descriptors.py +427 -413
  181. edsl/questions/loop_processor.py +149 -0
  182. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  183. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  184. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  185. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  186. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  187. edsl/questions/prompt_templates/question_list.jinja +17 -17
  188. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  189. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  190. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  191. edsl/questions/question_registry.py +177 -177
  192. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  193. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  194. edsl/questions/response_validator_factory.py +34 -0
  195. edsl/questions/settings.py +12 -12
  196. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  197. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  198. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  199. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  200. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  201. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  202. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  203. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  204. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  205. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  206. edsl/questions/templates/list/question_presentation.jinja +5 -5
  207. edsl/questions/templates/matrix/__init__.py +1 -0
  208. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  209. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  210. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  211. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  212. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  213. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  214. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  215. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  216. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  217. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  218. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  219. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  220. edsl/results/CSSParameterizer.py +108 -108
  221. edsl/results/Dataset.py +587 -424
  222. edsl/results/DatasetExportMixin.py +594 -731
  223. edsl/results/DatasetTree.py +295 -275
  224. edsl/results/MarkdownToDocx.py +122 -0
  225. edsl/results/MarkdownToPDF.py +111 -0
  226. edsl/results/Result.py +557 -465
  227. edsl/results/Results.py +1183 -1165
  228. edsl/results/ResultsExportMixin.py +45 -43
  229. edsl/results/ResultsGGMixin.py +121 -121
  230. edsl/results/TableDisplay.py +125 -198
  231. edsl/results/TextEditor.py +50 -0
  232. edsl/results/__init__.py +2 -2
  233. edsl/results/file_exports.py +252 -0
  234. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  235. edsl/results/{Selector.py → results_selector.py} +145 -135
  236. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  237. edsl/results/smart_objects.py +96 -0
  238. edsl/results/table_data_class.py +12 -0
  239. edsl/results/table_display.css +77 -77
  240. edsl/results/table_renderers.py +118 -0
  241. edsl/results/tree_explore.py +115 -115
  242. edsl/scenarios/ConstructDownloadLink.py +109 -0
  243. edsl/scenarios/DocumentChunker.py +102 -0
  244. edsl/scenarios/DocxScenario.py +16 -0
  245. edsl/scenarios/FileStore.py +511 -632
  246. edsl/scenarios/PdfExtractor.py +40 -0
  247. edsl/scenarios/Scenario.py +498 -601
  248. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  249. edsl/scenarios/ScenarioList.py +1458 -1287
  250. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  251. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  252. edsl/scenarios/__init__.py +3 -4
  253. edsl/scenarios/directory_scanner.py +96 -0
  254. edsl/scenarios/file_methods.py +85 -0
  255. edsl/scenarios/handlers/__init__.py +13 -0
  256. edsl/scenarios/handlers/csv.py +38 -0
  257. edsl/scenarios/handlers/docx.py +76 -0
  258. edsl/scenarios/handlers/html.py +37 -0
  259. edsl/scenarios/handlers/json.py +111 -0
  260. edsl/scenarios/handlers/latex.py +5 -0
  261. edsl/scenarios/handlers/md.py +51 -0
  262. edsl/scenarios/handlers/pdf.py +68 -0
  263. edsl/scenarios/handlers/png.py +39 -0
  264. edsl/scenarios/handlers/pptx.py +105 -0
  265. edsl/scenarios/handlers/py.py +294 -0
  266. edsl/scenarios/handlers/sql.py +313 -0
  267. edsl/scenarios/handlers/sqlite.py +149 -0
  268. edsl/scenarios/handlers/txt.py +33 -0
  269. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
  270. edsl/scenarios/scenario_selector.py +156 -0
  271. edsl/shared.py +1 -1
  272. edsl/study/ObjectEntry.py +173 -173
  273. edsl/study/ProofOfWork.py +113 -113
  274. edsl/study/SnapShot.py +80 -80
  275. edsl/study/Study.py +521 -528
  276. edsl/study/__init__.py +4 -4
  277. edsl/surveys/ConstructDAG.py +92 -0
  278. edsl/surveys/DAG.py +148 -148
  279. edsl/surveys/EditSurvey.py +221 -0
  280. edsl/surveys/InstructionHandler.py +100 -0
  281. edsl/surveys/Memory.py +31 -31
  282. edsl/surveys/MemoryManagement.py +72 -0
  283. edsl/surveys/MemoryPlan.py +244 -244
  284. edsl/surveys/Rule.py +327 -326
  285. edsl/surveys/RuleCollection.py +385 -387
  286. edsl/surveys/RuleManager.py +172 -0
  287. edsl/surveys/Simulator.py +75 -0
  288. edsl/surveys/Survey.py +1280 -1801
  289. edsl/surveys/SurveyCSS.py +273 -261
  290. edsl/surveys/SurveyExportMixin.py +259 -259
  291. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
  292. edsl/surveys/SurveyQualtricsImport.py +284 -284
  293. edsl/surveys/SurveyToApp.py +141 -0
  294. edsl/surveys/__init__.py +5 -3
  295. edsl/surveys/base.py +53 -53
  296. edsl/surveys/descriptors.py +60 -56
  297. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  298. edsl/surveys/instructions/Instruction.py +56 -65
  299. edsl/surveys/instructions/InstructionCollection.py +82 -77
  300. edsl/templates/error_reporting/base.html +23 -23
  301. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  302. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  303. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  304. edsl/templates/error_reporting/interview_details.html +115 -115
  305. edsl/templates/error_reporting/interviews.html +19 -19
  306. edsl/templates/error_reporting/overview.html +4 -4
  307. edsl/templates/error_reporting/performance_plot.html +1 -1
  308. edsl/templates/error_reporting/report.css +73 -73
  309. edsl/templates/error_reporting/report.html +117 -117
  310. edsl/templates/error_reporting/report.js +25 -25
  311. edsl/test_h +1 -0
  312. edsl/tools/__init__.py +1 -1
  313. edsl/tools/clusters.py +192 -192
  314. edsl/tools/embeddings.py +27 -27
  315. edsl/tools/embeddings_plotting.py +118 -118
  316. edsl/tools/plotting.py +112 -112
  317. edsl/tools/summarize.py +18 -18
  318. edsl/utilities/PrettyList.py +56 -0
  319. edsl/utilities/SystemInfo.py +28 -28
  320. edsl/utilities/__init__.py +22 -22
  321. edsl/utilities/ast_utilities.py +25 -25
  322. edsl/utilities/data/Registry.py +6 -6
  323. edsl/utilities/data/__init__.py +1 -1
  324. edsl/utilities/data/scooter_results.json +1 -1
  325. edsl/utilities/decorators.py +77 -77
  326. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  327. edsl/utilities/gcp_bucket/example.py +50 -0
  328. edsl/utilities/interface.py +627 -627
  329. edsl/utilities/is_notebook.py +18 -0
  330. edsl/utilities/is_valid_variable_name.py +11 -0
  331. edsl/utilities/naming_utilities.py +263 -263
  332. edsl/utilities/remove_edsl_version.py +24 -0
  333. edsl/utilities/repair_functions.py +28 -28
  334. edsl/utilities/restricted_python.py +70 -70
  335. edsl/utilities/utilities.py +436 -424
  336. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
  337. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
  338. edsl-0.1.39.dev4.dist-info/RECORD +361 -0
  339. edsl/language_models/KeyLookup.py +0 -30
  340. edsl/language_models/registry.py +0 -190
  341. edsl/language_models/unused/ReplicateBase.py +0 -83
  342. edsl/results/ResultsDBMixin.py +0 -238
  343. edsl-0.1.39.dev3.dist-info/RECORD +0 -277
  344. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,149 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import os
3
+ import tempfile
4
+ import sqlite3
5
+
6
+
7
+ class SQLiteMethods(FileMethods):
8
+ suffix = "db" # or "sqlite", depending on your preference
9
+
10
+ def extract_text(self):
11
+ """
12
+ Extracts a text representation of the database schema and table contents.
13
+ """
14
+ with sqlite3.connect(self.path) as conn:
15
+ cursor = conn.cursor()
16
+
17
+ # Get all table names
18
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
19
+ tables = cursor.fetchall()
20
+
21
+ full_text = []
22
+
23
+ # For each table, get schema and contents
24
+ for (table_name,) in tables:
25
+ # Get table schema
26
+ cursor.execute(
27
+ f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}';"
28
+ )
29
+ schema = cursor.fetchone()[0]
30
+ full_text.append(f"Table: {table_name}")
31
+ full_text.append(f"Schema: {schema}")
32
+
33
+ # Get table contents
34
+ cursor.execute(f"SELECT * FROM {table_name};")
35
+ rows = cursor.fetchall()
36
+
37
+ # Get column names
38
+ column_names = [description[0] for description in cursor.description]
39
+ full_text.append(f"Columns: {', '.join(column_names)}")
40
+
41
+ # Add row data
42
+ for row in rows:
43
+ full_text.append(str(row))
44
+ full_text.append("\n")
45
+
46
+ return "\n".join(full_text)
47
+
48
+ def view_system(self):
49
+ """
50
+ Opens the database with the system's default SQLite viewer if available.
51
+ """
52
+ import os
53
+ import subprocess
54
+
55
+ if os.path.exists(self.path):
56
+ try:
57
+ if (os_name := os.name) == "posix":
58
+ # Try DB Browser for SQLite on macOS
59
+ subprocess.run(
60
+ ["open", "-a", "DB Browser for SQLite", self.path], check=True
61
+ )
62
+ elif os_name == "nt":
63
+ # Try DB Browser for SQLite on Windows
64
+ subprocess.run(["DB Browser for SQLite.exe", self.path], check=True)
65
+ else:
66
+ # Try sqlitebrowser on Linux
67
+ subprocess.run(["sqlitebrowser", self.path], check=True)
68
+ except Exception as e:
69
+ print(f"Error opening SQLite database: {e}")
70
+ else:
71
+ print("SQLite database file was not found.")
72
+
73
+ def view_notebook(self):
74
+ """
75
+ Displays database contents in a Jupyter notebook.
76
+ """
77
+ import pandas as pd
78
+ from IPython.display import HTML, display
79
+
80
+ with sqlite3.connect(self.path) as conn:
81
+ # Get all table names
82
+ cursor = conn.cursor()
83
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
84
+ tables = cursor.fetchall()
85
+
86
+ html_parts = []
87
+ for (table_name,) in tables:
88
+ # Read table into pandas DataFrame
89
+ df = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)
90
+
91
+ # Convert to HTML with styling
92
+ table_html = f"""
93
+ <div style="margin-bottom: 20px;">
94
+ <h3>{table_name}</h3>
95
+ {df.to_html(index=False)}
96
+ </div>
97
+ """
98
+ html_parts.append(table_html)
99
+
100
+ # Combine all tables into one scrollable div
101
+ html = f"""
102
+ <div style="width: 800px; height: 800px; padding: 20px;
103
+ border: 1px solid #ccc; overflow-y: auto;">
104
+ {''.join(html_parts)}
105
+ </div>
106
+ """
107
+ display(HTML(html))
108
+
109
+ def example(self):
110
+ """
111
+ Creates an example SQLite database for testing.
112
+ """
113
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".db") as tmp:
114
+ conn = sqlite3.connect(tmp.name)
115
+ cursor = conn.cursor()
116
+
117
+ # Create a sample table
118
+ cursor.execute(
119
+ """
120
+ CREATE TABLE survey_responses (
121
+ id INTEGER PRIMARY KEY,
122
+ question TEXT,
123
+ response TEXT
124
+ )
125
+ """
126
+ )
127
+
128
+ # Insert some sample data
129
+ sample_data = [
130
+ (1, "First Survey Question", "Response 1"),
131
+ (2, "Second Survey Question", "Response 2"),
132
+ ]
133
+ cursor.executemany(
134
+ "INSERT INTO survey_responses (id, question, response) VALUES (?, ?, ?)",
135
+ sample_data,
136
+ )
137
+
138
+ conn.commit()
139
+ conn.close()
140
+ tmp.close()
141
+
142
+ return tmp.name
143
+
144
+
145
+ if __name__ == "__main__":
146
+ sqlite_temp = SQLiteMethods.example()
147
+ from edsl.scenarios.FileStore import FileStore
148
+
149
+ fs = FileStore(sqlite_temp)
@@ -0,0 +1,33 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+
4
+
5
+ class TxtMethods(FileMethods):
6
+ suffix = "txt"
7
+
8
+ def view_system(self):
9
+ import os
10
+ import subprocess
11
+
12
+ if os.path.exists(self.path):
13
+ try:
14
+ if (os_name := os.name) == "posix":
15
+ subprocess.run(["open", self.path], check=True) # macOS
16
+ elif os_name == "nt":
17
+ os.startfile(self.path) # Windows
18
+ else:
19
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
20
+ except Exception as e:
21
+ print(f"Error opening TXT: {e}")
22
+ else:
23
+ print("TXT file was not found.")
24
+
25
+ def view_notebook(self):
26
+ from IPython.display import FileLink, display
27
+
28
+ display(FileLink(self.path))
29
+
30
+ def example(self):
31
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
32
+ f.write(b"Hello, World!")
33
+ return f.name
@@ -1,127 +1,131 @@
1
- from __future__ import annotations
2
- from typing import Union, TYPE_CHECKING
3
-
4
- # if TYPE_CHECKING:
5
- from edsl.scenarios.ScenarioList import ScenarioList
6
- from edsl.scenarios.Scenario import Scenario
7
-
8
-
9
- class ScenarioJoin:
10
- """Handles join operations between two ScenarioLists.
11
-
12
- This class encapsulates all join-related logic, making it easier to maintain
13
- and extend with other join types (inner, right, full) in the future.
14
- """
15
-
16
- def __init__(self, left: "ScenarioList", right: "ScenarioList"):
17
- """Initialize join operation with two ScenarioLists.
18
-
19
- Args:
20
- left: The left ScenarioList
21
- right: The right ScenarioList
22
- """
23
- self.left = left
24
- self.right = right
25
-
26
- def left_join(self, by: Union[str, list[str]]) -> ScenarioList:
27
- """Perform a left join between the two ScenarioLists.
28
-
29
- Args:
30
- by: String or list of strings representing the key(s) to join on. Cannot be empty.
31
-
32
- Returns:
33
- A new ScenarioList containing the joined scenarios
34
-
35
- Raises:
36
- ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
37
- """
38
- self._validate_join_keys(by)
39
- by_keys = [by] if isinstance(by, str) else by
40
-
41
- other_dict = self._create_lookup_dict(self.right, by_keys)
42
- all_keys = self._get_all_keys()
43
-
44
- return ScenarioList(
45
- self._create_joined_scenarios(by_keys, other_dict, all_keys)
46
- )
47
-
48
- def _validate_join_keys(self, by: Union[str, list[str]]) -> None:
49
- """Validate join keys exist in both ScenarioLists."""
50
- if not by:
51
- raise ValueError(
52
- "Join keys cannot be empty. Please specify at least one key to join on."
53
- )
54
-
55
- by_keys = [by] if isinstance(by, str) else by
56
- left_keys = set(next(iter(self.left)).keys()) if self.left else set()
57
- right_keys = set(next(iter(self.right)).keys()) if self.right else set()
58
-
59
- missing_left = set(by_keys) - left_keys
60
- missing_right = set(by_keys) - right_keys
61
- if missing_left or missing_right:
62
- missing = missing_left | missing_right
63
- raise ValueError(f"Join key(s) {missing} not found in both ScenarioLists")
64
-
65
- @staticmethod
66
- def _get_key_tuple(scenario: Scenario, keys: list[str]) -> tuple:
67
- """Create a tuple of values for the join keys."""
68
- return tuple(scenario[k] for k in keys)
69
-
70
- def _create_lookup_dict(self, scenarios: ScenarioList, by_keys: list[str]) -> dict:
71
- """Create a lookup dictionary for the right scenarios."""
72
- return {
73
- self._get_key_tuple(scenario, by_keys): scenario for scenario in scenarios
74
- }
75
-
76
- def _get_all_keys(self) -> set:
77
- """Get all unique keys from both ScenarioLists."""
78
- all_keys = set()
79
- for scenario in self.left:
80
- all_keys.update(scenario.keys())
81
- for scenario in self.right:
82
- all_keys.update(scenario.keys())
83
- return all_keys
84
-
85
- def _create_joined_scenarios(
86
- self, by_keys: list[str], other_dict: dict, all_keys: set
87
- ) -> list[Scenario]:
88
- """Create the joined scenarios."""
89
- new_scenarios = []
90
-
91
- for scenario in self.left:
92
- new_scenario = {key: None for key in all_keys}
93
- new_scenario.update(scenario)
94
-
95
- key_tuple = self._get_key_tuple(scenario, by_keys)
96
- if matching_scenario := other_dict.get(key_tuple):
97
- self._handle_matching_scenario(
98
- new_scenario, scenario, matching_scenario, by_keys
99
- )
100
-
101
- new_scenarios.append(Scenario(new_scenario))
102
-
103
- return new_scenarios
104
-
105
- def _handle_matching_scenario(
106
- self,
107
- new_scenario: dict,
108
- left_scenario: Scenario,
109
- right_scenario: Scenario,
110
- by_keys: list[str],
111
- ) -> None:
112
- """Handle merging of matching scenarios and conflict warnings."""
113
- overlapping_keys = set(left_scenario.keys()) & set(right_scenario.keys())
114
-
115
- for key in overlapping_keys:
116
- if key not in by_keys and left_scenario[key] != right_scenario[key]:
117
- join_conditions = [f"{k}='{left_scenario[k]}'" for k in by_keys]
118
- print(
119
- f"Warning: Conflicting values for key '{key}' where "
120
- f"{' AND '.join(join_conditions)}. "
121
- f"Keeping left value: {left_scenario[key]} "
122
- f"(discarding: {right_scenario[key]})"
123
- )
124
-
125
- # Only update with non-overlapping keys from matching scenario
126
- new_keys = set(right_scenario.keys()) - set(left_scenario.keys())
127
- new_scenario.update({k: right_scenario[k] for k in new_keys})
1
+ from __future__ import annotations
2
+ from typing import Union, TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from edsl.scenarios.ScenarioList import ScenarioList
6
+ from edsl.scenarios.Scenario import Scenario
7
+
8
+
9
+ class ScenarioJoin:
10
+ """Handles join operations between two ScenarioLists.
11
+
12
+ This class encapsulates all join-related logic, making it easier to maintain
13
+ and extend with other join types (inner, right, full) in the future.
14
+ """
15
+
16
+ def __init__(self, left: "ScenarioList", right: "ScenarioList"):
17
+ """Initialize join operation with two ScenarioLists.
18
+
19
+ Args:
20
+ left: The left ScenarioList
21
+ right: The right ScenarioList
22
+ """
23
+ self.left = left
24
+ self.right = right
25
+
26
+ def left_join(self, by: Union[str, list[str]]) -> "ScenarioList":
27
+ """Perform a left join between the two ScenarioLists.
28
+
29
+ Args:
30
+ by: String or list of strings representing the key(s) to join on. Cannot be empty.
31
+
32
+ Returns:
33
+ A new ScenarioList containing the joined scenarios
34
+
35
+ Raises:
36
+ ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
37
+ """
38
+ from edsl.scenarios.ScenarioList import ScenarioList
39
+
40
+ self._validate_join_keys(by)
41
+ by_keys = [by] if isinstance(by, str) else by
42
+
43
+ other_dict = self._create_lookup_dict(self.right, by_keys)
44
+ all_keys = self._get_all_keys()
45
+
46
+ return ScenarioList(
47
+ self._create_joined_scenarios(by_keys, other_dict, all_keys)
48
+ )
49
+
50
+ def _validate_join_keys(self, by: Union[str, list[str]]) -> None:
51
+ """Validate join keys exist in both ScenarioLists."""
52
+ if not by:
53
+ raise ValueError(
54
+ "Join keys cannot be empty. Please specify at least one key to join on."
55
+ )
56
+
57
+ by_keys = [by] if isinstance(by, str) else by
58
+ left_keys = set(next(iter(self.left)).keys()) if self.left else set()
59
+ right_keys = set(next(iter(self.right)).keys()) if self.right else set()
60
+
61
+ missing_left = set(by_keys) - left_keys
62
+ missing_right = set(by_keys) - right_keys
63
+ if missing_left or missing_right:
64
+ missing = missing_left | missing_right
65
+ raise ValueError(f"Join key(s) {missing} not found in both ScenarioLists")
66
+
67
+ @staticmethod
68
+ def _get_key_tuple(scenario: Scenario, keys: list[str]) -> tuple:
69
+ """Create a tuple of values for the join keys."""
70
+ return tuple(scenario[k] for k in keys)
71
+
72
+ def _create_lookup_dict(self, scenarios: ScenarioList, by_keys: list[str]) -> dict:
73
+ """Create a lookup dictionary for the right scenarios."""
74
+ return {
75
+ self._get_key_tuple(scenario, by_keys): scenario for scenario in scenarios
76
+ }
77
+
78
+ def _get_all_keys(self) -> set:
79
+ """Get all unique keys from both ScenarioLists."""
80
+ all_keys = set()
81
+ for scenario in self.left:
82
+ all_keys.update(scenario.keys())
83
+ for scenario in self.right:
84
+ all_keys.update(scenario.keys())
85
+ return all_keys
86
+
87
+ def _create_joined_scenarios(
88
+ self, by_keys: list[str], other_dict: dict, all_keys: set
89
+ ) -> list[Scenario]:
90
+ """Create the joined scenarios."""
91
+ from edsl.scenarios.Scenario import Scenario
92
+
93
+ new_scenarios = []
94
+
95
+ for scenario in self.left:
96
+ new_scenario = {key: None for key in all_keys}
97
+ new_scenario.update(scenario)
98
+
99
+ key_tuple = self._get_key_tuple(scenario, by_keys)
100
+ if matching_scenario := other_dict.get(key_tuple):
101
+ self._handle_matching_scenario(
102
+ new_scenario, scenario, matching_scenario, by_keys
103
+ )
104
+
105
+ new_scenarios.append(Scenario(new_scenario))
106
+
107
+ return new_scenarios
108
+
109
+ def _handle_matching_scenario(
110
+ self,
111
+ new_scenario: dict,
112
+ left_scenario: "Scenario",
113
+ right_scenario: "Scenario",
114
+ by_keys: list[str],
115
+ ) -> None:
116
+ """Handle merging of matching scenarios and conflict warnings."""
117
+ overlapping_keys = set(left_scenario.keys()) & set(right_scenario.keys())
118
+
119
+ for key in overlapping_keys:
120
+ if key not in by_keys and left_scenario[key] != right_scenario[key]:
121
+ join_conditions = [f"{k}='{left_scenario[k]}'" for k in by_keys]
122
+ print(
123
+ f"Warning: Conflicting values for key '{key}' where "
124
+ f"{' AND '.join(join_conditions)}. "
125
+ f"Keeping left value: {left_scenario[key]} "
126
+ f"(discarding: {right_scenario[key]})"
127
+ )
128
+
129
+ # Only update with non-overlapping keys from matching scenario
130
+ new_keys = set(right_scenario.keys()) - set(left_scenario.keys())
131
+ new_scenario.update({k: right_scenario[k] for k in new_keys})
@@ -0,0 +1,156 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+
4
+ class ScenarioSelector:
5
+ """
6
+ A class for performing advanced field selection on ScenarioList objects,
7
+ including support for wildcard patterns.
8
+
9
+ Args:
10
+ scenario_list: The ScenarioList object to perform selections on
11
+
12
+ Examples:
13
+ >>> from edsl import Scenario, ScenarioList
14
+ >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3}), Scenario({'test_1': 4, 'test_2': 5, 'other': 6})])
15
+ >>> selector = ScenarioSelector(scenarios)
16
+ >>> selector.select('test*')
17
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
18
+ """
19
+
20
+ def __init__(self, scenario_list: "ScenarioList"):
21
+ """Initialize with a ScenarioList object."""
22
+ self.scenario_list = scenario_list
23
+ self.available_fields = (
24
+ list(scenario_list.data[0].keys()) if scenario_list.data else []
25
+ )
26
+
27
+ def _match_field_pattern(self, pattern: str, field: str) -> bool:
28
+ """
29
+ Checks if a field name matches a pattern with wildcards.
30
+ Supports '*' as wildcard at start or end of pattern.
31
+
32
+ Args:
33
+ pattern: The pattern to match against, may contain '*' at start or end
34
+ field: The field name to check
35
+
36
+ Examples:
37
+ >>> from edsl.scenarios import ScenarioList, Scenario
38
+ >>> selector = ScenarioSelector(ScenarioList([]))
39
+ >>> selector._match_field_pattern('test*', 'test_field')
40
+ True
41
+ >>> selector._match_field_pattern('*field', 'test_field')
42
+ True
43
+ >>> selector._match_field_pattern('test', 'test')
44
+ True
45
+ >>> selector._match_field_pattern('*test*', 'my_test_field')
46
+ True
47
+ """
48
+ if "*" not in pattern:
49
+ return pattern == field
50
+
51
+ if pattern.startswith("*") and pattern.endswith("*"):
52
+ return pattern[1:-1] in field
53
+ elif pattern.startswith("*"):
54
+ return field.endswith(pattern[1:])
55
+ elif pattern.endswith("*"):
56
+ return field.startswith(pattern[:-1])
57
+ return pattern == field
58
+
59
+ def _get_matching_fields(self, patterns: list[str]) -> list[str]:
60
+ """
61
+ Gets all fields that match any of the given patterns.
62
+
63
+ Args:
64
+ patterns: List of field patterns, may contain wildcards
65
+
66
+ Returns:
67
+ List of field names that match at least one pattern
68
+
69
+ Examples:
70
+ >>> from edsl import Scenario, ScenarioList
71
+ >>> scenarios = ScenarioList([
72
+ ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3})
73
+ ... ])
74
+ >>> selector = ScenarioSelector(scenarios)
75
+ >>> selector._get_matching_fields(['test*'])
76
+ ['test_1', 'test_2']
77
+ """
78
+ matching_fields = set()
79
+ for pattern in patterns:
80
+ matches = [
81
+ field
82
+ for field in self.available_fields
83
+ if self._match_field_pattern(pattern, field)
84
+ ]
85
+ matching_fields.update(matches)
86
+ return sorted(list(matching_fields))
87
+
88
+ def select(self, *fields) -> "ScenarioList":
89
+ """
90
+ Selects scenarios with only the referenced fields.
91
+ Supports wildcard patterns using '*' at the start or end of field names.
92
+
93
+ Args:
94
+ *fields: Field names or patterns to select. Patterns may include '*' for wildcards.
95
+
96
+ Returns:
97
+ A new ScenarioList containing only the matched fields.
98
+
99
+ Raises:
100
+ ValueError: If no fields match the given patterns.
101
+
102
+ Examples:
103
+ >>> from edsl import Scenario, ScenarioList
104
+ >>> scenarios = ScenarioList([
105
+ ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3}),
106
+ ... Scenario({'test_1': 4, 'test_2': 5, 'other': 6})
107
+ ... ])
108
+ >>> selector = ScenarioSelector(scenarios)
109
+ >>> selector.select('test*') # Selects all fields starting with 'test'
110
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
111
+ >>> selector.select('*_1') # Selects all fields ending with '_1'
112
+ ScenarioList([Scenario({'test_1': 1}), Scenario({'test_1': 4})])
113
+ >>> selector.select('test_1', '*_2') # Multiple patterns
114
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
115
+ """
116
+ if not self.scenario_list.data:
117
+ return self.scenario_list.__class__([])
118
+
119
+ # Convert single string to list for consistent processing
120
+ patterns = list(fields)
121
+
122
+ # Get all fields that match the patterns
123
+ fields_to_select = self._get_matching_fields(patterns)
124
+
125
+ # If no fields match, raise an informative error
126
+ if not fields_to_select:
127
+ raise ValueError(
128
+ f"No fields matched the given patterns: {patterns}. "
129
+ f"Available fields are: {self.available_fields}"
130
+ )
131
+
132
+ return self.scenario_list.__class__(
133
+ [scenario.select(fields_to_select) for scenario in self.scenario_list.data]
134
+ )
135
+
136
+ def get_available_fields(self) -> list[str]:
137
+ """
138
+ Returns a list of all available fields in the ScenarioList.
139
+
140
+ Returns:
141
+ List of field names available for selection.
142
+
143
+ Examples:
144
+ >>> from edsl import Scenario, ScenarioList
145
+ >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3})])
146
+ >>> selector = ScenarioSelector(scenarios)
147
+ >>> selector.get_available_fields()
148
+ ['other', 'test_1', 'test_2']
149
+ """
150
+ return sorted(self.available_fields)
151
+
152
+
153
+ if __name__ == "__main__":
154
+ import doctest
155
+
156
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
edsl/shared.py CHANGED
@@ -1 +1 @@
1
- shared_globals = {}
1
+ shared_globals = {}