edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. edsl/Base.py +332 -385
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +49 -57
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +867 -1079
  7. edsl/agents/AgentList.py +413 -551
  8. edsl/agents/Invigilator.py +233 -285
  9. edsl/agents/InvigilatorBase.py +270 -254
  10. edsl/agents/PromptConstructor.py +354 -252
  11. edsl/agents/__init__.py +3 -2
  12. edsl/agents/descriptors.py +99 -99
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +279 -279
  26. edsl/config.py +157 -177
  27. edsl/conversation/Conversation.py +290 -290
  28. edsl/conversation/car_buying.py +58 -59
  29. edsl/conversation/chips.py +95 -95
  30. edsl/conversation/mug_negotiation.py +81 -81
  31. edsl/conversation/next_speaker_utilities.py +93 -93
  32. edsl/coop/PriceFetcher.py +54 -54
  33. edsl/coop/__init__.py +2 -2
  34. edsl/coop/coop.py +1028 -1090
  35. edsl/coop/utils.py +131 -131
  36. edsl/data/Cache.py +555 -562
  37. edsl/data/CacheEntry.py +233 -230
  38. edsl/data/CacheHandler.py +149 -170
  39. edsl/data/RemoteCacheSync.py +78 -78
  40. edsl/data/SQLiteDict.py +292 -292
  41. edsl/data/__init__.py +4 -5
  42. edsl/data/orm.py +10 -10
  43. edsl/data_transfer_models.py +73 -74
  44. edsl/enums.py +175 -195
  45. edsl/exceptions/BaseException.py +21 -21
  46. edsl/exceptions/__init__.py +54 -54
  47. edsl/exceptions/agents.py +42 -54
  48. edsl/exceptions/cache.py +5 -5
  49. edsl/exceptions/configuration.py +16 -16
  50. edsl/exceptions/coop.py +10 -10
  51. edsl/exceptions/data.py +14 -14
  52. edsl/exceptions/general.py +34 -34
  53. edsl/exceptions/jobs.py +33 -33
  54. edsl/exceptions/language_models.py +63 -63
  55. edsl/exceptions/prompts.py +15 -15
  56. edsl/exceptions/questions.py +91 -109
  57. edsl/exceptions/results.py +29 -29
  58. edsl/exceptions/scenarios.py +22 -29
  59. edsl/exceptions/surveys.py +37 -37
  60. edsl/inference_services/AnthropicService.py +87 -84
  61. edsl/inference_services/AwsBedrock.py +120 -118
  62. edsl/inference_services/AzureAI.py +217 -215
  63. edsl/inference_services/DeepInfraService.py +18 -18
  64. edsl/inference_services/GoogleService.py +148 -139
  65. edsl/inference_services/GroqService.py +20 -20
  66. edsl/inference_services/InferenceServiceABC.py +147 -80
  67. edsl/inference_services/InferenceServicesCollection.py +97 -122
  68. edsl/inference_services/MistralAIService.py +123 -120
  69. edsl/inference_services/OllamaService.py +18 -18
  70. edsl/inference_services/OpenAIService.py +224 -221
  71. edsl/inference_services/PerplexityService.py +163 -160
  72. edsl/inference_services/TestService.py +89 -92
  73. edsl/inference_services/TogetherAIService.py +170 -170
  74. edsl/inference_services/models_available_cache.py +118 -118
  75. edsl/inference_services/rate_limits_cache.py +25 -25
  76. edsl/inference_services/registry.py +41 -41
  77. edsl/inference_services/write_available.py +10 -10
  78. edsl/jobs/Answers.py +56 -43
  79. edsl/jobs/Jobs.py +898 -757
  80. edsl/jobs/JobsChecks.py +147 -172
  81. edsl/jobs/JobsPrompts.py +268 -270
  82. edsl/jobs/JobsRemoteInferenceHandler.py +239 -287
  83. edsl/jobs/__init__.py +1 -1
  84. edsl/jobs/buckets/BucketCollection.py +63 -104
  85. edsl/jobs/buckets/ModelBuckets.py +65 -65
  86. edsl/jobs/buckets/TokenBucket.py +251 -283
  87. edsl/jobs/interviews/Interview.py +661 -358
  88. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  89. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  90. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  91. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  92. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  93. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  94. edsl/jobs/interviews/ReportErrors.py +66 -66
  95. edsl/jobs/interviews/interview_status_enum.py +9 -9
  96. edsl/jobs/runners/JobsRunnerAsyncio.py +466 -421
  97. edsl/jobs/runners/JobsRunnerStatus.py +330 -330
  98. edsl/jobs/tasks/QuestionTaskCreator.py +242 -244
  99. edsl/jobs/tasks/TaskCreators.py +64 -64
  100. edsl/jobs/tasks/TaskHistory.py +450 -449
  101. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  102. edsl/jobs/tasks/task_status_enum.py +163 -161
  103. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  104. edsl/jobs/tokens/TokenUsage.py +34 -34
  105. edsl/language_models/KeyLookup.py +30 -0
  106. edsl/language_models/LanguageModel.py +668 -571
  107. edsl/language_models/ModelList.py +155 -153
  108. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  109. edsl/language_models/__init__.py +3 -2
  110. edsl/language_models/fake_openai_call.py +15 -15
  111. edsl/language_models/fake_openai_service.py +61 -61
  112. edsl/language_models/registry.py +190 -180
  113. edsl/language_models/repair.py +156 -156
  114. edsl/language_models/unused/ReplicateBase.py +83 -0
  115. edsl/language_models/utilities.py +64 -65
  116. edsl/notebooks/Notebook.py +258 -263
  117. edsl/notebooks/__init__.py +1 -1
  118. edsl/prompts/Prompt.py +362 -352
  119. edsl/prompts/__init__.py +2 -2
  120. edsl/questions/AnswerValidatorMixin.py +289 -334
  121. edsl/questions/QuestionBase.py +664 -509
  122. edsl/questions/QuestionBaseGenMixin.py +161 -165
  123. edsl/questions/QuestionBasePromptsMixin.py +217 -221
  124. edsl/questions/QuestionBudget.py +227 -227
  125. edsl/questions/QuestionCheckBox.py +359 -359
  126. edsl/questions/QuestionExtract.py +182 -182
  127. edsl/questions/QuestionFreeText.py +114 -113
  128. edsl/questions/QuestionFunctional.py +166 -166
  129. edsl/questions/QuestionList.py +231 -229
  130. edsl/questions/QuestionMultipleChoice.py +286 -330
  131. edsl/questions/QuestionNumerical.py +153 -151
  132. edsl/questions/QuestionRank.py +324 -314
  133. edsl/questions/Quick.py +41 -41
  134. edsl/questions/RegisterQuestionsMeta.py +71 -71
  135. edsl/questions/ResponseValidatorABC.py +174 -200
  136. edsl/questions/SimpleAskMixin.py +73 -74
  137. edsl/questions/__init__.py +26 -27
  138. edsl/questions/compose_questions.py +98 -98
  139. edsl/questions/decorators.py +21 -21
  140. edsl/questions/derived/QuestionLikertFive.py +76 -76
  141. edsl/questions/derived/QuestionLinearScale.py +87 -90
  142. edsl/questions/derived/QuestionTopK.py +93 -93
  143. edsl/questions/derived/QuestionYesNo.py +82 -82
  144. edsl/questions/descriptors.py +413 -427
  145. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  146. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  147. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  148. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  149. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  150. edsl/questions/prompt_templates/question_list.jinja +17 -17
  151. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  152. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  153. edsl/questions/question_registry.py +177 -177
  154. edsl/questions/settings.py +12 -12
  155. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  156. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  157. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  158. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  159. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  160. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  161. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  162. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  163. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  164. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  165. edsl/questions/templates/list/question_presentation.jinja +5 -5
  166. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  167. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  168. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  169. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  170. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  171. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  172. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  173. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  174. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  176. edsl/results/CSSParameterizer.py +108 -108
  177. edsl/results/Dataset.py +424 -587
  178. edsl/results/DatasetExportMixin.py +731 -653
  179. edsl/results/DatasetTree.py +275 -295
  180. edsl/results/Result.py +465 -451
  181. edsl/results/Results.py +1165 -1172
  182. edsl/results/ResultsDBMixin.py +238 -0
  183. edsl/results/ResultsExportMixin.py +43 -45
  184. edsl/results/ResultsFetchMixin.py +33 -33
  185. edsl/results/ResultsGGMixin.py +121 -121
  186. edsl/results/ResultsToolsMixin.py +98 -98
  187. edsl/results/Selector.py +135 -145
  188. edsl/results/TableDisplay.py +198 -125
  189. edsl/results/__init__.py +2 -2
  190. edsl/results/table_display.css +77 -77
  191. edsl/results/tree_explore.py +115 -115
  192. edsl/scenarios/FileStore.py +632 -511
  193. edsl/scenarios/Scenario.py +601 -498
  194. edsl/scenarios/ScenarioHtmlMixin.py +64 -65
  195. edsl/scenarios/ScenarioJoin.py +127 -131
  196. edsl/scenarios/ScenarioList.py +1287 -1430
  197. edsl/scenarios/ScenarioListExportMixin.py +52 -45
  198. edsl/scenarios/ScenarioListPdfMixin.py +261 -239
  199. edsl/scenarios/__init__.py +4 -3
  200. edsl/shared.py +1 -1
  201. edsl/study/ObjectEntry.py +173 -173
  202. edsl/study/ProofOfWork.py +113 -113
  203. edsl/study/SnapShot.py +80 -80
  204. edsl/study/Study.py +528 -521
  205. edsl/study/__init__.py +4 -4
  206. edsl/surveys/DAG.py +148 -148
  207. edsl/surveys/Memory.py +31 -31
  208. edsl/surveys/MemoryPlan.py +244 -244
  209. edsl/surveys/Rule.py +326 -327
  210. edsl/surveys/RuleCollection.py +387 -385
  211. edsl/surveys/Survey.py +1801 -1229
  212. edsl/surveys/SurveyCSS.py +261 -273
  213. edsl/surveys/SurveyExportMixin.py +259 -259
  214. edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +179 -181
  215. edsl/surveys/SurveyQualtricsImport.py +284 -284
  216. edsl/surveys/__init__.py +3 -5
  217. edsl/surveys/base.py +53 -53
  218. edsl/surveys/descriptors.py +56 -60
  219. edsl/surveys/instructions/ChangeInstruction.py +49 -48
  220. edsl/surveys/instructions/Instruction.py +65 -56
  221. edsl/surveys/instructions/InstructionCollection.py +77 -82
  222. edsl/templates/error_reporting/base.html +23 -23
  223. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  224. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  225. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  226. edsl/templates/error_reporting/interview_details.html +115 -115
  227. edsl/templates/error_reporting/interviews.html +19 -19
  228. edsl/templates/error_reporting/overview.html +4 -4
  229. edsl/templates/error_reporting/performance_plot.html +1 -1
  230. edsl/templates/error_reporting/report.css +73 -73
  231. edsl/templates/error_reporting/report.html +117 -117
  232. edsl/templates/error_reporting/report.js +25 -25
  233. edsl/tools/__init__.py +1 -1
  234. edsl/tools/clusters.py +192 -192
  235. edsl/tools/embeddings.py +27 -27
  236. edsl/tools/embeddings_plotting.py +118 -118
  237. edsl/tools/plotting.py +112 -112
  238. edsl/tools/summarize.py +18 -18
  239. edsl/utilities/SystemInfo.py +28 -28
  240. edsl/utilities/__init__.py +22 -22
  241. edsl/utilities/ast_utilities.py +25 -25
  242. edsl/utilities/data/Registry.py +6 -6
  243. edsl/utilities/data/__init__.py +1 -1
  244. edsl/utilities/data/scooter_results.json +1 -1
  245. edsl/utilities/decorators.py +77 -77
  246. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  247. edsl/utilities/interface.py +627 -627
  248. edsl/utilities/naming_utilities.py +263 -263
  249. edsl/utilities/repair_functions.py +28 -28
  250. edsl/utilities/restricted_python.py +70 -70
  251. edsl/utilities/utilities.py +424 -436
  252. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
  253. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +10 -12
  254. edsl-0.1.39.dev3.dist-info/RECORD +277 -0
  255. edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
  256. edsl/agents/QuestionOptionProcessor.py +0 -172
  257. edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
  258. edsl/coop/CoopFunctionsMixin.py +0 -15
  259. edsl/coop/ExpectedParrotKeyHandler.py +0 -125
  260. edsl/exceptions/inference_services.py +0 -5
  261. edsl/inference_services/AvailableModelCacheHandler.py +0 -184
  262. edsl/inference_services/AvailableModelFetcher.py +0 -209
  263. edsl/inference_services/ServiceAvailability.py +0 -135
  264. edsl/inference_services/data_structures.py +0 -62
  265. edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -188
  266. edsl/jobs/FetchInvigilator.py +0 -40
  267. edsl/jobs/InterviewTaskManager.py +0 -98
  268. edsl/jobs/InterviewsConstructor.py +0 -48
  269. edsl/jobs/JobsComponentConstructor.py +0 -189
  270. edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
  271. edsl/jobs/RequestTokenEstimator.py +0 -30
  272. edsl/jobs/buckets/TokenBucketAPI.py +0 -211
  273. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  274. edsl/jobs/decorators.py +0 -35
  275. edsl/jobs/jobs_status_enums.py +0 -9
  276. edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
  277. edsl/language_models/ComputeCost.py +0 -63
  278. edsl/language_models/PriceManager.py +0 -127
  279. edsl/language_models/RawResponseHandler.py +0 -106
  280. edsl/language_models/ServiceDataSources.py +0 -0
  281. edsl/language_models/key_management/KeyLookup.py +0 -63
  282. edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
  283. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  284. edsl/language_models/key_management/__init__.py +0 -0
  285. edsl/language_models/key_management/models.py +0 -131
  286. edsl/notebooks/NotebookToLaTeX.py +0 -142
  287. edsl/questions/ExceptionExplainer.py +0 -77
  288. edsl/questions/HTMLQuestion.py +0 -103
  289. edsl/questions/LoopProcessor.py +0 -149
  290. edsl/questions/QuestionMatrix.py +0 -265
  291. edsl/questions/ResponseValidatorFactory.py +0 -28
  292. edsl/questions/templates/matrix/__init__.py +0 -1
  293. edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
  294. edsl/questions/templates/matrix/question_presentation.jinja +0 -20
  295. edsl/results/MarkdownToDocx.py +0 -122
  296. edsl/results/MarkdownToPDF.py +0 -111
  297. edsl/results/TextEditor.py +0 -50
  298. edsl/results/smart_objects.py +0 -96
  299. edsl/results/table_data_class.py +0 -12
  300. edsl/results/table_renderers.py +0 -118
  301. edsl/scenarios/ConstructDownloadLink.py +0 -109
  302. edsl/scenarios/DirectoryScanner.py +0 -96
  303. edsl/scenarios/DocumentChunker.py +0 -102
  304. edsl/scenarios/DocxScenario.py +0 -16
  305. edsl/scenarios/PdfExtractor.py +0 -40
  306. edsl/scenarios/ScenarioSelector.py +0 -156
  307. edsl/scenarios/file_methods.py +0 -85
  308. edsl/scenarios/handlers/__init__.py +0 -13
  309. edsl/scenarios/handlers/csv.py +0 -38
  310. edsl/scenarios/handlers/docx.py +0 -76
  311. edsl/scenarios/handlers/html.py +0 -37
  312. edsl/scenarios/handlers/json.py +0 -111
  313. edsl/scenarios/handlers/latex.py +0 -5
  314. edsl/scenarios/handlers/md.py +0 -51
  315. edsl/scenarios/handlers/pdf.py +0 -68
  316. edsl/scenarios/handlers/png.py +0 -39
  317. edsl/scenarios/handlers/pptx.py +0 -105
  318. edsl/scenarios/handlers/py.py +0 -294
  319. edsl/scenarios/handlers/sql.py +0 -313
  320. edsl/scenarios/handlers/sqlite.py +0 -149
  321. edsl/scenarios/handlers/txt.py +0 -33
  322. edsl/surveys/ConstructDAG.py +0 -92
  323. edsl/surveys/EditSurvey.py +0 -221
  324. edsl/surveys/InstructionHandler.py +0 -100
  325. edsl/surveys/MemoryManagement.py +0 -72
  326. edsl/surveys/RuleManager.py +0 -172
  327. edsl/surveys/Simulator.py +0 -75
  328. edsl/surveys/SurveyToApp.py +0 -141
  329. edsl/utilities/PrettyList.py +0 -56
  330. edsl/utilities/is_notebook.py +0 -18
  331. edsl/utilities/is_valid_variable_name.py +0 -11
  332. edsl/utilities/remove_edsl_version.py +0 -24
  333. edsl-0.1.39.dev2.dist-info/RECORD +0 -352
  334. {edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0
@@ -1,16 +0,0 @@
1
- class DocxScenario:
2
- def __init__(self, docx_path: str):
3
- from docx import Document
4
-
5
- self.doc = Document(docx_path)
6
- self.docx_path = docx_path
7
-
8
- def get_scenario_dict(self) -> dict:
9
- # Extract all text
10
- full_text = []
11
- for para in self.doc.paragraphs:
12
- full_text.append(para.text)
13
-
14
- # Join the text from all paragraphs
15
- text = "\n".join(full_text)
16
- return {"file_path": self.docx_path, "text": text}
@@ -1,40 +0,0 @@
1
- import os
2
-
3
-
4
- class PdfExtractor:
5
- def __init__(self, pdf_path: str, parent_object: object):
6
- self.pdf_path = pdf_path
7
- self.constructor = parent_object.__class__
8
-
9
- def get_object(self) -> object:
10
- return self.constructor(self._get_pdf_dict())
11
-
12
- def _get_pdf_dict(self) -> dict:
13
- # Ensure the file exists
14
- import fitz
15
-
16
- if not os.path.exists(self.pdf_path):
17
- raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
18
-
19
- # Open the PDF file
20
- document = fitz.open(self.pdf_path)
21
-
22
- # Get the filename from the path
23
- filename = os.path.basename(self.pdf_path)
24
-
25
- # Iterate through each page and extract text
26
- text = ""
27
- for page_num in range(len(document)):
28
- page = document.load_page(page_num)
29
- blocks = page.get_text("blocks") # Extract text blocks
30
-
31
- # Sort blocks by their vertical position (y0) to maintain reading order
32
- blocks.sort(key=lambda b: (b[1], b[0])) # Sort by y0 first, then x0
33
-
34
- # Combine the text blocks in order
35
- for block in blocks:
36
- text += block[4] + "\n"
37
-
38
- # Create a dictionary for the combined text
39
- page_info = {"filename": filename, "text": text}
40
- return page_info
@@ -1,156 +0,0 @@
1
- from typing import TYPE_CHECKING
2
-
3
-
4
- class ScenarioSelector:
5
- """
6
- A class for performing advanced field selection on ScenarioList objects,
7
- including support for wildcard patterns.
8
-
9
- Args:
10
- scenario_list: The ScenarioList object to perform selections on
11
-
12
- Examples:
13
- >>> from edsl import Scenario, ScenarioList
14
- >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3}), Scenario({'test_1': 4, 'test_2': 5, 'other': 6})])
15
- >>> selector = ScenarioSelector(scenarios)
16
- >>> selector.select('test*')
17
- ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
18
- """
19
-
20
- def __init__(self, scenario_list: "ScenarioList"):
21
- """Initialize with a ScenarioList object."""
22
- self.scenario_list = scenario_list
23
- self.available_fields = (
24
- list(scenario_list.data[0].keys()) if scenario_list.data else []
25
- )
26
-
27
- def _match_field_pattern(self, pattern: str, field: str) -> bool:
28
- """
29
- Checks if a field name matches a pattern with wildcards.
30
- Supports '*' as wildcard at start or end of pattern.
31
-
32
- Args:
33
- pattern: The pattern to match against, may contain '*' at start or end
34
- field: The field name to check
35
-
36
- Examples:
37
- >>> from edsl.scenarios import ScenarioList, Scenario
38
- >>> selector = ScenarioSelector(ScenarioList([]))
39
- >>> selector._match_field_pattern('test*', 'test_field')
40
- True
41
- >>> selector._match_field_pattern('*field', 'test_field')
42
- True
43
- >>> selector._match_field_pattern('test', 'test')
44
- True
45
- >>> selector._match_field_pattern('*test*', 'my_test_field')
46
- True
47
- """
48
- if "*" not in pattern:
49
- return pattern == field
50
-
51
- if pattern.startswith("*") and pattern.endswith("*"):
52
- return pattern[1:-1] in field
53
- elif pattern.startswith("*"):
54
- return field.endswith(pattern[1:])
55
- elif pattern.endswith("*"):
56
- return field.startswith(pattern[:-1])
57
- return pattern == field
58
-
59
- def _get_matching_fields(self, patterns: list[str]) -> list[str]:
60
- """
61
- Gets all fields that match any of the given patterns.
62
-
63
- Args:
64
- patterns: List of field patterns, may contain wildcards
65
-
66
- Returns:
67
- List of field names that match at least one pattern
68
-
69
- Examples:
70
- >>> from edsl import Scenario, ScenarioList
71
- >>> scenarios = ScenarioList([
72
- ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3})
73
- ... ])
74
- >>> selector = ScenarioSelector(scenarios)
75
- >>> selector._get_matching_fields(['test*'])
76
- ['test_1', 'test_2']
77
- """
78
- matching_fields = set()
79
- for pattern in patterns:
80
- matches = [
81
- field
82
- for field in self.available_fields
83
- if self._match_field_pattern(pattern, field)
84
- ]
85
- matching_fields.update(matches)
86
- return sorted(list(matching_fields))
87
-
88
- def select(self, *fields) -> "ScenarioList":
89
- """
90
- Selects scenarios with only the referenced fields.
91
- Supports wildcard patterns using '*' at the start or end of field names.
92
-
93
- Args:
94
- *fields: Field names or patterns to select. Patterns may include '*' for wildcards.
95
-
96
- Returns:
97
- A new ScenarioList containing only the matched fields.
98
-
99
- Raises:
100
- ValueError: If no fields match the given patterns.
101
-
102
- Examples:
103
- >>> from edsl import Scenario, ScenarioList
104
- >>> scenarios = ScenarioList([
105
- ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3}),
106
- ... Scenario({'test_1': 4, 'test_2': 5, 'other': 6})
107
- ... ])
108
- >>> selector = ScenarioSelector(scenarios)
109
- >>> selector.select('test*') # Selects all fields starting with 'test'
110
- ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
111
- >>> selector.select('*_1') # Selects all fields ending with '_1'
112
- ScenarioList([Scenario({'test_1': 1}), Scenario({'test_1': 4})])
113
- >>> selector.select('test_1', '*_2') # Multiple patterns
114
- ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
115
- """
116
- if not self.scenario_list.data:
117
- return self.scenario_list.__class__([])
118
-
119
- # Convert single string to list for consistent processing
120
- patterns = list(fields)
121
-
122
- # Get all fields that match the patterns
123
- fields_to_select = self._get_matching_fields(patterns)
124
-
125
- # If no fields match, raise an informative error
126
- if not fields_to_select:
127
- raise ValueError(
128
- f"No fields matched the given patterns: {patterns}. "
129
- f"Available fields are: {self.available_fields}"
130
- )
131
-
132
- return self.scenario_list.__class__(
133
- [scenario.select(fields_to_select) for scenario in self.scenario_list.data]
134
- )
135
-
136
- def get_available_fields(self) -> list[str]:
137
- """
138
- Returns a list of all available fields in the ScenarioList.
139
-
140
- Returns:
141
- List of field names available for selection.
142
-
143
- Examples:
144
- >>> from edsl import Scenario, ScenarioList
145
- >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3})])
146
- >>> selector = ScenarioSelector(scenarios)
147
- >>> selector.get_available_fields()
148
- ['other', 'test_1', 'test_2']
149
- """
150
- return sorted(self.available_fields)
151
-
152
-
153
- if __name__ == "__main__":
154
- import doctest
155
-
156
- doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,85 +0,0 @@
1
- from typing import Optional, Dict, Type
2
- from abc import ABC, abstractmethod
3
- import importlib.metadata
4
- import importlib.util
5
-
6
- from edsl.utilities.is_notebook import is_notebook
7
-
8
-
9
- class FileMethods(ABC):
10
- _handlers: Dict[str, Type["FileMethods"]] = {}
11
-
12
- def __init__(self, path: Optional[str] = None):
13
- self.path = path
14
-
15
- def __init_subclass__(cls) -> None:
16
- """Register subclasses automatically when they're defined."""
17
- super().__init_subclass__()
18
- if hasattr(cls, "suffix"):
19
- FileMethods._handlers[cls.suffix] = cls
20
-
21
- @classmethod
22
- def get_handler(cls, suffix: str) -> Optional[Type["FileMethods"]]:
23
- """Get the appropriate handler class for a given suffix."""
24
- # Load plugins if they haven't been loaded yet
25
- if not cls._handlers:
26
- cls.load_plugins()
27
- return cls._handlers.get(suffix.lower())
28
-
29
- @classmethod
30
- def load_plugins(cls):
31
- """Load all file handler plugins including built-ins and external plugins."""
32
-
33
- from edsl.scenarios import handlers
34
-
35
- # Then load any external plugins
36
- try:
37
- entries = importlib.metadata.entry_points(group="file_handlers")
38
- except TypeError: # some Python 3.9 bullshit
39
- # entries = importlib.metadata.entry_points()
40
- entries = []
41
-
42
- for ep in entries:
43
- try:
44
- handler_class = ep.load()
45
- # Registration happens automatically via __init_subclass__
46
- except Exception as e:
47
- print(f"Failed to load external handler {ep.name}: {e}")
48
-
49
- @classmethod
50
- def get_handler_for_path(cls, path: str) -> Optional[Type["FileMethods"]]:
51
- """Get the appropriate handler class for a file path."""
52
- suffix = path.split(".")[-1].lower() if "." in path else ""
53
- return cls.get_handler(suffix)
54
-
55
- @classmethod
56
- def create(cls, path: str) -> Optional["FileMethods"]:
57
- """Create an appropriate handler instance for the given path."""
58
- handler_class = cls.get_handler_for_path(path)
59
- if handler_class:
60
- return handler_class(path)
61
- return None
62
-
63
- @classmethod
64
- def supported_file_types(cls):
65
- if not cls._handlers:
66
- cls.load_plugins()
67
- return list(cls._handlers.keys())
68
-
69
- @abstractmethod
70
- def view_system(self):
71
- ...
72
-
73
- @abstractmethod
74
- def view_notebook(self):
75
- ...
76
-
77
- def view(self):
78
- if is_notebook():
79
- self.view_notebook()
80
- else:
81
- self.view_system()
82
-
83
- @abstractmethod
84
- def example(self):
85
- ...
@@ -1,13 +0,0 @@
1
- from .pdf import PdfMethods
2
- from .docx import DocxMethods
3
- from .png import PngMethods
4
- from .txt import TxtMethods
5
- from .html import HtmlMethods
6
- from .md import MarkdownMethods
7
- from .csv import CsvMethods
8
- from .json import JsonMethods
9
- from .sql import SqlMethods
10
- from .pptx import PptxMethods
11
- from .latex import LaTeXMethods
12
- from .py import PyMethods
13
- from .sqlite import SQLiteMethods
@@ -1,38 +0,0 @@
1
- import tempfile
2
- from edsl.scenarios.file_methods import FileMethods
3
-
4
-
5
- class CsvMethods(FileMethods):
6
- suffix = "csv"
7
-
8
- def view_system(self):
9
- import os
10
- import subprocess
11
-
12
- if os.path.exists(self.path):
13
- try:
14
- if (os_name := os.name) == "posix":
15
- subprocess.run(["open", self.path], check=True) # macOS
16
- elif os_name == "nt":
17
- os.startfile(self.path) # Windows
18
- else:
19
- subprocess.run(["xdg-open", self.path], check=True) # Linux
20
- except Exception as e:
21
- print(f"Error opening CSV: {e}")
22
- else:
23
- print("CSV file was not found.")
24
-
25
- def view_notebook(self):
26
- import pandas as pd
27
- from IPython.display import display
28
-
29
- df = pd.read_csv(self.path)
30
- display(df)
31
-
32
- def example(self):
33
- import pandas as pd
34
-
35
- df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
36
- with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
37
- df.to_csv(f.name, index=False)
38
- return f.name
@@ -1,76 +0,0 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
- import os
3
- import tempfile
4
-
5
-
6
- class DocxMethods(FileMethods):
7
- suffix = "docx"
8
-
9
- def extract_text(self):
10
- from docx import Document
11
-
12
- self.doc = Document(self.path)
13
-
14
- # Extract all text
15
- full_text = []
16
- for para in self.doc.paragraphs:
17
- full_text.append(para.text)
18
-
19
- text = "\n".join(full_text)
20
- return text
21
-
22
- def view_system(self):
23
- import os
24
- import subprocess
25
-
26
- if os.path.exists(self.path):
27
- try:
28
- if (os_name := os.name) == "posix":
29
- subprocess.run(["open", self.path], check=True) # macOS
30
- elif os_name == "nt":
31
- os.startfile(self.path) # Windows
32
- else:
33
- subprocess.run(["xdg-open", self.path], check=True) # Linux
34
- except Exception as e:
35
- print(f"Error opening DOCX: {e}")
36
- else:
37
- print("DOCX file was not found.")
38
-
39
- def view_notebook(self):
40
- import mammoth
41
- from IPython.display import HTML, display
42
-
43
- with open(self.path, "rb") as docx_file:
44
- result = mammoth.convert_to_html(docx_file)
45
- html = f"""
46
- <div style="width: 800px; height: 800px; padding: 20px;
47
- border: 1px solid #ccc; overflow-y: auto;">
48
- {result.value}
49
- </div>
50
- """
51
- display(HTML(html))
52
-
53
- def example(self):
54
- from docx import Document
55
- from edsl.scenarios.Scenario import Scenario
56
- from edsl.scenarios.ScenarioList import ScenarioList
57
-
58
- os.makedirs("test_dir", exist_ok=True)
59
- doc1 = Document()
60
- _ = doc1.add_heading("First Survey")
61
- doc1.save("test_dir/test1.docx")
62
- doc2 = Document()
63
- _ = doc2.add_heading("Second Survey")
64
-
65
- with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
66
- doc2.save(tmp.name)
67
- tmp.close()
68
-
69
- return tmp.name
70
-
71
-
72
- if __name__ == "__main__":
73
- docx_temp = DocxMethods.example()
74
- from edsl.scenarios.FileStore import FileStore
75
-
76
- fs = FileStore(docx_temp)
@@ -1,37 +0,0 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
- import tempfile
3
-
4
-
5
- class HtmlMethods(FileMethods):
6
- suffix = "html"
7
-
8
- def view_system(self):
9
- import webbrowser
10
-
11
- # with open(self.path, "r") as f:
12
- # html_string = f.read()
13
-
14
- # html_path = self.to_tempfile()
15
- # webbrowser.open("file://" + html_path)
16
- webbrowser.open("file://" + self.path)
17
-
18
- def view_notebook(self):
19
- from IPython.display import IFrame, display
20
-
21
- display(IFrame(self.path, width=800, height=800))
22
-
23
- def example(self):
24
- html_string = b"""
25
- <html>
26
- <head>
27
- <title>Test</title>
28
- </head>
29
- <body>
30
- <h1>Hello, World!</h1>
31
- </body>
32
- </html>
33
- """
34
-
35
- with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
36
- f.write(html_string)
37
- return f.name
@@ -1,111 +0,0 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
- import tempfile
3
- import json
4
- from typing import Optional, Dict, Any
5
-
6
-
7
- class JsonMethods(FileMethods):
8
- suffix = "json"
9
-
10
- def view_system(self):
11
- import os
12
- import subprocess
13
-
14
- if os.path.exists(self.path):
15
- try:
16
- if (os_name := os.name) == "posix":
17
- subprocess.run(["open", self.path], check=True) # macOS
18
- elif os_name == "nt":
19
- os.startfile(self.path) # Windows
20
- else:
21
- subprocess.run(["xdg-open", self.path], check=True) # Linux
22
- except Exception as e:
23
- print(f"Error opening JSON: {e}")
24
- else:
25
- print("JSON file was not found.")
26
-
27
- def view_notebook(self):
28
- from IPython.display import FileLink, JSON, display
29
- import json
30
-
31
- # Read and parse the JSON file
32
- try:
33
- with open(self.path, "r", encoding="utf-8") as f:
34
- content = json.load(f)
35
-
36
- # Display formatted JSON
37
- display(JSON(content))
38
-
39
- # Provide download link
40
- display(FileLink(self.path))
41
- except json.JSONDecodeError as e:
42
- print(f"Error parsing JSON: {e}")
43
- except Exception as e:
44
- print(f"Error reading file: {e}")
45
-
46
- def validate_json(self, schema: Optional[Dict[str, Any]] = None) -> bool:
47
- """
48
- Validate the JSON file against a schema if provided,
49
- or check if it's valid JSON if no schema is provided.
50
- """
51
- try:
52
- with open(self.path, "r", encoding="utf-8") as f:
53
- content = json.load(f)
54
-
55
- if schema is not None:
56
- from jsonschema import validate
57
-
58
- validate(instance=content, schema=schema)
59
-
60
- return True
61
- except json.JSONDecodeError as e:
62
- print(f"Invalid JSON format: {e}")
63
- return False
64
- except Exception as e:
65
- print(f"Validation error: {e}")
66
- return False
67
-
68
- def pretty_print(self):
69
- """Pretty print the JSON content with proper indentation."""
70
- try:
71
- with open(self.path, "r", encoding="utf-8") as f:
72
- content = json.load(f)
73
-
74
- pretty_json = json.dumps(content, indent=2, sort_keys=True)
75
- print(pretty_json)
76
- except Exception as e:
77
- print(f"Error pretty printing JSON: {e}")
78
-
79
- def example(self):
80
- sample_json = {
81
- "person": {
82
- "name": "John Doe",
83
- "age": 30,
84
- "contact": {"email": "john@example.com", "phone": "+1-555-555-5555"},
85
- "interests": ["programming", "data science", "machine learning"],
86
- "active": True,
87
- "metadata": {"last_updated": "2024-01-01", "version": 1.0},
88
- }
89
- }
90
-
91
- with tempfile.NamedTemporaryFile(
92
- delete=False, suffix=".json", mode="w", encoding="utf-8"
93
- ) as f:
94
- json.dump(sample_json, f, indent=2)
95
- return f.name
96
-
97
- def format_file(self):
98
- """Read, format, and write back the JSON with consistent formatting."""
99
- try:
100
- # Read the current content
101
- with open(self.path, "r", encoding="utf-8") as f:
102
- content = json.load(f)
103
-
104
- # Write back with consistent formatting
105
- with open(self.path, "w", encoding="utf-8") as f:
106
- json.dump(content, f, indent=2, sort_keys=True)
107
-
108
- return True
109
- except Exception as e:
110
- print(f"Error formatting JSON file: {e}")
111
- return False
@@ -1,5 +0,0 @@
1
- from edsl.scenarios.handlers.txt import TxtMethods
2
-
3
-
4
- class LaTeXMethods(TxtMethods):
5
- pass
@@ -1,51 +0,0 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
- import tempfile
3
-
4
-
5
- class MarkdownMethods(FileMethods):
6
- suffix = "md"
7
-
8
- def view_system(self):
9
- import os
10
- import subprocess
11
-
12
- if os.path.exists(self.path):
13
- try:
14
- if (os_name := os.name) == "posix":
15
- subprocess.run(["open", self.path], check=True) # macOS
16
- elif os_name == "nt":
17
- os.startfile(self.path) # Windows
18
- else:
19
- subprocess.run(["xdg-open", self.path], check=True) # Linux
20
- except Exception as e:
21
- print(f"Error opening Markdown: {e}")
22
- else:
23
- print("Markdown file was not found.")
24
-
25
- def view_notebook(self):
26
- from IPython.display import FileLink, Markdown, display
27
-
28
- # First display the content of the markdown file
29
- with open(self.path, "r", encoding="utf-8") as f:
30
- content = f.read()
31
- display(Markdown(content))
32
-
33
- # Then provide a download link
34
- display(FileLink(self.path))
35
-
36
- def example(self):
37
- markdown_content = """# Sample Markdown
38
-
39
- ## Features
40
- - **Bold text** demonstration
41
- - *Italic text* demonstration
42
- - Code block example:
43
- ```python
44
- print("Hello, World!")
45
- ```
46
- """
47
- with tempfile.NamedTemporaryFile(
48
- delete=False, suffix=".md", mode="w", encoding="utf-8"
49
- ) as f:
50
- f.write(markdown_content)
51
- return f.name