edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev5.dist-info/RECORD +358 -0
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
edsl/auto/StageBase.py
CHANGED
@@ -1,230 +1,243 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
|
3
|
-
from
|
4
|
-
import
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
previous_stage:
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
return
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
return
|
45
|
-
|
46
|
-
def
|
47
|
-
"""
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
"""
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
def
|
102
|
-
stage_processor = StageProcessingClosure(
|
103
|
-
stage_func=lambda obj: obj.
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
if
|
155
|
-
raise
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
)
|
162
|
-
|
163
|
-
def
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
class
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import json
|
3
|
+
from typing import Dict, List, Any, TypeVar, Generator, Dict, Callable
|
4
|
+
from dataclasses import dataclass, field, KW_ONLY, fields, asdict
|
5
|
+
import textwrap
|
6
|
+
|
7
|
+
|
8
|
+
class ExceptionPipesDoNotFit(Exception):
|
9
|
+
pass
|
10
|
+
|
11
|
+
|
12
|
+
class StageProcessingClosure:
|
13
|
+
def __init__(self, stage_func: Callable, reduction_func=lambda x: x):
|
14
|
+
self.data = []
|
15
|
+
self.stage_func = stage_func
|
16
|
+
# reduction function is applied to self.data when complete
|
17
|
+
# it might just return the list, or it might do something more complicated such as
|
18
|
+
# reduce the list to a dictionary
|
19
|
+
self.reduction_func = reduction_func
|
20
|
+
|
21
|
+
def func(self, obj: "FlowDataBase") -> None:
|
22
|
+
"Function to apply to each stage"
|
23
|
+
self.data.append(self.stage_func(obj))
|
24
|
+
|
25
|
+
def __call__(self):
|
26
|
+
return self.reduction_func(self.data)
|
27
|
+
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class FlowDataBase:
|
31
|
+
"""Base class for dataclasses that are passed between stages."""
|
32
|
+
|
33
|
+
_: KW_ONLY
|
34
|
+
# previous_stage: Dict = field(default_factory=dict)
|
35
|
+
previous_stage: Any = None
|
36
|
+
sent_to_stage_name: str = field(default_factory=str)
|
37
|
+
came_from_stage_name: str = field(default_factory=str)
|
38
|
+
|
39
|
+
def to_dict(self):
|
40
|
+
return asdict(self)
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def from_dict(cls, data: dict):
|
44
|
+
return cls(**data)
|
45
|
+
|
46
|
+
def __getitem__(self, key):
|
47
|
+
"""Allows dictionary-style getting."""
|
48
|
+
return getattr(self, key)
|
49
|
+
|
50
|
+
def __setitem__(self, key, value):
|
51
|
+
"""Allows dictionary-style setting."""
|
52
|
+
return setattr(self, key, value)
|
53
|
+
|
54
|
+
def current_values(self):
|
55
|
+
"""Returns a dictionary of the current values of the dataclass"""
|
56
|
+
to_exclude = ["sent_to_stage_name", "came_from_stage_name", "previous_stage"]
|
57
|
+
d = asdict(self)
|
58
|
+
[d.pop(key) for key in to_exclude]
|
59
|
+
return d
|
60
|
+
|
61
|
+
def stage_input_output(self):
|
62
|
+
return {
|
63
|
+
"came_from": self.came_from_stage_name,
|
64
|
+
"sent_to": self.sent_to_stage_name,
|
65
|
+
}
|
66
|
+
|
67
|
+
def _align_values_with_padding(
|
68
|
+
self, stages
|
69
|
+
) -> Generator[Dict[str, str], None, None]:
|
70
|
+
"Pads out the the names of the stages so they are aligned when printing"
|
71
|
+
|
72
|
+
def longest_value(stage):
|
73
|
+
return max([len(v) for v in stage.values()])
|
74
|
+
|
75
|
+
max_length = max([longest_value(stage) for stage in stages])
|
76
|
+
for stage in stages:
|
77
|
+
new_stage = {k: v.ljust(max_length) for k, v in stage.items()}
|
78
|
+
yield new_stage
|
79
|
+
|
80
|
+
def _reduce(self, stage_processor: StageProcessingClosure) -> Dict[str, dict]:
|
81
|
+
"""Applies some function defined in stage_processor to each stage in the chain, working from back to front
|
82
|
+
|
83
|
+
The stage_processor will record the results of the function applied to each stage in
|
84
|
+
an instance of the StageProcessingClosure class.
|
85
|
+
The results can be accessed by calling the StageProcessingClosure instance.
|
86
|
+
This somewhat convoluted approach is necessary because the stages are connected in a chain and
|
87
|
+
we want a way to access the results of the function applied to each stage in the chain without
|
88
|
+
writing the while-loop over and over again.
|
89
|
+
"""
|
90
|
+
stage_processor.func(self)
|
91
|
+
current_pipe = self
|
92
|
+
while True:
|
93
|
+
if current_pipe.previous_stage is None:
|
94
|
+
break
|
95
|
+
else:
|
96
|
+
current_pipe = current_pipe.previous_stage
|
97
|
+
stage_processor.func(
|
98
|
+
current_pipe
|
99
|
+
) # the result is getting stored in stage_processor.data
|
100
|
+
|
101
|
+
def combined_results(self) -> Dict[str, dict]:
|
102
|
+
stage_processor = StageProcessingClosure(
|
103
|
+
stage_func=lambda obj: obj.current_values(),
|
104
|
+
reduction_func=lambda x: {k: v for d in x for k, v in d.items()},
|
105
|
+
)
|
106
|
+
self._reduce(stage_processor)
|
107
|
+
return stage_processor()
|
108
|
+
|
109
|
+
def flow_history(self):
|
110
|
+
stage_processor = StageProcessingClosure(
|
111
|
+
stage_func=lambda obj: obj.stage_input_output()
|
112
|
+
)
|
113
|
+
self._reduce(stage_processor)
|
114
|
+
return stage_processor()
|
115
|
+
|
116
|
+
def visualize_flow(self) -> str:
|
117
|
+
"""Visualize the flow of data through the chain"""
|
118
|
+
stages = self.flow_history()
|
119
|
+
new_stages = list(self._align_values_with_padding(stages))
|
120
|
+
new_stages.reverse()
|
121
|
+
return tuple(new_stages)
|
122
|
+
|
123
|
+
|
124
|
+
class StageBase(ABC):
|
125
|
+
input: FlowDataBase = NotImplemented
|
126
|
+
output: FlowDataBase = NotImplemented
|
127
|
+
|
128
|
+
def __init__(self, **kwargs):
|
129
|
+
for key, value in kwargs.items():
|
130
|
+
setattr(self, key, value)
|
131
|
+
|
132
|
+
if hasattr(self, "next_stage"):
|
133
|
+
self._validate_connection(self.next_stage)
|
134
|
+
else:
|
135
|
+
self.next_stage = None
|
136
|
+
|
137
|
+
@classmethod
|
138
|
+
def function_parameters(self):
|
139
|
+
return fields(self.input)
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def func(cls, **kwargs):
|
143
|
+
"This provides a shortcut for running a stage by passing keyword arguments to the input function."
|
144
|
+
input_data = cls.input(**kwargs)
|
145
|
+
return cls().process(input_data)
|
146
|
+
|
147
|
+
@abstractmethod
|
148
|
+
def handle_data(self, data):
|
149
|
+
"This implements how the stage actually handles the passed in data"
|
150
|
+
raise NotImplementedError
|
151
|
+
|
152
|
+
def _validate_connection(self, stage):
|
153
|
+
"Checks that the outputs of the first stage match the inputs of the second stage"
|
154
|
+
if not self.output == stage.input:
|
155
|
+
raise ExceptionPipesDoNotFit(
|
156
|
+
textwrap.dedent(
|
157
|
+
f"""\
|
158
|
+
Stage \"{self.__class__.__name__}\" cannot be connected to stage \"{stage.__class__.__name__}\".
|
159
|
+
The outputs of the first stage {self.output} do not match the inputs of the second stage, {stage.input}."""
|
160
|
+
)
|
161
|
+
)
|
162
|
+
|
163
|
+
def __init_subclass__(cls, **kwargs):
|
164
|
+
"Checks that the subclass has the required class variables of input & output"
|
165
|
+
super().__init_subclass__(**kwargs)
|
166
|
+
if cls.input is NotImplemented:
|
167
|
+
raise NotImplementedError(
|
168
|
+
f"Class {cls.__name__} lacks required class variable 'inputs'"
|
169
|
+
)
|
170
|
+
if cls.output is NotImplemented:
|
171
|
+
raise NotImplementedError(
|
172
|
+
f"Class {cls.__name__} lacks required class variable 'outputs'"
|
173
|
+
)
|
174
|
+
|
175
|
+
def process(self, data):
|
176
|
+
print(f"Running stage: {self.__class__.__name__}")
|
177
|
+
data.sent_to_stage_name = self.__class__.__name__
|
178
|
+
processed_data = self.handle_data(data)
|
179
|
+
processed_data.came_from_stage_name = self.__class__.__name__
|
180
|
+
processed_data.previous_stage = data
|
181
|
+
if self.next_stage:
|
182
|
+
return self.next_stage.process(processed_data)
|
183
|
+
else:
|
184
|
+
return processed_data
|
185
|
+
|
186
|
+
|
187
|
+
if __name__ == "__main__":
|
188
|
+
pass
|
189
|
+
# try:
|
190
|
+
|
191
|
+
# class StageMissing(StageBase):
|
192
|
+
# def handle_data(self, data):
|
193
|
+
# return data
|
194
|
+
|
195
|
+
# except NotImplementedError as e:
|
196
|
+
# print(e)
|
197
|
+
# else:
|
198
|
+
# raise Exception("Should have raised NotImplementedError")
|
199
|
+
|
200
|
+
# try:
|
201
|
+
|
202
|
+
# class StageMissingInput(StageBase):
|
203
|
+
# output = FlowDataBase
|
204
|
+
|
205
|
+
# except NotImplementedError as e:
|
206
|
+
# print(e)
|
207
|
+
|
208
|
+
# else:
|
209
|
+
# raise Exception("Should have raised NotImplementedError")
|
210
|
+
|
211
|
+
# @dataclass
|
212
|
+
# class MockInputOutput(FlowDataBase):
|
213
|
+
# text: str
|
214
|
+
|
215
|
+
# class StageTest(StageBase):
|
216
|
+
# input = MockInputOutput
|
217
|
+
# output = MockInputOutput
|
218
|
+
|
219
|
+
# def handle_data(self, data):
|
220
|
+
# return self.output(text=data["text"] + "processed")
|
221
|
+
|
222
|
+
# result = StageTest().process(MockInputOutput(text="Hello world!"))
|
223
|
+
# print(result.text)
|
224
|
+
|
225
|
+
# pipeline = StageTest(next_stage=StageTest(next_stage=StageTest()))
|
226
|
+
# result = pipeline.process(MockInputOutput(text="Hello world!"))
|
227
|
+
# print(result.text)
|
228
|
+
|
229
|
+
# class BadMockInput(FlowDataBase):
|
230
|
+
# text: str
|
231
|
+
# other: str
|
232
|
+
|
233
|
+
# class StageBad(StageBase):
|
234
|
+
# input = BadMockInput
|
235
|
+
# output = BadMockInput
|
236
|
+
|
237
|
+
# def handle_data(self, data):
|
238
|
+
# return self.output(text=data["text"] + "processed")
|
239
|
+
|
240
|
+
# try:
|
241
|
+
# pipeline = StageTest(next_stage=StageBad(next_stage=StageTest()))
|
242
|
+
# except ExceptionPipesDoNotFit as e:
|
243
|
+
# print(e)
|