edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,8 +1,16 @@
|
|
1
|
+
"""
|
2
|
+
Checks a Jobs object for missing API keys and other requirements.
|
3
|
+
"""
|
4
|
+
|
1
5
|
import os
|
2
6
|
from edsl.exceptions.general import MissingAPIKeyError
|
3
7
|
|
4
8
|
|
5
9
|
class JobsChecks:
|
10
|
+
"""
|
11
|
+
Checks a Jobs object for missing API keys and other requirements.
|
12
|
+
"""
|
13
|
+
|
6
14
|
def __init__(self, jobs):
|
7
15
|
"""Checks a Jobs object for missing API keys and other requirements."""
|
8
16
|
self.jobs = jobs
|
@@ -42,9 +50,7 @@ class JobsChecks:
|
|
42
50
|
|
43
51
|
def user_has_ep_api_key(self) -> bool:
|
44
52
|
"""
|
45
|
-
|
46
|
-
|
47
|
-
Otherwise, returns False.
|
53
|
+
Does the user have an EXPECTED_PARROT_API_KEY in their env?
|
48
54
|
"""
|
49
55
|
|
50
56
|
coop_api_key = os.getenv("EXPECTED_PARROT_API_KEY")
|
@@ -54,9 +60,9 @@ class JobsChecks:
|
|
54
60
|
else:
|
55
61
|
return False
|
56
62
|
|
57
|
-
def user_has_all_model_keys(self):
|
63
|
+
def user_has_all_model_keys(self) -> bool:
|
58
64
|
"""
|
59
|
-
|
65
|
+
Does the user have all the model keys required to run their job?
|
60
66
|
|
61
67
|
Otherwise, returns False.
|
62
68
|
"""
|
@@ -71,7 +77,7 @@ class JobsChecks:
|
|
71
77
|
|
72
78
|
def needs_external_llms(self) -> bool:
|
73
79
|
"""
|
74
|
-
|
80
|
+
Does the job need external LLMs to run?
|
75
81
|
|
76
82
|
Otherwise, returns False.
|
77
83
|
"""
|
@@ -99,8 +105,10 @@ class JobsChecks:
|
|
99
105
|
else:
|
100
106
|
return True
|
101
107
|
|
102
|
-
def needs_key_process(self):
|
108
|
+
def needs_key_process(self) -> bool:
|
103
109
|
"""
|
110
|
+
Determines if the user needs to go through the key process.
|
111
|
+
|
104
112
|
A User needs the key process when:
|
105
113
|
1. They don't have all the model keys
|
106
114
|
2. They don't have the EP API
|
@@ -1,11 +1,11 @@
|
|
1
1
|
from typing import Union, Sequence, TYPE_CHECKING
|
2
2
|
|
3
3
|
if TYPE_CHECKING:
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
8
|
-
|
4
|
+
from ..agents import Agent
|
5
|
+
from ..language_models import LanguageModel
|
6
|
+
from ..scenarios import Scenario
|
7
|
+
from .jobs import Jobs
|
8
|
+
from ..invigilators import InvigilatorBase
|
9
9
|
|
10
10
|
class JobsComponentConstructor:
|
11
11
|
"Handles the creation of Agents, Scenarios, and LanguageModels in a job."
|
@@ -50,7 +50,8 @@ class JobsComponentConstructor:
|
|
50
50
|
- scenarios: traits of new scenarios are combined with traits of old existing. New scenarios will overwrite overlapping traits, and do not increase the number of scenarios in the instance
|
51
51
|
- models: new models overwrite old models.
|
52
52
|
"""
|
53
|
-
|
53
|
+
|
54
|
+
from ..dataset.dataset import Dataset
|
54
55
|
|
55
56
|
if isinstance(
|
56
57
|
args[0], Dataset
|
@@ -107,9 +108,10 @@ class JobsComponentConstructor:
|
|
107
108
|
def _get_current_objects_of_this_type(
|
108
109
|
self, object: Union["Agent", "Scenario", "LanguageModel"]
|
109
110
|
) -> tuple[list, str]:
|
110
|
-
|
111
|
-
from
|
112
|
-
from
|
111
|
+
|
112
|
+
from ..agents import Agent
|
113
|
+
from ..scenarios import Scenario
|
114
|
+
from ..language_models import LanguageModel
|
113
115
|
|
114
116
|
"""Return the current objects of the same type as the first argument.
|
115
117
|
|
@@ -138,8 +140,8 @@ class JobsComponentConstructor:
|
|
138
140
|
|
139
141
|
@staticmethod
|
140
142
|
def _get_empty_container_object(object):
|
141
|
-
from edsl.agents
|
142
|
-
from edsl.scenarios
|
143
|
+
from edsl.agents import AgentList
|
144
|
+
from edsl.scenarios import ScenarioList
|
143
145
|
|
144
146
|
return {"Agent": AgentList([]), "Scenario": ScenarioList([])}.get(
|
145
147
|
object.__class__.__name__, []
|
@@ -153,7 +155,8 @@ class JobsComponentConstructor:
|
|
153
155
|
For example, if the user passes in 3 agents,
|
154
156
|
and there are 2 existing agents, this will create 6 new agents
|
155
157
|
>>> from edsl.jobs import Jobs
|
156
|
-
>>>
|
158
|
+
>>> from edsl.surveys import Survey
|
159
|
+
>>> JobsComponentConstructor(Jobs(survey = Survey.example()))._merge_objects([1,2,3], [4,5,6])
|
157
160
|
[5, 6, 7, 6, 7, 8, 7, 8, 9]
|
158
161
|
"""
|
159
162
|
new_objects = JobsComponentConstructor._get_empty_container_object(
|
@@ -166,11 +169,11 @@ class JobsComponentConstructor:
|
|
166
169
|
|
167
170
|
@staticmethod
|
168
171
|
def _get_container_class(object):
|
169
|
-
from
|
170
|
-
from
|
171
|
-
from
|
172
|
-
from
|
173
|
-
from
|
172
|
+
from ..agents import AgentList
|
173
|
+
from ..agents import Agent
|
174
|
+
from ..scenarios import Scenario
|
175
|
+
from ..scenarios import ScenarioList
|
176
|
+
from ..language_models import ModelList
|
174
177
|
|
175
178
|
if isinstance(object, Agent):
|
176
179
|
return AgentList
|
@@ -2,8 +2,9 @@ from typing import Generator, TYPE_CHECKING
|
|
2
2
|
from itertools import product
|
3
3
|
|
4
4
|
if TYPE_CHECKING:
|
5
|
-
from
|
6
|
-
|
5
|
+
from ..interviews import Interview
|
6
|
+
from .jobs import Jobs
|
7
|
+
from ..caching import Cache
|
7
8
|
|
8
9
|
class InterviewsConstructor:
|
9
10
|
def __init__(self, jobs: "Jobs", cache: "Cache"):
|
@@ -12,14 +13,14 @@ class InterviewsConstructor:
|
|
12
13
|
|
13
14
|
def create_interviews(self) -> Generator["Interview", None, None]:
|
14
15
|
"""
|
15
|
-
|
16
|
+
Generates interviews.
|
16
17
|
|
17
18
|
Note that this sets the agents, model and scenarios if they have not been set. This is a side effect of the method.
|
18
19
|
This is useful because a user can create a job without setting the agents, models, or scenarios, and the job will still run,
|
19
20
|
with us filling in defaults.
|
20
21
|
|
21
22
|
"""
|
22
|
-
from
|
23
|
+
from ..interviews import Interview
|
23
24
|
|
24
25
|
agent_index = {
|
25
26
|
hash(agent): index for index, agent in enumerate(self.jobs.agents)
|
@@ -35,7 +36,7 @@ class InterviewsConstructor:
|
|
35
36
|
self.jobs.agents, self.jobs.scenarios, self.jobs.models
|
36
37
|
):
|
37
38
|
yield Interview(
|
38
|
-
survey=self.jobs.survey.draw(),
|
39
|
+
survey=self.jobs.survey.draw(), # this draw is to support shuffling of question options
|
39
40
|
agent=agent,
|
40
41
|
scenario=scenario,
|
41
42
|
model=model,
|
@@ -48,3 +49,7 @@ class InterviewsConstructor:
|
|
48
49
|
"scenario": scenario_index[hash(scenario)],
|
49
50
|
},
|
50
51
|
)
|
52
|
+
|
53
|
+
if __name__ == "__main__":
|
54
|
+
import doctest
|
55
|
+
doctest.testmod()
|
@@ -0,0 +1,347 @@
|
|
1
|
+
import logging
|
2
|
+
import math
|
3
|
+
|
4
|
+
from typing import List, TYPE_CHECKING
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .jobs import Jobs
|
8
|
+
from ..agents import AgentList
|
9
|
+
from ..scenarios import ScenarioList
|
10
|
+
from ..surveys import Survey
|
11
|
+
from .interviews.Interview import Interview
|
12
|
+
|
13
|
+
from .fetch_invigilator import FetchInvigilator
|
14
|
+
from ..caching import CacheEntry
|
15
|
+
from ..dataset import Dataset
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class PromptCostEstimator:
|
21
|
+
|
22
|
+
DEFAULT_INPUT_PRICE_PER_TOKEN = 0.000001
|
23
|
+
DEFAULT_OUTPUT_PRICE_PER_TOKEN = 0.000001
|
24
|
+
CHARS_PER_TOKEN = 4
|
25
|
+
OUTPUT_TOKENS_PER_INPUT_TOKEN = 0.75
|
26
|
+
PIPING_MULTIPLIER = 2
|
27
|
+
|
28
|
+
def __init__(self,
|
29
|
+
system_prompt: str,
|
30
|
+
user_prompt: str,
|
31
|
+
price_lookup: dict,
|
32
|
+
inference_service: str,
|
33
|
+
model: str):
|
34
|
+
self.system_prompt = system_prompt
|
35
|
+
self.user_prompt = user_prompt
|
36
|
+
self.price_lookup = price_lookup
|
37
|
+
self.inference_service = inference_service
|
38
|
+
self.model = model
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def get_piping_multiplier(prompt: str):
|
42
|
+
"""Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
|
43
|
+
|
44
|
+
if "{{" in prompt and "}}" in prompt:
|
45
|
+
return PromptCostEstimator.PIPING_MULTIPLIER
|
46
|
+
return 1
|
47
|
+
|
48
|
+
@property
|
49
|
+
def key(self):
|
50
|
+
return (self.inference_service, self.model)
|
51
|
+
|
52
|
+
@property
|
53
|
+
def relevant_prices(self):
|
54
|
+
try:
|
55
|
+
return self.price_lookup[self.key]
|
56
|
+
except KeyError:
|
57
|
+
return {}
|
58
|
+
|
59
|
+
def input_price_per_token(self):
|
60
|
+
try:
|
61
|
+
return self.relevant_prices["input"]["service_stated_token_price"] / self.relevant_prices["input"]["service_stated_token_qty"]
|
62
|
+
except KeyError:
|
63
|
+
import warnings
|
64
|
+
warnings.warn(
|
65
|
+
"Price data could not be retrieved. Using default estimates for input and output token prices. Input: $1.00 / 1M tokens; Output: $1.00 / 1M tokens"
|
66
|
+
)
|
67
|
+
return self.DEFAULT_INPUT_PRICE_PER_TOKEN
|
68
|
+
|
69
|
+
def output_price_per_token(self):
|
70
|
+
try:
|
71
|
+
return self.relevant_prices["output"]["service_stated_token_price"] / self.relevant_prices["output"]["service_stated_token_qty"]
|
72
|
+
except KeyError:
|
73
|
+
return self.DEFAULT_OUTPUT_PRICE_PER_TOKEN
|
74
|
+
|
75
|
+
def __call__(self):
|
76
|
+
user_prompt_chars = len(str(self.user_prompt)) * self.get_piping_multiplier(
|
77
|
+
str(self.user_prompt)
|
78
|
+
)
|
79
|
+
system_prompt_chars = len(str(self.system_prompt)) * self.get_piping_multiplier(
|
80
|
+
str(self.system_prompt)
|
81
|
+
)
|
82
|
+
# Convert into tokens (1 token approx. equals 4 characters)
|
83
|
+
input_tokens = (user_prompt_chars + system_prompt_chars) // self.CHARS_PER_TOKEN
|
84
|
+
output_tokens = math.ceil(self.OUTPUT_TOKENS_PER_INPUT_TOKEN * input_tokens)
|
85
|
+
|
86
|
+
cost = (
|
87
|
+
input_tokens * self.input_price_per_token()
|
88
|
+
+ output_tokens * self.output_price_per_token()
|
89
|
+
)
|
90
|
+
return {
|
91
|
+
"input_tokens": input_tokens,
|
92
|
+
"output_tokens": output_tokens,
|
93
|
+
"cost_usd": cost,
|
94
|
+
}
|
95
|
+
|
96
|
+
|
97
|
+
class JobsPrompts:
|
98
|
+
|
99
|
+
relevant_keys = ["user_prompt", "system_prompt", "interview_index", "question_name", "scenario_index", "agent_index", "model", "estimated_cost", "cache_keys"]
|
100
|
+
|
101
|
+
"""This generates the prompts for a job for price estimation purposes.
|
102
|
+
|
103
|
+
It does *not* do the full job execution---that requires an LLM.
|
104
|
+
So assumptions are made about expansion of Jinja braces, etc.
|
105
|
+
"""
|
106
|
+
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def from_jobs(cls, jobs: "Jobs"):
|
110
|
+
"""Construct a JobsPrompts object from a Jobs object."""
|
111
|
+
interviews = jobs.interviews()
|
112
|
+
agents = jobs.agents
|
113
|
+
scenarios = jobs.scenarios
|
114
|
+
survey = jobs.survey
|
115
|
+
return cls(
|
116
|
+
interviews=interviews,
|
117
|
+
agents=agents,
|
118
|
+
scenarios=scenarios,
|
119
|
+
survey=survey
|
120
|
+
)
|
121
|
+
|
122
|
+
def __init__(self, interviews: List['Interview'], agents:'AgentList', scenarios: 'ScenarioList', survey: 'Survey'):
|
123
|
+
"""Initialize with extracted components rather than a Jobs object."""
|
124
|
+
self.interviews = interviews
|
125
|
+
self.agents = agents
|
126
|
+
self.scenarios = scenarios
|
127
|
+
self.survey = survey
|
128
|
+
self._price_lookup = None
|
129
|
+
|
130
|
+
self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
|
131
|
+
self._scenario_lookup = {
|
132
|
+
scenario: idx for idx, scenario in enumerate(self.scenarios)
|
133
|
+
}
|
134
|
+
|
135
|
+
@property
|
136
|
+
def price_lookup(self) -> dict:
|
137
|
+
"""Fetches the price lookup from Coop if it is not already cached."""
|
138
|
+
if self._price_lookup is None:
|
139
|
+
from edsl.coop.coop import Coop
|
140
|
+
|
141
|
+
c = Coop()
|
142
|
+
self._price_lookup = c.fetch_prices()
|
143
|
+
return self._price_lookup
|
144
|
+
|
145
|
+
def _process_one_invigilator(self, invigilator: 'Invigilator', interview_index: int, iterations: int = 1) -> dict :
|
146
|
+
"""Process a single invigilator and return a dictionary with all needed data fields."""
|
147
|
+
prompts = invigilator.get_prompts()
|
148
|
+
user_prompt = prompts["user_prompt"]
|
149
|
+
system_prompt = prompts["system_prompt"]
|
150
|
+
|
151
|
+
agent_index = self._agent_lookup[invigilator.agent]
|
152
|
+
scenario_index = self._scenario_lookup[invigilator.scenario]
|
153
|
+
model = invigilator.model.model
|
154
|
+
question_name = invigilator.question.question_name
|
155
|
+
|
156
|
+
# Calculate prompt cost
|
157
|
+
prompt_cost = self.estimate_prompt_cost(
|
158
|
+
system_prompt=system_prompt,
|
159
|
+
user_prompt=user_prompt,
|
160
|
+
price_lookup=self.price_lookup,
|
161
|
+
inference_service=invigilator.model._inference_service_,
|
162
|
+
model=model,
|
163
|
+
)
|
164
|
+
cost = prompt_cost["cost_usd"]
|
165
|
+
|
166
|
+
# Generate cache keys for each iteration
|
167
|
+
cache_keys = []
|
168
|
+
for iteration in range(iterations):
|
169
|
+
cache_key = CacheEntry.gen_key(
|
170
|
+
model=model,
|
171
|
+
parameters=invigilator.model.parameters,
|
172
|
+
system_prompt=system_prompt,
|
173
|
+
user_prompt=user_prompt,
|
174
|
+
iteration=iteration,
|
175
|
+
)
|
176
|
+
cache_keys.append(cache_key)
|
177
|
+
|
178
|
+
d = {
|
179
|
+
"user_prompt": user_prompt,
|
180
|
+
"system_prompt": system_prompt,
|
181
|
+
"interview_index": interview_index,
|
182
|
+
"question_name": question_name,
|
183
|
+
"scenario_index": scenario_index,
|
184
|
+
"agent_index": agent_index,
|
185
|
+
"model": model,
|
186
|
+
"estimated_cost": cost,
|
187
|
+
"cache_keys": cache_keys,
|
188
|
+
}
|
189
|
+
assert list(d.keys()) == self.relevant_keys
|
190
|
+
return d
|
191
|
+
|
192
|
+
def prompts(self, iterations=1) -> "Dataset":
|
193
|
+
"""Return a Dataset of prompts that will be used.
|
194
|
+
|
195
|
+
>>> from edsl.jobs import Jobs
|
196
|
+
>>> Jobs.example().prompts()
|
197
|
+
Dataset(...)
|
198
|
+
"""
|
199
|
+
dataset_of_prompts = {k: [] for k in self.relevant_keys}
|
200
|
+
|
201
|
+
interviews = self.interviews
|
202
|
+
|
203
|
+
# Process each interview and invigilator
|
204
|
+
for interview_index, interview in enumerate(interviews):
|
205
|
+
invigilators = [
|
206
|
+
FetchInvigilator(interview)(question)
|
207
|
+
for question in interview.survey.questions
|
208
|
+
]
|
209
|
+
|
210
|
+
for invigilator in invigilators:
|
211
|
+
# Process the invigilator and get all data as a dictionary
|
212
|
+
data = self._process_one_invigilator(invigilator, interview_index, iterations)
|
213
|
+
for k in self.relevant_keys:
|
214
|
+
dataset_of_prompts[k].append(data[k])
|
215
|
+
|
216
|
+
return Dataset([{k:dataset_of_prompts[k]} for k in self.relevant_keys])
|
217
|
+
|
218
|
+
@staticmethod
|
219
|
+
def estimate_prompt_cost(
|
220
|
+
system_prompt: str,
|
221
|
+
user_prompt: str,
|
222
|
+
price_lookup: dict,
|
223
|
+
inference_service: str,
|
224
|
+
model: str,
|
225
|
+
) -> dict:
|
226
|
+
"""Estimates the cost of a prompt, taking piping into account."""
|
227
|
+
return PromptCostEstimator(
|
228
|
+
system_prompt=system_prompt,
|
229
|
+
user_prompt=user_prompt,
|
230
|
+
price_lookup=price_lookup,
|
231
|
+
inference_service=inference_service,
|
232
|
+
model=model
|
233
|
+
)()
|
234
|
+
|
235
|
+
@staticmethod
|
236
|
+
def _extract_prompt_details(invigilator: FetchInvigilator) -> dict:
|
237
|
+
"""Extracts the prompt details from the invigilator.
|
238
|
+
|
239
|
+
>>> from edsl.invigilators import InvigilatorAI
|
240
|
+
>>> invigilator = InvigilatorAI.example()
|
241
|
+
>>> JobsPrompts._extract_prompt_details(invigilator)
|
242
|
+
{'user_prompt': ...
|
243
|
+
"""
|
244
|
+
prompts = invigilator.get_prompts()
|
245
|
+
user_prompt = prompts["user_prompt"]
|
246
|
+
system_prompt = prompts["system_prompt"]
|
247
|
+
inference_service = invigilator.model._inference_service_
|
248
|
+
model = invigilator.model.model
|
249
|
+
return {
|
250
|
+
"user_prompt": user_prompt,
|
251
|
+
"system_prompt": system_prompt,
|
252
|
+
"inference_service": inference_service,
|
253
|
+
"model": model,
|
254
|
+
}
|
255
|
+
|
256
|
+
def estimate_job_cost_from_external_prices(
|
257
|
+
self, price_lookup: dict, iterations: int = 1
|
258
|
+
) -> dict:
|
259
|
+
"""
|
260
|
+
Estimates the cost of a job.
|
261
|
+
|
262
|
+
:param price_lookup: An external pricing dictionary.
|
263
|
+
:param iterations: The number of times to iterate over the job.
|
264
|
+
|
265
|
+
Key assumptions:
|
266
|
+
- 1 token = 4 characters.
|
267
|
+
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
268
|
+
"""
|
269
|
+
interviews = self.interviews
|
270
|
+
data = []
|
271
|
+
for interview in interviews:
|
272
|
+
invigilators = [
|
273
|
+
FetchInvigilator(interview)(question)
|
274
|
+
for question in self.survey.questions
|
275
|
+
]
|
276
|
+
for invigilator in invigilators:
|
277
|
+
prompt_details = self._extract_prompt_details(invigilator)
|
278
|
+
prompt_cost = self.estimate_prompt_cost(**prompt_details, price_lookup=price_lookup)
|
279
|
+
price_estimates = {
|
280
|
+
'estimated_input_tokens': prompt_cost['input_tokens'],
|
281
|
+
'estimated_output_tokens': prompt_cost['output_tokens'],
|
282
|
+
'estimated_cost_usd': prompt_cost['cost_usd']
|
283
|
+
}
|
284
|
+
data.append({**price_estimates, **prompt_details})
|
285
|
+
|
286
|
+
model_groups = {}
|
287
|
+
for item in data:
|
288
|
+
key = (item["inference_service"], item["model"])
|
289
|
+
if key not in model_groups:
|
290
|
+
model_groups[key] = {
|
291
|
+
"inference_service": item["inference_service"],
|
292
|
+
"model": item["model"],
|
293
|
+
"estimated_cost_usd": 0,
|
294
|
+
"estimated_input_tokens": 0,
|
295
|
+
"estimated_output_tokens": 0
|
296
|
+
}
|
297
|
+
|
298
|
+
# Accumulate values
|
299
|
+
model_groups[key]["estimated_cost_usd"] += item["estimated_cost_usd"]
|
300
|
+
model_groups[key]["estimated_input_tokens"] += item["estimated_input_tokens"]
|
301
|
+
model_groups[key]["estimated_output_tokens"] += item["estimated_output_tokens"]
|
302
|
+
|
303
|
+
# Apply iterations and convert to list
|
304
|
+
estimated_costs_by_model = []
|
305
|
+
for group_data in model_groups.values():
|
306
|
+
group_data["estimated_cost_usd"] *= iterations
|
307
|
+
group_data["estimated_input_tokens"] *= iterations
|
308
|
+
group_data["estimated_output_tokens"] *= iterations
|
309
|
+
estimated_costs_by_model.append(group_data)
|
310
|
+
|
311
|
+
# Calculate totals
|
312
|
+
estimated_total_cost = sum(
|
313
|
+
model["estimated_cost_usd"] for model in estimated_costs_by_model
|
314
|
+
)
|
315
|
+
estimated_total_input_tokens = sum(
|
316
|
+
model["estimated_input_tokens"] for model in estimated_costs_by_model
|
317
|
+
)
|
318
|
+
estimated_total_output_tokens = sum(
|
319
|
+
model["estimated_output_tokens"] for model in estimated_costs_by_model
|
320
|
+
)
|
321
|
+
|
322
|
+
output = {
|
323
|
+
"estimated_total_cost_usd": estimated_total_cost,
|
324
|
+
"estimated_total_input_tokens": estimated_total_input_tokens,
|
325
|
+
"estimated_total_output_tokens": estimated_total_output_tokens,
|
326
|
+
"model_costs": estimated_costs_by_model,
|
327
|
+
}
|
328
|
+
|
329
|
+
return output
|
330
|
+
|
331
|
+
def estimate_job_cost(self, iterations: int = 1) -> dict:
|
332
|
+
"""
|
333
|
+
Estimates the cost of a job according to the following assumptions:
|
334
|
+
|
335
|
+
- 1 token = 4 characters.
|
336
|
+
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
337
|
+
|
338
|
+
Fetches prices from Coop.
|
339
|
+
"""
|
340
|
+
return self.estimate_job_cost_from_external_prices(
|
341
|
+
price_lookup=self.price_lookup, iterations=iterations
|
342
|
+
)
|
343
|
+
|
344
|
+
|
345
|
+
if __name__ == "__main__":
|
346
|
+
import doctest
|
347
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -5,12 +5,13 @@ from abc import ABC, abstractmethod
|
|
5
5
|
from typing import Optional, Union, Literal, TYPE_CHECKING, List, Dict
|
6
6
|
from datetime import datetime
|
7
7
|
from dataclasses import dataclass
|
8
|
-
from edsl.exceptions.coop import CoopServerResponseError
|
9
8
|
|
10
|
-
from
|
9
|
+
from ..coop import CoopServerResponseError
|
10
|
+
|
11
|
+
from .jobs_status_enums import JobsStatus
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
13
|
-
from
|
14
|
+
from ..results import Results
|
14
15
|
|
15
16
|
|
16
17
|
@dataclass
|