edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
edsl/jobs/JobsPrompts.py
DELETED
@@ -1,354 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
import logging
|
3
|
-
from typing import List, TYPE_CHECKING
|
4
|
-
|
5
|
-
from edsl.results.Dataset import Dataset
|
6
|
-
|
7
|
-
if TYPE_CHECKING:
|
8
|
-
from edsl.jobs import Jobs
|
9
|
-
|
10
|
-
# from edsl.jobs.interviews.Interview import Interview
|
11
|
-
# from edsl.results.Dataset import Dataset
|
12
|
-
# from edsl.agents.AgentList import AgentList
|
13
|
-
# from edsl.scenarios.ScenarioList import ScenarioList
|
14
|
-
# from edsl.surveys.Survey import Survey
|
15
|
-
|
16
|
-
from edsl.jobs.FetchInvigilator import FetchInvigilator
|
17
|
-
from edsl.data.CacheEntry import CacheEntry
|
18
|
-
|
19
|
-
logger = logging.getLogger(__name__)
|
20
|
-
|
21
|
-
|
22
|
-
class JobsPrompts:
|
23
|
-
def __init__(self, jobs: "Jobs"):
|
24
|
-
self.interviews = jobs.interviews()
|
25
|
-
self.agents = jobs.agents
|
26
|
-
self.scenarios = jobs.scenarios
|
27
|
-
self.survey = jobs.survey
|
28
|
-
self._price_lookup = None
|
29
|
-
self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
|
30
|
-
self._scenario_lookup = {
|
31
|
-
scenario: idx for idx, scenario in enumerate(self.scenarios)
|
32
|
-
}
|
33
|
-
|
34
|
-
@property
|
35
|
-
def price_lookup(self):
|
36
|
-
if self._price_lookup is None:
|
37
|
-
from edsl.coop.coop import Coop
|
38
|
-
|
39
|
-
c = Coop()
|
40
|
-
self._price_lookup = c.fetch_prices()
|
41
|
-
return self._price_lookup
|
42
|
-
|
43
|
-
def prompts(self, iterations=1) -> "Dataset":
|
44
|
-
"""Return a Dataset of prompts that will be used.
|
45
|
-
|
46
|
-
>>> from edsl.jobs import Jobs
|
47
|
-
>>> Jobs.example().prompts()
|
48
|
-
Dataset(...)
|
49
|
-
"""
|
50
|
-
interviews = self.interviews
|
51
|
-
interview_indices = []
|
52
|
-
question_names = []
|
53
|
-
user_prompts = []
|
54
|
-
system_prompts = []
|
55
|
-
scenario_indices = []
|
56
|
-
agent_indices = []
|
57
|
-
models = []
|
58
|
-
costs = []
|
59
|
-
cache_keys = []
|
60
|
-
|
61
|
-
for interview_index, interview in enumerate(interviews):
|
62
|
-
logger.info(f"Processing interview {interview_index} of {len(interviews)}")
|
63
|
-
interview_start = time.time()
|
64
|
-
|
65
|
-
# Fetch invigilators timing
|
66
|
-
invig_start = time.time()
|
67
|
-
invigilators = [
|
68
|
-
FetchInvigilator(interview)(question)
|
69
|
-
for question in interview.survey.questions
|
70
|
-
]
|
71
|
-
invig_end = time.time()
|
72
|
-
logger.debug(
|
73
|
-
f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s"
|
74
|
-
)
|
75
|
-
|
76
|
-
# Process prompts timing
|
77
|
-
prompts_start = time.time()
|
78
|
-
for _, invigilator in enumerate(invigilators):
|
79
|
-
# Get prompts timing
|
80
|
-
get_prompts_start = time.time()
|
81
|
-
prompts = invigilator.get_prompts()
|
82
|
-
get_prompts_end = time.time()
|
83
|
-
logger.debug(
|
84
|
-
f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s"
|
85
|
-
)
|
86
|
-
|
87
|
-
user_prompt = prompts["user_prompt"]
|
88
|
-
system_prompt = prompts["system_prompt"]
|
89
|
-
user_prompts.append(user_prompt)
|
90
|
-
system_prompts.append(system_prompt)
|
91
|
-
|
92
|
-
# Index lookups timing
|
93
|
-
index_start = time.time()
|
94
|
-
agent_index = self._agent_lookup[invigilator.agent]
|
95
|
-
agent_indices.append(agent_index)
|
96
|
-
interview_indices.append(interview_index)
|
97
|
-
scenario_index = self._scenario_lookup[invigilator.scenario]
|
98
|
-
scenario_indices.append(scenario_index)
|
99
|
-
index_end = time.time()
|
100
|
-
logger.debug(
|
101
|
-
f"Time taken for index lookups: {index_end - index_start:.4f}s"
|
102
|
-
)
|
103
|
-
|
104
|
-
# Model and question name assignment timing
|
105
|
-
assign_start = time.time()
|
106
|
-
models.append(invigilator.model.model)
|
107
|
-
question_names.append(invigilator.question.question_name)
|
108
|
-
assign_end = time.time()
|
109
|
-
logger.debug(
|
110
|
-
f"Time taken for assignments: {assign_end - assign_start:.4f}s"
|
111
|
-
)
|
112
|
-
|
113
|
-
# Cost estimation timing
|
114
|
-
cost_start = time.time()
|
115
|
-
prompt_cost = self.estimate_prompt_cost(
|
116
|
-
system_prompt=system_prompt,
|
117
|
-
user_prompt=user_prompt,
|
118
|
-
price_lookup=self.price_lookup,
|
119
|
-
inference_service=invigilator.model._inference_service_,
|
120
|
-
model=invigilator.model.model,
|
121
|
-
)
|
122
|
-
cost_end = time.time()
|
123
|
-
logger.debug(
|
124
|
-
f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s"
|
125
|
-
)
|
126
|
-
costs.append(prompt_cost["cost_usd"])
|
127
|
-
|
128
|
-
# Cache key generation timing
|
129
|
-
cache_key_gen_start = time.time()
|
130
|
-
for iteration in range(iterations):
|
131
|
-
cache_key = CacheEntry.gen_key(
|
132
|
-
model=invigilator.model.model,
|
133
|
-
parameters=invigilator.model.parameters,
|
134
|
-
system_prompt=system_prompt,
|
135
|
-
user_prompt=user_prompt,
|
136
|
-
iteration=iteration,
|
137
|
-
)
|
138
|
-
cache_keys.append(cache_key)
|
139
|
-
|
140
|
-
cache_key_gen_end = time.time()
|
141
|
-
logger.debug(
|
142
|
-
f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s"
|
143
|
-
)
|
144
|
-
logger.debug("-" * 50) # Separator between iterations
|
145
|
-
|
146
|
-
prompts_end = time.time()
|
147
|
-
logger.info(
|
148
|
-
f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s"
|
149
|
-
)
|
150
|
-
|
151
|
-
interview_end = time.time()
|
152
|
-
logger.info(
|
153
|
-
f"Overall time taken for interview: {interview_end - interview_start:.4f}s"
|
154
|
-
)
|
155
|
-
logger.info("Time breakdown:")
|
156
|
-
logger.info(f" Invigilators: {invig_end - invig_start:.4f}s")
|
157
|
-
logger.info(f" Prompts processing: {prompts_end - prompts_start:.4f}s")
|
158
|
-
logger.info(
|
159
|
-
f" Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s"
|
160
|
-
)
|
161
|
-
|
162
|
-
d = Dataset(
|
163
|
-
[
|
164
|
-
{"user_prompt": user_prompts},
|
165
|
-
{"system_prompt": system_prompts},
|
166
|
-
{"interview_index": interview_indices},
|
167
|
-
{"question_name": question_names},
|
168
|
-
{"scenario_index": scenario_indices},
|
169
|
-
{"agent_index": agent_indices},
|
170
|
-
{"model": models},
|
171
|
-
{"estimated_cost": costs},
|
172
|
-
{"cache_key": cache_keys},
|
173
|
-
]
|
174
|
-
)
|
175
|
-
return d
|
176
|
-
|
177
|
-
@staticmethod
|
178
|
-
def estimate_prompt_cost(
|
179
|
-
system_prompt: str,
|
180
|
-
user_prompt: str,
|
181
|
-
price_lookup: dict,
|
182
|
-
inference_service: str,
|
183
|
-
model: str,
|
184
|
-
) -> dict:
|
185
|
-
"""Estimates the cost of a prompt. Takes piping into account."""
|
186
|
-
import math
|
187
|
-
|
188
|
-
def get_piping_multiplier(prompt: str):
|
189
|
-
"""Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
|
190
|
-
|
191
|
-
if "{{" in prompt and "}}" in prompt:
|
192
|
-
return 2
|
193
|
-
return 1
|
194
|
-
|
195
|
-
# Look up prices per token
|
196
|
-
key = (inference_service, model)
|
197
|
-
|
198
|
-
try:
|
199
|
-
relevant_prices = price_lookup[key]
|
200
|
-
|
201
|
-
service_input_token_price = float(
|
202
|
-
relevant_prices["input"]["service_stated_token_price"]
|
203
|
-
)
|
204
|
-
service_input_token_qty = float(
|
205
|
-
relevant_prices["input"]["service_stated_token_qty"]
|
206
|
-
)
|
207
|
-
input_price_per_token = service_input_token_price / service_input_token_qty
|
208
|
-
|
209
|
-
service_output_token_price = float(
|
210
|
-
relevant_prices["output"]["service_stated_token_price"]
|
211
|
-
)
|
212
|
-
service_output_token_qty = float(
|
213
|
-
relevant_prices["output"]["service_stated_token_qty"]
|
214
|
-
)
|
215
|
-
output_price_per_token = (
|
216
|
-
service_output_token_price / service_output_token_qty
|
217
|
-
)
|
218
|
-
|
219
|
-
except KeyError:
|
220
|
-
# A KeyError is likely to occur if we cannot retrieve prices (the price_lookup dict is empty)
|
221
|
-
# Use a sensible default
|
222
|
-
|
223
|
-
import warnings
|
224
|
-
|
225
|
-
warnings.warn(
|
226
|
-
"Price data could not be retrieved. Using default estimates for input and output token prices. Input: $1.00 / 1M tokens; Output: $1.00 / 1M tokens"
|
227
|
-
)
|
228
|
-
input_price_per_token = 0.000001 # $1.00 / 1M tokens
|
229
|
-
output_price_per_token = 0.000001 # $1.00 / 1M tokens
|
230
|
-
|
231
|
-
# Compute the number of characters (double if the question involves piping)
|
232
|
-
user_prompt_chars = len(str(user_prompt)) * get_piping_multiplier(
|
233
|
-
str(user_prompt)
|
234
|
-
)
|
235
|
-
system_prompt_chars = len(str(system_prompt)) * get_piping_multiplier(
|
236
|
-
str(system_prompt)
|
237
|
-
)
|
238
|
-
|
239
|
-
# Convert into tokens (1 token approx. equals 4 characters)
|
240
|
-
input_tokens = (user_prompt_chars + system_prompt_chars) // 4
|
241
|
-
|
242
|
-
output_tokens = math.ceil(0.75 * input_tokens)
|
243
|
-
|
244
|
-
cost = (
|
245
|
-
input_tokens * input_price_per_token
|
246
|
-
+ output_tokens * output_price_per_token
|
247
|
-
)
|
248
|
-
|
249
|
-
return {
|
250
|
-
"input_tokens": input_tokens,
|
251
|
-
"output_tokens": output_tokens,
|
252
|
-
"cost_usd": cost,
|
253
|
-
}
|
254
|
-
|
255
|
-
def estimate_job_cost_from_external_prices(
|
256
|
-
self, price_lookup: dict, iterations: int = 1
|
257
|
-
) -> dict:
|
258
|
-
"""
|
259
|
-
Estimates the cost of a job according to the following assumptions:
|
260
|
-
|
261
|
-
- 1 token = 4 characters.
|
262
|
-
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
263
|
-
|
264
|
-
price_lookup is an external pricing dictionary.
|
265
|
-
"""
|
266
|
-
|
267
|
-
import pandas as pd
|
268
|
-
|
269
|
-
interviews = self.interviews
|
270
|
-
data = []
|
271
|
-
for interview in interviews:
|
272
|
-
invigilators = [
|
273
|
-
FetchInvigilator(interview)(question)
|
274
|
-
for question in self.survey.questions
|
275
|
-
]
|
276
|
-
for invigilator in invigilators:
|
277
|
-
prompts = invigilator.get_prompts()
|
278
|
-
|
279
|
-
# By this point, agent and scenario data has already been added to the prompts
|
280
|
-
user_prompt = prompts["user_prompt"]
|
281
|
-
system_prompt = prompts["system_prompt"]
|
282
|
-
inference_service = invigilator.model._inference_service_
|
283
|
-
model = invigilator.model.model
|
284
|
-
|
285
|
-
prompt_cost = self.estimate_prompt_cost(
|
286
|
-
system_prompt=system_prompt,
|
287
|
-
user_prompt=user_prompt,
|
288
|
-
price_lookup=price_lookup,
|
289
|
-
inference_service=inference_service,
|
290
|
-
model=model,
|
291
|
-
)
|
292
|
-
|
293
|
-
data.append(
|
294
|
-
{
|
295
|
-
"user_prompt": user_prompt,
|
296
|
-
"system_prompt": system_prompt,
|
297
|
-
"estimated_input_tokens": prompt_cost["input_tokens"],
|
298
|
-
"estimated_output_tokens": prompt_cost["output_tokens"],
|
299
|
-
"estimated_cost_usd": prompt_cost["cost_usd"],
|
300
|
-
"inference_service": inference_service,
|
301
|
-
"model": model,
|
302
|
-
}
|
303
|
-
)
|
304
|
-
|
305
|
-
df = pd.DataFrame.from_records(data)
|
306
|
-
|
307
|
-
df = (
|
308
|
-
df.groupby(["inference_service", "model"])
|
309
|
-
.agg(
|
310
|
-
{
|
311
|
-
"estimated_cost_usd": "sum",
|
312
|
-
"estimated_input_tokens": "sum",
|
313
|
-
"estimated_output_tokens": "sum",
|
314
|
-
}
|
315
|
-
)
|
316
|
-
.reset_index()
|
317
|
-
)
|
318
|
-
df["estimated_cost_usd"] = df["estimated_cost_usd"] * iterations
|
319
|
-
df["estimated_input_tokens"] = df["estimated_input_tokens"] * iterations
|
320
|
-
df["estimated_output_tokens"] = df["estimated_output_tokens"] * iterations
|
321
|
-
|
322
|
-
estimated_costs_by_model = df.to_dict("records")
|
323
|
-
|
324
|
-
estimated_total_cost = sum(
|
325
|
-
model["estimated_cost_usd"] for model in estimated_costs_by_model
|
326
|
-
)
|
327
|
-
estimated_total_input_tokens = sum(
|
328
|
-
model["estimated_input_tokens"] for model in estimated_costs_by_model
|
329
|
-
)
|
330
|
-
estimated_total_output_tokens = sum(
|
331
|
-
model["estimated_output_tokens"] for model in estimated_costs_by_model
|
332
|
-
)
|
333
|
-
|
334
|
-
output = {
|
335
|
-
"estimated_total_cost_usd": estimated_total_cost,
|
336
|
-
"estimated_total_input_tokens": estimated_total_input_tokens,
|
337
|
-
"estimated_total_output_tokens": estimated_total_output_tokens,
|
338
|
-
"model_costs": estimated_costs_by_model,
|
339
|
-
}
|
340
|
-
|
341
|
-
return output
|
342
|
-
|
343
|
-
def estimate_job_cost(self, iterations: int = 1) -> dict:
|
344
|
-
"""
|
345
|
-
Estimates the cost of a job according to the following assumptions:
|
346
|
-
|
347
|
-
- 1 token = 4 characters.
|
348
|
-
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
349
|
-
|
350
|
-
Fetches prices from Coop.
|
351
|
-
"""
|
352
|
-
return self.estimate_job_cost_from_external_prices(
|
353
|
-
price_lookup=self.price_lookup, iterations=iterations
|
354
|
-
)
|
@@ -1,134 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
from collections import UserDict
|
3
|
-
from edsl.jobs.buckets.TokenBucket import TokenBucket
|
4
|
-
from edsl.jobs.buckets.ModelBuckets import ModelBuckets
|
5
|
-
|
6
|
-
# from functools import wraps
|
7
|
-
from threading import RLock
|
8
|
-
|
9
|
-
from edsl.jobs.decorators import synchronized_class
|
10
|
-
|
11
|
-
|
12
|
-
@synchronized_class
|
13
|
-
class BucketCollection(UserDict):
|
14
|
-
"""A Jobs object will have a whole collection of model buckets, as multiple models could be used.
|
15
|
-
|
16
|
-
The keys here are the models, and the values are the ModelBuckets objects.
|
17
|
-
Models themselves are hashable, so this works.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def __init__(self, infinity_buckets: bool = False):
|
21
|
-
"""Create a new BucketCollection.
|
22
|
-
An infinity bucket is a bucket that never runs out of tokens or requests.
|
23
|
-
"""
|
24
|
-
super().__init__()
|
25
|
-
self.infinity_buckets = infinity_buckets
|
26
|
-
self.models_to_services = {}
|
27
|
-
self.services_to_buckets = {}
|
28
|
-
self._lock = RLock()
|
29
|
-
|
30
|
-
from edsl.config import CONFIG
|
31
|
-
import os
|
32
|
-
|
33
|
-
url = os.environ.get("EDSL_REMOTE_TOKEN_BUCKET_URL", None)
|
34
|
-
|
35
|
-
if url == "None" or url is None:
|
36
|
-
self.remote_url = None
|
37
|
-
# print(f"Using remote token bucket URL: {url}")
|
38
|
-
else:
|
39
|
-
self.remote_url = url
|
40
|
-
|
41
|
-
@classmethod
|
42
|
-
def from_models(
|
43
|
-
cls, models_list: list, infinity_buckets: bool = False
|
44
|
-
) -> "BucketCollection":
|
45
|
-
"""Create a BucketCollection from a list of models."""
|
46
|
-
bucket_collection = cls(infinity_buckets=infinity_buckets)
|
47
|
-
for model in models_list:
|
48
|
-
bucket_collection.add_model(model)
|
49
|
-
return bucket_collection
|
50
|
-
|
51
|
-
def get_tokens(
|
52
|
-
self, model: "LanguageModel", bucket_type: str, num_tokens: int
|
53
|
-
) -> int:
|
54
|
-
"""Get the number of tokens remaining in the bucket."""
|
55
|
-
relevant_bucket = getattr(self[model], bucket_type)
|
56
|
-
return relevant_bucket.get_tokens(num_tokens)
|
57
|
-
|
58
|
-
def __repr__(self):
|
59
|
-
return f"BucketCollection({self.data})"
|
60
|
-
|
61
|
-
def add_model(self, model: "LanguageModel") -> None:
|
62
|
-
"""Adds a model to the bucket collection.
|
63
|
-
|
64
|
-
This will create the token and request buckets for the model."""
|
65
|
-
|
66
|
-
# compute the TPS and RPS from the model
|
67
|
-
if not self.infinity_buckets:
|
68
|
-
TPS = model.tpm / 60.0
|
69
|
-
RPS = model.rpm / 60.0
|
70
|
-
else:
|
71
|
-
TPS = float("inf")
|
72
|
-
RPS = float("inf")
|
73
|
-
|
74
|
-
if model.model not in self.models_to_services:
|
75
|
-
service = model._inference_service_
|
76
|
-
if service not in self.services_to_buckets:
|
77
|
-
requests_bucket = TokenBucket(
|
78
|
-
bucket_name=service,
|
79
|
-
bucket_type="requests",
|
80
|
-
capacity=RPS,
|
81
|
-
refill_rate=RPS,
|
82
|
-
remote_url=self.remote_url,
|
83
|
-
)
|
84
|
-
tokens_bucket = TokenBucket(
|
85
|
-
bucket_name=service,
|
86
|
-
bucket_type="tokens",
|
87
|
-
capacity=TPS,
|
88
|
-
refill_rate=TPS,
|
89
|
-
remote_url=self.remote_url,
|
90
|
-
)
|
91
|
-
self.services_to_buckets[service] = ModelBuckets(
|
92
|
-
requests_bucket, tokens_bucket
|
93
|
-
)
|
94
|
-
self.models_to_services[model.model] = service
|
95
|
-
self[model] = self.services_to_buckets[service]
|
96
|
-
else:
|
97
|
-
self[model] = self.services_to_buckets[self.models_to_services[model.model]]
|
98
|
-
|
99
|
-
def update_from_key_lookup(self, key_lookup: "KeyLookup") -> None:
|
100
|
-
"""Updates the bucket collection rates based on model RPM/TPM from KeyLookup"""
|
101
|
-
|
102
|
-
for model_name, service in self.models_to_services.items():
|
103
|
-
if service in key_lookup and not self.infinity_buckets:
|
104
|
-
|
105
|
-
if key_lookup[service].rpm is not None:
|
106
|
-
new_rps = key_lookup[service].rpm / 60.0
|
107
|
-
new_requests_bucket = TokenBucket(
|
108
|
-
bucket_name=service,
|
109
|
-
bucket_type="requests",
|
110
|
-
capacity=new_rps,
|
111
|
-
refill_rate=new_rps,
|
112
|
-
remote_url=self.remote_url,
|
113
|
-
)
|
114
|
-
self.services_to_buckets[service].requests_bucket = (
|
115
|
-
new_requests_bucket
|
116
|
-
)
|
117
|
-
|
118
|
-
if key_lookup[service].tpm is not None:
|
119
|
-
new_tps = key_lookup[service].tpm / 60.0
|
120
|
-
new_tokens_bucket = TokenBucket(
|
121
|
-
bucket_name=service,
|
122
|
-
bucket_type="tokens",
|
123
|
-
capacity=new_tps,
|
124
|
-
refill_rate=new_tps,
|
125
|
-
remote_url=self.remote_url,
|
126
|
-
)
|
127
|
-
self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
|
128
|
-
|
129
|
-
def visualize(self) -> dict:
|
130
|
-
"""Visualize the token and request buckets for each model."""
|
131
|
-
plots = {}
|
132
|
-
for model in self:
|
133
|
-
plots[model] = self[model].visualize()
|
134
|
-
return plots
|
@@ -1,65 +0,0 @@
|
|
1
|
-
# from edsl.jobs.buckets.TokenBucket import TokenBucket
|
2
|
-
|
3
|
-
|
4
|
-
class ModelBuckets:
|
5
|
-
"""A class to represent the token and request buckets for a model.
|
6
|
-
|
7
|
-
Most LLM model services have limits both on requests-per-minute (RPM) and tokens-per-minute (TPM).
|
8
|
-
A request is one call to the service. The number of tokens required for a request depends on parameters.
|
9
|
-
"""
|
10
|
-
|
11
|
-
def __init__(self, requests_bucket: "TokenBucket", tokens_bucket: "TokenBucket"):
|
12
|
-
"""Initialize the model buckets.
|
13
|
-
|
14
|
-
The requests bucket captures requests per unit of time.
|
15
|
-
The tokens bucket captures the number of language model tokens.
|
16
|
-
|
17
|
-
"""
|
18
|
-
self.requests_bucket = requests_bucket
|
19
|
-
self.tokens_bucket = tokens_bucket
|
20
|
-
|
21
|
-
def __add__(self, other: "ModelBuckets"):
|
22
|
-
"""Combine two model buckets."""
|
23
|
-
return ModelBuckets(
|
24
|
-
requests_bucket=self.requests_bucket + other.requests_bucket,
|
25
|
-
tokens_bucket=self.tokens_bucket + other.tokens_bucket,
|
26
|
-
)
|
27
|
-
|
28
|
-
def turbo_mode_on(self):
|
29
|
-
"""Set the refill rate to infinity for both buckets."""
|
30
|
-
self.requests_bucket.turbo_mode_on()
|
31
|
-
self.tokens_bucket.turbo_mode_on()
|
32
|
-
|
33
|
-
def turbo_mode_off(self):
|
34
|
-
"""Restore the refill rate to its original value for both buckets."""
|
35
|
-
self.requests_bucket.turbo_mode_off()
|
36
|
-
self.tokens_bucket.turbo_mode_off()
|
37
|
-
|
38
|
-
@classmethod
|
39
|
-
def infinity_bucket(cls, model_name: str = "not_specified") -> "ModelBuckets":
|
40
|
-
"""Create a bucket with infinite capacity and refill rate."""
|
41
|
-
from edsl.jobs.buckets.TokenBucket import TokenBucket
|
42
|
-
|
43
|
-
return cls(
|
44
|
-
requests_bucket=TokenBucket(
|
45
|
-
bucket_name=model_name,
|
46
|
-
bucket_type="requests",
|
47
|
-
capacity=float("inf"),
|
48
|
-
refill_rate=float("inf"),
|
49
|
-
),
|
50
|
-
tokens_bucket=TokenBucket(
|
51
|
-
bucket_name=model_name,
|
52
|
-
bucket_type="tokens",
|
53
|
-
capacity=float("inf"),
|
54
|
-
refill_rate=float("inf"),
|
55
|
-
),
|
56
|
-
)
|
57
|
-
|
58
|
-
def visualize(self):
|
59
|
-
"""Visualize the token and request buckets."""
|
60
|
-
plot1 = self.requests_bucket.visualize()
|
61
|
-
plot2 = self.tokens_bucket.visualize()
|
62
|
-
return plot1, plot2
|
63
|
-
|
64
|
-
def __repr__(self):
|
65
|
-
return f"ModelBuckets(requests_bucket={self.requests_bucket}, tokens_bucket={self.tokens_bucket})"
|