edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +332 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +49 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +867 -867
- edsl/agents/AgentList.py +413 -413
- edsl/agents/Invigilator.py +233 -233
- edsl/agents/InvigilatorBase.py +270 -265
- edsl/agents/PromptConstructor.py +354 -354
- edsl/agents/__init__.py +3 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/auto/AutoStudy.py +117 -117
- edsl/auto/StageBase.py +230 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +73 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +224 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +157 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +58 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1028 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +555 -555
- edsl/data/CacheEntry.py +233 -233
- edsl/data/CacheHandler.py +149 -149
- edsl/data/RemoteCacheSync.py +78 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +4 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +73 -73
- edsl/enums.py +175 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +42 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +91 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +22 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +87 -87
- edsl/inference_services/AwsBedrock.py +120 -120
- edsl/inference_services/AzureAI.py +217 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +148 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +147 -147
- edsl/inference_services/InferenceServicesCollection.py +97 -97
- edsl/inference_services/MistralAIService.py +123 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +224 -224
- edsl/inference_services/PerplexityService.py +163 -163
- edsl/inference_services/TestService.py +89 -89
- edsl/inference_services/TogetherAIService.py +170 -170
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/Answers.py +56 -56
- edsl/jobs/Jobs.py +898 -898
- edsl/jobs/JobsChecks.py +147 -147
- edsl/jobs/JobsPrompts.py +268 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +239 -239
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/buckets/BucketCollection.py +63 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +251 -251
- edsl/jobs/interviews/Interview.py +661 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/runners/JobsRunnerAsyncio.py +466 -466
- edsl/jobs/runners/JobsRunnerStatus.py +330 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +450 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +163 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/KeyLookup.py +30 -30
- edsl/language_models/LanguageModel.py +668 -668
- edsl/language_models/ModelList.py +155 -155
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/__init__.py +3 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/registry.py +190 -190
- edsl/language_models/repair.py +156 -156
- edsl/language_models/unused/ReplicateBase.py +83 -83
- edsl/language_models/utilities.py +64 -64
- edsl/notebooks/Notebook.py +258 -258
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +362 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/AnswerValidatorMixin.py +289 -289
- edsl/questions/QuestionBase.py +664 -664
- edsl/questions/QuestionBaseGenMixin.py +161 -161
- edsl/questions/QuestionBasePromptsMixin.py +217 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +182 -182
- edsl/questions/QuestionFreeText.py +114 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +231 -231
- edsl/questions/QuestionMultipleChoice.py +286 -286
- edsl/questions/QuestionNumerical.py +153 -153
- edsl/questions/QuestionRank.py +324 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/RegisterQuestionsMeta.py +71 -71
- edsl/questions/ResponseValidatorABC.py +174 -174
- edsl/questions/SimpleAskMixin.py +73 -73
- edsl/questions/__init__.py +26 -26
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +87 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +413 -413
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/question_registry.py +177 -177
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +424 -424
- edsl/results/DatasetExportMixin.py +731 -731
- edsl/results/DatasetTree.py +275 -275
- edsl/results/Result.py +465 -465
- edsl/results/Results.py +1165 -1165
- edsl/results/ResultsDBMixin.py +238 -238
- edsl/results/ResultsExportMixin.py +43 -43
- edsl/results/ResultsFetchMixin.py +33 -33
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/ResultsToolsMixin.py +98 -98
- edsl/results/Selector.py +135 -135
- edsl/results/TableDisplay.py +198 -198
- edsl/results/__init__.py +2 -2
- edsl/results/table_display.css +77 -77
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/FileStore.py +632 -632
- edsl/scenarios/Scenario.py +601 -601
- edsl/scenarios/ScenarioHtmlMixin.py +64 -64
- edsl/scenarios/ScenarioJoin.py +127 -127
- edsl/scenarios/ScenarioList.py +1287 -1287
- edsl/scenarios/ScenarioListExportMixin.py +52 -52
- edsl/scenarios/ScenarioListPdfMixin.py +261 -261
- edsl/scenarios/__init__.py +4 -4
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +528 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +326 -326
- edsl/surveys/RuleCollection.py +387 -387
- edsl/surveys/Survey.py +1801 -1801
- edsl/surveys/SurveyCSS.py +261 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/SurveyFlowVisualizationMixin.py +179 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/__init__.py +3 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +56 -56
- edsl/surveys/instructions/ChangeInstruction.py +49 -49
- edsl/surveys/instructions/Instruction.py +65 -65
- edsl/surveys/instructions/InstructionCollection.py +77 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +424 -424
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +1 -1
- edsl-0.1.39.dev3.dist-info/RECORD +277 -0
- edsl-0.1.39.dev1.dist-info/RECORD +0 -277
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0
edsl/jobs/JobsPrompts.py
CHANGED
@@ -1,268 +1,268 @@
|
|
1
|
-
from typing import List, TYPE_CHECKING
|
2
|
-
|
3
|
-
from edsl.results.Dataset import Dataset
|
4
|
-
|
5
|
-
if TYPE_CHECKING:
|
6
|
-
from edsl.jobs import Jobs
|
7
|
-
|
8
|
-
# from edsl.jobs.interviews.Interview import Interview
|
9
|
-
# from edsl.results.Dataset import Dataset
|
10
|
-
# from edsl.agents.AgentList import AgentList
|
11
|
-
# from edsl.scenarios.ScenarioList import ScenarioList
|
12
|
-
# from edsl.surveys.Survey import Survey
|
13
|
-
|
14
|
-
|
15
|
-
class JobsPrompts:
|
16
|
-
def __init__(self, jobs: "Jobs"):
|
17
|
-
self.interviews = jobs.interviews()
|
18
|
-
self.agents = jobs.agents
|
19
|
-
self.scenarios = jobs.scenarios
|
20
|
-
self.survey = jobs.survey
|
21
|
-
self._price_lookup = None
|
22
|
-
|
23
|
-
@property
|
24
|
-
def price_lookup(self):
|
25
|
-
if self._price_lookup is None:
|
26
|
-
from edsl import Coop
|
27
|
-
|
28
|
-
c = Coop()
|
29
|
-
self._price_lookup = c.fetch_prices()
|
30
|
-
return self._price_lookup
|
31
|
-
|
32
|
-
def prompts(self) -> "Dataset":
|
33
|
-
"""Return a Dataset of prompts that will be used.
|
34
|
-
|
35
|
-
>>> from edsl.jobs import Jobs
|
36
|
-
>>> Jobs.example().prompts()
|
37
|
-
Dataset(...)
|
38
|
-
"""
|
39
|
-
interviews = self.interviews
|
40
|
-
interview_indices = []
|
41
|
-
question_names = []
|
42
|
-
user_prompts = []
|
43
|
-
system_prompts = []
|
44
|
-
scenario_indices = []
|
45
|
-
agent_indices = []
|
46
|
-
models = []
|
47
|
-
costs = []
|
48
|
-
|
49
|
-
for interview_index, interview in enumerate(interviews):
|
50
|
-
invigilators = [
|
51
|
-
interview._get_invigilator(question)
|
52
|
-
for question in self.survey.questions
|
53
|
-
]
|
54
|
-
for _, invigilator in enumerate(invigilators):
|
55
|
-
prompts = invigilator.get_prompts()
|
56
|
-
user_prompt = prompts["user_prompt"]
|
57
|
-
system_prompt = prompts["system_prompt"]
|
58
|
-
user_prompts.append(user_prompt)
|
59
|
-
system_prompts.append(system_prompt)
|
60
|
-
agent_index = self.agents.index(invigilator.agent)
|
61
|
-
agent_indices.append(agent_index)
|
62
|
-
interview_indices.append(interview_index)
|
63
|
-
scenario_index = self.scenarios.index(invigilator.scenario)
|
64
|
-
scenario_indices.append(scenario_index)
|
65
|
-
models.append(invigilator.model.model)
|
66
|
-
question_names.append(invigilator.question.question_name)
|
67
|
-
|
68
|
-
prompt_cost = self.estimate_prompt_cost(
|
69
|
-
system_prompt=system_prompt,
|
70
|
-
user_prompt=user_prompt,
|
71
|
-
price_lookup=self.price_lookup,
|
72
|
-
inference_service=invigilator.model._inference_service_,
|
73
|
-
model=invigilator.model.model,
|
74
|
-
)
|
75
|
-
costs.append(prompt_cost["cost_usd"])
|
76
|
-
|
77
|
-
d = Dataset(
|
78
|
-
[
|
79
|
-
{"user_prompt": user_prompts},
|
80
|
-
{"system_prompt": system_prompts},
|
81
|
-
{"interview_index": interview_indices},
|
82
|
-
{"question_name": question_names},
|
83
|
-
{"scenario_index": scenario_indices},
|
84
|
-
{"agent_index": agent_indices},
|
85
|
-
{"model": models},
|
86
|
-
{"estimated_cost": costs},
|
87
|
-
]
|
88
|
-
)
|
89
|
-
return d
|
90
|
-
|
91
|
-
@staticmethod
|
92
|
-
def estimate_prompt_cost(
|
93
|
-
system_prompt: str,
|
94
|
-
user_prompt: str,
|
95
|
-
price_lookup: dict,
|
96
|
-
inference_service: str,
|
97
|
-
model: str,
|
98
|
-
) -> dict:
|
99
|
-
"""Estimates the cost of a prompt. Takes piping into account."""
|
100
|
-
import math
|
101
|
-
|
102
|
-
def get_piping_multiplier(prompt: str):
|
103
|
-
"""Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
|
104
|
-
|
105
|
-
if "{{" in prompt and "}}" in prompt:
|
106
|
-
return 2
|
107
|
-
return 1
|
108
|
-
|
109
|
-
# Look up prices per token
|
110
|
-
key = (inference_service, model)
|
111
|
-
|
112
|
-
try:
|
113
|
-
relevant_prices = price_lookup[key]
|
114
|
-
|
115
|
-
service_input_token_price = float(
|
116
|
-
relevant_prices["input"]["service_stated_token_price"]
|
117
|
-
)
|
118
|
-
service_input_token_qty = float(
|
119
|
-
relevant_prices["input"]["service_stated_token_qty"]
|
120
|
-
)
|
121
|
-
input_price_per_token = service_input_token_price / service_input_token_qty
|
122
|
-
|
123
|
-
service_output_token_price = float(
|
124
|
-
relevant_prices["output"]["service_stated_token_price"]
|
125
|
-
)
|
126
|
-
service_output_token_qty = float(
|
127
|
-
relevant_prices["output"]["service_stated_token_qty"]
|
128
|
-
)
|
129
|
-
output_price_per_token = (
|
130
|
-
service_output_token_price / service_output_token_qty
|
131
|
-
)
|
132
|
-
|
133
|
-
except KeyError:
|
134
|
-
# A KeyError is likely to occur if we cannot retrieve prices (the price_lookup dict is empty)
|
135
|
-
# Use a sensible default
|
136
|
-
|
137
|
-
import warnings
|
138
|
-
|
139
|
-
warnings.warn(
|
140
|
-
"Price data could not be retrieved. Using default estimates for input and output token prices. Input: $0.15 / 1M tokens; Output: $0.60 / 1M tokens"
|
141
|
-
)
|
142
|
-
input_price_per_token = 0.00000015 # $0.15 / 1M tokens
|
143
|
-
output_price_per_token = 0.00000060 # $0.60 / 1M tokens
|
144
|
-
|
145
|
-
# Compute the number of characters (double if the question involves piping)
|
146
|
-
user_prompt_chars = len(str(user_prompt)) * get_piping_multiplier(
|
147
|
-
str(user_prompt)
|
148
|
-
)
|
149
|
-
system_prompt_chars = len(str(system_prompt)) * get_piping_multiplier(
|
150
|
-
str(system_prompt)
|
151
|
-
)
|
152
|
-
|
153
|
-
# Convert into tokens (1 token approx. equals 4 characters)
|
154
|
-
input_tokens = (user_prompt_chars + system_prompt_chars) // 4
|
155
|
-
|
156
|
-
output_tokens = math.ceil(0.75 * input_tokens)
|
157
|
-
|
158
|
-
cost = (
|
159
|
-
input_tokens * input_price_per_token
|
160
|
-
+ output_tokens * output_price_per_token
|
161
|
-
)
|
162
|
-
|
163
|
-
return {
|
164
|
-
"input_tokens": input_tokens,
|
165
|
-
"output_tokens": output_tokens,
|
166
|
-
"cost_usd": cost,
|
167
|
-
}
|
168
|
-
|
169
|
-
def estimate_job_cost_from_external_prices(
|
170
|
-
self, price_lookup: dict, iterations: int = 1
|
171
|
-
) -> dict:
|
172
|
-
"""
|
173
|
-
Estimates the cost of a job according to the following assumptions:
|
174
|
-
|
175
|
-
- 1 token = 4 characters.
|
176
|
-
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
177
|
-
|
178
|
-
price_lookup is an external pricing dictionary.
|
179
|
-
"""
|
180
|
-
|
181
|
-
import pandas as pd
|
182
|
-
|
183
|
-
interviews = self.interviews
|
184
|
-
data = []
|
185
|
-
for interview in interviews:
|
186
|
-
invigilators = [
|
187
|
-
interview._get_invigilator(question)
|
188
|
-
for question in self.survey.questions
|
189
|
-
]
|
190
|
-
for invigilator in invigilators:
|
191
|
-
prompts = invigilator.get_prompts()
|
192
|
-
|
193
|
-
# By this point, agent and scenario data has already been added to the prompts
|
194
|
-
user_prompt = prompts["user_prompt"]
|
195
|
-
system_prompt = prompts["system_prompt"]
|
196
|
-
inference_service = invigilator.model._inference_service_
|
197
|
-
model = invigilator.model.model
|
198
|
-
|
199
|
-
prompt_cost = self.estimate_prompt_cost(
|
200
|
-
system_prompt=system_prompt,
|
201
|
-
user_prompt=user_prompt,
|
202
|
-
price_lookup=price_lookup,
|
203
|
-
inference_service=inference_service,
|
204
|
-
model=model,
|
205
|
-
)
|
206
|
-
|
207
|
-
data.append(
|
208
|
-
{
|
209
|
-
"user_prompt": user_prompt,
|
210
|
-
"system_prompt": system_prompt,
|
211
|
-
"estimated_input_tokens": prompt_cost["input_tokens"],
|
212
|
-
"estimated_output_tokens": prompt_cost["output_tokens"],
|
213
|
-
"estimated_cost_usd": prompt_cost["cost_usd"],
|
214
|
-
"inference_service": inference_service,
|
215
|
-
"model": model,
|
216
|
-
}
|
217
|
-
)
|
218
|
-
|
219
|
-
df = pd.DataFrame.from_records(data)
|
220
|
-
|
221
|
-
df = (
|
222
|
-
df.groupby(["inference_service", "model"])
|
223
|
-
.agg(
|
224
|
-
{
|
225
|
-
"estimated_cost_usd": "sum",
|
226
|
-
"estimated_input_tokens": "sum",
|
227
|
-
"estimated_output_tokens": "sum",
|
228
|
-
}
|
229
|
-
)
|
230
|
-
.reset_index()
|
231
|
-
)
|
232
|
-
df["estimated_cost_usd"] = df["estimated_cost_usd"] * iterations
|
233
|
-
df["estimated_input_tokens"] = df["estimated_input_tokens"] * iterations
|
234
|
-
df["estimated_output_tokens"] = df["estimated_output_tokens"] * iterations
|
235
|
-
|
236
|
-
estimated_costs_by_model = df.to_dict("records")
|
237
|
-
|
238
|
-
estimated_total_cost = sum(
|
239
|
-
model["estimated_cost_usd"] for model in estimated_costs_by_model
|
240
|
-
)
|
241
|
-
estimated_total_input_tokens = sum(
|
242
|
-
model["estimated_input_tokens"] for model in estimated_costs_by_model
|
243
|
-
)
|
244
|
-
estimated_total_output_tokens = sum(
|
245
|
-
model["estimated_output_tokens"] for model in estimated_costs_by_model
|
246
|
-
)
|
247
|
-
|
248
|
-
output = {
|
249
|
-
"estimated_total_cost_usd": estimated_total_cost,
|
250
|
-
"estimated_total_input_tokens": estimated_total_input_tokens,
|
251
|
-
"estimated_total_output_tokens": estimated_total_output_tokens,
|
252
|
-
"model_costs": estimated_costs_by_model,
|
253
|
-
}
|
254
|
-
|
255
|
-
return output
|
256
|
-
|
257
|
-
def estimate_job_cost(self, iterations: int = 1) -> dict:
|
258
|
-
"""
|
259
|
-
Estimates the cost of a job according to the following assumptions:
|
260
|
-
|
261
|
-
- 1 token = 4 characters.
|
262
|
-
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
263
|
-
|
264
|
-
Fetches prices from Coop.
|
265
|
-
"""
|
266
|
-
return self.estimate_job_cost_from_external_prices(
|
267
|
-
price_lookup=self.price_lookup, iterations=iterations
|
268
|
-
)
|
1
|
+
from typing import List, TYPE_CHECKING
|
2
|
+
|
3
|
+
from edsl.results.Dataset import Dataset
|
4
|
+
|
5
|
+
if TYPE_CHECKING:
|
6
|
+
from edsl.jobs import Jobs
|
7
|
+
|
8
|
+
# from edsl.jobs.interviews.Interview import Interview
|
9
|
+
# from edsl.results.Dataset import Dataset
|
10
|
+
# from edsl.agents.AgentList import AgentList
|
11
|
+
# from edsl.scenarios.ScenarioList import ScenarioList
|
12
|
+
# from edsl.surveys.Survey import Survey
|
13
|
+
|
14
|
+
|
15
|
+
class JobsPrompts:
|
16
|
+
def __init__(self, jobs: "Jobs"):
|
17
|
+
self.interviews = jobs.interviews()
|
18
|
+
self.agents = jobs.agents
|
19
|
+
self.scenarios = jobs.scenarios
|
20
|
+
self.survey = jobs.survey
|
21
|
+
self._price_lookup = None
|
22
|
+
|
23
|
+
@property
|
24
|
+
def price_lookup(self):
|
25
|
+
if self._price_lookup is None:
|
26
|
+
from edsl import Coop
|
27
|
+
|
28
|
+
c = Coop()
|
29
|
+
self._price_lookup = c.fetch_prices()
|
30
|
+
return self._price_lookup
|
31
|
+
|
32
|
+
def prompts(self) -> "Dataset":
|
33
|
+
"""Return a Dataset of prompts that will be used.
|
34
|
+
|
35
|
+
>>> from edsl.jobs import Jobs
|
36
|
+
>>> Jobs.example().prompts()
|
37
|
+
Dataset(...)
|
38
|
+
"""
|
39
|
+
interviews = self.interviews
|
40
|
+
interview_indices = []
|
41
|
+
question_names = []
|
42
|
+
user_prompts = []
|
43
|
+
system_prompts = []
|
44
|
+
scenario_indices = []
|
45
|
+
agent_indices = []
|
46
|
+
models = []
|
47
|
+
costs = []
|
48
|
+
|
49
|
+
for interview_index, interview in enumerate(interviews):
|
50
|
+
invigilators = [
|
51
|
+
interview._get_invigilator(question)
|
52
|
+
for question in self.survey.questions
|
53
|
+
]
|
54
|
+
for _, invigilator in enumerate(invigilators):
|
55
|
+
prompts = invigilator.get_prompts()
|
56
|
+
user_prompt = prompts["user_prompt"]
|
57
|
+
system_prompt = prompts["system_prompt"]
|
58
|
+
user_prompts.append(user_prompt)
|
59
|
+
system_prompts.append(system_prompt)
|
60
|
+
agent_index = self.agents.index(invigilator.agent)
|
61
|
+
agent_indices.append(agent_index)
|
62
|
+
interview_indices.append(interview_index)
|
63
|
+
scenario_index = self.scenarios.index(invigilator.scenario)
|
64
|
+
scenario_indices.append(scenario_index)
|
65
|
+
models.append(invigilator.model.model)
|
66
|
+
question_names.append(invigilator.question.question_name)
|
67
|
+
|
68
|
+
prompt_cost = self.estimate_prompt_cost(
|
69
|
+
system_prompt=system_prompt,
|
70
|
+
user_prompt=user_prompt,
|
71
|
+
price_lookup=self.price_lookup,
|
72
|
+
inference_service=invigilator.model._inference_service_,
|
73
|
+
model=invigilator.model.model,
|
74
|
+
)
|
75
|
+
costs.append(prompt_cost["cost_usd"])
|
76
|
+
|
77
|
+
d = Dataset(
|
78
|
+
[
|
79
|
+
{"user_prompt": user_prompts},
|
80
|
+
{"system_prompt": system_prompts},
|
81
|
+
{"interview_index": interview_indices},
|
82
|
+
{"question_name": question_names},
|
83
|
+
{"scenario_index": scenario_indices},
|
84
|
+
{"agent_index": agent_indices},
|
85
|
+
{"model": models},
|
86
|
+
{"estimated_cost": costs},
|
87
|
+
]
|
88
|
+
)
|
89
|
+
return d
|
90
|
+
|
91
|
+
@staticmethod
|
92
|
+
def estimate_prompt_cost(
|
93
|
+
system_prompt: str,
|
94
|
+
user_prompt: str,
|
95
|
+
price_lookup: dict,
|
96
|
+
inference_service: str,
|
97
|
+
model: str,
|
98
|
+
) -> dict:
|
99
|
+
"""Estimates the cost of a prompt. Takes piping into account."""
|
100
|
+
import math
|
101
|
+
|
102
|
+
def get_piping_multiplier(prompt: str):
|
103
|
+
"""Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
|
104
|
+
|
105
|
+
if "{{" in prompt and "}}" in prompt:
|
106
|
+
return 2
|
107
|
+
return 1
|
108
|
+
|
109
|
+
# Look up prices per token
|
110
|
+
key = (inference_service, model)
|
111
|
+
|
112
|
+
try:
|
113
|
+
relevant_prices = price_lookup[key]
|
114
|
+
|
115
|
+
service_input_token_price = float(
|
116
|
+
relevant_prices["input"]["service_stated_token_price"]
|
117
|
+
)
|
118
|
+
service_input_token_qty = float(
|
119
|
+
relevant_prices["input"]["service_stated_token_qty"]
|
120
|
+
)
|
121
|
+
input_price_per_token = service_input_token_price / service_input_token_qty
|
122
|
+
|
123
|
+
service_output_token_price = float(
|
124
|
+
relevant_prices["output"]["service_stated_token_price"]
|
125
|
+
)
|
126
|
+
service_output_token_qty = float(
|
127
|
+
relevant_prices["output"]["service_stated_token_qty"]
|
128
|
+
)
|
129
|
+
output_price_per_token = (
|
130
|
+
service_output_token_price / service_output_token_qty
|
131
|
+
)
|
132
|
+
|
133
|
+
except KeyError:
|
134
|
+
# A KeyError is likely to occur if we cannot retrieve prices (the price_lookup dict is empty)
|
135
|
+
# Use a sensible default
|
136
|
+
|
137
|
+
import warnings
|
138
|
+
|
139
|
+
warnings.warn(
|
140
|
+
"Price data could not be retrieved. Using default estimates for input and output token prices. Input: $0.15 / 1M tokens; Output: $0.60 / 1M tokens"
|
141
|
+
)
|
142
|
+
input_price_per_token = 0.00000015 # $0.15 / 1M tokens
|
143
|
+
output_price_per_token = 0.00000060 # $0.60 / 1M tokens
|
144
|
+
|
145
|
+
# Compute the number of characters (double if the question involves piping)
|
146
|
+
user_prompt_chars = len(str(user_prompt)) * get_piping_multiplier(
|
147
|
+
str(user_prompt)
|
148
|
+
)
|
149
|
+
system_prompt_chars = len(str(system_prompt)) * get_piping_multiplier(
|
150
|
+
str(system_prompt)
|
151
|
+
)
|
152
|
+
|
153
|
+
# Convert into tokens (1 token approx. equals 4 characters)
|
154
|
+
input_tokens = (user_prompt_chars + system_prompt_chars) // 4
|
155
|
+
|
156
|
+
output_tokens = math.ceil(0.75 * input_tokens)
|
157
|
+
|
158
|
+
cost = (
|
159
|
+
input_tokens * input_price_per_token
|
160
|
+
+ output_tokens * output_price_per_token
|
161
|
+
)
|
162
|
+
|
163
|
+
return {
|
164
|
+
"input_tokens": input_tokens,
|
165
|
+
"output_tokens": output_tokens,
|
166
|
+
"cost_usd": cost,
|
167
|
+
}
|
168
|
+
|
169
|
+
def estimate_job_cost_from_external_prices(
|
170
|
+
self, price_lookup: dict, iterations: int = 1
|
171
|
+
) -> dict:
|
172
|
+
"""
|
173
|
+
Estimates the cost of a job according to the following assumptions:
|
174
|
+
|
175
|
+
- 1 token = 4 characters.
|
176
|
+
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
177
|
+
|
178
|
+
price_lookup is an external pricing dictionary.
|
179
|
+
"""
|
180
|
+
|
181
|
+
import pandas as pd
|
182
|
+
|
183
|
+
interviews = self.interviews
|
184
|
+
data = []
|
185
|
+
for interview in interviews:
|
186
|
+
invigilators = [
|
187
|
+
interview._get_invigilator(question)
|
188
|
+
for question in self.survey.questions
|
189
|
+
]
|
190
|
+
for invigilator in invigilators:
|
191
|
+
prompts = invigilator.get_prompts()
|
192
|
+
|
193
|
+
# By this point, agent and scenario data has already been added to the prompts
|
194
|
+
user_prompt = prompts["user_prompt"]
|
195
|
+
system_prompt = prompts["system_prompt"]
|
196
|
+
inference_service = invigilator.model._inference_service_
|
197
|
+
model = invigilator.model.model
|
198
|
+
|
199
|
+
prompt_cost = self.estimate_prompt_cost(
|
200
|
+
system_prompt=system_prompt,
|
201
|
+
user_prompt=user_prompt,
|
202
|
+
price_lookup=price_lookup,
|
203
|
+
inference_service=inference_service,
|
204
|
+
model=model,
|
205
|
+
)
|
206
|
+
|
207
|
+
data.append(
|
208
|
+
{
|
209
|
+
"user_prompt": user_prompt,
|
210
|
+
"system_prompt": system_prompt,
|
211
|
+
"estimated_input_tokens": prompt_cost["input_tokens"],
|
212
|
+
"estimated_output_tokens": prompt_cost["output_tokens"],
|
213
|
+
"estimated_cost_usd": prompt_cost["cost_usd"],
|
214
|
+
"inference_service": inference_service,
|
215
|
+
"model": model,
|
216
|
+
}
|
217
|
+
)
|
218
|
+
|
219
|
+
df = pd.DataFrame.from_records(data)
|
220
|
+
|
221
|
+
df = (
|
222
|
+
df.groupby(["inference_service", "model"])
|
223
|
+
.agg(
|
224
|
+
{
|
225
|
+
"estimated_cost_usd": "sum",
|
226
|
+
"estimated_input_tokens": "sum",
|
227
|
+
"estimated_output_tokens": "sum",
|
228
|
+
}
|
229
|
+
)
|
230
|
+
.reset_index()
|
231
|
+
)
|
232
|
+
df["estimated_cost_usd"] = df["estimated_cost_usd"] * iterations
|
233
|
+
df["estimated_input_tokens"] = df["estimated_input_tokens"] * iterations
|
234
|
+
df["estimated_output_tokens"] = df["estimated_output_tokens"] * iterations
|
235
|
+
|
236
|
+
estimated_costs_by_model = df.to_dict("records")
|
237
|
+
|
238
|
+
estimated_total_cost = sum(
|
239
|
+
model["estimated_cost_usd"] for model in estimated_costs_by_model
|
240
|
+
)
|
241
|
+
estimated_total_input_tokens = sum(
|
242
|
+
model["estimated_input_tokens"] for model in estimated_costs_by_model
|
243
|
+
)
|
244
|
+
estimated_total_output_tokens = sum(
|
245
|
+
model["estimated_output_tokens"] for model in estimated_costs_by_model
|
246
|
+
)
|
247
|
+
|
248
|
+
output = {
|
249
|
+
"estimated_total_cost_usd": estimated_total_cost,
|
250
|
+
"estimated_total_input_tokens": estimated_total_input_tokens,
|
251
|
+
"estimated_total_output_tokens": estimated_total_output_tokens,
|
252
|
+
"model_costs": estimated_costs_by_model,
|
253
|
+
}
|
254
|
+
|
255
|
+
return output
|
256
|
+
|
257
|
+
def estimate_job_cost(self, iterations: int = 1) -> dict:
|
258
|
+
"""
|
259
|
+
Estimates the cost of a job according to the following assumptions:
|
260
|
+
|
261
|
+
- 1 token = 4 characters.
|
262
|
+
- For each prompt, output tokens = input tokens * 0.75, rounded up to the nearest integer.
|
263
|
+
|
264
|
+
Fetches prices from Coop.
|
265
|
+
"""
|
266
|
+
return self.estimate_job_cost_from_external_prices(
|
267
|
+
price_lookup=self.price_lookup, iterations=iterations
|
268
|
+
)
|