edsl 0.1.53__py3-none-any.whl → 0.1.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/inference_services/services/test_service.py +11 -2
- edsl/invigilators/invigilators.py +1 -1
- edsl/jobs/jobs_pricing_estimation.py +127 -46
- edsl/language_models/language_model.py +16 -6
- edsl/language_models/utilities.py +2 -1
- edsl/questions/question_check_box.py +171 -149
- edsl/questions/question_dict.py +47 -40
- {edsl-0.1.53.dist-info → edsl-0.1.54.dist-info}/METADATA +2 -1
- {edsl-0.1.53.dist-info → edsl-0.1.54.dist-info}/RECORD +13 -13
- {edsl-0.1.53.dist-info → edsl-0.1.54.dist-info}/LICENSE +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.54.dist-info}/WHEEL +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.54.dist-info}/entry_points.txt +0 -0
edsl/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.54"
|
@@ -53,7 +53,6 @@ class TestService(InferenceServiceABC):
|
|
53
53
|
@property
|
54
54
|
def _canned_response(self):
|
55
55
|
if hasattr(self, "canned_response"):
|
56
|
-
|
57
56
|
return self.canned_response
|
58
57
|
else:
|
59
58
|
return "Hello, world X"
|
@@ -64,6 +63,7 @@ class TestService(InferenceServiceABC):
|
|
64
63
|
system_prompt: str,
|
65
64
|
# func: Optional[callable] = None,
|
66
65
|
files_list: Optional[List["File"]] = None,
|
66
|
+
question_name: Optional[str] = None,
|
67
67
|
) -> dict[str, Any]:
|
68
68
|
await asyncio.sleep(0.1)
|
69
69
|
|
@@ -75,6 +75,7 @@ class TestService(InferenceServiceABC):
|
|
75
75
|
|
76
76
|
if random.random() < p:
|
77
77
|
from ..exceptions import InferenceServiceError
|
78
|
+
|
78
79
|
raise InferenceServiceError("This is a test error")
|
79
80
|
|
80
81
|
if hasattr(self, "func"):
|
@@ -85,8 +86,16 @@ class TestService(InferenceServiceABC):
|
|
85
86
|
"usage": {"prompt_tokens": 1, "completion_tokens": 1},
|
86
87
|
}
|
87
88
|
|
89
|
+
response = self._canned_response
|
90
|
+
if isinstance(response, dict) and question_name:
|
91
|
+
canned_text = response.get(
|
92
|
+
question_name, f"No canned response for '{question_name}'"
|
93
|
+
)
|
94
|
+
else:
|
95
|
+
canned_text = response
|
96
|
+
|
88
97
|
return {
|
89
|
-
"message": [{"text": f"{
|
98
|
+
"message": [{"text": f"{canned_text}"}],
|
90
99
|
"usage": {"prompt_tokens": 1, "completion_tokens": 1},
|
91
100
|
}
|
92
101
|
|
@@ -393,7 +393,7 @@ class InvigilatorAI(InvigilatorBase):
|
|
393
393
|
exception_occurred = None
|
394
394
|
validated = False
|
395
395
|
|
396
|
-
if agent_response_dict.model_outputs.cache_used:
|
396
|
+
if agent_response_dict.model_outputs.cache_used and False:
|
397
397
|
data = {
|
398
398
|
"answer": agent_response_dict.edsl_dict.answer
|
399
399
|
if type(agent_response_dict.edsl_dict.answer) is str
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import math
|
3
3
|
|
4
|
-
from typing import List, TYPE_CHECKING
|
4
|
+
from typing import List, TYPE_CHECKING, Union, Literal
|
5
5
|
|
6
6
|
if TYPE_CHECKING:
|
7
7
|
from .jobs import Jobs
|
@@ -26,53 +26,104 @@ class PromptCostEstimator:
|
|
26
26
|
OUTPUT_TOKENS_PER_INPUT_TOKEN = 0.75
|
27
27
|
PIPING_MULTIPLIER = 2
|
28
28
|
|
29
|
-
def __init__(
|
29
|
+
def __init__(
|
30
|
+
self,
|
30
31
|
system_prompt: str,
|
31
32
|
user_prompt: str,
|
32
33
|
price_lookup: dict,
|
33
34
|
inference_service: str,
|
34
|
-
model: str
|
35
|
+
model: str,
|
36
|
+
):
|
35
37
|
self.system_prompt = system_prompt
|
36
38
|
self.user_prompt = user_prompt
|
37
39
|
self.price_lookup = price_lookup
|
38
40
|
self.inference_service = inference_service
|
39
41
|
self.model = model
|
40
42
|
|
41
|
-
@staticmethod
|
43
|
+
@staticmethod
|
42
44
|
def get_piping_multiplier(prompt: str):
|
43
45
|
"""Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
|
44
46
|
|
45
47
|
if "{{" in prompt and "}}" in prompt:
|
46
48
|
return PromptCostEstimator.PIPING_MULTIPLIER
|
47
49
|
return 1
|
48
|
-
|
50
|
+
|
49
51
|
@property
|
50
52
|
def key(self):
|
51
53
|
return (self.inference_service, self.model)
|
52
|
-
|
54
|
+
|
53
55
|
@property
|
54
56
|
def relevant_prices(self):
|
55
57
|
try:
|
56
58
|
return self.price_lookup[self.key]
|
57
59
|
except KeyError:
|
58
60
|
return {}
|
59
|
-
|
60
|
-
def
|
61
|
+
|
62
|
+
def _get_highest_price_for_service(self, price_type: str) -> Union[float, None]:
|
63
|
+
"""Returns the highest price per token for a given service and price type (input/output).
|
64
|
+
|
65
|
+
Args:
|
66
|
+
price_type: Either "input" or "output"
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
float | None: The highest price per token for the service, or None if not found
|
70
|
+
"""
|
71
|
+
prices_for_service = [
|
72
|
+
prices[price_type]["service_stated_token_price"]
|
73
|
+
/ prices[price_type]["service_stated_token_qty"]
|
74
|
+
for (service, _), prices in self.price_lookup.items()
|
75
|
+
if service == self.inference_service and price_type in prices
|
76
|
+
]
|
77
|
+
return max(prices_for_service) if prices_for_service else None
|
78
|
+
|
79
|
+
def input_price_per_token(
|
80
|
+
self,
|
81
|
+
) -> tuple[float, Literal["price_lookup", "highest_price_for_service", "default"]]:
|
61
82
|
try:
|
62
|
-
return
|
83
|
+
return (
|
84
|
+
self.relevant_prices["input"]["service_stated_token_price"]
|
85
|
+
/ self.relevant_prices["input"]["service_stated_token_qty"]
|
86
|
+
), "price_lookup"
|
63
87
|
except KeyError:
|
88
|
+
highest_price = self._get_highest_price_for_service("input")
|
89
|
+
if highest_price is not None:
|
90
|
+
import warnings
|
91
|
+
|
92
|
+
warnings.warn(
|
93
|
+
f"Price data not found for {self.key}. Using highest available input price for {self.inference_service}: ${highest_price:.6f} per token"
|
94
|
+
)
|
95
|
+
return highest_price, "highest_price_for_service"
|
64
96
|
import warnings
|
97
|
+
|
65
98
|
warnings.warn(
|
66
|
-
"Price data
|
99
|
+
f"Price data not found for {self.inference_service}. Using default estimate for input token price: $1.00 / 1M tokens"
|
67
100
|
)
|
68
|
-
return self.DEFAULT_INPUT_PRICE_PER_TOKEN
|
101
|
+
return self.DEFAULT_INPUT_PRICE_PER_TOKEN, "default"
|
69
102
|
|
70
|
-
def output_price_per_token(
|
103
|
+
def output_price_per_token(
|
104
|
+
self,
|
105
|
+
) -> tuple[float, Literal["price_lookup", "highest_price_for_service", "default"]]:
|
71
106
|
try:
|
72
|
-
return
|
107
|
+
return (
|
108
|
+
self.relevant_prices["output"]["service_stated_token_price"]
|
109
|
+
/ self.relevant_prices["output"]["service_stated_token_qty"]
|
110
|
+
), "price_lookup"
|
73
111
|
except KeyError:
|
74
|
-
|
75
|
-
|
112
|
+
highest_price = self._get_highest_price_for_service("output")
|
113
|
+
if highest_price is not None:
|
114
|
+
import warnings
|
115
|
+
|
116
|
+
warnings.warn(
|
117
|
+
f"Price data not found for {self.key}. Using highest available output price for {self.inference_service}: ${highest_price:.6f} per token"
|
118
|
+
)
|
119
|
+
return highest_price, "highest_price_for_service"
|
120
|
+
import warnings
|
121
|
+
|
122
|
+
warnings.warn(
|
123
|
+
f"Price data not found for {self.inference_service}. Using default estimate for output token price: $1.00 / 1M tokens"
|
124
|
+
)
|
125
|
+
return self.DEFAULT_OUTPUT_PRICE_PER_TOKEN, "default"
|
126
|
+
|
76
127
|
def __call__(self):
|
77
128
|
user_prompt_chars = len(str(self.user_prompt)) * self.get_piping_multiplier(
|
78
129
|
str(self.user_prompt)
|
@@ -84,20 +135,37 @@ class PromptCostEstimator:
|
|
84
135
|
input_tokens = (user_prompt_chars + system_prompt_chars) // self.CHARS_PER_TOKEN
|
85
136
|
output_tokens = math.ceil(self.OUTPUT_TOKENS_PER_INPUT_TOKEN * input_tokens)
|
86
137
|
|
138
|
+
input_price_per_token, input_price_source = self.input_price_per_token()
|
139
|
+
output_price_per_token, output_price_source = self.output_price_per_token()
|
140
|
+
|
87
141
|
cost = (
|
88
|
-
input_tokens *
|
89
|
-
+ output_tokens *
|
142
|
+
input_tokens * input_price_per_token
|
143
|
+
+ output_tokens * output_price_per_token
|
90
144
|
)
|
91
145
|
return {
|
146
|
+
"input_price_source": input_price_source,
|
147
|
+
"input_price_per_token": input_price_per_token,
|
92
148
|
"input_tokens": input_tokens,
|
149
|
+
"output_price_source": output_price_source,
|
93
150
|
"output_tokens": output_tokens,
|
151
|
+
"output_price_per_token": output_price_per_token,
|
94
152
|
"cost_usd": cost,
|
95
153
|
}
|
96
154
|
|
97
155
|
|
98
156
|
class JobsPrompts:
|
99
157
|
|
100
|
-
relevant_keys = [
|
158
|
+
relevant_keys = [
|
159
|
+
"user_prompt",
|
160
|
+
"system_prompt",
|
161
|
+
"interview_index",
|
162
|
+
"question_name",
|
163
|
+
"scenario_index",
|
164
|
+
"agent_index",
|
165
|
+
"model",
|
166
|
+
"estimated_cost",
|
167
|
+
"cache_keys",
|
168
|
+
]
|
101
169
|
|
102
170
|
"""This generates the prompts for a job for price estimation purposes.
|
103
171
|
|
@@ -105,7 +173,6 @@ class JobsPrompts:
|
|
105
173
|
So assumptions are made about expansion of Jinja braces, etc.
|
106
174
|
"""
|
107
175
|
|
108
|
-
|
109
176
|
@classmethod
|
110
177
|
def from_jobs(cls, jobs: "Jobs"):
|
111
178
|
"""Construct a JobsPrompts object from a Jobs object."""
|
@@ -114,13 +181,16 @@ class JobsPrompts:
|
|
114
181
|
scenarios = jobs.scenarios
|
115
182
|
survey = jobs.survey
|
116
183
|
return cls(
|
117
|
-
interviews=interviews,
|
118
|
-
agents=agents,
|
119
|
-
scenarios=scenarios,
|
120
|
-
survey=survey
|
184
|
+
interviews=interviews, agents=agents, scenarios=scenarios, survey=survey
|
121
185
|
)
|
122
|
-
|
123
|
-
def __init__(
|
186
|
+
|
187
|
+
def __init__(
|
188
|
+
self,
|
189
|
+
interviews: List["Interview"],
|
190
|
+
agents: "AgentList",
|
191
|
+
scenarios: "ScenarioList",
|
192
|
+
survey: "Survey",
|
193
|
+
):
|
124
194
|
"""Initialize with extracted components rather than a Jobs object."""
|
125
195
|
self.interviews = interviews
|
126
196
|
self.agents = agents
|
@@ -143,17 +213,19 @@ class JobsPrompts:
|
|
143
213
|
self._price_lookup = c.fetch_prices()
|
144
214
|
return self._price_lookup
|
145
215
|
|
146
|
-
def _process_one_invigilator(
|
216
|
+
def _process_one_invigilator(
|
217
|
+
self, invigilator: "Invigilator", interview_index: int, iterations: int = 1
|
218
|
+
) -> dict:
|
147
219
|
"""Process a single invigilator and return a dictionary with all needed data fields."""
|
148
220
|
prompts = invigilator.get_prompts()
|
149
221
|
user_prompt = prompts["user_prompt"]
|
150
222
|
system_prompt = prompts["system_prompt"]
|
151
|
-
|
223
|
+
|
152
224
|
agent_index = self._agent_lookup[invigilator.agent]
|
153
225
|
scenario_index = self._scenario_lookup[invigilator.scenario]
|
154
226
|
model = invigilator.model.model
|
155
227
|
question_name = invigilator.question.question_name
|
156
|
-
|
228
|
+
|
157
229
|
# Calculate prompt cost
|
158
230
|
prompt_cost = self.estimate_prompt_cost(
|
159
231
|
system_prompt=system_prompt,
|
@@ -163,7 +235,7 @@ class JobsPrompts:
|
|
163
235
|
model=model,
|
164
236
|
)
|
165
237
|
cost = prompt_cost["cost_usd"]
|
166
|
-
|
238
|
+
|
167
239
|
# Generate cache keys for each iteration
|
168
240
|
cache_keys = []
|
169
241
|
for iteration in range(iterations):
|
@@ -175,7 +247,7 @@ class JobsPrompts:
|
|
175
247
|
iteration=iteration,
|
176
248
|
)
|
177
249
|
cache_keys.append(cache_key)
|
178
|
-
|
250
|
+
|
179
251
|
d = {
|
180
252
|
"user_prompt": user_prompt,
|
181
253
|
"system_prompt": system_prompt,
|
@@ -200,7 +272,7 @@ class JobsPrompts:
|
|
200
272
|
dataset_of_prompts = {k: [] for k in self.relevant_keys}
|
201
273
|
|
202
274
|
interviews = self.interviews
|
203
|
-
|
275
|
+
|
204
276
|
# Process each interview and invigilator
|
205
277
|
for interview_index, interview in enumerate(interviews):
|
206
278
|
invigilators = [
|
@@ -210,11 +282,13 @@ class JobsPrompts:
|
|
210
282
|
|
211
283
|
for invigilator in invigilators:
|
212
284
|
# Process the invigilator and get all data as a dictionary
|
213
|
-
data = self._process_one_invigilator(
|
285
|
+
data = self._process_one_invigilator(
|
286
|
+
invigilator, interview_index, iterations
|
287
|
+
)
|
214
288
|
for k in self.relevant_keys:
|
215
289
|
dataset_of_prompts[k].append(data[k])
|
216
|
-
|
217
|
-
return Dataset([{k:dataset_of_prompts[k]} for k in self.relevant_keys])
|
290
|
+
|
291
|
+
return Dataset([{k: dataset_of_prompts[k]} for k in self.relevant_keys])
|
218
292
|
|
219
293
|
@staticmethod
|
220
294
|
def estimate_prompt_cost(
|
@@ -230,13 +304,13 @@ class JobsPrompts:
|
|
230
304
|
user_prompt=user_prompt,
|
231
305
|
price_lookup=price_lookup,
|
232
306
|
inference_service=inference_service,
|
233
|
-
model=model
|
307
|
+
model=model,
|
234
308
|
)()
|
235
|
-
|
309
|
+
|
236
310
|
@staticmethod
|
237
311
|
def _extract_prompt_details(invigilator: FetchInvigilator) -> dict:
|
238
312
|
"""Extracts the prompt details from the invigilator.
|
239
|
-
|
313
|
+
|
240
314
|
>>> from edsl.invigilators import InvigilatorAI
|
241
315
|
>>> invigilator = InvigilatorAI.example()
|
242
316
|
>>> JobsPrompts._extract_prompt_details(invigilator)
|
@@ -276,11 +350,13 @@ class JobsPrompts:
|
|
276
350
|
]
|
277
351
|
for invigilator in invigilators:
|
278
352
|
prompt_details = self._extract_prompt_details(invigilator)
|
279
|
-
prompt_cost = self.estimate_prompt_cost(
|
353
|
+
prompt_cost = self.estimate_prompt_cost(
|
354
|
+
**prompt_details, price_lookup=price_lookup
|
355
|
+
)
|
280
356
|
price_estimates = {
|
281
|
-
|
282
|
-
|
283
|
-
|
357
|
+
"estimated_input_tokens": prompt_cost["input_tokens"],
|
358
|
+
"estimated_output_tokens": prompt_cost["output_tokens"],
|
359
|
+
"estimated_cost_usd": prompt_cost["cost_usd"],
|
284
360
|
}
|
285
361
|
data.append({**price_estimates, **prompt_details})
|
286
362
|
|
@@ -293,14 +369,18 @@ class JobsPrompts:
|
|
293
369
|
"model": item["model"],
|
294
370
|
"estimated_cost_usd": 0,
|
295
371
|
"estimated_input_tokens": 0,
|
296
|
-
"estimated_output_tokens": 0
|
372
|
+
"estimated_output_tokens": 0,
|
297
373
|
}
|
298
|
-
|
374
|
+
|
299
375
|
# Accumulate values
|
300
376
|
model_groups[key]["estimated_cost_usd"] += item["estimated_cost_usd"]
|
301
|
-
model_groups[key]["estimated_input_tokens"] += item[
|
302
|
-
|
303
|
-
|
377
|
+
model_groups[key]["estimated_input_tokens"] += item[
|
378
|
+
"estimated_input_tokens"
|
379
|
+
]
|
380
|
+
model_groups[key]["estimated_output_tokens"] += item[
|
381
|
+
"estimated_output_tokens"
|
382
|
+
]
|
383
|
+
|
304
384
|
# Apply iterations and convert to list
|
305
385
|
estimated_costs_by_model = []
|
306
386
|
for group_data in model_groups.values():
|
@@ -345,4 +425,5 @@ class JobsPrompts:
|
|
345
425
|
|
346
426
|
if __name__ == "__main__":
|
347
427
|
import doctest
|
428
|
+
|
348
429
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -509,7 +509,9 @@ class LanguageModel(
|
|
509
509
|
return self.execute_model_call(user_prompt, system_prompt)
|
510
510
|
|
511
511
|
@abstractmethod
|
512
|
-
async def async_execute_model_call(
|
512
|
+
async def async_execute_model_call(
|
513
|
+
self, user_prompt: str, system_prompt: str, question_name: Optional[str] = None
|
514
|
+
):
|
513
515
|
"""Execute the model call asynchronously.
|
514
516
|
|
515
517
|
This abstract method must be implemented by all model subclasses
|
@@ -518,6 +520,7 @@ class LanguageModel(
|
|
518
520
|
Args:
|
519
521
|
user_prompt: The user message or input prompt
|
520
522
|
system_prompt: The system message or context
|
523
|
+
question_name: Optional name of the question being asked (primarily used for test models)
|
521
524
|
|
522
525
|
Returns:
|
523
526
|
Coroutine that resolves to the model response
|
@@ -529,7 +532,7 @@ class LanguageModel(
|
|
529
532
|
pass
|
530
533
|
|
531
534
|
async def remote_async_execute_model_call(
|
532
|
-
self, user_prompt: str, system_prompt: str
|
535
|
+
self, user_prompt: str, system_prompt: str, question_name: Optional[str] = None
|
533
536
|
):
|
534
537
|
"""Execute the model call remotely through the EDSL Coop service.
|
535
538
|
|
@@ -540,6 +543,7 @@ class LanguageModel(
|
|
540
543
|
Args:
|
541
544
|
user_prompt: The user message or input prompt
|
542
545
|
system_prompt: The system message or context
|
546
|
+
question_name: Optional name of the question being asked (primarily used for test models)
|
543
547
|
|
544
548
|
Returns:
|
545
549
|
Coroutine that resolves to the model response from the remote service
|
@@ -563,6 +567,7 @@ class LanguageModel(
|
|
563
567
|
Args:
|
564
568
|
*args: Positional arguments to pass to async_execute_model_call
|
565
569
|
**kwargs: Keyword arguments to pass to async_execute_model_call
|
570
|
+
Can include question_name for test models
|
566
571
|
|
567
572
|
Returns:
|
568
573
|
The model response
|
@@ -702,7 +707,9 @@ class LanguageModel(
|
|
702
707
|
"system_prompt": system_prompt,
|
703
708
|
"files_list": files_list,
|
704
709
|
}
|
705
|
-
|
710
|
+
# Add question_name parameter for test models
|
711
|
+
if self.model == "test" and invigilator:
|
712
|
+
params["question_name"] = invigilator.question.question_name
|
706
713
|
# Get timeout from configuration
|
707
714
|
from ..config import CONFIG
|
708
715
|
|
@@ -710,7 +717,6 @@ class LanguageModel(
|
|
710
717
|
|
711
718
|
# Execute the model call with timeout
|
712
719
|
response = await asyncio.wait_for(f(**params), timeout=TIMEOUT)
|
713
|
-
|
714
720
|
# Store the response in the cache
|
715
721
|
new_cache_key = cache.store(
|
716
722
|
**cache_call_params, response=response, service=self._inference_service_
|
@@ -801,7 +807,6 @@ class LanguageModel(
|
|
801
807
|
|
802
808
|
# Create structured input record
|
803
809
|
model_inputs = ModelInputs(user_prompt=user_prompt, system_prompt=system_prompt)
|
804
|
-
|
805
810
|
# Get model response (using cache if available)
|
806
811
|
model_outputs: ModelResponse = (
|
807
812
|
await self._async_get_intended_model_call_outcome(**params)
|
@@ -1046,7 +1051,12 @@ class LanguageModel(
|
|
1046
1051
|
]
|
1047
1052
|
|
1048
1053
|
# Define a new async_execute_model_call that only reads from cache
|
1049
|
-
async def async_execute_model_call(
|
1054
|
+
async def async_execute_model_call(
|
1055
|
+
self,
|
1056
|
+
user_prompt: str,
|
1057
|
+
system_prompt: str,
|
1058
|
+
question_name: Optional[str] = None,
|
1059
|
+
):
|
1050
1060
|
"""Only use cached responses, never making new API calls."""
|
1051
1061
|
cache_call_params = {
|
1052
1062
|
"model": str(self.model),
|
@@ -5,6 +5,7 @@ from ..surveys import Survey
|
|
5
5
|
|
6
6
|
from .language_model import LanguageModel
|
7
7
|
|
8
|
+
|
8
9
|
def create_survey(num_questions: int, chained: bool = True, take_scenario=False):
|
9
10
|
from ..questions import QuestionFreeText
|
10
11
|
|
@@ -28,7 +29,6 @@ def create_survey(num_questions: int, chained: bool = True, take_scenario=False)
|
|
28
29
|
def create_language_model(
|
29
30
|
exception: Exception, fail_at_number: int, never_ending=False
|
30
31
|
):
|
31
|
-
|
32
32
|
class LanguageModelFromUtilities(LanguageModel):
|
33
33
|
_model_ = "test"
|
34
34
|
_parameters_ = {"temperature": 0.5}
|
@@ -45,6 +45,7 @@ def create_language_model(
|
|
45
45
|
user_prompt: str,
|
46
46
|
system_prompt: str,
|
47
47
|
files_list: Optional[List[Any]] = None,
|
48
|
+
question_name: Optional[str] = None,
|
48
49
|
) -> dict[str, Any]:
|
49
50
|
question_number = int(
|
50
51
|
user_prompt.split("XX")[1]
|