edsl 0.1.31.dev3__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/agents/Invigilator.py +7 -2
- edsl/agents/PromptConstructionMixin.py +35 -15
- edsl/config.py +15 -1
- edsl/conjure/Conjure.py +6 -0
- edsl/coop/coop.py +4 -0
- edsl/data/CacheHandler.py +3 -4
- edsl/enums.py +5 -0
- edsl/exceptions/general.py +10 -8
- edsl/inference_services/AwsBedrock.py +110 -0
- edsl/inference_services/AzureAI.py +197 -0
- edsl/inference_services/DeepInfraService.py +6 -91
- edsl/inference_services/GroqService.py +18 -0
- edsl/inference_services/InferenceServicesCollection.py +13 -8
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +68 -21
- edsl/inference_services/models_available_cache.py +31 -0
- edsl/inference_services/registry.py +14 -1
- edsl/jobs/Jobs.py +103 -21
- edsl/jobs/buckets/TokenBucket.py +12 -4
- edsl/jobs/interviews/Interview.py +31 -9
- edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +49 -33
- edsl/jobs/interviews/interview_exception_tracking.py +68 -10
- edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
- edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
- edsl/jobs/tasks/TaskCreators.py +8 -2
- edsl/jobs/tasks/TaskHistory.py +145 -1
- edsl/language_models/LanguageModel.py +62 -41
- edsl/language_models/registry.py +4 -0
- edsl/questions/QuestionBudget.py +0 -1
- edsl/questions/QuestionCheckBox.py +0 -1
- edsl/questions/QuestionExtract.py +0 -1
- edsl/questions/QuestionFreeText.py +2 -9
- edsl/questions/QuestionList.py +0 -1
- edsl/questions/QuestionMultipleChoice.py +1 -2
- edsl/questions/QuestionNumerical.py +0 -1
- edsl/questions/QuestionRank.py +0 -1
- edsl/results/DatasetExportMixin.py +33 -3
- edsl/scenarios/Scenario.py +14 -0
- edsl/scenarios/ScenarioList.py +216 -13
- edsl/scenarios/ScenarioListExportMixin.py +15 -4
- edsl/scenarios/ScenarioListPdfMixin.py +3 -0
- edsl/surveys/Rule.py +5 -2
- edsl/surveys/Survey.py +84 -1
- edsl/surveys/SurveyQualtricsImport.py +213 -0
- edsl/utilities/utilities.py +31 -0
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/METADATA +5 -1
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/RECORD +52 -46
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/LICENSE +0 -0
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/WHEEL +0 -0
@@ -1,237 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
from enum import Enum
|
3
|
-
from typing import Literal, List, Type, DefaultDict
|
4
|
-
from collections import UserDict, defaultdict
|
5
|
-
|
6
|
-
from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
|
7
|
-
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
8
|
-
|
9
|
-
# from edsl.enums import pricing, TokenPricing
|
10
|
-
from edsl.enums import get_token_pricing
|
11
|
-
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
12
|
-
|
13
|
-
InterviewTokenUsageMapping = DefaultDict[str, InterviewTokenUsage]
|
14
|
-
|
15
|
-
from edsl.jobs.interviews.InterviewStatistic import InterviewStatistic
|
16
|
-
from edsl.jobs.interviews.InterviewStatisticsCollection import (
|
17
|
-
InterviewStatisticsCollection,
|
18
|
-
)
|
19
|
-
|
20
|
-
|
21
|
-
class JobsRunnerStatusData:
|
22
|
-
def status_dict(
|
23
|
-
self, interviews: List[Type["Interview"]]
|
24
|
-
) -> List[Type[InterviewStatusDictionary]]:
|
25
|
-
"""
|
26
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
27
|
-
>>> interviews = [Interview.example()]
|
28
|
-
>>> JobsRunnerStatusData().status_dict(interviews)
|
29
|
-
[InterviewStatusDictionary({<TaskStatus.NOT_STARTED: 1>: 0, <TaskStatus.WAITING_FOR_DEPENDENCIES: 2>: 0, <TaskStatus.CANCELLED: 3>: 0, <TaskStatus.PARENT_FAILED: 4>: 0, <TaskStatus.WAITING_FOR_REQUEST_CAPACITY: 5>: 0, <TaskStatus.WAITING_FOR_TOKEN_CAPACITY: 6>: 0, <TaskStatus.API_CALL_IN_PROGRESS: 7>: 0, <TaskStatus.SUCCESS: 8>: 0, <TaskStatus.FAILED: 9>: 0, 'number_from_cache': 0})]
|
30
|
-
"""
|
31
|
-
status = []
|
32
|
-
for interview in interviews:
|
33
|
-
status.append(interview.interview_status)
|
34
|
-
|
35
|
-
return status
|
36
|
-
|
37
|
-
def status_counts(self, interviews: List[Type["Interview"]]):
|
38
|
-
"""
|
39
|
-
Takes a collection of interviews and returns a dictionary of the counts of each status.
|
40
|
-
|
41
|
-
:param interviews: a collection of interviews.
|
42
|
-
|
43
|
-
This creates a dictionary of the counts of each status in the collection of interviews.
|
44
|
-
|
45
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
46
|
-
>>> interviews = [Interview.example() for _ in range(100)]
|
47
|
-
>>> jd = JobsRunnerStatusData()
|
48
|
-
>>> jd.status_counts(interviews)
|
49
|
-
dict_values([InterviewStatusDictionary({<TaskStatus.NOT_STARTED: 1>: 0, <TaskStatus.WAITING_FOR_DEPENDENCIES: 2>: 0, <TaskStatus.CANCELLED: 3>: 0, <TaskStatus.PARENT_FAILED: 4>: 0, <TaskStatus.WAITING_FOR_REQUEST_CAPACITY: 5>: 0, <TaskStatus.WAITING_FOR_TOKEN_CAPACITY: 6>: 0, <TaskStatus.API_CALL_IN_PROGRESS: 7>: 0, <TaskStatus.SUCCESS: 8>: 0, <TaskStatus.FAILED: 9>: 0, 'number_from_cache': 0})])
|
50
|
-
>>> len(jd.status_counts(interviews))
|
51
|
-
1
|
52
|
-
"""
|
53
|
-
model_to_status = defaultdict(InterviewStatusDictionary)
|
54
|
-
|
55
|
-
for interview in interviews:
|
56
|
-
model = interview.model # get the model for the interview
|
57
|
-
model_to_status[
|
58
|
-
model
|
59
|
-
] += (
|
60
|
-
interview.interview_status
|
61
|
-
) # InterviewStatusDictionary objects can be added together
|
62
|
-
|
63
|
-
return (
|
64
|
-
model_to_status.values()
|
65
|
-
) # return the values of the dictionary, which is a list of dictionaries
|
66
|
-
|
67
|
-
def generate_status_summary(
|
68
|
-
self,
|
69
|
-
completed_tasks: List[Type[asyncio.Task]],
|
70
|
-
elapsed_time: float,
|
71
|
-
interviews: List[Type["Interview"]],
|
72
|
-
) -> InterviewStatisticsCollection:
|
73
|
-
"""Generate a summary of the status of the job runner.
|
74
|
-
|
75
|
-
:param completed_tasks: list of completed tasks
|
76
|
-
:param elapsed_time: time elapsed since the start of the job
|
77
|
-
:param interviews: list of interviews to be conducted
|
78
|
-
|
79
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
80
|
-
>>> interviews = [Interview.example()]
|
81
|
-
>>> completed_tasks = []
|
82
|
-
>>> elapsed_time = 0
|
83
|
-
>>> JobsRunnerStatusData().generate_status_summary(completed_tasks, elapsed_time, interviews)
|
84
|
-
{'Elapsed time': '0.0 sec.', 'Total interviews requested': '1 ', 'Completed interviews': '0 ', 'Percent complete': '0 %', 'Average time per interview': 'NA', 'Task remaining': '1 ', 'Estimated time remaining': 'NA', 'model_queues': [{'model_name': '...', 'TPM_limit_k': ..., 'RPM_limit_k': ..., 'num_tasks_waiting': 0, 'token_usage_info': [{'cache_status': 'new_token_usage', 'details': [{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], 'cost': '$0.00000'}, {'cache_status': 'cached_token_usage', 'details': [{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], 'cost': '$0.00000'}]}]}
|
85
|
-
"""
|
86
|
-
|
87
|
-
models_to_tokens = defaultdict(InterviewTokenUsage)
|
88
|
-
model_to_status = defaultdict(InterviewStatusDictionary)
|
89
|
-
|
90
|
-
waiting_dict = defaultdict(int)
|
91
|
-
|
92
|
-
interview_statistics = InterviewStatisticsCollection()
|
93
|
-
|
94
|
-
for interview in interviews:
|
95
|
-
model = interview.model
|
96
|
-
models_to_tokens[model] += interview.token_usage
|
97
|
-
model_to_status[model] += interview.interview_status
|
98
|
-
waiting_dict[model] += interview.interview_status.waiting
|
99
|
-
|
100
|
-
interview_statistics.add_stat(
|
101
|
-
InterviewStatistic(
|
102
|
-
"elapsed_time", value=elapsed_time, digits=1, units="sec."
|
103
|
-
)
|
104
|
-
)
|
105
|
-
interview_statistics.add_stat(
|
106
|
-
InterviewStatistic(
|
107
|
-
"total_interviews_requested", value=len(interviews), units=""
|
108
|
-
)
|
109
|
-
)
|
110
|
-
interview_statistics.add_stat(
|
111
|
-
InterviewStatistic(
|
112
|
-
"completed_interviews", value=len(completed_tasks), units=""
|
113
|
-
)
|
114
|
-
)
|
115
|
-
interview_statistics.add_stat(
|
116
|
-
InterviewStatistic(
|
117
|
-
"percent_complete",
|
118
|
-
value=(
|
119
|
-
len(completed_tasks) / len(interviews) * 100
|
120
|
-
if len(interviews) > 0
|
121
|
-
else "NA"
|
122
|
-
),
|
123
|
-
digits=0,
|
124
|
-
units="%",
|
125
|
-
)
|
126
|
-
)
|
127
|
-
interview_statistics.add_stat(
|
128
|
-
InterviewStatistic(
|
129
|
-
"average_time_per_interview",
|
130
|
-
value=elapsed_time / len(completed_tasks) if completed_tasks else "NA",
|
131
|
-
digits=1,
|
132
|
-
units="sec.",
|
133
|
-
)
|
134
|
-
)
|
135
|
-
interview_statistics.add_stat(
|
136
|
-
InterviewStatistic(
|
137
|
-
"task_remaining", value=len(interviews) - len(completed_tasks), units=""
|
138
|
-
)
|
139
|
-
)
|
140
|
-
number_remaining = len(interviews) - len(completed_tasks)
|
141
|
-
time_per_task = (
|
142
|
-
elapsed_time / len(completed_tasks) if len(completed_tasks) > 0 else "NA"
|
143
|
-
)
|
144
|
-
estimated_time_remaining = (
|
145
|
-
number_remaining * time_per_task if time_per_task != "NA" else "NA"
|
146
|
-
)
|
147
|
-
|
148
|
-
interview_statistics.add_stat(
|
149
|
-
InterviewStatistic(
|
150
|
-
"estimated_time_remaining",
|
151
|
-
value=estimated_time_remaining,
|
152
|
-
digits=1,
|
153
|
-
units="sec.",
|
154
|
-
)
|
155
|
-
)
|
156
|
-
model_queues_info = []
|
157
|
-
for model, num_waiting in waiting_dict.items():
|
158
|
-
model_info = self._get_model_info(model, num_waiting, models_to_tokens)
|
159
|
-
model_queues_info.append(model_info)
|
160
|
-
|
161
|
-
interview_statistics["model_queues"] = model_queues_info
|
162
|
-
|
163
|
-
return interview_statistics
|
164
|
-
|
165
|
-
def _get_model_info(
|
166
|
-
self,
|
167
|
-
model: str,
|
168
|
-
num_waiting: int,
|
169
|
-
models_to_tokens: InterviewTokenUsageMapping,
|
170
|
-
) -> dict:
|
171
|
-
"""Get the status of a model.
|
172
|
-
|
173
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
174
|
-
>>> interviews = [Interview.example()]
|
175
|
-
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
176
|
-
>>> model = interviews[0].model
|
177
|
-
>>> num_waiting = 0
|
178
|
-
>>> JobsRunnerStatusData()._get_model_info(model, num_waiting, models_to_tokens)
|
179
|
-
{'model_name': 'gpt-4-1106-preview', 'TPM_limit_k': ..., 'RPM_limit_k': ..., 'num_tasks_waiting': 0, 'token_usage_info': [{'cache_status': 'new_token_usage', 'details': [{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], 'cost': '$0.00000'}, {'cache_status': 'cached_token_usage', 'details': [{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], 'cost': '$0.00000'}]}
|
180
|
-
"""
|
181
|
-
|
182
|
-
prices = get_token_pricing(model.model)
|
183
|
-
|
184
|
-
model_info = {
|
185
|
-
"model_name": model.model,
|
186
|
-
"TPM_limit_k": model.TPM / 1000,
|
187
|
-
"RPM_limit_k": model.RPM / 1000,
|
188
|
-
"num_tasks_waiting": num_waiting,
|
189
|
-
"token_usage_info": [],
|
190
|
-
}
|
191
|
-
|
192
|
-
token_usage_types = ["new_token_usage", "cached_token_usage"]
|
193
|
-
for token_usage_type in token_usage_types:
|
194
|
-
cache_info = self._get_token_usage_info(
|
195
|
-
token_usage_type, models_to_tokens, model, prices
|
196
|
-
)
|
197
|
-
model_info["token_usage_info"].append(cache_info)
|
198
|
-
|
199
|
-
return model_info
|
200
|
-
|
201
|
-
def _get_token_usage_info(
|
202
|
-
self,
|
203
|
-
cache_status: Literal["new_token_usage", "cached_token_usage"],
|
204
|
-
models_to_tokens: InterviewTokenUsageMapping,
|
205
|
-
model: str,
|
206
|
-
prices: "TokenPricing",
|
207
|
-
) -> dict:
|
208
|
-
"""Get the token usage info for a model.
|
209
|
-
|
210
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
211
|
-
>>> interviews = [Interview.example()]
|
212
|
-
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
213
|
-
>>> model = interviews[0].model
|
214
|
-
>>> prices = get_token_pricing(model.model)
|
215
|
-
>>> cache_status = "new_token_usage"
|
216
|
-
>>> JobsRunnerStatusData()._get_token_usage_info(cache_status, models_to_tokens, model, prices)
|
217
|
-
{'cache_status': 'new_token_usage', 'details': [{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], 'cost': '$0.00000'}
|
218
|
-
|
219
|
-
"""
|
220
|
-
cache_info = {"cache_status": cache_status, "details": []}
|
221
|
-
token_usage = getattr(models_to_tokens[model], cache_status)
|
222
|
-
for token_type in ["prompt_tokens", "completion_tokens"]:
|
223
|
-
tokens = getattr(token_usage, token_type)
|
224
|
-
cache_info["details"].append(
|
225
|
-
{
|
226
|
-
"type": token_type,
|
227
|
-
"tokens": tokens,
|
228
|
-
}
|
229
|
-
)
|
230
|
-
cache_info["cost"] = f"${token_usage.cost(prices):.5f}"
|
231
|
-
return cache_info
|
232
|
-
|
233
|
-
|
234
|
-
if __name__ == "__main__":
|
235
|
-
import doctest
|
236
|
-
|
237
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -1,27 +1,272 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from typing import List, DefaultDict
|
3
3
|
import asyncio
|
4
|
+
from typing import Type
|
5
|
+
from collections import defaultdict
|
6
|
+
|
7
|
+
from typing import Literal, List, Type, DefaultDict
|
8
|
+
from collections import UserDict, defaultdict
|
9
|
+
|
10
|
+
from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
|
11
|
+
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
12
|
+
from edsl.jobs.tokens.TokenUsage import TokenUsage
|
13
|
+
from edsl.enums import get_token_pricing
|
14
|
+
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
15
|
+
|
16
|
+
InterviewTokenUsageMapping = DefaultDict[str, InterviewTokenUsage]
|
17
|
+
|
18
|
+
from edsl.jobs.interviews.InterviewStatistic import InterviewStatistic
|
19
|
+
from edsl.jobs.interviews.InterviewStatisticsCollection import (
|
20
|
+
InterviewStatisticsCollection,
|
21
|
+
)
|
22
|
+
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
23
|
+
|
24
|
+
|
25
|
+
# return {"cache_status": token_usage_type, "details": details, "cost": f"${token_usage.cost(prices):.5f}"}
|
26
|
+
|
27
|
+
from dataclasses import dataclass, asdict
|
4
28
|
|
5
29
|
from rich.text import Text
|
6
30
|
from rich.box import SIMPLE
|
7
31
|
from rich.table import Table
|
8
32
|
|
9
|
-
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
10
|
-
from edsl.jobs.runners.JobsRunnerStatusData import JobsRunnerStatusData
|
11
33
|
|
12
|
-
|
34
|
+
@dataclass
|
35
|
+
class ModelInfo:
|
36
|
+
model_name: str
|
37
|
+
TPM_limit_k: float
|
38
|
+
RPM_limit_k: float
|
39
|
+
num_tasks_waiting: int
|
40
|
+
token_usage_info: dict
|
41
|
+
|
42
|
+
|
43
|
+
@dataclass
|
44
|
+
class ModelTokenUsageStats:
|
45
|
+
token_usage_type: str
|
46
|
+
details: List[dict]
|
47
|
+
cost: str
|
48
|
+
|
49
|
+
|
50
|
+
class Stats:
|
51
|
+
def elapsed_time(self):
|
52
|
+
InterviewStatistic("elapsed_time", value=elapsed_time, digits=1, units="sec.")
|
53
|
+
|
54
|
+
|
55
|
+
class JobsRunnerStatusMixin:
|
56
|
+
# @staticmethod
|
57
|
+
# def status_dict(interviews: List[Type["Interview"]]) -> List[Type[InterviewStatusDictionary]]:
|
58
|
+
# """
|
59
|
+
# >>> from edsl.jobs.interviews.Interview import Interview
|
60
|
+
# >>> interviews = [Interview.example()]
|
61
|
+
# >>> JobsRunnerStatusMixin().status_dict(interviews)
|
62
|
+
# [InterviewStatusDictionary({<TaskStatus.NOT_STARTED: 1>: 0, <TaskStatus.WAITING_FOR_DEPENDENCIES: 2>: 0, <TaskStatus.CANCELLED: 3>: 0, <TaskStatus.PARENT_FAILED: 4>: 0, <TaskStatus.WAITING_FOR_REQUEST_CAPACITY: 5>: 0, <TaskStatus.WAITING_FOR_TOKEN_CAPACITY: 6>: 0, <TaskStatus.API_CALL_IN_PROGRESS: 7>: 0, <TaskStatus.SUCCESS: 8>: 0, <TaskStatus.FAILED: 9>: 0, 'number_from_cache': 0})]
|
63
|
+
# """
|
64
|
+
# return [interview.interview_status for interview in interviews]
|
13
65
|
|
66
|
+
def _compute_statistic(stat_name: str, completed_tasks, elapsed_time, interviews):
|
67
|
+
stat_definitions = {
|
68
|
+
"elapsed_time": lambda: InterviewStatistic(
|
69
|
+
"elapsed_time", value=elapsed_time, digits=1, units="sec."
|
70
|
+
),
|
71
|
+
"total_interviews_requested": lambda: InterviewStatistic(
|
72
|
+
"total_interviews_requested", value=len(interviews), units=""
|
73
|
+
),
|
74
|
+
"completed_interviews": lambda: InterviewStatistic(
|
75
|
+
"completed_interviews", value=len(completed_tasks), units=""
|
76
|
+
),
|
77
|
+
"percent_complete": lambda: InterviewStatistic(
|
78
|
+
"percent_complete",
|
79
|
+
value=(
|
80
|
+
len(completed_tasks) / len(interviews) * 100
|
81
|
+
if len(interviews) > 0
|
82
|
+
else "NA"
|
83
|
+
),
|
84
|
+
digits=0,
|
85
|
+
units="%",
|
86
|
+
),
|
87
|
+
"average_time_per_interview": lambda: InterviewStatistic(
|
88
|
+
"average_time_per_interview",
|
89
|
+
value=elapsed_time / len(completed_tasks) if completed_tasks else "NA",
|
90
|
+
digits=1,
|
91
|
+
units="sec.",
|
92
|
+
),
|
93
|
+
"task_remaining": lambda: InterviewStatistic(
|
94
|
+
"task_remaining", value=len(interviews) - len(completed_tasks), units=""
|
95
|
+
),
|
96
|
+
"estimated_time_remaining": lambda: InterviewStatistic(
|
97
|
+
"estimated_time_remaining",
|
98
|
+
value=(
|
99
|
+
(len(interviews) - len(completed_tasks))
|
100
|
+
* (elapsed_time / len(completed_tasks))
|
101
|
+
if len(completed_tasks) > 0
|
102
|
+
else "NA"
|
103
|
+
),
|
104
|
+
digits=1,
|
105
|
+
units="sec.",
|
106
|
+
),
|
107
|
+
}
|
108
|
+
if stat_name not in stat_definitions:
|
109
|
+
raise ValueError(
|
110
|
+
f"Invalid stat_name: {stat_name}. The valid stat_names are: {list(stat_definitions.keys())}"
|
111
|
+
)
|
112
|
+
return stat_definitions[stat_name]()
|
14
113
|
|
15
|
-
class JobsRunnerStatusPresentation:
|
16
114
|
@staticmethod
|
17
|
-
def
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
115
|
+
def _job_level_info(
|
116
|
+
completed_tasks: List[Type[asyncio.Task]],
|
117
|
+
elapsed_time: float,
|
118
|
+
interviews: List[Type["Interview"]],
|
119
|
+
) -> InterviewStatisticsCollection:
|
120
|
+
interview_statistics = InterviewStatisticsCollection()
|
121
|
+
|
122
|
+
default_statistics = [
|
123
|
+
"elapsed_time",
|
124
|
+
"total_interviews_requested",
|
125
|
+
"completed_interviews",
|
126
|
+
"percent_complete",
|
127
|
+
"average_time_per_interview",
|
128
|
+
"task_remaining",
|
129
|
+
"estimated_time_remaining",
|
130
|
+
]
|
131
|
+
for stat_name in default_statistics:
|
132
|
+
interview_statistics.add_stat(
|
133
|
+
JobsRunnerStatusMixin._compute_statistic(
|
134
|
+
stat_name, completed_tasks, elapsed_time, interviews
|
135
|
+
)
|
136
|
+
)
|
137
|
+
|
138
|
+
return interview_statistics
|
139
|
+
|
140
|
+
@staticmethod
|
141
|
+
def _get_model_queues_info(interviews):
|
142
|
+
models_to_tokens = defaultdict(InterviewTokenUsage)
|
143
|
+
model_to_status = defaultdict(InterviewStatusDictionary)
|
144
|
+
waiting_dict = defaultdict(int)
|
145
|
+
|
146
|
+
for interview in interviews:
|
147
|
+
models_to_tokens[interview.model] += interview.token_usage
|
148
|
+
model_to_status[interview.model] += interview.interview_status
|
149
|
+
waiting_dict[interview.model] += interview.interview_status.waiting
|
150
|
+
|
151
|
+
for model, num_waiting in waiting_dict.items():
|
152
|
+
yield JobsRunnerStatusMixin._get_model_info(
|
153
|
+
model, num_waiting, models_to_tokens
|
154
|
+
)
|
155
|
+
|
156
|
+
@staticmethod
|
157
|
+
def generate_status_summary(
|
158
|
+
completed_tasks: List[Type[asyncio.Task]],
|
159
|
+
elapsed_time: float,
|
160
|
+
interviews: List[Type["Interview"]],
|
161
|
+
include_model_queues=False,
|
162
|
+
) -> InterviewStatisticsCollection:
|
163
|
+
"""Generate a summary of the status of the job runner.
|
164
|
+
|
165
|
+
:param completed_tasks: list of completed tasks
|
166
|
+
:param elapsed_time: time elapsed since the start of the job
|
167
|
+
:param interviews: list of interviews to be conducted
|
168
|
+
|
169
|
+
>>> from edsl.jobs.interviews.Interview import Interview
|
170
|
+
>>> interviews = [Interview.example()]
|
171
|
+
>>> completed_tasks = []
|
172
|
+
>>> elapsed_time = 0
|
173
|
+
>>> JobsRunnerStatusMixin().generate_status_summary(completed_tasks, elapsed_time, interviews)
|
174
|
+
{'Elapsed time': '0.0 sec.', 'Total interviews requested': '1 ', 'Completed interviews': '0 ', 'Percent complete': '0 %', 'Average time per interview': 'NA', 'Task remaining': '1 ', 'Estimated time remaining': 'NA'}
|
175
|
+
"""
|
176
|
+
|
177
|
+
interview_status_summary: InterviewStatisticsCollection = (
|
178
|
+
JobsRunnerStatusMixin._job_level_info(
|
179
|
+
completed_tasks=completed_tasks,
|
180
|
+
elapsed_time=elapsed_time,
|
181
|
+
interviews=interviews,
|
182
|
+
)
|
23
183
|
)
|
24
|
-
|
184
|
+
if include_model_queues:
|
185
|
+
interview_status_summary.model_queues = list(
|
186
|
+
JobsRunnerStatusMixin._get_model_queues_info(interviews)
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
interview_status_summary.model_queues = None
|
190
|
+
|
191
|
+
return interview_status_summary
|
192
|
+
|
193
|
+
@staticmethod
|
194
|
+
def _get_model_info(
|
195
|
+
model: str,
|
196
|
+
num_waiting: int,
|
197
|
+
models_to_tokens: InterviewTokenUsageMapping,
|
198
|
+
) -> dict:
|
199
|
+
"""Get the status of a model.
|
200
|
+
|
201
|
+
:param model: the model name
|
202
|
+
:param num_waiting: the number of tasks waiting for capacity
|
203
|
+
:param models_to_tokens: a mapping of models to token usage
|
204
|
+
|
205
|
+
>>> from edsl.jobs.interviews.Interview import Interview
|
206
|
+
>>> interviews = [Interview.example()]
|
207
|
+
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
208
|
+
>>> model = interviews[0].model
|
209
|
+
>>> num_waiting = 0
|
210
|
+
>>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
|
211
|
+
ModelInfo(model_name='gpt-4-1106-preview', TPM_limit_k=480.0, RPM_limit_k=4.0, num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
|
212
|
+
"""
|
213
|
+
|
214
|
+
## TODO: This should probably be a coop method
|
215
|
+
prices = get_token_pricing(model.model)
|
216
|
+
|
217
|
+
token_usage_info = []
|
218
|
+
for token_usage_type in ["new_token_usage", "cached_token_usage"]:
|
219
|
+
token_usage_info.append(
|
220
|
+
JobsRunnerStatusMixin._get_token_usage_info(
|
221
|
+
token_usage_type, models_to_tokens, model, prices
|
222
|
+
)
|
223
|
+
)
|
224
|
+
|
225
|
+
return ModelInfo(
|
226
|
+
**{
|
227
|
+
"model_name": model.model,
|
228
|
+
"TPM_limit_k": model.TPM / 1000,
|
229
|
+
"RPM_limit_k": model.RPM / 1000,
|
230
|
+
"num_tasks_waiting": num_waiting,
|
231
|
+
"token_usage_info": token_usage_info,
|
232
|
+
}
|
233
|
+
)
|
234
|
+
|
235
|
+
@staticmethod
|
236
|
+
def _get_token_usage_info(
|
237
|
+
token_usage_type: Literal["new_token_usage", "cached_token_usage"],
|
238
|
+
models_to_tokens: InterviewTokenUsageMapping,
|
239
|
+
model: str,
|
240
|
+
prices: "TokenPricing",
|
241
|
+
) -> ModelTokenUsageStats:
|
242
|
+
"""Get the token usage info for a model.
|
243
|
+
|
244
|
+
>>> from edsl.jobs.interviews.Interview import Interview
|
245
|
+
>>> interviews = [Interview.example()]
|
246
|
+
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
247
|
+
>>> model = interviews[0].model
|
248
|
+
>>> prices = get_token_pricing(model.model)
|
249
|
+
>>> cache_status = "new_token_usage"
|
250
|
+
>>> JobsRunnerStatusMixin()._get_token_usage_info(cache_status, models_to_tokens, model, prices)
|
251
|
+
ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')
|
252
|
+
|
253
|
+
"""
|
254
|
+
all_token_usage: InterviewTokenUsage = models_to_tokens[model]
|
255
|
+
token_usage: TokenUsage = getattr(all_token_usage, token_usage_type)
|
256
|
+
|
257
|
+
details = [
|
258
|
+
{"type": token_type, "tokens": getattr(token_usage, token_type)}
|
259
|
+
for token_type in ["prompt_tokens", "completion_tokens"]
|
260
|
+
]
|
261
|
+
|
262
|
+
return ModelTokenUsageStats(
|
263
|
+
token_usage_type=token_usage_type,
|
264
|
+
details=details,
|
265
|
+
cost=f"${token_usage.cost(prices):.5f}",
|
266
|
+
)
|
267
|
+
|
268
|
+
@staticmethod
|
269
|
+
def _add_statistics_to_table(table, status_summary):
|
25
270
|
table.add_column("Statistic", style="dim", no_wrap=True, width=50)
|
26
271
|
table.add_column("Value", width=10)
|
27
272
|
|
@@ -29,49 +274,60 @@ class JobsRunnerStatusPresentation:
|
|
29
274
|
if key != "model_queues":
|
30
275
|
table.add_row(key, value)
|
31
276
|
|
277
|
+
@staticmethod
|
278
|
+
def display_status_table(status_summary: InterviewStatisticsCollection) -> "Table":
|
279
|
+
table = Table(
|
280
|
+
title="Job Status",
|
281
|
+
show_header=True,
|
282
|
+
header_style="bold magenta",
|
283
|
+
box=SIMPLE,
|
284
|
+
)
|
285
|
+
|
286
|
+
### Job-level statistics
|
287
|
+
JobsRunnerStatusMixin._add_statistics_to_table(table, status_summary)
|
288
|
+
|
289
|
+
## Model-level statistics
|
32
290
|
spacing = " "
|
33
|
-
|
291
|
+
|
292
|
+
if status_summary.model_queues is not None:
|
34
293
|
table.add_row(Text("Model Queues", style="bold red"), "")
|
35
|
-
for model_info in status_summary
|
36
|
-
model_name = model_info
|
37
|
-
tpm = "TPM (k)=
|
38
|
-
rpm = "RPM (k)=
|
294
|
+
for model_info in status_summary.model_queues:
|
295
|
+
model_name = model_info.model_name
|
296
|
+
tpm = f"TPM (k)={model_info.TPM_limit_k}"
|
297
|
+
rpm = f"RPM (k)= {model_info.RPM_limit_k}"
|
39
298
|
pretty_model_name = model_name + ";" + tpm + ";" + rpm
|
40
299
|
table.add_row(Text(pretty_model_name, style="blue"), "")
|
41
300
|
table.add_row(
|
42
301
|
"Number question tasks waiting for capacity",
|
43
|
-
str(model_info
|
302
|
+
str(model_info.num_tasks_waiting),
|
44
303
|
)
|
45
304
|
# Token usage and cost info
|
46
|
-
for
|
47
|
-
|
305
|
+
for token_usage_info in model_info.token_usage_info:
|
306
|
+
token_usage_type = token_usage_info.token_usage_type
|
48
307
|
table.add_row(
|
49
|
-
Text(
|
308
|
+
Text(
|
309
|
+
spacing + token_usage_type.replace("_", " "), style="bold"
|
310
|
+
),
|
311
|
+
"",
|
50
312
|
)
|
51
|
-
for detail in
|
313
|
+
for detail in token_usage_info.details:
|
52
314
|
token_type = detail["type"]
|
53
315
|
tokens = detail["tokens"]
|
54
|
-
# cost = detail["cost"]
|
55
316
|
table.add_row(spacing + f"{token_type}", f"{tokens:,}")
|
56
|
-
table.add_row(spacing + "cost", cache_info["cost"])
|
317
|
+
# table.add_row(spacing + "cost", cache_info["cost"])
|
57
318
|
|
58
319
|
return table
|
59
320
|
|
60
|
-
|
61
|
-
class JobsRunnerStatusMixin(JobsRunnerStatusData, JobsRunnerStatusPresentation):
|
62
|
-
def status_data(self, completed_tasks: List[asyncio.Task], elapsed_time: float):
|
63
|
-
# return self.generate_status_summary(
|
64
|
-
# completed_tasks=completed_tasks,
|
65
|
-
# elapsed_time=elapsed_time,
|
66
|
-
# interviews=self.total_interviews).rawplt.figure(figsize=(10, 6))
|
67
|
-
|
68
|
-
# return self.full_status(self.total_interviews)
|
69
|
-
return None
|
70
|
-
|
71
321
|
def status_table(self, completed_tasks: List[asyncio.Task], elapsed_time: float):
|
72
|
-
summary_data =
|
322
|
+
summary_data = JobsRunnerStatusMixin.generate_status_summary(
|
73
323
|
completed_tasks=completed_tasks,
|
74
324
|
elapsed_time=elapsed_time,
|
75
325
|
interviews=self.total_interviews,
|
76
326
|
)
|
77
327
|
return self.display_status_table(summary_data)
|
328
|
+
|
329
|
+
|
330
|
+
if __name__ == "__main__":
|
331
|
+
import doctest
|
332
|
+
|
333
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/jobs/tasks/TaskCreators.py
CHANGED
@@ -17,7 +17,13 @@ class TaskCreators(UserDict):
|
|
17
17
|
|
18
18
|
@property
|
19
19
|
def token_usage(self) -> InterviewTokenUsage:
|
20
|
-
"""Determines how many tokens were used for the interview.
|
20
|
+
"""Determines how many tokens were used for the interview.
|
21
|
+
|
22
|
+
This is iterates through all tasks that make up an interview.
|
23
|
+
For each task, it determines how many tokens were used and whether they were cached or new.
|
24
|
+
It then sums the total number of cached and new tokens used for the interview.
|
25
|
+
|
26
|
+
"""
|
21
27
|
cached_tokens = TokenUsage(from_cache=True)
|
22
28
|
new_tokens = TokenUsage(from_cache=False)
|
23
29
|
for task_creator in self.values():
|
@@ -28,7 +34,7 @@ class TaskCreators(UserDict):
|
|
28
34
|
new_token_usage=new_tokens, cached_token_usage=cached_tokens
|
29
35
|
)
|
30
36
|
|
31
|
-
def print(self):
|
37
|
+
def print(self) -> None:
|
32
38
|
from rich import print
|
33
39
|
|
34
40
|
print({task.get_name(): task.task_status for task in self.values()})
|