edsl 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/agents/Invigilator.py +4 -3
- edsl/agents/InvigilatorBase.py +2 -1
- edsl/agents/PromptConstructor.py +92 -21
- edsl/agents/QuestionInstructionPromptBuilder.py +68 -9
- edsl/agents/QuestionTemplateReplacementsBuilder.py +7 -2
- edsl/agents/prompt_helpers.py +2 -2
- edsl/coop/coop.py +97 -19
- edsl/enums.py +3 -1
- edsl/exceptions/coop.py +4 -0
- edsl/exceptions/jobs.py +1 -9
- edsl/exceptions/language_models.py +8 -4
- edsl/exceptions/questions.py +8 -11
- edsl/inference_services/AvailableModelFetcher.py +4 -1
- edsl/inference_services/DeepSeekService.py +18 -0
- edsl/inference_services/registry.py +2 -0
- edsl/jobs/Jobs.py +60 -34
- edsl/jobs/JobsPrompts.py +64 -3
- edsl/jobs/JobsRemoteInferenceHandler.py +42 -25
- edsl/jobs/JobsRemoteInferenceLogger.py +1 -1
- edsl/jobs/buckets/BucketCollection.py +30 -0
- edsl/jobs/data_structures.py +1 -0
- edsl/jobs/interviews/Interview.py +1 -1
- edsl/jobs/loggers/HTMLTableJobLogger.py +6 -1
- edsl/jobs/results_exceptions_handler.py +2 -7
- edsl/jobs/tasks/TaskHistory.py +49 -17
- edsl/language_models/LanguageModel.py +7 -4
- edsl/language_models/ModelList.py +1 -1
- edsl/language_models/key_management/KeyLookupBuilder.py +47 -20
- edsl/language_models/key_management/models.py +10 -4
- edsl/language_models/model.py +49 -0
- edsl/prompts/Prompt.py +124 -61
- edsl/questions/descriptors.py +37 -23
- edsl/questions/question_base_gen_mixin.py +1 -0
- edsl/results/DatasetExportMixin.py +35 -6
- edsl/results/Result.py +9 -3
- edsl/results/Results.py +180 -2
- edsl/results/ResultsGGMixin.py +117 -60
- edsl/scenarios/PdfExtractor.py +3 -6
- edsl/scenarios/Scenario.py +35 -1
- edsl/scenarios/ScenarioList.py +22 -3
- edsl/scenarios/ScenarioListPdfMixin.py +9 -3
- edsl/surveys/Survey.py +1 -1
- edsl/templates/error_reporting/base.html +2 -4
- edsl/templates/error_reporting/exceptions_table.html +35 -0
- edsl/templates/error_reporting/interview_details.html +67 -53
- edsl/templates/error_reporting/interviews.html +4 -17
- edsl/templates/error_reporting/overview.html +31 -5
- edsl/templates/error_reporting/performance_plot.html +1 -1
- {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/METADATA +2 -3
- {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/RECORD +53 -51
- {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/LICENSE +0 -0
- {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/WHEEL +0 -0
@@ -136,7 +136,10 @@ class AvailableModelFetcher:
|
|
136
136
|
if not service_models:
|
137
137
|
import warnings
|
138
138
|
|
139
|
-
warnings.
|
139
|
+
with warnings.catch_warnings():
|
140
|
+
warnings.simplefilter("ignore") # Ignores the warning
|
141
|
+
warnings.warn(f"No models found for service {service_name}")
|
142
|
+
|
140
143
|
return [], service_name
|
141
144
|
|
142
145
|
models_list = AvailableModels(
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import aiohttp
|
2
|
+
import json
|
3
|
+
import requests
|
4
|
+
from typing import Any, List
|
5
|
+
|
6
|
+
# from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
|
7
|
+
from edsl.language_models import LanguageModel
|
8
|
+
|
9
|
+
from edsl.inference_services.OpenAIService import OpenAIService
|
10
|
+
|
11
|
+
|
12
|
+
class DeepSeekService(OpenAIService):
|
13
|
+
"""DeepInfra service class."""
|
14
|
+
|
15
|
+
_inference_service_ = "deepseek"
|
16
|
+
_env_key_name_ = "DEEPSEEK_API_KEY"
|
17
|
+
_base_url_ = "https://api.deepseek.com"
|
18
|
+
_models_list_cache: List[str] = []
|
@@ -13,6 +13,7 @@ from edsl.inference_services.OllamaService import OllamaService
|
|
13
13
|
from edsl.inference_services.TestService import TestService
|
14
14
|
from edsl.inference_services.TogetherAIService import TogetherAIService
|
15
15
|
from edsl.inference_services.PerplexityService import PerplexityService
|
16
|
+
from edsl.inference_services.DeepSeekService import DeepSeekService
|
16
17
|
|
17
18
|
try:
|
18
19
|
from edsl.inference_services.MistralAIService import MistralAIService
|
@@ -33,6 +34,7 @@ services = [
|
|
33
34
|
TestService,
|
34
35
|
TogetherAIService,
|
35
36
|
PerplexityService,
|
37
|
+
DeepSeekService,
|
36
38
|
]
|
37
39
|
|
38
40
|
if mistral_available:
|
edsl/jobs/Jobs.py
CHANGED
@@ -38,6 +38,7 @@ if TYPE_CHECKING:
|
|
38
38
|
from edsl.language_models.ModelList import ModelList
|
39
39
|
from edsl.data.Cache import Cache
|
40
40
|
from edsl.language_models.key_management.KeyLookup import KeyLookup
|
41
|
+
from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
|
41
42
|
|
42
43
|
VisibilityType = Literal["private", "public", "unlisted"]
|
43
44
|
|
@@ -407,7 +408,13 @@ class Jobs(Base):
|
|
407
408
|
>>> bc
|
408
409
|
BucketCollection(...)
|
409
410
|
"""
|
410
|
-
|
411
|
+
bc = BucketCollection.from_models(self.models)
|
412
|
+
|
413
|
+
if self.run_config.environment.key_lookup is not None:
|
414
|
+
bc.update_from_key_lookup(
|
415
|
+
self.run_config.environment.key_lookup
|
416
|
+
)
|
417
|
+
return bc
|
411
418
|
|
412
419
|
def html(self):
|
413
420
|
"""Return the HTML representations for each scenario"""
|
@@ -465,22 +472,47 @@ class Jobs(Base):
|
|
465
472
|
|
466
473
|
return False
|
467
474
|
|
475
|
+
def _start_remote_inference_job(
|
476
|
+
self, job_handler: Optional[JobsRemoteInferenceHandler] = None
|
477
|
+
) -> Union["Results", None]:
|
478
|
+
|
479
|
+
if job_handler is None:
|
480
|
+
job_handler = self._create_remote_inference_handler()
|
481
|
+
|
482
|
+
job_info = job_handler.create_remote_inference_job(
|
483
|
+
iterations=self.run_config.parameters.n,
|
484
|
+
remote_inference_description=self.run_config.parameters.remote_inference_description,
|
485
|
+
remote_inference_results_visibility=self.run_config.parameters.remote_inference_results_visibility,
|
486
|
+
)
|
487
|
+
return job_info
|
488
|
+
|
489
|
+
def _create_remote_inference_handler(self) -> JobsRemoteInferenceHandler:
|
490
|
+
|
491
|
+
from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
|
492
|
+
|
493
|
+
return JobsRemoteInferenceHandler(
|
494
|
+
self, verbose=self.run_config.parameters.verbose
|
495
|
+
)
|
496
|
+
|
468
497
|
def _remote_results(
|
469
498
|
self,
|
499
|
+
config: RunConfig,
|
470
500
|
) -> Union["Results", None]:
|
471
501
|
from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
|
502
|
+
from edsl.jobs.JobsRemoteInferenceHandler import RemoteJobInfo
|
472
503
|
|
473
|
-
|
474
|
-
|
475
|
-
)
|
504
|
+
background = config.parameters.background
|
505
|
+
|
506
|
+
jh = self._create_remote_inference_handler()
|
476
507
|
if jh.use_remote_inference(self.run_config.parameters.disable_remote_inference):
|
477
|
-
job_info =
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
508
|
+
job_info: RemoteJobInfo = self._start_remote_inference_job(jh)
|
509
|
+
if background:
|
510
|
+
from edsl.results.Results import Results
|
511
|
+
results = Results.from_job_info(job_info)
|
512
|
+
return results
|
513
|
+
else:
|
514
|
+
results = jh.poll_remote_inference_job(job_info)
|
515
|
+
return results
|
484
516
|
else:
|
485
517
|
return None
|
486
518
|
|
@@ -507,13 +539,6 @@ class Jobs(Base):
|
|
507
539
|
|
508
540
|
assert isinstance(self.run_config.environment.cache, Cache)
|
509
541
|
|
510
|
-
# with RemoteCacheSync(
|
511
|
-
# coop=Coop(),
|
512
|
-
# cache=self.run_config.environment.cache,
|
513
|
-
# output_func=self._output,
|
514
|
-
# remote_cache=use_remote_cache,
|
515
|
-
# remote_cache_description=self.run_config.parameters.remote_cache_description,
|
516
|
-
# ):
|
517
542
|
runner = JobsRunnerAsyncio(self, environment=self.run_config.environment)
|
518
543
|
if run_job_async:
|
519
544
|
results = await runner.run_async(self.run_config.parameters)
|
@@ -521,17 +546,6 @@ class Jobs(Base):
|
|
521
546
|
results = runner.run(self.run_config.parameters)
|
522
547
|
return results
|
523
548
|
|
524
|
-
def _setup_and_check(self) -> Tuple[RunConfig, Optional[Results]]:
|
525
|
-
self._prepare_to_run()
|
526
|
-
self._check_if_remote_keys_ok()
|
527
|
-
|
528
|
-
# first try to run the job remotely
|
529
|
-
if results := self._remote_results():
|
530
|
-
return results
|
531
|
-
|
532
|
-
self._check_if_local_keys_ok()
|
533
|
-
return None
|
534
|
-
|
535
549
|
@property
|
536
550
|
def num_interviews(self):
|
537
551
|
if self.run_config.parameters.n is None:
|
@@ -539,7 +553,7 @@ class Jobs(Base):
|
|
539
553
|
else:
|
540
554
|
return len(self) * self.run_config.parameters.n
|
541
555
|
|
542
|
-
def _run(self, config: RunConfig):
|
556
|
+
def _run(self, config: RunConfig) -> Union[None, "Results"]:
|
543
557
|
"Shared code for run and run_async"
|
544
558
|
if config.environment.cache is not None:
|
545
559
|
self.run_config.environment.cache = config.environment.cache
|
@@ -561,7 +575,6 @@ class Jobs(Base):
|
|
561
575
|
|
562
576
|
self.replace_missing_objects()
|
563
577
|
|
564
|
-
# try to run remotely first
|
565
578
|
self._prepare_to_run()
|
566
579
|
self._check_if_remote_keys_ok()
|
567
580
|
|
@@ -579,9 +592,9 @@ class Jobs(Base):
|
|
579
592
|
self.run_config.environment.cache = Cache(immediate_write=False)
|
580
593
|
|
581
594
|
# first try to run the job remotely
|
582
|
-
if results := self._remote_results():
|
595
|
+
if (results := self._remote_results(config)) is not None:
|
583
596
|
return results
|
584
|
-
|
597
|
+
|
585
598
|
self._check_if_local_keys_ok()
|
586
599
|
|
587
600
|
if config.environment.bucket_collection is None:
|
@@ -589,6 +602,16 @@ class Jobs(Base):
|
|
589
602
|
self.create_bucket_collection()
|
590
603
|
)
|
591
604
|
|
605
|
+
if (
|
606
|
+
self.run_config.environment.key_lookup is not None
|
607
|
+
and self.run_config.environment.bucket_collection is not None
|
608
|
+
):
|
609
|
+
self.run_config.environment.bucket_collection.update_from_key_lookup(
|
610
|
+
self.run_config.environment.key_lookup
|
611
|
+
)
|
612
|
+
|
613
|
+
return None
|
614
|
+
|
592
615
|
@with_config
|
593
616
|
def run(self, *, config: RunConfig) -> "Results":
|
594
617
|
"""
|
@@ -608,7 +631,10 @@ class Jobs(Base):
|
|
608
631
|
:param bucket_collection: A BucketCollection object to track API calls
|
609
632
|
:param key_lookup: A KeyLookup object to manage API keys
|
610
633
|
"""
|
611
|
-
self._run(config)
|
634
|
+
potentially_completed_results = self._run(config)
|
635
|
+
|
636
|
+
if potentially_completed_results is not None:
|
637
|
+
return potentially_completed_results
|
612
638
|
|
613
639
|
return asyncio.run(self._execute_with_remote_cache(run_job_async=False))
|
614
640
|
|
edsl/jobs/JobsPrompts.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
import time
|
2
|
+
import logging
|
1
3
|
from typing import List, TYPE_CHECKING
|
2
4
|
|
3
5
|
from edsl.results.Dataset import Dataset
|
@@ -12,7 +14,9 @@ if TYPE_CHECKING:
|
|
12
14
|
# from edsl.surveys.Survey import Survey
|
13
15
|
|
14
16
|
from edsl.jobs.FetchInvigilator import FetchInvigilator
|
17
|
+
from edsl.data.CacheEntry import CacheEntry
|
15
18
|
|
19
|
+
logger = logging.getLogger(__name__)
|
16
20
|
|
17
21
|
class JobsPrompts:
|
18
22
|
def __init__(self, jobs: "Jobs"):
|
@@ -21,6 +25,8 @@ class JobsPrompts:
|
|
21
25
|
self.scenarios = jobs.scenarios
|
22
26
|
self.survey = jobs.survey
|
23
27
|
self._price_lookup = None
|
28
|
+
self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
|
29
|
+
self._scenario_lookup = {scenario: idx for idx, scenario in enumerate(self.scenarios)}
|
24
30
|
|
25
31
|
@property
|
26
32
|
def price_lookup(self):
|
@@ -47,26 +53,54 @@ class JobsPrompts:
|
|
47
53
|
agent_indices = []
|
48
54
|
models = []
|
49
55
|
costs = []
|
50
|
-
|
56
|
+
cache_keys = []
|
57
|
+
|
51
58
|
for interview_index, interview in enumerate(interviews):
|
59
|
+
logger.info(f"Processing interview {interview_index} of {len(interviews)}")
|
60
|
+
interview_start = time.time()
|
61
|
+
|
62
|
+
# Fetch invigilators timing
|
63
|
+
invig_start = time.time()
|
52
64
|
invigilators = [
|
53
65
|
FetchInvigilator(interview)(question)
|
54
66
|
for question in interview.survey.questions
|
55
67
|
]
|
68
|
+
invig_end = time.time()
|
69
|
+
logger.debug(f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s")
|
70
|
+
|
71
|
+
# Process prompts timing
|
72
|
+
prompts_start = time.time()
|
56
73
|
for _, invigilator in enumerate(invigilators):
|
74
|
+
# Get prompts timing
|
75
|
+
get_prompts_start = time.time()
|
57
76
|
prompts = invigilator.get_prompts()
|
77
|
+
get_prompts_end = time.time()
|
78
|
+
logger.debug(f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s")
|
79
|
+
|
58
80
|
user_prompt = prompts["user_prompt"]
|
59
81
|
system_prompt = prompts["system_prompt"]
|
60
82
|
user_prompts.append(user_prompt)
|
61
83
|
system_prompts.append(system_prompt)
|
62
|
-
|
84
|
+
|
85
|
+
# Index lookups timing
|
86
|
+
index_start = time.time()
|
87
|
+
agent_index = self._agent_lookup[invigilator.agent]
|
63
88
|
agent_indices.append(agent_index)
|
64
89
|
interview_indices.append(interview_index)
|
65
|
-
scenario_index = self.
|
90
|
+
scenario_index = self._scenario_lookup[invigilator.scenario]
|
66
91
|
scenario_indices.append(scenario_index)
|
92
|
+
index_end = time.time()
|
93
|
+
logger.debug(f"Time taken for index lookups: {index_end - index_start:.4f}s")
|
94
|
+
|
95
|
+
# Model and question name assignment timing
|
96
|
+
assign_start = time.time()
|
67
97
|
models.append(invigilator.model.model)
|
68
98
|
question_names.append(invigilator.question.question_name)
|
99
|
+
assign_end = time.time()
|
100
|
+
logger.debug(f"Time taken for assignments: {assign_end - assign_start:.4f}s")
|
69
101
|
|
102
|
+
# Cost estimation timing
|
103
|
+
cost_start = time.time()
|
70
104
|
prompt_cost = self.estimate_prompt_cost(
|
71
105
|
system_prompt=system_prompt,
|
72
106
|
user_prompt=user_prompt,
|
@@ -74,8 +108,34 @@ class JobsPrompts:
|
|
74
108
|
inference_service=invigilator.model._inference_service_,
|
75
109
|
model=invigilator.model.model,
|
76
110
|
)
|
111
|
+
cost_end = time.time()
|
112
|
+
logger.debug(f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s")
|
77
113
|
costs.append(prompt_cost["cost_usd"])
|
78
114
|
|
115
|
+
# Cache key generation timing
|
116
|
+
cache_key_gen_start = time.time()
|
117
|
+
cache_key = CacheEntry.gen_key(
|
118
|
+
model=invigilator.model.model,
|
119
|
+
parameters=invigilator.model.parameters,
|
120
|
+
system_prompt=system_prompt,
|
121
|
+
user_prompt=user_prompt,
|
122
|
+
iteration=0,
|
123
|
+
)
|
124
|
+
cache_key_gen_end = time.time()
|
125
|
+
cache_keys.append(cache_key)
|
126
|
+
logger.debug(f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s")
|
127
|
+
logger.debug("-" * 50) # Separator between iterations
|
128
|
+
|
129
|
+
prompts_end = time.time()
|
130
|
+
logger.info(f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s")
|
131
|
+
|
132
|
+
interview_end = time.time()
|
133
|
+
logger.info(f"Overall time taken for interview: {interview_end - interview_start:.4f}s")
|
134
|
+
logger.info("Time breakdown:")
|
135
|
+
logger.info(f" Invigilators: {invig_end - invig_start:.4f}s")
|
136
|
+
logger.info(f" Prompts processing: {prompts_end - prompts_start:.4f}s")
|
137
|
+
logger.info(f" Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s")
|
138
|
+
|
79
139
|
d = Dataset(
|
80
140
|
[
|
81
141
|
{"user_prompt": user_prompts},
|
@@ -86,6 +146,7 @@ class JobsPrompts:
|
|
86
146
|
{"agent_index": agent_indices},
|
87
147
|
{"model": models},
|
88
148
|
{"estimated_cost": costs},
|
149
|
+
{"cache_key": cache_keys},
|
89
150
|
]
|
90
151
|
)
|
91
152
|
return d
|
@@ -219,6 +219,7 @@ class JobsRemoteInferenceHandler:
|
|
219
219
|
job_info.logger.add_info("results_uuid", results_uuid)
|
220
220
|
results = object_fetcher(results_uuid, expected_object_type="results")
|
221
221
|
results_url = remote_job_data.get("results_url")
|
222
|
+
job_info.logger.add_info("results_url", results_url)
|
222
223
|
job_info.logger.update(
|
223
224
|
f"Job completed and Results stored on Coop: {results_url}",
|
224
225
|
status=JobsStatus.COMPLETED,
|
@@ -227,6 +228,40 @@ class JobsRemoteInferenceHandler:
|
|
227
228
|
results.results_uuid = results_uuid
|
228
229
|
return results
|
229
230
|
|
231
|
+
def _attempt_fetch_job(
|
232
|
+
self,
|
233
|
+
job_info: RemoteJobInfo,
|
234
|
+
remote_job_data_fetcher: Callable,
|
235
|
+
object_fetcher: Callable,
|
236
|
+
) -> Union[None, "Results", Literal["continue"]]:
|
237
|
+
"""Makes one attempt to fetch and process a remote job's status and results."""
|
238
|
+
remote_job_data = remote_job_data_fetcher(job_info.job_uuid)
|
239
|
+
status = remote_job_data.get("status")
|
240
|
+
|
241
|
+
if status == "cancelled":
|
242
|
+
self._handle_cancelled_job(job_info)
|
243
|
+
return None
|
244
|
+
|
245
|
+
elif status == "failed" or status == "completed":
|
246
|
+
if status == "failed":
|
247
|
+
self._handle_failed_job(job_info, remote_job_data)
|
248
|
+
|
249
|
+
results_uuid = remote_job_data.get("results_uuid")
|
250
|
+
if results_uuid:
|
251
|
+
results = self._fetch_results_and_log(
|
252
|
+
job_info=job_info,
|
253
|
+
results_uuid=results_uuid,
|
254
|
+
remote_job_data=remote_job_data,
|
255
|
+
object_fetcher=object_fetcher,
|
256
|
+
)
|
257
|
+
return results
|
258
|
+
else:
|
259
|
+
return None
|
260
|
+
|
261
|
+
else:
|
262
|
+
self._sleep_for_a_bit(job_info, status)
|
263
|
+
return "continue"
|
264
|
+
|
230
265
|
def poll_remote_inference_job(
|
231
266
|
self,
|
232
267
|
job_info: RemoteJobInfo,
|
@@ -241,31 +276,13 @@ class JobsRemoteInferenceHandler:
|
|
241
276
|
|
242
277
|
job_in_queue = True
|
243
278
|
while job_in_queue:
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
elif status == "failed" or status == "completed":
|
252
|
-
if status == "failed":
|
253
|
-
self._handle_failed_job(job_info, remote_job_data)
|
254
|
-
|
255
|
-
results_uuid = remote_job_data.get("results_uuid")
|
256
|
-
if results_uuid:
|
257
|
-
results = self._fetch_results_and_log(
|
258
|
-
job_info=job_info,
|
259
|
-
results_uuid=results_uuid,
|
260
|
-
remote_job_data=remote_job_data,
|
261
|
-
object_fetcher=object_fetcher,
|
262
|
-
)
|
263
|
-
return results
|
264
|
-
else:
|
265
|
-
return None
|
266
|
-
|
267
|
-
else:
|
268
|
-
self._sleep_for_a_bit(job_info, status)
|
279
|
+
result = self._attempt_fetch_job(
|
280
|
+
job_info,
|
281
|
+
remote_job_data_fetcher,
|
282
|
+
object_fetcher
|
283
|
+
)
|
284
|
+
if result != "continue":
|
285
|
+
return result
|
269
286
|
|
270
287
|
async def create_and_poll_remote_job(
|
271
288
|
self,
|
@@ -32,7 +32,7 @@ class JobsInfo:
|
|
32
32
|
pretty_names = {
|
33
33
|
"job_uuid": "Job UUID",
|
34
34
|
"progress_bar_url": "Progress Bar URL",
|
35
|
-
"error_report_url": "
|
35
|
+
"error_report_url": "Exceptions Report URL",
|
36
36
|
"results_uuid": "Results UUID",
|
37
37
|
"results_url": "Results URL",
|
38
38
|
}
|
@@ -96,6 +96,36 @@ class BucketCollection(UserDict):
|
|
96
96
|
else:
|
97
97
|
self[model] = self.services_to_buckets[self.models_to_services[model.model]]
|
98
98
|
|
99
|
+
def update_from_key_lookup(self, key_lookup: "KeyLookup") -> None:
|
100
|
+
"""Updates the bucket collection rates based on model RPM/TPM from KeyLookup"""
|
101
|
+
|
102
|
+
for model_name, service in self.models_to_services.items():
|
103
|
+
if service in key_lookup and not self.infinity_buckets:
|
104
|
+
|
105
|
+
if key_lookup[service].rpm is not None:
|
106
|
+
new_rps = key_lookup[service].rpm / 60.0
|
107
|
+
new_requests_bucket = TokenBucket(
|
108
|
+
bucket_name=service,
|
109
|
+
bucket_type="requests",
|
110
|
+
capacity=new_rps,
|
111
|
+
refill_rate=new_rps,
|
112
|
+
remote_url=self.remote_url,
|
113
|
+
)
|
114
|
+
self.services_to_buckets[service].requests_bucket = (
|
115
|
+
new_requests_bucket
|
116
|
+
)
|
117
|
+
|
118
|
+
if key_lookup[service].tpm is not None:
|
119
|
+
new_tps = key_lookup[service].tpm / 60.0
|
120
|
+
new_tokens_bucket = TokenBucket(
|
121
|
+
bucket_name=service,
|
122
|
+
bucket_type="tokens",
|
123
|
+
capacity=new_tps,
|
124
|
+
refill_rate=new_tps,
|
125
|
+
remote_url=self.remote_url,
|
126
|
+
)
|
127
|
+
self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
|
128
|
+
|
99
129
|
def visualize(self) -> dict:
|
100
130
|
"""Visualize the token and request buckets for each model."""
|
101
131
|
plots = {}
|
edsl/jobs/data_structures.py
CHANGED
@@ -32,6 +32,7 @@ class RunParameters(Base):
|
|
32
32
|
remote_inference_results_visibility: Optional[VisibilityType] = "unlisted"
|
33
33
|
skip_retry: bool = False
|
34
34
|
raise_validation_errors: bool = False
|
35
|
+
background: bool = False
|
35
36
|
disable_remote_cache: bool = False
|
36
37
|
disable_remote_inference: bool = False
|
37
38
|
job_uuid: Optional[str] = None
|
@@ -9,7 +9,8 @@ from edsl.jobs.jobs_status_enums import JobsStatus
|
|
9
9
|
class HTMLTableJobLogger(JobLogger):
|
10
10
|
def __init__(self, verbose=True, theme="auto", **kwargs):
|
11
11
|
super().__init__(verbose=verbose)
|
12
|
-
self.display_handle = display(HTML(""), display_id=True)
|
12
|
+
self.display_handle = display(HTML(""), display_id=True) if verbose else None
|
13
|
+
#self.display_handle = display(HTML(""), display_id=True)
|
13
14
|
self.current_message = None
|
14
15
|
self.log_id = str(uuid.uuid4())
|
15
16
|
self.is_expanded = True
|
@@ -22,6 +23,9 @@ class HTMLTableJobLogger(JobLogger):
|
|
22
23
|
|
23
24
|
def _init_css(self):
|
24
25
|
"""Initialize the CSS styles with enhanced theme support"""
|
26
|
+
if not self.verbose:
|
27
|
+
return None
|
28
|
+
|
25
29
|
css = """
|
26
30
|
<style>
|
27
31
|
/* Base theme variables */
|
@@ -217,6 +221,7 @@ class HTMLTableJobLogger(JobLogger):
|
|
217
221
|
}});
|
218
222
|
</script>
|
219
223
|
"""
|
224
|
+
|
220
225
|
|
221
226
|
display(HTML(css + init_script))
|
222
227
|
|
@@ -66,9 +66,7 @@ class ResultsExceptionsHandler:
|
|
66
66
|
|
67
67
|
def _generate_error_message(self, indices) -> str:
|
68
68
|
"""Generate appropriate error message based on number of exceptions."""
|
69
|
-
msg = f"Exceptions were raised
|
70
|
-
if len(indices) > 5:
|
71
|
-
msg += f"Exceptions were raised in the following interviews: {indices}.\n"
|
69
|
+
msg = f"Exceptions were raised.\n"
|
72
70
|
return msg
|
73
71
|
|
74
72
|
def handle_exceptions(self) -> None:
|
@@ -84,7 +82,6 @@ class ResultsExceptionsHandler:
|
|
84
82
|
|
85
83
|
# Generate HTML report
|
86
84
|
filepath = self.results.task_history.html(
|
87
|
-
cta="Open report to see details.",
|
88
85
|
open_in_browser=self.open_in_browser,
|
89
86
|
return_link=True,
|
90
87
|
)
|
@@ -92,7 +89,5 @@ class ResultsExceptionsHandler:
|
|
92
89
|
# Handle remote logging if enabled
|
93
90
|
if self.remote_logging:
|
94
91
|
filestore = HTMLFileStore(filepath)
|
95
|
-
coop_details = filestore.push(description="
|
92
|
+
coop_details = filestore.push(description="Exceptions Report")
|
96
93
|
print(coop_details)
|
97
|
-
|
98
|
-
print("Also see: https://docs.expectedparrot.com/en/latest/exceptions.html")
|
edsl/jobs/tasks/TaskHistory.py
CHANGED
@@ -264,9 +264,27 @@ class TaskHistory(RepresentationMixin):
|
|
264
264
|
js = env.joinpath("report.js").read_text()
|
265
265
|
return js
|
266
266
|
|
267
|
+
@property
|
268
|
+
def exceptions_table(self) -> dict:
|
269
|
+
"""Return a dictionary of exceptions organized by type, service, model, and question name."""
|
270
|
+
exceptions_table = {}
|
271
|
+
for interview in self.total_interviews:
|
272
|
+
for question_name, exceptions in interview.exceptions.items():
|
273
|
+
for exception in exceptions:
|
274
|
+
key = (
|
275
|
+
exception.exception.__class__.__name__, # Exception type
|
276
|
+
interview.model._inference_service_, # Service
|
277
|
+
interview.model.model, # Model
|
278
|
+
question_name # Question name
|
279
|
+
)
|
280
|
+
if key not in exceptions_table:
|
281
|
+
exceptions_table[key] = 0
|
282
|
+
exceptions_table[key] += 1
|
283
|
+
return exceptions_table
|
284
|
+
|
267
285
|
@property
|
268
286
|
def exceptions_by_type(self) -> dict:
|
269
|
-
"""Return a dictionary of exceptions by type."""
|
287
|
+
"""Return a dictionary of exceptions tallied by type."""
|
270
288
|
exceptions_by_type = {}
|
271
289
|
for interview in self.total_interviews:
|
272
290
|
for question_name, exceptions in interview.exceptions.items():
|
@@ -324,6 +342,27 @@ class TaskHistory(RepresentationMixin):
|
|
324
342
|
}
|
325
343
|
return sorted_exceptions_by_question_name
|
326
344
|
|
345
|
+
# @property
|
346
|
+
# def exceptions_by_model(self) -> dict:
|
347
|
+
# """Return a dictionary of exceptions tallied by model and question name."""
|
348
|
+
# exceptions_by_model = {}
|
349
|
+
# for interview in self.total_interviews:
|
350
|
+
# model = interview.model.model
|
351
|
+
# service = interview.model._inference_service_
|
352
|
+
# if (service, model) not in exceptions_by_model:
|
353
|
+
# exceptions_by_model[(service, model)] = 0
|
354
|
+
# if interview.exceptions != {}:
|
355
|
+
# exceptions_by_model[(service, model)] += len(interview.exceptions)
|
356
|
+
|
357
|
+
# # sort the exceptions by model
|
358
|
+
# sorted_exceptions_by_model = {
|
359
|
+
# k: v
|
360
|
+
# for k, v in sorted(
|
361
|
+
# exceptions_by_model.items(), key=lambda item: item[1], reverse=True
|
362
|
+
# )
|
363
|
+
# }
|
364
|
+
# return sorted_exceptions_by_model
|
365
|
+
|
327
366
|
@property
|
328
367
|
def exceptions_by_model(self) -> dict:
|
329
368
|
"""Return a dictionary of exceptions tallied by model and question name."""
|
@@ -331,19 +370,12 @@ class TaskHistory(RepresentationMixin):
|
|
331
370
|
for interview in self.total_interviews:
|
332
371
|
model = interview.model.model
|
333
372
|
service = interview.model._inference_service_
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
sorted_exceptions_by_model = {
|
341
|
-
k: v
|
342
|
-
for k, v in sorted(
|
343
|
-
exceptions_by_model.items(), key=lambda item: item[1], reverse=True
|
344
|
-
)
|
345
|
-
}
|
346
|
-
return sorted_exceptions_by_model
|
373
|
+
for question_name, exceptions in interview.exceptions.items():
|
374
|
+
key = (service, model, question_name)
|
375
|
+
if key not in exceptions_by_model:
|
376
|
+
exceptions_by_model[key] = 0
|
377
|
+
exceptions_by_model[key] += len(exceptions)
|
378
|
+
return exceptions_by_model
|
347
379
|
|
348
380
|
def generate_html_report(self, css: Optional[str], include_plot=False):
|
349
381
|
if include_plot:
|
@@ -372,6 +404,7 @@ class TaskHistory(RepresentationMixin):
|
|
372
404
|
javascript=self.javascript(),
|
373
405
|
num_exceptions=len(self.exceptions),
|
374
406
|
performance_plot_html=performance_plot_html,
|
407
|
+
exceptions_table=self.exceptions_table,
|
375
408
|
exceptions_by_type=self.exceptions_by_type,
|
376
409
|
exceptions_by_question_name=self.exceptions_by_question_name,
|
377
410
|
exceptions_by_model=self.exceptions_by_model,
|
@@ -386,11 +419,10 @@ class TaskHistory(RepresentationMixin):
|
|
386
419
|
filename: Optional[str] = None,
|
387
420
|
return_link=False,
|
388
421
|
css=None,
|
389
|
-
cta="
|
422
|
+
cta="\nClick to open the report in a new tab\n",
|
390
423
|
open_in_browser=False,
|
391
424
|
):
|
392
425
|
"""Return an HTML report."""
|
393
|
-
|
394
426
|
from IPython.display import display, HTML
|
395
427
|
import tempfile
|
396
428
|
import os
|
@@ -419,7 +451,7 @@ class TaskHistory(RepresentationMixin):
|
|
419
451
|
html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
|
420
452
|
display(HTML(html_link))
|
421
453
|
escaped_output = html.escape(output)
|
422
|
-
iframe = f"""
|
454
|
+
iframe = f"""
|
423
455
|
<iframe srcdoc="{ escaped_output }" style="width: 800px; height: 600px;"></iframe>
|
424
456
|
"""
|
425
457
|
display(HTML(iframe))
|