edsl 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. edsl/__version__.py +1 -1
  2. edsl/agents/Invigilator.py +4 -3
  3. edsl/agents/InvigilatorBase.py +2 -1
  4. edsl/agents/PromptConstructor.py +92 -21
  5. edsl/agents/QuestionInstructionPromptBuilder.py +68 -9
  6. edsl/agents/QuestionTemplateReplacementsBuilder.py +7 -2
  7. edsl/agents/prompt_helpers.py +2 -2
  8. edsl/coop/coop.py +97 -19
  9. edsl/enums.py +3 -1
  10. edsl/exceptions/coop.py +4 -0
  11. edsl/exceptions/jobs.py +1 -9
  12. edsl/exceptions/language_models.py +8 -4
  13. edsl/exceptions/questions.py +8 -11
  14. edsl/inference_services/AvailableModelFetcher.py +4 -1
  15. edsl/inference_services/DeepSeekService.py +18 -0
  16. edsl/inference_services/registry.py +2 -0
  17. edsl/jobs/Jobs.py +60 -34
  18. edsl/jobs/JobsPrompts.py +64 -3
  19. edsl/jobs/JobsRemoteInferenceHandler.py +42 -25
  20. edsl/jobs/JobsRemoteInferenceLogger.py +1 -1
  21. edsl/jobs/buckets/BucketCollection.py +30 -0
  22. edsl/jobs/data_structures.py +1 -0
  23. edsl/jobs/interviews/Interview.py +1 -1
  24. edsl/jobs/loggers/HTMLTableJobLogger.py +6 -1
  25. edsl/jobs/results_exceptions_handler.py +2 -7
  26. edsl/jobs/tasks/TaskHistory.py +49 -17
  27. edsl/language_models/LanguageModel.py +7 -4
  28. edsl/language_models/ModelList.py +1 -1
  29. edsl/language_models/key_management/KeyLookupBuilder.py +47 -20
  30. edsl/language_models/key_management/models.py +10 -4
  31. edsl/language_models/model.py +49 -0
  32. edsl/prompts/Prompt.py +124 -61
  33. edsl/questions/descriptors.py +37 -23
  34. edsl/questions/question_base_gen_mixin.py +1 -0
  35. edsl/results/DatasetExportMixin.py +35 -6
  36. edsl/results/Result.py +9 -3
  37. edsl/results/Results.py +180 -2
  38. edsl/results/ResultsGGMixin.py +117 -60
  39. edsl/scenarios/PdfExtractor.py +3 -6
  40. edsl/scenarios/Scenario.py +35 -1
  41. edsl/scenarios/ScenarioList.py +22 -3
  42. edsl/scenarios/ScenarioListPdfMixin.py +9 -3
  43. edsl/surveys/Survey.py +1 -1
  44. edsl/templates/error_reporting/base.html +2 -4
  45. edsl/templates/error_reporting/exceptions_table.html +35 -0
  46. edsl/templates/error_reporting/interview_details.html +67 -53
  47. edsl/templates/error_reporting/interviews.html +4 -17
  48. edsl/templates/error_reporting/overview.html +31 -5
  49. edsl/templates/error_reporting/performance_plot.html +1 -1
  50. {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/METADATA +2 -3
  51. {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/RECORD +53 -51
  52. {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/LICENSE +0 -0
  53. {edsl-0.1.41.dist-info → edsl-0.1.43.dist-info}/WHEEL +0 -0
@@ -136,7 +136,10 @@ class AvailableModelFetcher:
136
136
  if not service_models:
137
137
  import warnings
138
138
 
139
- warnings.warn(f"No models found for service {service_name}")
139
+ with warnings.catch_warnings():
140
+ warnings.simplefilter("ignore") # Ignores the warning
141
+ warnings.warn(f"No models found for service {service_name}")
142
+
140
143
  return [], service_name
141
144
 
142
145
  models_list = AvailableModels(
@@ -0,0 +1,18 @@
1
+ import aiohttp
2
+ import json
3
+ import requests
4
+ from typing import Any, List
5
+
6
+ # from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
7
+ from edsl.language_models import LanguageModel
8
+
9
+ from edsl.inference_services.OpenAIService import OpenAIService
10
+
11
+
12
+ class DeepSeekService(OpenAIService):
13
+ """DeepInfra service class."""
14
+
15
+ _inference_service_ = "deepseek"
16
+ _env_key_name_ = "DEEPSEEK_API_KEY"
17
+ _base_url_ = "https://api.deepseek.com"
18
+ _models_list_cache: List[str] = []
@@ -13,6 +13,7 @@ from edsl.inference_services.OllamaService import OllamaService
13
13
  from edsl.inference_services.TestService import TestService
14
14
  from edsl.inference_services.TogetherAIService import TogetherAIService
15
15
  from edsl.inference_services.PerplexityService import PerplexityService
16
+ from edsl.inference_services.DeepSeekService import DeepSeekService
16
17
 
17
18
  try:
18
19
  from edsl.inference_services.MistralAIService import MistralAIService
@@ -33,6 +34,7 @@ services = [
33
34
  TestService,
34
35
  TogetherAIService,
35
36
  PerplexityService,
37
+ DeepSeekService,
36
38
  ]
37
39
 
38
40
  if mistral_available:
edsl/jobs/Jobs.py CHANGED
@@ -38,6 +38,7 @@ if TYPE_CHECKING:
38
38
  from edsl.language_models.ModelList import ModelList
39
39
  from edsl.data.Cache import Cache
40
40
  from edsl.language_models.key_management.KeyLookup import KeyLookup
41
+ from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
41
42
 
42
43
  VisibilityType = Literal["private", "public", "unlisted"]
43
44
 
@@ -407,7 +408,13 @@ class Jobs(Base):
407
408
  >>> bc
408
409
  BucketCollection(...)
409
410
  """
410
- return BucketCollection.from_models(self.models)
411
+ bc = BucketCollection.from_models(self.models)
412
+
413
+ if self.run_config.environment.key_lookup is not None:
414
+ bc.update_from_key_lookup(
415
+ self.run_config.environment.key_lookup
416
+ )
417
+ return bc
411
418
 
412
419
  def html(self):
413
420
  """Return the HTML representations for each scenario"""
@@ -465,22 +472,47 @@ class Jobs(Base):
465
472
 
466
473
  return False
467
474
 
475
+ def _start_remote_inference_job(
476
+ self, job_handler: Optional[JobsRemoteInferenceHandler] = None
477
+ ) -> Union["Results", None]:
478
+
479
+ if job_handler is None:
480
+ job_handler = self._create_remote_inference_handler()
481
+
482
+ job_info = job_handler.create_remote_inference_job(
483
+ iterations=self.run_config.parameters.n,
484
+ remote_inference_description=self.run_config.parameters.remote_inference_description,
485
+ remote_inference_results_visibility=self.run_config.parameters.remote_inference_results_visibility,
486
+ )
487
+ return job_info
488
+
489
+ def _create_remote_inference_handler(self) -> JobsRemoteInferenceHandler:
490
+
491
+ from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
492
+
493
+ return JobsRemoteInferenceHandler(
494
+ self, verbose=self.run_config.parameters.verbose
495
+ )
496
+
468
497
  def _remote_results(
469
498
  self,
499
+ config: RunConfig,
470
500
  ) -> Union["Results", None]:
471
501
  from edsl.jobs.JobsRemoteInferenceHandler import JobsRemoteInferenceHandler
502
+ from edsl.jobs.JobsRemoteInferenceHandler import RemoteJobInfo
472
503
 
473
- jh = JobsRemoteInferenceHandler(
474
- self, verbose=self.run_config.parameters.verbose
475
- )
504
+ background = config.parameters.background
505
+
506
+ jh = self._create_remote_inference_handler()
476
507
  if jh.use_remote_inference(self.run_config.parameters.disable_remote_inference):
477
- job_info = jh.create_remote_inference_job(
478
- iterations=self.run_config.parameters.n,
479
- remote_inference_description=self.run_config.parameters.remote_inference_description,
480
- remote_inference_results_visibility=self.run_config.parameters.remote_inference_results_visibility,
481
- )
482
- results = jh.poll_remote_inference_job(job_info)
483
- return results
508
+ job_info: RemoteJobInfo = self._start_remote_inference_job(jh)
509
+ if background:
510
+ from edsl.results.Results import Results
511
+ results = Results.from_job_info(job_info)
512
+ return results
513
+ else:
514
+ results = jh.poll_remote_inference_job(job_info)
515
+ return results
484
516
  else:
485
517
  return None
486
518
 
@@ -507,13 +539,6 @@ class Jobs(Base):
507
539
 
508
540
  assert isinstance(self.run_config.environment.cache, Cache)
509
541
 
510
- # with RemoteCacheSync(
511
- # coop=Coop(),
512
- # cache=self.run_config.environment.cache,
513
- # output_func=self._output,
514
- # remote_cache=use_remote_cache,
515
- # remote_cache_description=self.run_config.parameters.remote_cache_description,
516
- # ):
517
542
  runner = JobsRunnerAsyncio(self, environment=self.run_config.environment)
518
543
  if run_job_async:
519
544
  results = await runner.run_async(self.run_config.parameters)
@@ -521,17 +546,6 @@ class Jobs(Base):
521
546
  results = runner.run(self.run_config.parameters)
522
547
  return results
523
548
 
524
- def _setup_and_check(self) -> Tuple[RunConfig, Optional[Results]]:
525
- self._prepare_to_run()
526
- self._check_if_remote_keys_ok()
527
-
528
- # first try to run the job remotely
529
- if results := self._remote_results():
530
- return results
531
-
532
- self._check_if_local_keys_ok()
533
- return None
534
-
535
549
  @property
536
550
  def num_interviews(self):
537
551
  if self.run_config.parameters.n is None:
@@ -539,7 +553,7 @@ class Jobs(Base):
539
553
  else:
540
554
  return len(self) * self.run_config.parameters.n
541
555
 
542
- def _run(self, config: RunConfig):
556
+ def _run(self, config: RunConfig) -> Union[None, "Results"]:
543
557
  "Shared code for run and run_async"
544
558
  if config.environment.cache is not None:
545
559
  self.run_config.environment.cache = config.environment.cache
@@ -561,7 +575,6 @@ class Jobs(Base):
561
575
 
562
576
  self.replace_missing_objects()
563
577
 
564
- # try to run remotely first
565
578
  self._prepare_to_run()
566
579
  self._check_if_remote_keys_ok()
567
580
 
@@ -579,9 +592,9 @@ class Jobs(Base):
579
592
  self.run_config.environment.cache = Cache(immediate_write=False)
580
593
 
581
594
  # first try to run the job remotely
582
- if results := self._remote_results():
595
+ if (results := self._remote_results(config)) is not None:
583
596
  return results
584
-
597
+
585
598
  self._check_if_local_keys_ok()
586
599
 
587
600
  if config.environment.bucket_collection is None:
@@ -589,6 +602,16 @@ class Jobs(Base):
589
602
  self.create_bucket_collection()
590
603
  )
591
604
 
605
+ if (
606
+ self.run_config.environment.key_lookup is not None
607
+ and self.run_config.environment.bucket_collection is not None
608
+ ):
609
+ self.run_config.environment.bucket_collection.update_from_key_lookup(
610
+ self.run_config.environment.key_lookup
611
+ )
612
+
613
+ return None
614
+
592
615
  @with_config
593
616
  def run(self, *, config: RunConfig) -> "Results":
594
617
  """
@@ -608,7 +631,10 @@ class Jobs(Base):
608
631
  :param bucket_collection: A BucketCollection object to track API calls
609
632
  :param key_lookup: A KeyLookup object to manage API keys
610
633
  """
611
- self._run(config)
634
+ potentially_completed_results = self._run(config)
635
+
636
+ if potentially_completed_results is not None:
637
+ return potentially_completed_results
612
638
 
613
639
  return asyncio.run(self._execute_with_remote_cache(run_job_async=False))
614
640
 
edsl/jobs/JobsPrompts.py CHANGED
@@ -1,3 +1,5 @@
1
+ import time
2
+ import logging
1
3
  from typing import List, TYPE_CHECKING
2
4
 
3
5
  from edsl.results.Dataset import Dataset
@@ -12,7 +14,9 @@ if TYPE_CHECKING:
12
14
  # from edsl.surveys.Survey import Survey
13
15
 
14
16
  from edsl.jobs.FetchInvigilator import FetchInvigilator
17
+ from edsl.data.CacheEntry import CacheEntry
15
18
 
19
+ logger = logging.getLogger(__name__)
16
20
 
17
21
  class JobsPrompts:
18
22
  def __init__(self, jobs: "Jobs"):
@@ -21,6 +25,8 @@ class JobsPrompts:
21
25
  self.scenarios = jobs.scenarios
22
26
  self.survey = jobs.survey
23
27
  self._price_lookup = None
28
+ self._agent_lookup = {agent: idx for idx, agent in enumerate(self.agents)}
29
+ self._scenario_lookup = {scenario: idx for idx, scenario in enumerate(self.scenarios)}
24
30
 
25
31
  @property
26
32
  def price_lookup(self):
@@ -47,26 +53,54 @@ class JobsPrompts:
47
53
  agent_indices = []
48
54
  models = []
49
55
  costs = []
50
-
56
+ cache_keys = []
57
+
51
58
  for interview_index, interview in enumerate(interviews):
59
+ logger.info(f"Processing interview {interview_index} of {len(interviews)}")
60
+ interview_start = time.time()
61
+
62
+ # Fetch invigilators timing
63
+ invig_start = time.time()
52
64
  invigilators = [
53
65
  FetchInvigilator(interview)(question)
54
66
  for question in interview.survey.questions
55
67
  ]
68
+ invig_end = time.time()
69
+ logger.debug(f"Time taken to fetch invigilators: {invig_end - invig_start:.4f}s")
70
+
71
+ # Process prompts timing
72
+ prompts_start = time.time()
56
73
  for _, invigilator in enumerate(invigilators):
74
+ # Get prompts timing
75
+ get_prompts_start = time.time()
57
76
  prompts = invigilator.get_prompts()
77
+ get_prompts_end = time.time()
78
+ logger.debug(f"Time taken to get prompts: {get_prompts_end - get_prompts_start:.4f}s")
79
+
58
80
  user_prompt = prompts["user_prompt"]
59
81
  system_prompt = prompts["system_prompt"]
60
82
  user_prompts.append(user_prompt)
61
83
  system_prompts.append(system_prompt)
62
- agent_index = self.agents.index(invigilator.agent)
84
+
85
+ # Index lookups timing
86
+ index_start = time.time()
87
+ agent_index = self._agent_lookup[invigilator.agent]
63
88
  agent_indices.append(agent_index)
64
89
  interview_indices.append(interview_index)
65
- scenario_index = self.scenarios.index(invigilator.scenario)
90
+ scenario_index = self._scenario_lookup[invigilator.scenario]
66
91
  scenario_indices.append(scenario_index)
92
+ index_end = time.time()
93
+ logger.debug(f"Time taken for index lookups: {index_end - index_start:.4f}s")
94
+
95
+ # Model and question name assignment timing
96
+ assign_start = time.time()
67
97
  models.append(invigilator.model.model)
68
98
  question_names.append(invigilator.question.question_name)
99
+ assign_end = time.time()
100
+ logger.debug(f"Time taken for assignments: {assign_end - assign_start:.4f}s")
69
101
 
102
+ # Cost estimation timing
103
+ cost_start = time.time()
70
104
  prompt_cost = self.estimate_prompt_cost(
71
105
  system_prompt=system_prompt,
72
106
  user_prompt=user_prompt,
@@ -74,8 +108,34 @@ class JobsPrompts:
74
108
  inference_service=invigilator.model._inference_service_,
75
109
  model=invigilator.model.model,
76
110
  )
111
+ cost_end = time.time()
112
+ logger.debug(f"Time taken to estimate prompt cost: {cost_end - cost_start:.4f}s")
77
113
  costs.append(prompt_cost["cost_usd"])
78
114
 
115
+ # Cache key generation timing
116
+ cache_key_gen_start = time.time()
117
+ cache_key = CacheEntry.gen_key(
118
+ model=invigilator.model.model,
119
+ parameters=invigilator.model.parameters,
120
+ system_prompt=system_prompt,
121
+ user_prompt=user_prompt,
122
+ iteration=0,
123
+ )
124
+ cache_key_gen_end = time.time()
125
+ cache_keys.append(cache_key)
126
+ logger.debug(f"Time taken to generate cache key: {cache_key_gen_end - cache_key_gen_start:.4f}s")
127
+ logger.debug("-" * 50) # Separator between iterations
128
+
129
+ prompts_end = time.time()
130
+ logger.info(f"Time taken to process prompts: {prompts_end - prompts_start:.4f}s")
131
+
132
+ interview_end = time.time()
133
+ logger.info(f"Overall time taken for interview: {interview_end - interview_start:.4f}s")
134
+ logger.info("Time breakdown:")
135
+ logger.info(f" Invigilators: {invig_end - invig_start:.4f}s")
136
+ logger.info(f" Prompts processing: {prompts_end - prompts_start:.4f}s")
137
+ logger.info(f" Other overhead: {(interview_end - interview_start) - ((invig_end - invig_start) + (prompts_end - prompts_start)):.4f}s")
138
+
79
139
  d = Dataset(
80
140
  [
81
141
  {"user_prompt": user_prompts},
@@ -86,6 +146,7 @@ class JobsPrompts:
86
146
  {"agent_index": agent_indices},
87
147
  {"model": models},
88
148
  {"estimated_cost": costs},
149
+ {"cache_key": cache_keys},
89
150
  ]
90
151
  )
91
152
  return d
@@ -219,6 +219,7 @@ class JobsRemoteInferenceHandler:
219
219
  job_info.logger.add_info("results_uuid", results_uuid)
220
220
  results = object_fetcher(results_uuid, expected_object_type="results")
221
221
  results_url = remote_job_data.get("results_url")
222
+ job_info.logger.add_info("results_url", results_url)
222
223
  job_info.logger.update(
223
224
  f"Job completed and Results stored on Coop: {results_url}",
224
225
  status=JobsStatus.COMPLETED,
@@ -227,6 +228,40 @@ class JobsRemoteInferenceHandler:
227
228
  results.results_uuid = results_uuid
228
229
  return results
229
230
 
231
+ def _attempt_fetch_job(
232
+ self,
233
+ job_info: RemoteJobInfo,
234
+ remote_job_data_fetcher: Callable,
235
+ object_fetcher: Callable,
236
+ ) -> Union[None, "Results", Literal["continue"]]:
237
+ """Makes one attempt to fetch and process a remote job's status and results."""
238
+ remote_job_data = remote_job_data_fetcher(job_info.job_uuid)
239
+ status = remote_job_data.get("status")
240
+
241
+ if status == "cancelled":
242
+ self._handle_cancelled_job(job_info)
243
+ return None
244
+
245
+ elif status == "failed" or status == "completed":
246
+ if status == "failed":
247
+ self._handle_failed_job(job_info, remote_job_data)
248
+
249
+ results_uuid = remote_job_data.get("results_uuid")
250
+ if results_uuid:
251
+ results = self._fetch_results_and_log(
252
+ job_info=job_info,
253
+ results_uuid=results_uuid,
254
+ remote_job_data=remote_job_data,
255
+ object_fetcher=object_fetcher,
256
+ )
257
+ return results
258
+ else:
259
+ return None
260
+
261
+ else:
262
+ self._sleep_for_a_bit(job_info, status)
263
+ return "continue"
264
+
230
265
  def poll_remote_inference_job(
231
266
  self,
232
267
  job_info: RemoteJobInfo,
@@ -241,31 +276,13 @@ class JobsRemoteInferenceHandler:
241
276
 
242
277
  job_in_queue = True
243
278
  while job_in_queue:
244
- remote_job_data = remote_job_data_fetcher(job_info.job_uuid)
245
- status = remote_job_data.get("status")
246
-
247
- if status == "cancelled":
248
- self._handle_cancelled_job(job_info)
249
- return None
250
-
251
- elif status == "failed" or status == "completed":
252
- if status == "failed":
253
- self._handle_failed_job(job_info, remote_job_data)
254
-
255
- results_uuid = remote_job_data.get("results_uuid")
256
- if results_uuid:
257
- results = self._fetch_results_and_log(
258
- job_info=job_info,
259
- results_uuid=results_uuid,
260
- remote_job_data=remote_job_data,
261
- object_fetcher=object_fetcher,
262
- )
263
- return results
264
- else:
265
- return None
266
-
267
- else:
268
- self._sleep_for_a_bit(job_info, status)
279
+ result = self._attempt_fetch_job(
280
+ job_info,
281
+ remote_job_data_fetcher,
282
+ object_fetcher
283
+ )
284
+ if result != "continue":
285
+ return result
269
286
 
270
287
  async def create_and_poll_remote_job(
271
288
  self,
@@ -32,7 +32,7 @@ class JobsInfo:
32
32
  pretty_names = {
33
33
  "job_uuid": "Job UUID",
34
34
  "progress_bar_url": "Progress Bar URL",
35
- "error_report_url": "Error Report URL",
35
+ "error_report_url": "Exceptions Report URL",
36
36
  "results_uuid": "Results UUID",
37
37
  "results_url": "Results URL",
38
38
  }
@@ -96,6 +96,36 @@ class BucketCollection(UserDict):
96
96
  else:
97
97
  self[model] = self.services_to_buckets[self.models_to_services[model.model]]
98
98
 
99
+ def update_from_key_lookup(self, key_lookup: "KeyLookup") -> None:
100
+ """Updates the bucket collection rates based on model RPM/TPM from KeyLookup"""
101
+
102
+ for model_name, service in self.models_to_services.items():
103
+ if service in key_lookup and not self.infinity_buckets:
104
+
105
+ if key_lookup[service].rpm is not None:
106
+ new_rps = key_lookup[service].rpm / 60.0
107
+ new_requests_bucket = TokenBucket(
108
+ bucket_name=service,
109
+ bucket_type="requests",
110
+ capacity=new_rps,
111
+ refill_rate=new_rps,
112
+ remote_url=self.remote_url,
113
+ )
114
+ self.services_to_buckets[service].requests_bucket = (
115
+ new_requests_bucket
116
+ )
117
+
118
+ if key_lookup[service].tpm is not None:
119
+ new_tps = key_lookup[service].tpm / 60.0
120
+ new_tokens_bucket = TokenBucket(
121
+ bucket_name=service,
122
+ bucket_type="tokens",
123
+ capacity=new_tps,
124
+ refill_rate=new_tps,
125
+ remote_url=self.remote_url,
126
+ )
127
+ self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
128
+
99
129
  def visualize(self) -> dict:
100
130
  """Visualize the token and request buckets for each model."""
101
131
  plots = {}
@@ -32,6 +32,7 @@ class RunParameters(Base):
32
32
  remote_inference_results_visibility: Optional[VisibilityType] = "unlisted"
33
33
  skip_retry: bool = False
34
34
  raise_validation_errors: bool = False
35
+ background: bool = False
35
36
  disable_remote_cache: bool = False
36
37
  disable_remote_inference: bool = False
37
38
  job_uuid: Optional[str] = None
@@ -153,7 +153,7 @@ class Interview:
153
153
 
154
154
  >>> i = Interview.example()
155
155
  >>> hash(i)
156
- 193593189022259693
156
+ 767745459362662063
157
157
  """
158
158
  d = {
159
159
  "agent": self.agent.to_dict(add_edsl_version=add_edsl_version),
@@ -9,7 +9,8 @@ from edsl.jobs.jobs_status_enums import JobsStatus
9
9
  class HTMLTableJobLogger(JobLogger):
10
10
  def __init__(self, verbose=True, theme="auto", **kwargs):
11
11
  super().__init__(verbose=verbose)
12
- self.display_handle = display(HTML(""), display_id=True)
12
+ self.display_handle = display(HTML(""), display_id=True) if verbose else None
13
+ #self.display_handle = display(HTML(""), display_id=True)
13
14
  self.current_message = None
14
15
  self.log_id = str(uuid.uuid4())
15
16
  self.is_expanded = True
@@ -22,6 +23,9 @@ class HTMLTableJobLogger(JobLogger):
22
23
 
23
24
  def _init_css(self):
24
25
  """Initialize the CSS styles with enhanced theme support"""
26
+ if not self.verbose:
27
+ return None
28
+
25
29
  css = """
26
30
  <style>
27
31
  /* Base theme variables */
@@ -217,6 +221,7 @@ class HTMLTableJobLogger(JobLogger):
217
221
  }});
218
222
  </script>
219
223
  """
224
+
220
225
 
221
226
  display(HTML(css + init_script))
222
227
 
@@ -66,9 +66,7 @@ class ResultsExceptionsHandler:
66
66
 
67
67
  def _generate_error_message(self, indices) -> str:
68
68
  """Generate appropriate error message based on number of exceptions."""
69
- msg = f"Exceptions were raised in {len(indices)} interviews.\n"
70
- if len(indices) > 5:
71
- msg += f"Exceptions were raised in the following interviews: {indices}.\n"
69
+ msg = f"Exceptions were raised.\n"
72
70
  return msg
73
71
 
74
72
  def handle_exceptions(self) -> None:
@@ -84,7 +82,6 @@ class ResultsExceptionsHandler:
84
82
 
85
83
  # Generate HTML report
86
84
  filepath = self.results.task_history.html(
87
- cta="Open report to see details.",
88
85
  open_in_browser=self.open_in_browser,
89
86
  return_link=True,
90
87
  )
@@ -92,7 +89,5 @@ class ResultsExceptionsHandler:
92
89
  # Handle remote logging if enabled
93
90
  if self.remote_logging:
94
91
  filestore = HTMLFileStore(filepath)
95
- coop_details = filestore.push(description="Error report")
92
+ coop_details = filestore.push(description="Exceptions Report")
96
93
  print(coop_details)
97
-
98
- print("Also see: https://docs.expectedparrot.com/en/latest/exceptions.html")
@@ -264,9 +264,27 @@ class TaskHistory(RepresentationMixin):
264
264
  js = env.joinpath("report.js").read_text()
265
265
  return js
266
266
 
267
+ @property
268
+ def exceptions_table(self) -> dict:
269
+ """Return a dictionary of exceptions organized by type, service, model, and question name."""
270
+ exceptions_table = {}
271
+ for interview in self.total_interviews:
272
+ for question_name, exceptions in interview.exceptions.items():
273
+ for exception in exceptions:
274
+ key = (
275
+ exception.exception.__class__.__name__, # Exception type
276
+ interview.model._inference_service_, # Service
277
+ interview.model.model, # Model
278
+ question_name # Question name
279
+ )
280
+ if key not in exceptions_table:
281
+ exceptions_table[key] = 0
282
+ exceptions_table[key] += 1
283
+ return exceptions_table
284
+
267
285
  @property
268
286
  def exceptions_by_type(self) -> dict:
269
- """Return a dictionary of exceptions by type."""
287
+ """Return a dictionary of exceptions tallied by type."""
270
288
  exceptions_by_type = {}
271
289
  for interview in self.total_interviews:
272
290
  for question_name, exceptions in interview.exceptions.items():
@@ -324,6 +342,27 @@ class TaskHistory(RepresentationMixin):
324
342
  }
325
343
  return sorted_exceptions_by_question_name
326
344
 
345
+ # @property
346
+ # def exceptions_by_model(self) -> dict:
347
+ # """Return a dictionary of exceptions tallied by model and question name."""
348
+ # exceptions_by_model = {}
349
+ # for interview in self.total_interviews:
350
+ # model = interview.model.model
351
+ # service = interview.model._inference_service_
352
+ # if (service, model) not in exceptions_by_model:
353
+ # exceptions_by_model[(service, model)] = 0
354
+ # if interview.exceptions != {}:
355
+ # exceptions_by_model[(service, model)] += len(interview.exceptions)
356
+
357
+ # # sort the exceptions by model
358
+ # sorted_exceptions_by_model = {
359
+ # k: v
360
+ # for k, v in sorted(
361
+ # exceptions_by_model.items(), key=lambda item: item[1], reverse=True
362
+ # )
363
+ # }
364
+ # return sorted_exceptions_by_model
365
+
327
366
  @property
328
367
  def exceptions_by_model(self) -> dict:
329
368
  """Return a dictionary of exceptions tallied by model and question name."""
@@ -331,19 +370,12 @@ class TaskHistory(RepresentationMixin):
331
370
  for interview in self.total_interviews:
332
371
  model = interview.model.model
333
372
  service = interview.model._inference_service_
334
- if (service, model) not in exceptions_by_model:
335
- exceptions_by_model[(service, model)] = 0
336
- if interview.exceptions != {}:
337
- exceptions_by_model[(service, model)] += len(interview.exceptions)
338
-
339
- # sort the exceptions by model
340
- sorted_exceptions_by_model = {
341
- k: v
342
- for k, v in sorted(
343
- exceptions_by_model.items(), key=lambda item: item[1], reverse=True
344
- )
345
- }
346
- return sorted_exceptions_by_model
373
+ for question_name, exceptions in interview.exceptions.items():
374
+ key = (service, model, question_name)
375
+ if key not in exceptions_by_model:
376
+ exceptions_by_model[key] = 0
377
+ exceptions_by_model[key] += len(exceptions)
378
+ return exceptions_by_model
347
379
 
348
380
  def generate_html_report(self, css: Optional[str], include_plot=False):
349
381
  if include_plot:
@@ -372,6 +404,7 @@ class TaskHistory(RepresentationMixin):
372
404
  javascript=self.javascript(),
373
405
  num_exceptions=len(self.exceptions),
374
406
  performance_plot_html=performance_plot_html,
407
+ exceptions_table=self.exceptions_table,
375
408
  exceptions_by_type=self.exceptions_by_type,
376
409
  exceptions_by_question_name=self.exceptions_by_question_name,
377
410
  exceptions_by_model=self.exceptions_by_model,
@@ -386,11 +419,10 @@ class TaskHistory(RepresentationMixin):
386
419
  filename: Optional[str] = None,
387
420
  return_link=False,
388
421
  css=None,
389
- cta="Open Report in New Tab",
422
+ cta="\nClick to open the report in a new tab\n",
390
423
  open_in_browser=False,
391
424
  ):
392
425
  """Return an HTML report."""
393
-
394
426
  from IPython.display import display, HTML
395
427
  import tempfile
396
428
  import os
@@ -419,7 +451,7 @@ class TaskHistory(RepresentationMixin):
419
451
  html_link = f'<a href="{html_url}" target="_blank">{cta}</a>'
420
452
  display(HTML(html_link))
421
453
  escaped_output = html.escape(output)
422
- iframe = f""""
454
+ iframe = f"""
423
455
  <iframe srcdoc="{ escaped_output }" style="width: 800px; height: 600px;"></iframe>
424
456
  """
425
457
  display(HTML(iframe))