crfm-helm 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (121) hide show
  1. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +3 -1
  2. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +117 -115
  3. helm/benchmark/adaptation/adapter_spec.py +5 -0
  4. helm/benchmark/metrics/bbq_metrics.py +12 -0
  5. helm/benchmark/metrics/evaluate_reference_metrics.py +12 -0
  6. helm/benchmark/metrics/safety_metrics.py +13 -1
  7. helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
  8. helm/benchmark/presentation/run_display.py +13 -3
  9. helm/benchmark/presentation/run_entry.py +2 -2
  10. helm/benchmark/run.py +1 -1
  11. helm/benchmark/run_specs/arabic_run_specs.py +6 -0
  12. helm/benchmark/run_specs/medhelm_run_specs.py +2 -2
  13. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
  14. helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
  15. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
  16. helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
  17. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
  18. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
  19. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
  20. helm/benchmark/scenarios/banking77_scenario.py +21 -0
  21. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  22. helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
  23. helm/benchmark/scenarios/commonsense_scenario.py +7 -1
  24. helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
  25. helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
  26. helm/benchmark/scenarios/financebench_scenario.py +21 -0
  27. helm/benchmark/scenarios/gsm_scenario.py +9 -3
  28. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
  29. helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
  30. helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
  31. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
  32. helm/benchmark/scenarios/legalbench_scenario.py +6 -7
  33. helm/benchmark/scenarios/math_scenario.py +11 -4
  34. helm/benchmark/scenarios/med_qa_scenario.py +7 -1
  35. helm/benchmark/scenarios/medi_qa_scenario.py +2 -2
  36. helm/benchmark/scenarios/mmlu_scenario.py +8 -2
  37. helm/benchmark/scenarios/narrativeqa_scenario.py +3 -4
  38. helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
  39. helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
  40. helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
  41. helm/benchmark/scenarios/spider_scenario.py +18 -0
  42. helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
  43. helm/benchmark/scenarios/wmt_14_scenario.py +9 -2
  44. helm/benchmark/static/schema_long_context.yaml +12 -31
  45. helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
  46. helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
  47. helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
  48. helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
  49. helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
  50. helm/benchmark/static_build/index.html +5 -6
  51. helm/clients/ai21_client.py +2 -0
  52. helm/clients/aleph_alpha_client.py +2 -0
  53. helm/clients/anthropic_client.py +7 -1
  54. helm/clients/audio_language/diva_llama_client.py +2 -0
  55. helm/clients/audio_language/llama_omni_client.py +2 -1
  56. helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
  57. helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
  58. helm/clients/audio_language/qwen_audiolm_client.py +2 -1
  59. helm/clients/bedrock_client.py +2 -0
  60. helm/clients/cohere_client.py +3 -0
  61. helm/clients/google_client.py +2 -0
  62. helm/clients/http_model_client.py +2 -0
  63. helm/clients/huggingface_client.py +2 -1
  64. helm/clients/ibm_client.py +3 -1
  65. helm/clients/image_generation/adobe_vision_client.py +2 -0
  66. helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
  67. helm/clients/image_generation/cogview2_client.py +2 -1
  68. helm/clients/image_generation/dalle2_client.py +2 -0
  69. helm/clients/image_generation/dalle_mini_client.py +2 -1
  70. helm/clients/image_generation/deep_floyd_client.py +2 -0
  71. helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
  72. helm/clients/image_generation/lexica_client.py +2 -0
  73. helm/clients/image_generation/mindalle_client.py +2 -1
  74. helm/clients/image_generation/together_image_generation_client.py +2 -0
  75. helm/clients/megatron_client.py +2 -0
  76. helm/clients/mistral_client.py +2 -0
  77. helm/clients/moderation_api_client.py +2 -0
  78. helm/clients/openai_client.py +5 -1
  79. helm/clients/palmyra_client.py +2 -1
  80. helm/clients/reka_client.py +2 -1
  81. helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
  82. helm/clients/stanfordhealthcare_http_model_client.py +2 -0
  83. helm/clients/together_client.py +4 -0
  84. helm/clients/vertexai_client.py +4 -0
  85. helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
  86. helm/clients/vision_language/huggingface_vlm_client.py +2 -0
  87. helm/clients/vision_language/idefics_client.py +2 -1
  88. helm/clients/vision_language/open_flamingo_client.py +2 -1
  89. helm/clients/vision_language/paligemma_client.py +2 -1
  90. helm/clients/vision_language/palmyra_vision_client.py +2 -0
  91. helm/clients/vision_language/qwen2_vlm_client.py +2 -1
  92. helm/clients/vision_language/qwen_vlm_client.py +2 -1
  93. helm/clients/writer_client.py +2 -0
  94. helm/common/hierarchical_logger.py +20 -0
  95. helm/common/optional_dependencies.py +1 -1
  96. helm/common/test_general.py +4 -0
  97. helm/config/model_deployments.yaml +225 -0
  98. helm/config/model_metadata.yaml +232 -7
  99. helm/config/tokenizer_configs.yaml +74 -4
  100. helm/benchmark/static_build/assets/index-671a5e06.js +0 -10
  101. helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
  102. helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
  103. helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
  104. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
  105. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
  106. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
  107. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
  108. /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
  109. /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
  110. /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
  111. /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
  112. /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
  113. /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
  114. /helm/benchmark/static_build/assets/{index-9352595e.css → index-oIeiQW2g.css} +0 -0
  115. /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
  116. /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
  117. /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
  118. /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
  119. /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
  120. /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
  121. /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
@@ -7,14 +7,13 @@
7
7
  <title>Holistic Evaluation of Language Models (HELM)</title>
8
8
  <meta name="description" content="The Holistic Evaluation of Language Models (HELM) serves as a living benchmark for transparency in language models. Providing broad coverage and recognizing incompleteness, multi-metric measurements, and standardization. All data and analysis are freely accessible on the website for exploration and study." />
9
9
  <script type="text/javascript" src="./config.js"></script>
10
- <script type="module" crossorigin src="./assets/index-671a5e06.js"></script>
11
- <link rel="modulepreload" crossorigin href="./assets/react-f82877fd.js">
12
- <link rel="modulepreload" crossorigin href="./assets/recharts-4037aff0.js">
13
- <link rel="modulepreload" crossorigin href="./assets/tremor-38a10867.js">
14
- <link rel="stylesheet" href="./assets/index-9352595e.css">
10
+ <script type="module" crossorigin src="./assets/index-qOFpOyHb.js"></script>
11
+ <link rel="modulepreload" crossorigin href="./assets/react-BteFIppM.js">
12
+ <link rel="modulepreload" crossorigin href="./assets/recharts-DxuQtTOs.js">
13
+ <link rel="modulepreload" crossorigin href="./assets/tremor-DR4fE7ko.js">
14
+ <link rel="stylesheet" crossorigin href="./assets/index-oIeiQW2g.css">
15
15
  </head>
16
16
  <body class="block">
17
17
  <div id="root"></div>
18
-
19
18
  </body>
20
19
  </html>
@@ -2,6 +2,7 @@ from typing import Dict, List, Optional, TypedDict
2
2
  import requests
3
3
 
4
4
  from helm.common.cache import CacheConfig
5
+ from helm.common.hierarchical_logger import hexception
5
6
  from helm.common.optional_dependencies import handle_module_not_found_error
6
7
  from helm.common.request import (
7
8
  wrap_request_time,
@@ -76,6 +77,7 @@ class AI21Client(CachingClient):
76
77
  cache_key = CachingClient.make_cache_key({"engine": request.model_engine, **raw_request}, request)
77
78
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
78
79
  except AI21RequestError as e:
80
+ hexception(e)
79
81
  return RequestResult(success=False, cached=False, error=str(e), completions=[], embedding=[])
80
82
 
81
83
  def fix_text(x: str, first: bool) -> str:
@@ -1,6 +1,7 @@
1
1
  from typing import List
2
2
 
3
3
  from helm.common.cache import CacheConfig
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.common.media_object import TEXT_TYPE
5
6
  from helm.common.optional_dependencies import handle_module_not_found_error
6
7
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
@@ -76,6 +77,7 @@ class AlephAlphaClient(CachingClient):
76
77
  cache_key = CachingClient.make_cache_key({"model": model, "prompt": prompt_key, **parameters}, request)
77
78
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
78
79
  except Exception as e:
80
+ hexception(e)
79
81
  error: str = f"AlephAlphaClient error: {e}"
80
82
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
81
83
 
@@ -8,7 +8,7 @@ import time
8
8
  import urllib.parse
9
9
 
10
10
  from helm.common.cache import CacheConfig
11
- from helm.common.hierarchical_logger import htrack_block, hlog, hwarn
11
+ from helm.common.hierarchical_logger import hexception, htrack_block, hlog, hwarn
12
12
  from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
13
13
  from helm.common.optional_dependencies import handle_module_not_found_error
14
14
  from helm.common.request import (
@@ -184,6 +184,7 @@ class AnthropicClient(CachingClient):
184
184
  embedding=[],
185
185
  error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
186
186
  )
187
+ hexception(error)
187
188
  return RequestResult(success=False, cached=False, error=str(error), completions=[], embedding=[])
188
189
 
189
190
  # Post process the completion.
@@ -385,6 +386,10 @@ class AnthropicMessagesClient(CachingClient):
385
386
  # Avoid error:
386
387
  # `top_k` must be unset when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking # noqa: E501
387
388
  del raw_request["top_k"]
389
+ if raw_request["model"].startswith("claude-sonnet-4-5"):
390
+ # Avoid error:
391
+ # `temperature` and `top_p` cannot both be specified for this model. Please use only one.
392
+ del raw_request["top_p"]
388
393
 
389
394
  completions: List[GeneratedOutput] = []
390
395
 
@@ -696,6 +701,7 @@ class AnthropicLegacyClient(CachingClient):
696
701
  )
697
702
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
698
703
  except AnthropicRequestError as error:
704
+ hexception(error)
699
705
  return RequestResult(success=False, cached=False, error=str(error), completions=[], embedding=[])
700
706
 
701
707
  sequence_logprob: float = 0
@@ -6,6 +6,7 @@ from transformers import AutoModel, PreTrainedModel
6
6
 
7
7
  from helm.clients.client import CachingClient
8
8
  from helm.common.cache import CacheConfig
9
+ from helm.common.hierarchical_logger import hexception
9
10
  from helm.common.media_object import TEXT_TYPE
10
11
  from helm.common.request import (
11
12
  GeneratedOutput,
@@ -105,6 +106,7 @@ class DivaLlamaClient(CachingClient):
105
106
  cache_key = CachingClient.make_cache_key(raw_request, request)
106
107
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
107
108
  except Exception as e: # Do something if error is encountered.
109
+ hexception(e)
108
110
  error: str = f"HuggingFace error: {e}"
109
111
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
110
112
 
@@ -12,7 +12,7 @@ from helm.clients.audio_language.llama_omni.preprocess import tokenizer_speech_t
12
12
 
13
13
  from helm.common.cache import CacheConfig
14
14
  from helm.common.gpu_utils import get_torch_device_name
15
- from helm.common.hierarchical_logger import hlog, htrack_block
15
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
16
16
  from helm.common.media_object import TEXT_TYPE
17
17
  from helm.common.request import Request, RequestResult, GeneratedOutput, Token
18
18
  from helm.common.request import wrap_request_time
@@ -170,6 +170,7 @@ class LlamaOmniAudioLMClient(CachingClient):
170
170
  )
171
171
  result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
172
172
  except RuntimeError as model_error:
173
+ hexception(model_error)
173
174
  return RequestResult(
174
175
  success=False, cached=False, error=str(model_error), completions=[], embedding=[]
175
176
  )
@@ -9,7 +9,7 @@ from helm.clients.audio_language.qwen_omni.qwen2_5_omni_utils.v2_5 import proces
9
9
 
10
10
  from helm.common.cache import CacheConfig
11
11
  from helm.common.gpu_utils import get_torch_device_name
12
- from helm.common.hierarchical_logger import hlog, htrack_block
12
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
13
13
  from helm.common.media_object import TEXT_TYPE
14
14
  from helm.common.request import Request, RequestResult, GeneratedOutput, Token
15
15
  from helm.common.request import wrap_request_time
@@ -180,6 +180,7 @@ class Qwen2_5OmniAudioLMClient(CachingClient):
180
180
  )
181
181
  result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
182
182
  except RuntimeError as model_error:
183
+ hexception(model_error)
183
184
  return RequestResult(
184
185
  success=False, cached=False, error=str(model_error), completions=[], embedding=[]
185
186
  )
@@ -7,7 +7,7 @@ from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
7
7
 
8
8
  from helm.common.cache import CacheConfig
9
9
  from helm.common.gpu_utils import get_torch_device_name
10
- from helm.common.hierarchical_logger import hlog, htrack_block
10
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
11
11
  from helm.common.media_object import TEXT_TYPE
12
12
  from helm.common.request import Request, RequestResult, GeneratedOutput, Token
13
13
  from helm.common.request import wrap_request_time
@@ -161,6 +161,7 @@ class Qwen2AudioLMClient(CachingClient):
161
161
  )
162
162
  result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
163
163
  except RuntimeError as model_error:
164
+ hexception(model_error)
164
165
  return RequestResult(
165
166
  success=False, cached=False, error=str(model_error), completions=[], embedding=[]
166
167
  )
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
6
6
 
7
7
  from helm.common.cache import CacheConfig
8
8
  from helm.common.gpu_utils import get_torch_device_name
9
- from helm.common.hierarchical_logger import hlog, htrack_block
9
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
10
10
  from helm.common.media_object import TEXT_TYPE
11
11
  from helm.common.request import Request, RequestResult, GeneratedOutput, Token
12
12
  from helm.common.request import wrap_request_time
@@ -124,6 +124,7 @@ class QwenAudioLMClient(CachingClient):
124
124
  )
125
125
  result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
126
126
  except RuntimeError as model_error:
127
+ hexception(model_error)
127
128
  return RequestResult(
128
129
  success=False, cached=False, error=str(model_error), completions=[], embedding=[]
129
130
  )
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
 
8
8
  from helm.common.cache import CacheConfig
9
9
  from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
10
+ from helm.common.hierarchical_logger import hexception
10
11
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
11
12
  from helm.clients.bedrock_utils import get_bedrock_client, get_bedrock_client_v1
12
13
  from helm.tokenizers.tokenizer import Tokenizer
@@ -75,6 +76,7 @@ class BedrockClient(CachingClient):
75
76
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
76
77
 
77
78
  except Exception as error:
79
+ hexception(error)
78
80
  return RequestResult(
79
81
  success=False,
80
82
  cached=False,
@@ -3,6 +3,7 @@ import requests
3
3
  from typing import List, Optional, Sequence, TypedDict
4
4
 
5
5
  from helm.common.cache import CacheConfig
6
+ from helm.common.hierarchical_logger import hexception
6
7
  from helm.common.optional_dependencies import handle_module_not_found_error
7
8
  from helm.common.request import (
8
9
  wrap_request_time,
@@ -123,6 +124,7 @@ class CohereClient(CachingClient):
123
124
 
124
125
  response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
125
126
  except (requests.exceptions.RequestException, AssertionError) as e:
127
+ hexception(e)
126
128
  error: str = f"CohereClient error: {e}"
127
129
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
128
130
 
@@ -232,6 +234,7 @@ class CohereChatClient(CachingClient):
232
234
 
233
235
  response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
234
236
  except (requests.exceptions.RequestException, AssertionError) as e:
237
+ hexception(e)
235
238
  error: str = f"CohereClient error: {e}"
236
239
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
237
240
 
@@ -1,6 +1,7 @@
1
1
  from typing import List, Dict
2
2
 
3
3
  from helm.common.cache import CacheConfig
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.common.request import Request, RequestResult, GeneratedOutput, Token
5
6
  from helm.clients.client import CachingClient, truncate_sequence
6
7
 
@@ -44,6 +45,7 @@ class GoogleClient(CachingClient):
44
45
  # If results are not cached for a given query, fail fast
45
46
  response, cached = self.cache.get(cache_key, fail)
46
47
  except RuntimeError as e:
48
+ hexception(e)
47
49
  error: str = f"GoogleClient error: {e}"
48
50
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
49
51
 
@@ -3,6 +3,7 @@ from dataclasses import asdict
3
3
  from typing import Any, Dict
4
4
 
5
5
  from helm.common.cache import CacheConfig
6
+ from helm.common.hierarchical_logger import hexception
6
7
  from helm.common.request import (
7
8
  wrap_request_time,
8
9
  Request,
@@ -76,5 +77,6 @@ class HTTPModelClient(CachingClient):
76
77
  request_time=response["request_time"],
77
78
  )
78
79
  except requests.exceptions.RequestException as e:
80
+ hexception(e)
79
81
  error: str = f"Request error: {e}"
80
82
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -8,7 +8,7 @@ from transformers.generation.stopping_criteria import (
8
8
  from typing import Any, Dict, List, Optional, TypedDict
9
9
 
10
10
  from helm.common.cache import CacheConfig
11
- from helm.common.hierarchical_logger import htrack_block, hlog, hwarn
11
+ from helm.common.hierarchical_logger import hexception, htrack_block, hlog, hwarn
12
12
  from helm.common.optional_dependencies import handle_module_not_found_error
13
13
  from helm.common.request import (
14
14
  wrap_request_time,
@@ -345,6 +345,7 @@ class HuggingFaceClient(CachingClient):
345
345
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
346
346
  except Exception as e: # Do something if error is encountered.
347
347
  error: str = f"HuggingFace error: {e}"
348
+ hexception(e)
348
349
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
349
350
 
350
351
  completions = []
@@ -1,7 +1,7 @@
1
1
  from abc import ABC
2
2
  from abc import abstractmethod
3
3
 
4
- from helm.common.hierarchical_logger import hlog
4
+ from helm.common.hierarchical_logger import hexception, hlog
5
5
  from helm.common.cache import CacheConfig
6
6
  from helm.common.request import (
7
7
  Request,
@@ -249,6 +249,7 @@ class IbmChatClient(IbmClient):
249
249
  )
250
250
 
251
251
  except Exception as e:
252
+ hexception(e)
252
253
  error: str = f"IBM Chat client Model error: {e}"
253
254
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
254
255
 
@@ -263,5 +264,6 @@ class IbmTextClient(IbmClient):
263
264
  inference_handler=GenerateInferenceHandler(inference_engine=self.inference_engine), request=request
264
265
  )
265
266
  except Exception as e:
267
+ hexception(e)
266
268
  error: str = f"IBM Text client Model error: {e}"
267
269
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -1,6 +1,7 @@
1
1
  from typing import List, Dict
2
2
 
3
3
  from helm.common.cache import Cache, CacheConfig
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.common.request import Request, RequestResult, GeneratedOutput
5
6
  from helm.common.tokenization_request import (
6
7
  TokenizationRequest,
@@ -54,6 +55,7 @@ class AdobeVisionClient(Client):
54
55
 
55
56
  response, cached = self._cache.get(cache_key, fail)
56
57
  except RuntimeError as e:
58
+ hexception(e)
57
59
  error: str = f"Adobe Vision Client error: {e}"
58
60
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
59
61
 
@@ -1,6 +1,7 @@
1
1
  from typing import List, Dict
2
2
 
3
3
  from helm.common.cache import Cache, CacheConfig
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.common.request import Request, RequestResult, GeneratedOutput
5
6
  from helm.common.tokenization_request import (
6
7
  TokenizationRequest,
@@ -74,6 +75,7 @@ class AlephAlphaImageGenerationClient(Client):
74
75
 
75
76
  response, cached = self._cache.get(cache_key, fail)
76
77
  except RuntimeError as e:
78
+ hexception(e)
77
79
  error: str = f"AlephAlphaVisionClient error: {e}"
78
80
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
79
81
 
@@ -9,7 +9,7 @@ from torchvision.utils import save_image
9
9
 
10
10
  from helm.common.cache import CacheConfig, Cache
11
11
  from helm.common.file_caches.file_cache import FileCache
12
- from helm.common.hierarchical_logger import hlog, htrack_block
12
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
13
13
  from helm.common.optional_dependencies import handle_module_not_found_error
14
14
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
15
15
  from helm.common.tokenization_request import (
@@ -167,6 +167,7 @@ class CogView2Client(Client):
167
167
  )
168
168
  results, cached = self._cache.get(cache_key, wrap_request_time(do_it))
169
169
  except RuntimeError as e:
170
+ hexception(e)
170
171
  error: str = f"CogView2Client error: {e}"
171
172
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
172
173
 
@@ -4,6 +4,7 @@ import base64
4
4
  from helm.common.cache import CacheConfig, Cache
5
5
  from helm.common.general import hlog
6
6
  from helm.common.file_caches.file_cache import FileCache
7
+ from helm.common.hierarchical_logger import hexception
7
8
  from helm.common.media_object import MultimediaObject
8
9
  from helm.common.optional_dependencies import handle_module_not_found_error
9
10
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
@@ -124,6 +125,7 @@ class DALLE2Client(Client):
124
125
  hlog(f"Failed safety check: {request.prompt}")
125
126
  return self.get_content_policy_violated_result(request)
126
127
  else:
128
+ hexception(error)
127
129
  return RequestResult(
128
130
  success=False, cached=False, error=f"DALL-E error: {error}", completions=[], embedding=[]
129
131
  )
@@ -5,7 +5,7 @@ from functools import partial
5
5
 
6
6
  from helm.common.cache import CacheConfig, Cache
7
7
  from helm.common.file_caches.file_cache import FileCache
8
- from helm.common.hierarchical_logger import hlog, htrack_block
8
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
9
9
  from helm.common.optional_dependencies import handle_module_not_found_error
10
10
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
11
11
  from helm.common.tokenization_request import (
@@ -166,6 +166,7 @@ class DALLEMiniClient(Client):
166
166
  )
167
167
  results, cached = self._cache.get(cache_key, wrap_request_time(do_it))
168
168
  except RuntimeError as e:
169
+ hexception(e)
169
170
  error: str = f"DALLEMiniClient error: {e}"
170
171
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
171
172
 
@@ -1,6 +1,7 @@
1
1
  from typing import List, Dict
2
2
 
3
3
  from helm.common.cache import Cache, CacheConfig
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.common.request import Request, RequestResult, GeneratedOutput
5
6
  from helm.common.tokenization_request import (
6
7
  TokenizationRequest,
@@ -54,6 +55,7 @@ class DeepFloydClient(Client):
54
55
 
55
56
  response, cached = self._cache.get(cache_key, fail)
56
57
  except RuntimeError as e:
58
+ hexception(e)
57
59
  error: str = f"DeepFloyd Client error: {e}"
58
60
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
59
61
 
@@ -7,7 +7,7 @@ import torch
7
7
  from helm.common.cache import CacheConfig, Cache
8
8
  from helm.common.file_caches.file_cache import FileCache
9
9
  from helm.common.gpu_utils import get_torch_device_name, is_cuda_available
10
- from helm.common.hierarchical_logger import hlog, htrack_block
10
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
11
11
  from helm.common.optional_dependencies import handle_module_not_found_error
12
12
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
13
13
  from helm.common.tokenization_request import (
@@ -178,6 +178,7 @@ class HuggingFaceDiffusersClient(Client):
178
178
  )
179
179
  results, cached = self._cache.get(cache_key, wrap_request_time(do_it))
180
180
  except RuntimeError as ex:
181
+ hexception(ex)
181
182
  error: str = f"HuggingFaceDiffusersClient error: {ex}"
182
183
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
183
184
 
@@ -5,6 +5,7 @@ import urllib.parse
5
5
 
6
6
  from helm.common.cache import CacheConfig, Cache
7
7
  from helm.common.file_caches.file_cache import FileCache
8
+ from helm.common.hierarchical_logger import hexception
8
9
  from helm.common.images_utils import encode_base64
9
10
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
10
11
  from helm.common.tokenization_request import (
@@ -62,6 +63,7 @@ class LexicaClient(Client):
62
63
 
63
64
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
64
65
  except RuntimeError as e:
66
+ hexception(e)
65
67
  error: str = f"LexicaClient error: {e}"
66
68
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
67
69
 
@@ -5,7 +5,7 @@ import numpy as np
5
5
  from helm.common.cache import CacheConfig, Cache
6
6
  from helm.common.file_caches.file_cache import FileCache
7
7
  from helm.common.gpu_utils import get_torch_device_name
8
- from helm.common.hierarchical_logger import hlog, htrack_block
8
+ from helm.common.hierarchical_logger import hexception, hlog, htrack_block
9
9
  from helm.common.optional_dependencies import handle_module_not_found_error
10
10
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
11
11
  from helm.common.tokenization_request import (
@@ -91,6 +91,7 @@ class MinDALLEClient(Client):
91
91
  )
92
92
  results, cached = self._cache.get(cache_key, wrap_request_time(do_it))
93
93
  except RuntimeError as ex:
94
+ hexception(ex)
94
95
  error: str = f"MinDALLEClient error: {ex}"
95
96
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
96
97
 
@@ -4,6 +4,7 @@ import requests
4
4
 
5
5
  from helm.common.cache import CacheConfig, Cache
6
6
  from helm.common.file_caches.file_cache import FileCache
7
+ from helm.common.hierarchical_logger import hexception
7
8
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
8
9
  from helm.common.tokenization_request import (
9
10
  TokenizationRequest,
@@ -84,6 +85,7 @@ class TogetherImageGenerationClient(Client):
84
85
 
85
86
  response, cached = self._cache.get(cache_key, wrap_request_time(do_it))
86
87
  except RuntimeError as e:
88
+ hexception(e)
87
89
  error: str = f"TogetherVisionClient error: {e}"
88
90
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
89
91
 
@@ -4,6 +4,7 @@ from typing import Any, Dict, List
4
4
  import traceback
5
5
  from helm.common.cache import CacheConfig
6
6
 
7
+ from helm.common.hierarchical_logger import hexception
7
8
  from helm.common.request import (
8
9
  wrap_request_time,
9
10
  EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
@@ -103,6 +104,7 @@ class MegatronClient(CachingClient):
103
104
  try:
104
105
  return self._make_request(request)
105
106
  except Exception as e:
107
+ hexception(e)
106
108
  return RequestResult(
107
109
  success=False,
108
110
  cached=False,
@@ -1,6 +1,7 @@
1
1
  import requests
2
2
  from typing import Any, Dict, List, Optional, TypedDict, Union
3
3
 
4
+ from helm.common.hierarchical_logger import hexception
4
5
  from helm.proxy.retry import NonRetriableException
5
6
  from helm.common.cache import CacheConfig
6
7
  from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
@@ -156,6 +157,7 @@ class MistralAIClient(CachingClient):
156
157
 
157
158
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
158
159
  except (requests.exceptions.RequestException, AssertionError) as e:
160
+ hexception(e)
159
161
  error: str = f"MistralClient error: {e}"
160
162
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
161
163
 
@@ -1,5 +1,6 @@
1
1
  from typing import Any, Dict
2
2
 
3
+ from helm.common.hierarchical_logger import hexception
3
4
  from helm.common.request import wrap_request_time
4
5
  from helm.common.cache import Cache, CacheConfig
5
6
  from helm.common.moderations_api_request import (
@@ -64,6 +65,7 @@ class ModerationAPIClient:
64
65
 
65
66
  response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
66
67
  except openai.OpenAIError as e:
68
+ hexception(e)
67
69
  error: str = f"Moderation API error: {e}"
68
70
  return ModerationAPIRequestResult(
69
71
  success=False, cached=False, error=error, flagged=None, flagged_results=None, scores=None
@@ -10,7 +10,7 @@ from helm.common import multimodal_request_utils
10
10
  from helm.common.cache import CacheConfig
11
11
  from helm.common.media_object import TEXT_TYPE, MultimediaObject, MediaObject
12
12
  from helm.common.request import ErrorFlags, Thinking, wrap_request_time, Request, RequestResult, GeneratedOutput, Token
13
- from helm.common.hierarchical_logger import hlog, hwarn
13
+ from helm.common.hierarchical_logger import hlog, hwarn, hexception
14
14
  from helm.common.object_spec import get_class_by_name
15
15
  from helm.common.optional_dependencies import handle_module_not_found_error
16
16
  from helm.common.tokenization_request import (
@@ -112,6 +112,7 @@ class OpenAIClientUtils:
112
112
  error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
113
113
  )
114
114
 
115
+ hexception(e)
115
116
  error: str = f"OpenAI error: {e}"
116
117
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
117
118
 
@@ -170,6 +171,7 @@ class OpenAIClient(CachingClient):
170
171
  cache_key = self._get_cache_key(raw_request, request)
171
172
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
172
173
  except openai.OpenAIError as e:
174
+ hexception(e)
173
175
  error: str = f"OpenAI error: {e}"
174
176
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
175
177
 
@@ -436,6 +438,7 @@ class OpenAIClient(CachingClient):
436
438
  cache_key = self._get_cache_key(raw_request, request)
437
439
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
438
440
  except openai.OpenAIError as e:
441
+ hexception(e)
439
442
  error: str = f"OpenAI error: {e}"
440
443
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
441
444
 
@@ -491,6 +494,7 @@ class OpenAIClient(CachingClient):
491
494
  cache_key = self._get_cache_key({"audio": audio_path, "model": model}, request)
492
495
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
493
496
  except openai.OpenAIError as e:
497
+ hexception(e)
494
498
  error: str = f"OpenAI error: {e}"
495
499
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
496
500
 
@@ -5,7 +5,7 @@ from typing import Any, Dict, List
5
5
 
6
6
  from helm.clients.openai_client import OpenAIClient
7
7
  from helm.common.cache import CacheConfig
8
- from helm.common.hierarchical_logger import hwarn
8
+ from helm.common.hierarchical_logger import hexception, hwarn
9
9
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token, ErrorFlags
10
10
  from helm.common.tokenization_request import (
11
11
  TokenizationRequest,
@@ -99,6 +99,7 @@ class PalmyraClient(CachingClient):
99
99
 
100
100
  response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
101
101
  except (requests.exceptions.RequestException, AssertionError) as e:
102
+ hexception(e)
102
103
  error: str = f"PalmyraClient error: {e}"
103
104
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
104
105
 
@@ -6,7 +6,7 @@ from helm.proxy.retry import NonRetriableException
6
6
  from helm.common.cache import CacheConfig
7
7
  from helm.common.media_object import TEXT_TYPE
8
8
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput
9
- from helm.common.hierarchical_logger import hwarn
9
+ from helm.common.hierarchical_logger import hexception, hwarn
10
10
  from helm.common.optional_dependencies import handle_module_not_found_error
11
11
  from helm.tokenizers.tokenizer import Tokenizer
12
12
  from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
@@ -167,6 +167,7 @@ class RekaClient(CachingClient):
167
167
 
168
168
  response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
169
169
  except (requests.exceptions.RequestException, AssertionError) as e:
170
+ hexception(e)
170
171
  error: str = f"RekaClient error: {e}"
171
172
  return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
172
173
 
@@ -39,7 +39,7 @@ class StanfordHealthCareAzureOpenAIClient(AzureOpenAIClient):
39
39
  tokenizer=tokenizer,
40
40
  tokenizer_name=tokenizer_name,
41
41
  cache_config=cache_config,
42
- api_key="unused",
42
+ api_key=api_key,
43
43
  base_url=base_url,
44
44
  azure_openai_deployment_name=openai_model_name,
45
45
  api_version=api_version,
@@ -50,7 +50,7 @@ class StanfordHealthCareAzureOpenAIClient(AzureOpenAIClient):
50
50
  tokenizer=tokenizer,
51
51
  tokenizer_name=tokenizer_name,
52
52
  cache_config=cache_config,
53
- api_key="unused",
53
+ api_key=api_key,
54
54
  endpoint=endpoint,
55
55
  azure_openai_deployment_name=openai_model_name,
56
56
  api_version=api_version,
@@ -5,6 +5,7 @@ from dataclasses import asdict
5
5
  from typing import Any, Dict, List, Optional
6
6
 
7
7
  from helm.common.cache import CacheConfig
8
+ from helm.common.hierarchical_logger import hexception
8
9
  from helm.common.request import (
9
10
  wrap_request_time,
10
11
  Request,
@@ -82,6 +83,7 @@ class StanfordHealthCareHTTPModelClient(CachingClient, ABC):
82
83
  request_time=response["request_time"],
83
84
  )
84
85
  except requests.exceptions.RequestException as e:
86
+ hexception(e)
85
87
  return RequestResult(success=False, cached=False, error=f"Request error: {e}", completions=[], embedding=[])
86
88
 
87
89
  @abstractmethod