crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -0,0 +1,35 @@
1
+ from typing import Optional
2
+
3
+ from helm.clients.openai_client import OpenAIClient
4
+ from helm.common.cache import CacheConfig
5
+ from helm.common.request import Request
6
+ from helm.tokenizers.tokenizer import Tokenizer
7
+
8
+
9
+ class NvidiaNimClient(OpenAIClient):
10
+
11
+ BASE_URL = "https://integrate.api.nvidia.com/v1"
12
+
13
+ def __init__(
14
+ self,
15
+ tokenizer: Tokenizer,
16
+ tokenizer_name: str,
17
+ cache_config: CacheConfig,
18
+ api_key: Optional[str] = None,
19
+ ):
20
+ self.tokenizer = tokenizer
21
+ self.tokenizer_name = tokenizer_name
22
+ super().__init__(
23
+ tokenizer=tokenizer,
24
+ tokenizer_name=tokenizer_name,
25
+ cache_config=cache_config,
26
+ api_key=api_key,
27
+ org_id=None,
28
+ base_url=NvidiaNimClient.BASE_URL,
29
+ )
30
+
31
+ def _get_model_for_request(self, request: Request) -> str:
32
+ return request.model
33
+
34
+ def _is_chat_model_engine(self, model_engine: str) -> bool:
35
+ return True
@@ -12,8 +12,8 @@ from helm.common.tokenization_request import (
12
12
  TokenizationRequest,
13
13
  TokenizationRequestResult,
14
14
  )
15
- from helm.tokenizers.tokenizer import Tokenizer
16
15
  from .client import CachingClient, truncate_sequence, generate_uid_for_multimodal_prompt
16
+ from helm.tokenizers.tokenizer import Tokenizer
17
17
 
18
18
  try:
19
19
  import openai
@@ -132,6 +132,7 @@ class OpenAIClient(CachingClient):
132
132
  content: Union[str, List[Union[str, Any]]]
133
133
  if request.multimodal_prompt is not None:
134
134
  content = []
135
+ request.validate()
135
136
  for media_object in request.multimodal_prompt.media_objects:
136
137
  if media_object.is_type("image") and media_object.location:
137
138
  from helm.common.images_utils import encode_base64
@@ -140,8 +141,6 @@ class OpenAIClient(CachingClient):
140
141
  image_object: Dict[str, str] = {"url": f"data:image/jpeg;base64,{base64_image}"}
141
142
  content.append({"type": "image_url", "image_url": image_object})
142
143
  elif media_object.is_type(TEXT_TYPE):
143
- if media_object.text is None:
144
- raise ValueError("MediaObject of text type has missing text field value")
145
144
  content.append({"type": media_object.type, "text": media_object.text})
146
145
  else:
147
146
  raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
@@ -3,6 +3,7 @@ import json
3
3
  import requests
4
4
  from typing import Any, Dict, List
5
5
 
6
+ from helm.clients.openai_client import OpenAIClient
6
7
  from helm.common.cache import CacheConfig
7
8
  from helm.common.hierarchical_logger import hlog
8
9
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token, ErrorFlags
@@ -142,3 +143,27 @@ class PalmyraClient(CachingClient):
142
143
  completions=completions,
143
144
  embedding=[],
144
145
  )
146
+
147
+
148
+ class PalmyraChatClient(OpenAIClient):
149
+ """Sends request to a Palmyra model using a OpenAI-compatible Chat API."""
150
+
151
+ def __init__(
152
+ self,
153
+ tokenizer: Tokenizer,
154
+ tokenizer_name: str,
155
+ cache_config: CacheConfig,
156
+ api_key: str,
157
+ base_url: str,
158
+ ):
159
+ super().__init__(
160
+ tokenizer=tokenizer,
161
+ tokenizer_name=tokenizer_name,
162
+ cache_config=cache_config,
163
+ api_key=api_key,
164
+ org_id=None,
165
+ base_url=base_url,
166
+ )
167
+
168
+ def _is_chat_model_engine(self, model_engine: str) -> bool:
169
+ return True
@@ -4,16 +4,21 @@ from dataclasses import asdict
4
4
  from typing import Any, List, Dict, Optional
5
5
 
6
6
  from dacite import from_dict
7
- from googleapiclient import discovery
8
- from googleapiclient.errors import BatchError, HttpError
9
- from googleapiclient.http import BatchHttpRequest
10
- from httplib2 import HttpLib2Error
7
+
11
8
  from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
9
+ from helm.common.optional_dependencies import handle_module_not_found_error
12
10
  from helm.proxy.retry import NonRetriableException
13
-
14
11
  from helm.common.cache import Cache, CacheConfig
15
12
  from helm.common.perspective_api_request import ToxicityAttributes, PerspectiveAPIRequest, PerspectiveAPIRequestResult
16
- from google.auth.exceptions import DefaultCredentialsError
13
+
14
+ try:
15
+ from googleapiclient import discovery
16
+ from googleapiclient.errors import BatchError, HttpError
17
+ from googleapiclient.http import BatchHttpRequest
18
+ from httplib2 import HttpLib2Error
19
+ from google.auth.exceptions import DefaultCredentialsError
20
+ except ModuleNotFoundError as e:
21
+ handle_module_not_found_error(e, ["metrics"])
17
22
 
18
23
 
19
24
  class PerspectiveAPIClientCredentialsError(NonRetriableException):
@@ -23,30 +23,28 @@ def test_truncate_sequence():
23
23
  # echo_prompt = True, nothing gets truncated
24
24
  truncate_sequence_helper(
25
25
  ["a", "b", "c"],
26
- Request(
27
- model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", prompt="abc", echo_prompt=True
28
- ),
26
+ Request(model="openai/gpt2", model_deployment="huggingface/gpt2", prompt="abc", echo_prompt=True),
29
27
  ["a", "b", "c"],
30
28
  )
31
29
 
32
30
  # Nothing gets truncated
33
31
  truncate_sequence_helper(
34
32
  ["hello", " world"],
35
- Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["#"]),
33
+ Request(model="openai/gpt2", model_deployment="huggingface/gpt2", stop_sequences=["#"]),
36
34
  ["hello", " world"],
37
35
  )
38
36
 
39
37
  # Truncate using stop sequences
40
38
  truncate_sequence_helper(
41
39
  ["hello", " world", "\n", "what"],
42
- Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["\n"]),
40
+ Request(model="openai/gpt2", model_deployment="huggingface/gpt2", stop_sequences=["\n"]),
43
41
  ["hello", " world"],
44
42
  )
45
43
 
46
44
  # Truncate using max tokens
47
45
  truncate_sequence_helper(
48
46
  ["a", "b", "c"],
49
- Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", max_tokens=2),
47
+ Request(model="openai/gpt2", model_deployment="huggingface/gpt2", max_tokens=2),
50
48
  ["a", "b"],
51
49
  )
52
50
 
@@ -82,13 +82,12 @@ class OpenFlamingoClient(CachingClient):
82
82
  # Build the prompt
83
83
  prompt_text: str = ""
84
84
  images: List[Image.Image] = []
85
+ request.validate()
85
86
  for media_object in request.multimodal_prompt.media_objects:
86
87
  if media_object.is_type("image") and media_object.location:
87
88
  images.append(open_image(media_object.location))
88
89
  prompt_text += self.IMAGE_TOKEN
89
90
  elif media_object.is_type(TEXT_TYPE):
90
- if media_object.text is None:
91
- raise ValueError("MediaObject of text type has missing text field value")
92
91
  prompt_text += media_object.text
93
92
  else:
94
93
  raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
@@ -6,13 +6,19 @@ import requests
6
6
  from helm.common.cache import CacheConfig
7
7
  from helm.common.images_utils import encode_base64
8
8
  from helm.common.media_object import TEXT_TYPE
9
- from helm.common.request import Request, RequestResult, GeneratedOutput
9
+ from helm.common.request import Request, RequestResult, GeneratedOutput, ErrorFlags
10
10
  from helm.common.request import wrap_request_time
11
11
  from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt, truncate_and_tokenize_response_text
12
12
  from helm.tokenizers.tokenizer import Tokenizer
13
13
 
14
14
 
15
+ class PalmyraVisionContentBlockedError(Exception):
16
+ pass
17
+
18
+
15
19
  class PalmyraVisionClient(CachingClient):
20
+ CONTENT_BLOCKED_ERROR: str = "fail.input.content.moderation"
21
+
16
22
  def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, endpoint: str, cache_config: CacheConfig):
17
23
  super().__init__(cache_config)
18
24
  self.tokenizer: Tokenizer = tokenizer
@@ -49,17 +55,19 @@ class PalmyraVisionClient(CachingClient):
49
55
  response = requests.post(
50
56
  self.endpoint, headers={"Content-Type": "application/json"}, data=json.dumps({"parts": prompt})
51
57
  )
52
- if response.status_code != 200:
53
- curl_command: str = (
54
- f"curl --location '{self.endpoint}' --header 'Content-Type: application/json' "
55
- f"--data '{json.dumps({'parts': prompt})}'"
56
- )
57
- assert False, f"Got status code {response.status_code}. Try {curl_command}"
58
-
59
58
  json_response = json.loads(response.text)
60
- assert (
61
- "choices" in json_response and "errors" not in json_response
62
- ), f"Invalid response: {response.text}"
59
+
60
+ # Check for content blocked error
61
+ if (
62
+ "errors" in json_response
63
+ and "tpe" in json_response
64
+ and json_response["tpe"] == self.CONTENT_BLOCKED_ERROR
65
+ ):
66
+ raise PalmyraVisionContentBlockedError(json_response["errors"])
67
+
68
+ # Hard fail if the `choices` is missing from the response
69
+ assert "choices" in json_response, f"Invalid response: {response.text}"
70
+
63
71
  return json_response
64
72
 
65
73
  cache_key = CachingClient.make_cache_key(
@@ -67,8 +75,15 @@ class PalmyraVisionClient(CachingClient):
67
75
  request=request,
68
76
  )
69
77
  result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
70
- except RuntimeError as ex:
71
- return RequestResult(success=False, cached=False, error=str(ex), completions=[], embedding=[])
78
+ except PalmyraVisionContentBlockedError as ex:
79
+ return RequestResult(
80
+ success=False,
81
+ cached=False,
82
+ error=f"Content blocked: {str(ex)}",
83
+ completions=[],
84
+ embedding=[],
85
+ error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
86
+ )
72
87
 
73
88
  # The internal endpoint doesn't support any other parameters, so we have to truncate ourselves
74
89
  completions: List[GeneratedOutput] = [
@@ -1,3 +1,4 @@
1
+ from hashlib import md5
1
2
  import base64
2
3
  import io
3
4
  import os
@@ -44,6 +45,11 @@ def encode_base64(image_location: str, format="JPEG") -> str:
44
45
  return base64.b64encode(image_file.getvalue()).decode("ascii")
45
46
 
46
47
 
48
+ def generate_hash(image: Image.Image) -> str:
49
+ """Generates a hash for the image."""
50
+ return md5(image.tobytes()).hexdigest()
51
+
52
+
47
53
  def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
48
54
  """
49
55
  Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`
@@ -85,4 +85,5 @@ class MongoKeyValueStore(KeyValueStore):
85
85
  self._collection.bulk_write(operations)
86
86
 
87
87
  def remove(self, key: Dict) -> None:
88
- self._collection.delete_one(key)
88
+ query = {self._REQUEST_KEY: self._canonicalize_key(key)}
89
+ self._collection.delete_one(query)
helm/common/request.py CHANGED
@@ -72,6 +72,22 @@ class Request:
72
72
  image_generation_parameters: Optional[ImageGenerationParameters] = None
73
73
  """Parameters for image generation."""
74
74
 
75
+ def validate(self):
76
+ if (
77
+ (self.messages and self.prompt)
78
+ or (self.messages and self.multimodal_prompt)
79
+ or (self.prompt and self.multimodal_prompt)
80
+ ):
81
+ raise ValueError("Exactly one of the messages, prompt, multimodal_prompt fields should be set")
82
+
83
+ if self.multimodal_prompt:
84
+ for media_object in self.multimodal_prompt.media_objects:
85
+ if media_object.content_type == "text" and media_object.text is None:
86
+ raise ValueError("Media object with text content type must have text set")
87
+
88
+ if media_object.content_type == "image" and media_object.location is None:
89
+ raise ValueError("Media object with image content type must have location set")
90
+
75
91
  @property
76
92
  def model_host(self) -> str:
77
93
  """Returns the model host (referring to the deployment).