nomic 3.0.41__tar.gz → 3.0.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nomic might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.0.41
3
+ Version: 3.0.42
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -4,7 +4,7 @@ import json
4
4
  import logging
5
5
  import multiprocessing as mp
6
6
  from pathlib import PosixPath
7
- from typing import List, Optional, Union
7
+ from typing import List, Optional, Tuple, Union
8
8
 
9
9
  import boto3
10
10
  import PIL
@@ -187,7 +187,22 @@ def embed_text(
187
187
  }
188
188
 
189
189
 
190
- def preprocess_image(images: List[Union[str, "PIL.Image.Image", bytes]]) -> List[bytes]:
190
+ # only way I could get sagemaker with multipart to work
191
+ def prepare_multipart_request(images: List[Tuple[str, bytes]]) -> Tuple[bytes, bytes]:
192
+ # Prepare the multipart body
193
+ boundary = b"---------------------------Boundary"
194
+ body = b""
195
+ for i, (name, img_bytes) in enumerate(images):
196
+ body += b"--" + boundary + b"\r\n"
197
+ body += f'Content-Disposition: form-data; name="{name}"; filename="image_{i}.jpg"\r\n'.encode("utf-8")
198
+ body += b"Content-Type: image/jpeg\r\n\r\n"
199
+ body += img_bytes + b"\r\n"
200
+ body += b"--" + boundary + b"--\r\n"
201
+
202
+ return body, boundary
203
+
204
+
205
+ def preprocess_image(images: List[Union[str, "PIL.Image.Image", bytes]]) -> Tuple[bytes, bytes]:
191
206
  """
192
207
  Preprocess a list of images for embedding using a sagemaker model.
193
208
 
@@ -210,17 +225,22 @@ def preprocess_image(images: List[Union[str, "PIL.Image.Image", bytes]]) -> List
210
225
  image = image.convert("RGB")
211
226
  buffered = io.BytesIO()
212
227
  image.save(buffered, format="JPEG")
213
- encoded_image = buffered.getvalue()
214
- encoded_images.append(encoded_image)
215
- return encoded_images
228
+ encoded_images.append(("image_data", buffered.getvalue()))
229
+
230
+ body, boundary = prepare_multipart_request(encoded_images)
231
+ return body, boundary
216
232
 
217
233
 
218
- def sagemaker_image_request(image: Union[str, bytes, "PIL.Image.Image"], sagemaker_endpoint: str, region_name: str):
219
- preprocessed_image = preprocess_image([image])
234
+ def sagemaker_image_request(
235
+ images: List[Union[str, bytes, "PIL.Image.Image"]], sagemaker_endpoint: str, region_name: str
236
+ ):
237
+ body, boundary = preprocess_image(images)
220
238
 
221
239
  client = boto3.client("sagemaker-runtime", region_name=region_name)
222
240
  response = client.invoke_endpoint(
223
- EndpointName=sagemaker_endpoint, Body=preprocessed_image[0], ContentType="image/jpeg"
241
+ EndpointName=sagemaker_endpoint,
242
+ Body=body,
243
+ ContentType=f'multipart/form-data; boundary={boundary.decode("utf-8")}',
224
244
  )
225
245
 
226
246
  return parse_sagemaker_response(response)
@@ -230,21 +250,18 @@ def embed_image(
230
250
  images: List[Union[str, "PIL.Image.Image", bytes]],
231
251
  sagemaker_endpoint: str,
232
252
  region_name: str,
233
- model_name="nomic-embed-vision-v1",
253
+ model_name="nomic-embed-vision-v1.5",
254
+ batch_size=16,
234
255
  ) -> dict:
235
256
  embeddings = []
236
257
 
237
- max_workers = mp.cpu_count()
238
258
  pbar = tqdm(total=len(images))
239
- with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
240
- futures = []
241
- for image in images:
242
- future = executor.submit(sagemaker_image_request, image, sagemaker_endpoint, region_name)
243
- future.add_done_callback(lambda p: pbar.update())
244
- futures.append(future)
245
-
246
- for future in concurrent.futures.as_completed(futures):
247
- embeddings.extend(future.result())
259
+ for i in range(0, len(images), batch_size):
260
+ batch = images[i : i + batch_size]
261
+ embeddings.extend(
262
+ sagemaker_image_request(batch, sagemaker_endpoint=sagemaker_endpoint, region_name=region_name)
263
+ )
264
+ pbar.update(len(batch))
248
265
 
249
266
  return {
250
267
  "embeddings": embeddings,
@@ -260,7 +277,7 @@ def batch_transform_image(
260
277
  arn: Optional[str] = None,
261
278
  role: Optional[str] = None,
262
279
  max_payload: Optional[int] = 6,
263
- instance_type: str = "ml.p3.2xlarge",
280
+ instance_type: str = "ml.g4dn.xlarge",
264
281
  n_instances: int = 1,
265
282
  wait: bool = True,
266
283
  logs: bool = True,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.0.41
3
+ Version: 3.0.42
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -8,7 +8,7 @@ description = "The official Nomic python client."
8
8
 
9
9
  setup(
10
10
  name="nomic",
11
- version="3.0.41",
11
+ version="3.0.42",
12
12
  url="https://github.com/nomic-ai/nomic",
13
13
  description=description,
14
14
  long_description=description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes