nomic 3.0.41__tar.gz → 3.0.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.0.41 → nomic-3.0.42}/PKG-INFO +1 -1
- {nomic-3.0.41 → nomic-3.0.42}/nomic/aws/sagemaker.py +37 -20
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/PKG-INFO +1 -1
- {nomic-3.0.41 → nomic-3.0.42}/setup.py +1 -1
- {nomic-3.0.41 → nomic-3.0.42}/README.md +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/__init__.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/atlas.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/aws/__init__.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/cli.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/data_inference.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/data_operations.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/dataset.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/embed.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/settings.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic/utils.py +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/SOURCES.txt +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/entry_points.txt +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/requires.txt +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/pyproject.toml +0 -0
- {nomic-3.0.41 → nomic-3.0.42}/setup.cfg +0 -0
|
@@ -4,7 +4,7 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import multiprocessing as mp
|
|
6
6
|
from pathlib import PosixPath
|
|
7
|
-
from typing import List, Optional, Union
|
|
7
|
+
from typing import List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import boto3
|
|
10
10
|
import PIL
|
|
@@ -187,7 +187,22 @@ def embed_text(
|
|
|
187
187
|
}
|
|
188
188
|
|
|
189
189
|
|
|
190
|
-
|
|
190
|
+
# only way I could get sagemaker with multipart to work
|
|
191
|
+
def prepare_multipart_request(images: List[Tuple[str, bytes]]) -> Tuple[bytes, bytes]:
|
|
192
|
+
# Prepare the multipart body
|
|
193
|
+
boundary = b"---------------------------Boundary"
|
|
194
|
+
body = b""
|
|
195
|
+
for i, (name, img_bytes) in enumerate(images):
|
|
196
|
+
body += b"--" + boundary + b"\r\n"
|
|
197
|
+
body += f'Content-Disposition: form-data; name="{name}"; filename="image_{i}.jpg"\r\n'.encode("utf-8")
|
|
198
|
+
body += b"Content-Type: image/jpeg\r\n\r\n"
|
|
199
|
+
body += img_bytes + b"\r\n"
|
|
200
|
+
body += b"--" + boundary + b"--\r\n"
|
|
201
|
+
|
|
202
|
+
return body, boundary
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def preprocess_image(images: List[Union[str, "PIL.Image.Image", bytes]]) -> Tuple[bytes, bytes]:
|
|
191
206
|
"""
|
|
192
207
|
Preprocess a list of images for embedding using a sagemaker model.
|
|
193
208
|
|
|
@@ -210,17 +225,22 @@ def preprocess_image(images: List[Union[str, "PIL.Image.Image", bytes]]) -> List
|
|
|
210
225
|
image = image.convert("RGB")
|
|
211
226
|
buffered = io.BytesIO()
|
|
212
227
|
image.save(buffered, format="JPEG")
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
228
|
+
encoded_images.append(("image_data", buffered.getvalue()))
|
|
229
|
+
|
|
230
|
+
body, boundary = prepare_multipart_request(encoded_images)
|
|
231
|
+
return body, boundary
|
|
216
232
|
|
|
217
233
|
|
|
218
|
-
def sagemaker_image_request(
|
|
219
|
-
|
|
234
|
+
def sagemaker_image_request(
|
|
235
|
+
images: List[Union[str, bytes, "PIL.Image.Image"]], sagemaker_endpoint: str, region_name: str
|
|
236
|
+
):
|
|
237
|
+
body, boundary = preprocess_image(images)
|
|
220
238
|
|
|
221
239
|
client = boto3.client("sagemaker-runtime", region_name=region_name)
|
|
222
240
|
response = client.invoke_endpoint(
|
|
223
|
-
EndpointName=sagemaker_endpoint,
|
|
241
|
+
EndpointName=sagemaker_endpoint,
|
|
242
|
+
Body=body,
|
|
243
|
+
ContentType=f'multipart/form-data; boundary={boundary.decode("utf-8")}',
|
|
224
244
|
)
|
|
225
245
|
|
|
226
246
|
return parse_sagemaker_response(response)
|
|
@@ -230,21 +250,18 @@ def embed_image(
|
|
|
230
250
|
images: List[Union[str, "PIL.Image.Image", bytes]],
|
|
231
251
|
sagemaker_endpoint: str,
|
|
232
252
|
region_name: str,
|
|
233
|
-
model_name="nomic-embed-vision-v1",
|
|
253
|
+
model_name="nomic-embed-vision-v1.5",
|
|
254
|
+
batch_size=16,
|
|
234
255
|
) -> dict:
|
|
235
256
|
embeddings = []
|
|
236
257
|
|
|
237
|
-
max_workers = mp.cpu_count()
|
|
238
258
|
pbar = tqdm(total=len(images))
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
for future in concurrent.futures.as_completed(futures):
|
|
247
|
-
embeddings.extend(future.result())
|
|
259
|
+
for i in range(0, len(images), batch_size):
|
|
260
|
+
batch = images[i : i + batch_size]
|
|
261
|
+
embeddings.extend(
|
|
262
|
+
sagemaker_image_request(batch, sagemaker_endpoint=sagemaker_endpoint, region_name=region_name)
|
|
263
|
+
)
|
|
264
|
+
pbar.update(len(batch))
|
|
248
265
|
|
|
249
266
|
return {
|
|
250
267
|
"embeddings": embeddings,
|
|
@@ -260,7 +277,7 @@ def batch_transform_image(
|
|
|
260
277
|
arn: Optional[str] = None,
|
|
261
278
|
role: Optional[str] = None,
|
|
262
279
|
max_payload: Optional[int] = 6,
|
|
263
|
-
instance_type: str = "ml.
|
|
280
|
+
instance_type: str = "ml.g4dn.xlarge",
|
|
264
281
|
n_instances: int = 1,
|
|
265
282
|
wait: bool = True,
|
|
266
283
|
logs: bool = True,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|