nomic 3.1.1__tar.gz → 3.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.1.1 → nomic-3.1.3}/PKG-INFO +1 -1
- {nomic-3.1.1 → nomic-3.1.3}/nomic/aws/sagemaker.py +8 -21
- {nomic-3.1.1 → nomic-3.1.3}/nomic/dataset.py +3 -1
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/PKG-INFO +1 -1
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/requires.txt +1 -1
- {nomic-3.1.1 → nomic-3.1.3}/setup.py +2 -2
- {nomic-3.1.1 → nomic-3.1.3}/README.md +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/__init__.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/atlas.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/aws/__init__.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/cli.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/data_inference.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/data_operations.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/embed.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/settings.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic/utils.py +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/SOURCES.txt +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/entry_points.txt +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/pyproject.toml +0 -0
- {nomic-3.1.1 → nomic-3.1.3}/setup.cfg +0 -0
|
@@ -38,26 +38,6 @@ def parse_sagemaker_response(response):
|
|
|
38
38
|
return resp["embeddings"]
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
def preprocess_texts(texts: List[str], task_type: str = "search_document"):
|
|
42
|
-
"""
|
|
43
|
-
Preprocess a list of texts for embedding using a sagemaker model.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
texts: List of texts to be embedded.
|
|
47
|
-
task_type: The task type to use when embedding. One of `search_query`, `search_document`, `classification`, `clustering`
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
List of texts formatted for sagemaker embedding.
|
|
51
|
-
"""
|
|
52
|
-
assert task_type in [
|
|
53
|
-
"search_query",
|
|
54
|
-
"search_document",
|
|
55
|
-
"classification",
|
|
56
|
-
"clustering",
|
|
57
|
-
], f"Invalid task type: {task_type}"
|
|
58
|
-
return [f"{task_type}: {text}" for text in texts]
|
|
59
|
-
|
|
60
|
-
|
|
61
41
|
def batch_transform_text(
|
|
62
42
|
s3_input_path: str,
|
|
63
43
|
s3_output_path: str,
|
|
@@ -157,7 +137,13 @@ def embed_text(
|
|
|
157
137
|
logger.warning("No texts to embed.")
|
|
158
138
|
return None
|
|
159
139
|
|
|
160
|
-
|
|
140
|
+
assert task_type in [
|
|
141
|
+
"search_query",
|
|
142
|
+
"search_document",
|
|
143
|
+
"classification",
|
|
144
|
+
"clustering",
|
|
145
|
+
], f"Invalid task type: {task_type}"
|
|
146
|
+
|
|
161
147
|
assert dimensionality in (
|
|
162
148
|
64,
|
|
163
149
|
128,
|
|
@@ -175,6 +161,7 @@ def embed_text(
|
|
|
175
161
|
"texts": texts[i : i + batch_size],
|
|
176
162
|
"binary": binary,
|
|
177
163
|
"dimensionality": dimensionality,
|
|
164
|
+
"task_type": task_type,
|
|
178
165
|
}
|
|
179
166
|
)
|
|
180
167
|
response = client.invoke_endpoint(EndpointName=sagemaker_endpoint, Body=batch, ContentType="application/json")
|
|
@@ -1409,9 +1409,10 @@ class AtlasDataset(AtlasClass):
|
|
|
1409
1409
|
# TODO: add support for other modalities
|
|
1410
1410
|
images = []
|
|
1411
1411
|
for uuid, blob in tqdm(zip(ids, blobs), total=len(ids), desc="Loading images"):
|
|
1412
|
-
if isinstance(blob, str) and os.path.exists(blob):
|
|
1412
|
+
if (isinstance(blob, str) or isinstance(blob, Path)) and os.path.exists(blob):
|
|
1413
1413
|
# Auto resize to max 512x512
|
|
1414
1414
|
image = Image.open(blob)
|
|
1415
|
+
image = image.convert("RGB")
|
|
1415
1416
|
if image.height > 512 or image.width > 512:
|
|
1416
1417
|
image = image.resize((512, 512))
|
|
1417
1418
|
buffered = BytesIO()
|
|
@@ -1420,6 +1421,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1420
1421
|
elif isinstance(blob, bytes):
|
|
1421
1422
|
images.append((uuid, blob))
|
|
1422
1423
|
elif isinstance(blob, Image.Image):
|
|
1424
|
+
blob = blob.convert("RGB") # type: ignore
|
|
1423
1425
|
if blob.height > 512 or blob.width > 512:
|
|
1424
1426
|
blob = blob.resize((512, 512))
|
|
1425
1427
|
buffered = BytesIO()
|
|
@@ -8,7 +8,7 @@ description = "The official Nomic python client."
|
|
|
8
8
|
|
|
9
9
|
setup(
|
|
10
10
|
name="nomic",
|
|
11
|
-
version="3.1.
|
|
11
|
+
version="3.1.3",
|
|
12
12
|
url="https://github.com/nomic-ai/nomic",
|
|
13
13
|
description=description,
|
|
14
14
|
long_description=description,
|
|
@@ -48,7 +48,7 @@ setup(
|
|
|
48
48
|
"pylint",
|
|
49
49
|
"pytest",
|
|
50
50
|
"isort",
|
|
51
|
-
"pyright",
|
|
51
|
+
"pyright<=1.1.377",
|
|
52
52
|
"myst-parser",
|
|
53
53
|
"mkdocs-material",
|
|
54
54
|
"mkautodoc",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|