nomic 3.3.4__tar.gz → 3.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.3.4 → nomic-3.4.1}/PKG-INFO +1 -1
- {nomic-3.3.4 → nomic-3.4.1}/nomic/atlas.py +5 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/dataset.py +11 -20
- {nomic-3.3.4 → nomic-3.4.1}/nomic/embed.py +22 -11
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/PKG-INFO +1 -1
- {nomic-3.3.4 → nomic-3.4.1}/setup.py +1 -1
- {nomic-3.3.4 → nomic-3.4.1}/README.md +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/__init__.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/aws/__init__.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/aws/sagemaker.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/cli.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/data_inference.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/data_operations.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/settings.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic/utils.py +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/SOURCES.txt +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/entry_points.txt +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/requires.txt +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/pyproject.toml +0 -0
- {nomic-3.3.4 → nomic-3.4.1}/setup.cfg +0 -0
|
@@ -44,6 +44,7 @@ def map_data(
|
|
|
44
44
|
description: The description of your dataset
|
|
45
45
|
id_field: Specify your data unique id field. This field can be up 36 characters in length. If not specified, one will be created for you named `id_`.
|
|
46
46
|
is_public: Should the dataset be accessible outside your Nomic Atlas organization.
|
|
47
|
+
indexed_field: The text field from the dataset that will be used to create embeddings, which determines the layout of the data map in Atlas. Required for text data but won't have an impact if uploading embeddings or image blobs.
|
|
47
48
|
projection: Options to adjust Nomic Project - the dimensionality algorithm organizing your dataset.
|
|
48
49
|
topic_model: Options to adjust Nomic Topic - the topic model organizing your dataset.
|
|
49
50
|
duplicate_detection: Options to adjust Nomic Duplicates - the duplicate detection algorithm.
|
|
@@ -62,6 +63,10 @@ def map_data(
|
|
|
62
63
|
modality = "text"
|
|
63
64
|
|
|
64
65
|
if blobs is not None:
|
|
66
|
+
if embeddings is not None:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"You cannot pass both `blobs` and `embeddings` to map_data(). To create a map of images, include `blobs` and not `embeddings`. To create a map of embeddings with images as metadata, include your images as a field in your `data` parameter."
|
|
69
|
+
)
|
|
65
70
|
# change this when we support other modalities
|
|
66
71
|
modality = "image"
|
|
67
72
|
indexed_field = "_blob_hash"
|
|
@@ -1367,26 +1367,15 @@ class AtlasDataset(AtlasClass):
|
|
|
1367
1367
|
"""
|
|
1368
1368
|
Deletes the specified datapoints from the dataset.
|
|
1369
1369
|
|
|
1370
|
+
.. deprecated:: 3.4.0
|
|
1370
1371
|
Args:
|
|
1371
1372
|
ids: A list of data ids to delete
|
|
1372
1373
|
|
|
1373
1374
|
Returns:
|
|
1374
1375
|
True if data deleted successfully.
|
|
1375
|
-
|
|
1376
1376
|
"""
|
|
1377
|
-
if not isinstance(ids, list):
|
|
1378
|
-
raise ValueError("You must specify a list of ids when deleting datums.")
|
|
1379
|
-
|
|
1380
|
-
response = requests.post(
|
|
1381
|
-
self.atlas_api_path + "/v1/project/data/delete",
|
|
1382
|
-
headers=self.header,
|
|
1383
|
-
json={"project_id": self.id, "datum_ids": ids},
|
|
1384
|
-
)
|
|
1385
1377
|
|
|
1386
|
-
|
|
1387
|
-
return True
|
|
1388
|
-
else:
|
|
1389
|
-
raise Exception(response.text)
|
|
1378
|
+
raise DeprecationWarning(f"The function AtlasDataset.delete_data is deprecated.")
|
|
1390
1379
|
|
|
1391
1380
|
def add_data(
|
|
1392
1381
|
self,
|
|
@@ -1728,11 +1717,12 @@ class AtlasDataset(AtlasClass):
|
|
|
1728
1717
|
"""
|
|
1729
1718
|
Utility method to update a project's maps by adding the given data.
|
|
1730
1719
|
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1720
|
+
.. deprecated:: 3.3.1
|
|
1721
|
+
Args:
|
|
1722
|
+
data: An [N,] element list of dictionaries containing metadata for each embedding.
|
|
1723
|
+
embeddings: An [N, d] matrix of embeddings for updating embedding dataset. Leave as None to update text dataset.
|
|
1724
|
+
shard_size: Data is uploaded in parallel by many threads. Adjust the number of datums to upload by each worker.
|
|
1725
|
+
num_workers: The number of workers to use when sending data.
|
|
1736
1726
|
|
|
1737
1727
|
"""
|
|
1738
1728
|
|
|
@@ -1745,8 +1735,9 @@ class AtlasDataset(AtlasClass):
|
|
|
1745
1735
|
Rebuilds all maps in a dataset with the latest state dataset data state. Maps will not be rebuilt to
|
|
1746
1736
|
reflect the additions, deletions or updates you have made to your data until this method is called.
|
|
1747
1737
|
|
|
1748
|
-
|
|
1749
|
-
|
|
1738
|
+
.. deprecated:: 3.3.1
|
|
1739
|
+
Args:
|
|
1740
|
+
rebuild_topic_models: (Default False) - If true, will create new topic models when updating these indices.
|
|
1750
1741
|
"""
|
|
1751
1742
|
|
|
1752
1743
|
raise DeprecationWarning(
|
|
@@ -12,6 +12,7 @@ from urllib.parse import urlparse
|
|
|
12
12
|
import PIL
|
|
13
13
|
import PIL.Image
|
|
14
14
|
import requests
|
|
15
|
+
from tqdm import tqdm
|
|
15
16
|
|
|
16
17
|
from .dataset import AtlasClass
|
|
17
18
|
from .settings import *
|
|
@@ -286,18 +287,28 @@ def _text_embed4all(
|
|
|
286
287
|
limits = {"cpu": 16, "kompute": 32, "metal": 1024}
|
|
287
288
|
return n_tokens > limits[backend]
|
|
288
289
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
290
|
+
pb = tqdm(total=len(texts), desc="Embedding texts", unit="inputs")
|
|
291
|
+
output_embeddings = []
|
|
292
|
+
ntok = 0
|
|
293
|
+
batch_size = 64
|
|
294
|
+
for start in range(0, len(texts), batch_size):
|
|
295
|
+
end = min(len(texts), start + batch_size)
|
|
296
|
+
b = end - start
|
|
297
|
+
out = _embed4all.embed(
|
|
298
|
+
texts[start:end],
|
|
299
|
+
prefix=task_type,
|
|
300
|
+
dimensionality=dimensionality,
|
|
301
|
+
long_text_mode=long_text_mode,
|
|
302
|
+
return_dict=True,
|
|
303
|
+
atlas=True,
|
|
304
|
+
cancel_cb=cancel_cb if dynamic_mode else None,
|
|
305
|
+
)
|
|
306
|
+
ntok += out["n_prompt_tokens"]
|
|
307
|
+
output_embeddings.extend(out["embeddings"])
|
|
308
|
+
pb.update(b)
|
|
309
|
+
pb.close()
|
|
299
310
|
usage = {"prompt_tokens": ntok, "total_tokens": ntok}
|
|
300
|
-
return {"embeddings":
|
|
311
|
+
return {"embeddings": output_embeddings, "usage": usage, "model": model, "inference_mode": "local"}
|
|
301
312
|
|
|
302
313
|
|
|
303
314
|
def free_embedding_model() -> None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|