nomic 3.3.4__tar.gz → 3.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nomic might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.4
3
+ Version: 3.4.0
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -44,6 +44,7 @@ def map_data(
44
44
  description: The description of your dataset
45
45
  id_field: Specify your data unique id field. This field can be up 36 characters in length. If not specified, one will be created for you named `id_`.
46
46
  is_public: Should the dataset be accessible outside your Nomic Atlas organization.
47
+ indexed_field: The text field from the dataset that will be used to create embeddings, which determines the layout of the data map in Atlas. Required for text data but won't have an impact if uploading embeddings or image blobs.
47
48
  projection: Options to adjust Nomic Project - the dimensionality algorithm organizing your dataset.
48
49
  topic_model: Options to adjust Nomic Topic - the topic model organizing your dataset.
49
50
  duplicate_detection: Options to adjust Nomic Duplicates - the duplicate detection algorithm.
@@ -62,6 +63,10 @@ def map_data(
62
63
  modality = "text"
63
64
 
64
65
  if blobs is not None:
66
+ if embeddings is not None:
67
+ raise ValueError(
68
+ "You cannot pass both `blobs` and `embeddings` to map_data(). To create a map of images, include `blobs` and not `embeddings`. To create a map of embeddings with images as metadata, include your images as a field in your `data` parameter."
69
+ )
65
70
  # change this when we support other modalities
66
71
  modality = "image"
67
72
  indexed_field = "_blob_hash"
@@ -12,6 +12,7 @@ from urllib.parse import urlparse
12
12
  import PIL
13
13
  import PIL.Image
14
14
  import requests
15
+ from tqdm import tqdm
15
16
 
16
17
  from .dataset import AtlasClass
17
18
  from .settings import *
@@ -286,18 +287,28 @@ def _text_embed4all(
286
287
  limits = {"cpu": 16, "kompute": 32, "metal": 1024}
287
288
  return n_tokens > limits[backend]
288
289
 
289
- output = _embed4all.embed(
290
- texts,
291
- prefix=task_type,
292
- dimensionality=dimensionality,
293
- long_text_mode=long_text_mode,
294
- return_dict=True,
295
- atlas=True,
296
- cancel_cb=cancel_cb if dynamic_mode else None,
297
- )
298
- ntok = output["n_prompt_tokens"]
290
+ pb = tqdm(total=len(texts), desc="Embedding texts", unit="inputs")
291
+ output_embeddings = []
292
+ ntok = 0
293
+ batch_size = 64
294
+ for start in range(0, len(texts), batch_size):
295
+ end = min(len(texts), start + batch_size)
296
+ b = end - start
297
+ out = _embed4all.embed(
298
+ texts[start:end],
299
+ prefix=task_type,
300
+ dimensionality=dimensionality,
301
+ long_text_mode=long_text_mode,
302
+ return_dict=True,
303
+ atlas=True,
304
+ cancel_cb=cancel_cb if dynamic_mode else None,
305
+ )
306
+ ntok += out["n_prompt_tokens"]
307
+ output_embeddings.extend(out["embeddings"])
308
+ pb.update(b)
309
+ pb.close()
299
310
  usage = {"prompt_tokens": ntok, "total_tokens": ntok}
300
- return {"embeddings": output["embeddings"], "usage": usage, "model": model, "inference_mode": "local"}
311
+ return {"embeddings": output_embeddings, "usage": usage, "model": model, "inference_mode": "local"}
301
312
 
302
313
 
303
314
  def free_embedding_model() -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.4
3
+ Version: 3.4.0
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -23,7 +23,7 @@ with open("README.md") as f:
23
23
 
24
24
  setup(
25
25
  name="nomic",
26
- version="3.3.4",
26
+ version="3.4.0",
27
27
  url="https://github.com/nomic-ai/nomic",
28
28
  description=description,
29
29
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes