nomic 3.3.2__tar.gz → 3.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nomic might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.2
3
+ Version: 3.3.4
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -1,10 +1,13 @@
1
1
  import base64
2
2
  import concurrent
3
3
  import concurrent.futures
4
+ import importlib.metadata
4
5
  import io
5
6
  import json
6
7
  import os
8
+ import re
7
9
  import time
10
+ import unicodedata
8
11
  from contextlib import contextmanager
9
12
  from datetime import datetime
10
13
  from io import BytesIO
@@ -12,6 +15,7 @@ from pathlib import Path
12
15
  from typing import Dict, List, Optional, Tuple, Union
13
16
 
14
17
  import numpy as np
18
+ import pandas as pd
15
19
  import pyarrow as pa
16
20
  import requests
17
21
  from loguru import logger
@@ -71,13 +75,20 @@ class AtlasClass(object):
71
75
  token = self.credentials["token"]
72
76
  self.token = token
73
77
 
74
- self.header = {"Authorization": f"Bearer {token}"}
78
+ try:
79
+ version = importlib.metadata.version("nomic")
80
+ except Exception:
81
+ version = "unknown"
82
+
83
+ self.header = {"Authorization": f"Bearer {token}", "User-Agent": f"py-nomic/{version}"}
75
84
 
76
85
  if self.token:
77
86
  response = requests.get(
78
87
  self.atlas_api_path + "/v1/user",
79
88
  headers=self.header,
80
89
  )
90
+ if "X-AtlasWarning" in response.headers:
91
+ logger.warning(response.headers["X-AtlasWarning"])
81
92
  response = validate_api_http_response(response)
82
93
  if not response.status_code == 200:
83
94
  logger.warning(str(response))
@@ -663,14 +674,22 @@ class AtlasProjection:
663
674
  sidecar_suffix = "feather"
664
675
  if sidecar_name != "":
665
676
  sidecar_suffix = f"{sidecar_name}.feather"
666
- for key in tqdm(self._manifest["key"].to_pylist()):
667
- sidecar_path = self.tile_destination / f"{key}.{sidecar_suffix}"
668
- sidecar_url = (
669
- self.dataset.atlas_api_path
670
- + f"/v1/project/{self.dataset.id}/index/projection/{self.id}/quadtree/{key}.{sidecar_suffix}"
671
- )
672
- download_feather(sidecar_url, sidecar_path, headers=self.dataset.header, overwrite=overwrite)
673
- downloaded_files.append(sidecar_path)
677
+ with concurrent.futures.ThreadPoolExecutor(4) as ex:
678
+ futures = []
679
+ for key in tqdm(self._manifest["key"].to_pylist()):
680
+ sidecar_path = self.tile_destination / f"{key}.{sidecar_suffix}"
681
+ sidecar_url = (
682
+ self.dataset.atlas_api_path
683
+ + f"/v1/project/{self.dataset.id}/index/projection/{self.id}/quadtree/{key}.{sidecar_suffix}"
684
+ )
685
+ futures.append(
686
+ ex.submit(
687
+ download_feather, sidecar_url, sidecar_path, headers=self.dataset.header, overwrite=overwrite
688
+ )
689
+ )
690
+ downloaded_files.append(sidecar_path)
691
+ for f in futures:
692
+ f.result()
674
693
  return downloaded_files
675
694
 
676
695
  @property
@@ -752,6 +771,15 @@ class AtlasDataset(AtlasClass):
752
771
  * **dataset_id** - An alternative way to load a dataset is by passing the dataset_id directly. This only works if a dataset exists.
753
772
  """
754
773
  assert identifier is not None or dataset_id is not None, "You must pass a dataset identifier"
774
+ # Normalize identifier.
775
+ if identifier is not None:
776
+ s = identifier.split("/", 1)
777
+ identifier = unicodedata.normalize("NFD", s[-1]) # normalize accents
778
+ identifier = identifier.lower().replace(" ", "-").replace("_", "-")
779
+ identifier = re.sub(r"[^a-z0-9-]", "", identifier)
780
+ identifier = re.sub(r"-+", "-", identifier)
781
+ if len(s) == 2:
782
+ identifier = f"{s[0]}/{identifier}"
755
783
 
756
784
  super().__init__()
757
785
 
@@ -760,6 +788,8 @@ class AtlasDataset(AtlasClass):
760
788
  f"Passing organization_name has been removed in Nomic Python client 3.0. Instead identify your dataset with `organization_name/project_name` (e.g. sterling-cooper/november-ads)."
761
789
  )
762
790
 
791
+ # Set this before possible early return.
792
+ self._schema = None
763
793
  if dataset_id is not None:
764
794
  self.meta = self._get_project_by_id(dataset_id)
765
795
  return
@@ -792,7 +822,6 @@ class AtlasDataset(AtlasClass):
792
822
  )
793
823
 
794
824
  self.meta = self._get_project_by_id(project_id=dataset_id)
795
- self._schema = None
796
825
 
797
826
  def delete(self):
798
827
  """
@@ -1073,6 +1102,7 @@ class AtlasDataset(AtlasClass):
1073
1102
  else:
1074
1103
  projection = NomicProjectOptions()
1075
1104
 
1105
+ topic_model_was_false = topic_model is False
1076
1106
  if isinstance(topic_model, Dict):
1077
1107
  topic_model = NomicTopicOptions(**topic_model)
1078
1108
  elif isinstance(topic_model, NomicTopicOptions):
@@ -1116,7 +1146,7 @@ class AtlasDataset(AtlasClass):
1116
1146
 
1117
1147
  build_template = {}
1118
1148
  if modality == "embedding":
1119
- if topic_model.topic_label_field is None:
1149
+ if (not topic_model_was_false) and topic_model.topic_label_field is None:
1120
1150
  logger.warning(
1121
1151
  "You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
1122
1152
  )
@@ -1373,6 +1403,18 @@ class AtlasDataset(AtlasClass):
1373
1403
  blobs: A list of image paths, bytes, or PIL Images. Use if you want to create an AtlasDataset using image embeddings over your images. Note: Blobs are stored locally only.
1374
1404
  pbar: (Optional). A tqdm progress bar to update.
1375
1405
  """
1406
+ if isinstance(data, DataFrame):
1407
+ cols_before = set(data.columns)
1408
+ for col in cols_before:
1409
+ if col.startswith("_"):
1410
+ raise ValueError(
1411
+ f"You are attempting to upload a pandas dataframe with the column name {col}, but columns beginning with '_' are reserved for Atlas internal use. Please rename your column and try again."
1412
+ )
1413
+ data = pa.Table.from_pandas(data)
1414
+ for newcol in set(data.column_names).difference(cols_before):
1415
+ logger.warning(f"Dropping column {newcol} added in pandas conversion to pyarrow")
1416
+ data = data.drop([newcol])
1417
+
1376
1418
  if embeddings is not None:
1377
1419
  self._add_embeddings(data=data, embeddings=embeddings, pbar=pbar)
1378
1420
  elif isinstance(data, pa.Table) and "_embeddings" in data.column_names: # type: ignore
@@ -1607,6 +1649,7 @@ class AtlasDataset(AtlasClass):
1607
1649
  close_pbar = True
1608
1650
  pbar = tqdm(total=int(len(data)) // shard_size)
1609
1651
  failed = 0
1652
+ failed_reqs = 0
1610
1653
  succeeded = 0
1611
1654
  errors_504 = 0
1612
1655
  with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -1655,6 +1698,11 @@ class AtlasDataset(AtlasClass):
1655
1698
  failed += shard_size
1656
1699
  pbar.update(1)
1657
1700
  response.close()
1701
+ failed_reqs += 1
1702
+ if failed_reqs > 10:
1703
+ raise RuntimeError(
1704
+ f"{self.identifier}: Too many upload requests have failed at this time. Please try again later."
1705
+ )
1658
1706
  else:
1659
1707
  # A successful upload.
1660
1708
  succeeded += shard_size
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.2
3
+ Version: 3.3.4
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -23,7 +23,7 @@ with open("README.md") as f:
23
23
 
24
24
  setup(
25
25
  name="nomic",
26
- version="3.3.2",
26
+ version="3.3.4",
27
27
  url="https://github.com/nomic-ai/nomic",
28
28
  description=description,
29
29
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes