nomic 3.3.3__tar.gz → 3.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nomic might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.3
3
+ Version: 3.3.4
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -1,10 +1,13 @@
1
1
  import base64
2
2
  import concurrent
3
3
  import concurrent.futures
4
+ import importlib.metadata
4
5
  import io
5
6
  import json
6
7
  import os
8
+ import re
7
9
  import time
10
+ import unicodedata
8
11
  from contextlib import contextmanager
9
12
  from datetime import datetime
10
13
  from io import BytesIO
@@ -72,13 +75,20 @@ class AtlasClass(object):
72
75
  token = self.credentials["token"]
73
76
  self.token = token
74
77
 
75
- self.header = {"Authorization": f"Bearer {token}"}
78
+ try:
79
+ version = importlib.metadata.version("nomic")
80
+ except Exception:
81
+ version = "unknown"
82
+
83
+ self.header = {"Authorization": f"Bearer {token}", "User-Agent": f"py-nomic/{version}"}
76
84
 
77
85
  if self.token:
78
86
  response = requests.get(
79
87
  self.atlas_api_path + "/v1/user",
80
88
  headers=self.header,
81
89
  )
90
+ if "X-AtlasWarning" in response.headers:
91
+ logger.warning(response.headers["X-AtlasWarning"])
82
92
  response = validate_api_http_response(response)
83
93
  if not response.status_code == 200:
84
94
  logger.warning(str(response))
@@ -664,14 +674,22 @@ class AtlasProjection:
664
674
  sidecar_suffix = "feather"
665
675
  if sidecar_name != "":
666
676
  sidecar_suffix = f"{sidecar_name}.feather"
667
- for key in tqdm(self._manifest["key"].to_pylist()):
668
- sidecar_path = self.tile_destination / f"{key}.{sidecar_suffix}"
669
- sidecar_url = (
670
- self.dataset.atlas_api_path
671
- + f"/v1/project/{self.dataset.id}/index/projection/{self.id}/quadtree/{key}.{sidecar_suffix}"
672
- )
673
- download_feather(sidecar_url, sidecar_path, headers=self.dataset.header, overwrite=overwrite)
674
- downloaded_files.append(sidecar_path)
677
+ with concurrent.futures.ThreadPoolExecutor(4) as ex:
678
+ futures = []
679
+ for key in tqdm(self._manifest["key"].to_pylist()):
680
+ sidecar_path = self.tile_destination / f"{key}.{sidecar_suffix}"
681
+ sidecar_url = (
682
+ self.dataset.atlas_api_path
683
+ + f"/v1/project/{self.dataset.id}/index/projection/{self.id}/quadtree/{key}.{sidecar_suffix}"
684
+ )
685
+ futures.append(
686
+ ex.submit(
687
+ download_feather, sidecar_url, sidecar_path, headers=self.dataset.header, overwrite=overwrite
688
+ )
689
+ )
690
+ downloaded_files.append(sidecar_path)
691
+ for f in futures:
692
+ f.result()
675
693
  return downloaded_files
676
694
 
677
695
  @property
@@ -753,6 +771,15 @@ class AtlasDataset(AtlasClass):
753
771
  * **dataset_id** - An alternative way to load a dataset is by passing the dataset_id directly. This only works if a dataset exists.
754
772
  """
755
773
  assert identifier is not None or dataset_id is not None, "You must pass a dataset identifier"
774
+ # Normalize identifier.
775
+ if identifier is not None:
776
+ s = identifier.split("/", 1)
777
+ identifier = unicodedata.normalize("NFD", s[-1]) # normalize accents
778
+ identifier = identifier.lower().replace(" ", "-").replace("_", "-")
779
+ identifier = re.sub(r"[^a-z0-9-]", "", identifier)
780
+ identifier = re.sub(r"-+", "-", identifier)
781
+ if len(s) == 2:
782
+ identifier = f"{s[0]}/{identifier}"
756
783
 
757
784
  super().__init__()
758
785
 
@@ -761,6 +788,8 @@ class AtlasDataset(AtlasClass):
761
788
  f"Passing organization_name has been removed in Nomic Python client 3.0. Instead identify your dataset with `organization_name/project_name` (e.g. sterling-cooper/november-ads)."
762
789
  )
763
790
 
791
+ # Set this before possible early return.
792
+ self._schema = None
764
793
  if dataset_id is not None:
765
794
  self.meta = self._get_project_by_id(dataset_id)
766
795
  return
@@ -793,7 +822,6 @@ class AtlasDataset(AtlasClass):
793
822
  )
794
823
 
795
824
  self.meta = self._get_project_by_id(project_id=dataset_id)
796
- self._schema = None
797
825
 
798
826
  def delete(self):
799
827
  """
@@ -1074,6 +1102,7 @@ class AtlasDataset(AtlasClass):
1074
1102
  else:
1075
1103
  projection = NomicProjectOptions()
1076
1104
 
1105
+ topic_model_was_false = topic_model is False
1077
1106
  if isinstance(topic_model, Dict):
1078
1107
  topic_model = NomicTopicOptions(**topic_model)
1079
1108
  elif isinstance(topic_model, NomicTopicOptions):
@@ -1117,7 +1146,7 @@ class AtlasDataset(AtlasClass):
1117
1146
 
1118
1147
  build_template = {}
1119
1148
  if modality == "embedding":
1120
- if topic_model.topic_label_field is None:
1149
+ if (not topic_model_was_false) and topic_model.topic_label_field is None:
1121
1150
  logger.warning(
1122
1151
  "You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
1123
1152
  )
@@ -1620,6 +1649,7 @@ class AtlasDataset(AtlasClass):
1620
1649
  close_pbar = True
1621
1650
  pbar = tqdm(total=int(len(data)) // shard_size)
1622
1651
  failed = 0
1652
+ failed_reqs = 0
1623
1653
  succeeded = 0
1624
1654
  errors_504 = 0
1625
1655
  with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -1668,6 +1698,11 @@ class AtlasDataset(AtlasClass):
1668
1698
  failed += shard_size
1669
1699
  pbar.update(1)
1670
1700
  response.close()
1701
+ failed_reqs += 1
1702
+ if failed_reqs > 10:
1703
+ raise RuntimeError(
1704
+ f"{self.identifier}: Too many upload requests have failed at this time. Please try again later."
1705
+ )
1671
1706
  else:
1672
1707
  # A successful upload.
1673
1708
  succeeded += shard_size
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.3.3
3
+ Version: 3.3.4
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -23,7 +23,7 @@ with open("README.md") as f:
23
23
 
24
24
  setup(
25
25
  name="nomic",
26
- version="3.3.3",
26
+ version="3.3.4",
27
27
  url="https://github.com/nomic-ai/nomic",
28
28
  description=description,
29
29
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes