nomic 3.0.36__tar.gz → 3.0.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nomic might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.0.36
3
+ Version: 3.0.41
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -38,6 +38,7 @@ def map_data(
38
38
 
39
39
  Args:
40
40
  data: An ordered collection of the datapoints you are structuring. Can be a list of dictionaries, Pandas Dataframe or PyArrow Table.
41
+ blobs: A list of image paths, bytes, or PIL images to add to your image dataset that are stored locally.
41
42
  embeddings: An [N,d] numpy array containing the N embeddings to add.
42
43
  identifier: A name for your dataset that is used to generate the dataset identifier. A unique name will be chosen if not supplied.
43
44
  description: The description of your dataset
@@ -1041,7 +1041,7 @@ class AtlasDataset(AtlasClass):
1041
1041
  name: The name of the index and the map.
1042
1042
  indexed_field: For text datasets, name the data field corresponding to the text to be mapped.
1043
1043
  reuse_embeddings_from_index: the name of the index to reuse embeddings from.
1044
- modality: The data modality of this index. Currently, Atlas supports either `text` or `embedding` indices.
1044
+ modality: The data modality of this index. Currently, Atlas supports either `text`, `image`, or `embedding` indices.
1045
1045
  projection: Options for configuring the 2D projection algorithm
1046
1046
  topic_model: Options for configuring the topic model
1047
1047
  duplicate_detection: Options for configuring semantic duplicate detection
@@ -1064,7 +1064,7 @@ class AtlasDataset(AtlasClass):
1064
1064
  elif isinstance(topic_model, NomicTopicOptions):
1065
1065
  pass
1066
1066
  elif topic_model:
1067
- topic_model = NomicTopicOptions()
1067
+ topic_model = NomicTopicOptions(topic_label_field=indexed_field)
1068
1068
  else:
1069
1069
  topic_model = NomicTopicOptions(build_topic_model=False)
1070
1070
 
@@ -1086,6 +1086,9 @@ class AtlasDataset(AtlasClass):
1086
1086
  else:
1087
1087
  embedding_model = NomicEmbedOptions()
1088
1088
 
1089
+ if modality is None:
1090
+ modality = self.meta["modality"]
1091
+
1089
1092
  colorable_fields = []
1090
1093
 
1091
1094
  for field in self.dataset_fields:
@@ -1093,7 +1096,7 @@ class AtlasDataset(AtlasClass):
1093
1096
  colorable_fields.append(field)
1094
1097
 
1095
1098
  build_template = {}
1096
- if self.modality == "embedding":
1099
+ if modality == "embedding":
1097
1100
  if topic_model.topic_label_field is None:
1098
1101
  logger.warning(
1099
1102
  "You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
@@ -1135,7 +1138,7 @@ class AtlasDataset(AtlasClass):
1135
1138
  ),
1136
1139
  }
1137
1140
 
1138
- elif self.modality == "text" or self.modality == "image":
1141
+ elif modality == "text" or modality == "image":
1139
1142
  # find the index id of the index with name reuse_embeddings_from_index
1140
1143
  reuse_embedding_from_index_id = None
1141
1144
  indices = self.indices
@@ -1149,13 +1152,18 @@ class AtlasDataset(AtlasClass):
1149
1152
  f"Could not find the index '{reuse_embeddings_from_index}' to re-use from. Possible options are {[index.name for index in indices]}"
1150
1153
  )
1151
1154
 
1152
- if indexed_field is None:
1155
+ if indexed_field is None and modality == "text":
1153
1156
  raise Exception("You did not specify a field to index. Specify an 'indexed_field'.")
1154
1157
 
1158
+ if modality == "image":
1159
+ indexed_field = "_blob_hash"
1160
+ if indexed_field is not None:
1161
+ logger.warning("Ignoring indexed_field for image datasets. Only _blob_hash is supported.")
1162
+
1155
1163
  if indexed_field not in self.dataset_fields:
1156
1164
  raise Exception(f"Indexing on {indexed_field} not allowed. Valid options are: {self.dataset_fields}")
1157
1165
 
1158
- if self.modality == "image":
1166
+ if modality == "image":
1159
1167
  if topic_model.topic_label_field is None:
1160
1168
  print(
1161
1169
  "You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
@@ -1163,7 +1171,9 @@ class AtlasDataset(AtlasClass):
1163
1171
  topic_field = None
1164
1172
  topic_model.build_topic_model = False
1165
1173
  else:
1166
- topic_field = topic_model.topic_label_field
1174
+ topic_field = (
1175
+ topic_model.topic_label_field if topic_model.topic_label_field != indexed_field else None
1176
+ )
1167
1177
  else:
1168
1178
  topic_field = topic_model.topic_label_field
1169
1179
 
@@ -1245,7 +1255,7 @@ class AtlasDataset(AtlasClass):
1245
1255
  logger.warning("Could not find a map being built for this dataset.")
1246
1256
  else:
1247
1257
  logger.info(
1248
- f"Created map `{atlas_projection.name}` in dataset `{self.identifier}`: {atlas_projection.map_link}"
1258
+ f"Created map `{atlas_projection.name}` in dataset `{self.identifier}`: {atlas_projection.dataset_link}"
1249
1259
  )
1250
1260
  return atlas_projection
1251
1261
 
@@ -1346,6 +1356,7 @@ class AtlasDataset(AtlasClass):
1346
1356
  Args:
1347
1357
  data: A pandas DataFrame, list of dictionaries, or pyarrow Table matching the dataset schema.
1348
1358
  embeddings: A numpy array of embeddings: each row corresponds to a row in the table. Use if you already have embeddings for your datapoints.
1359
+ blobs: A list of image paths, bytes, or PIL Images. Use if you want to create an AtlasDataset using image embeddings over your images. Note: Blobs are stored locally only.
1349
1360
  pbar: (Optional). A tqdm progress bar to update.
1350
1361
  """
1351
1362
  if embeddings is not None:
@@ -1364,6 +1375,7 @@ class AtlasDataset(AtlasClass):
1364
1375
  """
1365
1376
  Add data, with associated blobs, to the dataset.
1366
1377
  Uploads blobs to the server and associates them with the data.
1378
+ Blobs must reference objects stored locally
1367
1379
  """
1368
1380
  if isinstance(data, DataFrame):
1369
1381
  data = pa.Table.from_pandas(data)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nomic
3
- Version: 3.0.36
3
+ Version: 3.0.41
4
4
  Summary: The official Nomic python client.
5
5
  Home-page: https://github.com/nomic-ai/nomic
6
6
  Author: nomic.ai
@@ -8,7 +8,7 @@ description = "The official Nomic python client."
8
8
 
9
9
  setup(
10
10
  name="nomic",
11
- version="3.0.36",
11
+ version="3.0.41",
12
12
  url="https://github.com/nomic-ai/nomic",
13
13
  description=description,
14
14
  long_description=description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes