nomic 3.0.36__tar.gz → 3.0.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.0.36 → nomic-3.0.41}/PKG-INFO +1 -1
- {nomic-3.0.36 → nomic-3.0.41}/nomic/atlas.py +1 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/dataset.py +20 -8
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/PKG-INFO +1 -1
- {nomic-3.0.36 → nomic-3.0.41}/setup.py +1 -1
- {nomic-3.0.36 → nomic-3.0.41}/README.md +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/__init__.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/aws/__init__.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/aws/sagemaker.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/cli.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/data_inference.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/data_operations.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/embed.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/settings.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic/utils.py +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/SOURCES.txt +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/entry_points.txt +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/requires.txt +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/pyproject.toml +0 -0
- {nomic-3.0.36 → nomic-3.0.41}/setup.cfg +0 -0
|
@@ -38,6 +38,7 @@ def map_data(
|
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
40
|
data: An ordered collection of the datapoints you are structuring. Can be a list of dictionaries, Pandas Dataframe or PyArrow Table.
|
|
41
|
+
blobs: A list of image paths, bytes, or PIL images to add to your image dataset that are stored locally.
|
|
41
42
|
embeddings: An [N,d] numpy array containing the N embeddings to add.
|
|
42
43
|
identifier: A name for your dataset that is used to generate the dataset identifier. A unique name will be chosen if not supplied.
|
|
43
44
|
description: The description of your dataset
|
|
@@ -1041,7 +1041,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1041
1041
|
name: The name of the index and the map.
|
|
1042
1042
|
indexed_field: For text datasets, name the data field corresponding to the text to be mapped.
|
|
1043
1043
|
reuse_embeddings_from_index: the name of the index to reuse embeddings from.
|
|
1044
|
-
modality: The data modality of this index. Currently, Atlas supports either `text` or `embedding` indices.
|
|
1044
|
+
modality: The data modality of this index. Currently, Atlas supports either `text`, `image`, or `embedding` indices.
|
|
1045
1045
|
projection: Options for configuring the 2D projection algorithm
|
|
1046
1046
|
topic_model: Options for configuring the topic model
|
|
1047
1047
|
duplicate_detection: Options for configuring semantic duplicate detection
|
|
@@ -1064,7 +1064,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1064
1064
|
elif isinstance(topic_model, NomicTopicOptions):
|
|
1065
1065
|
pass
|
|
1066
1066
|
elif topic_model:
|
|
1067
|
-
topic_model = NomicTopicOptions()
|
|
1067
|
+
topic_model = NomicTopicOptions(topic_label_field=indexed_field)
|
|
1068
1068
|
else:
|
|
1069
1069
|
topic_model = NomicTopicOptions(build_topic_model=False)
|
|
1070
1070
|
|
|
@@ -1086,6 +1086,9 @@ class AtlasDataset(AtlasClass):
|
|
|
1086
1086
|
else:
|
|
1087
1087
|
embedding_model = NomicEmbedOptions()
|
|
1088
1088
|
|
|
1089
|
+
if modality is None:
|
|
1090
|
+
modality = self.meta["modality"]
|
|
1091
|
+
|
|
1089
1092
|
colorable_fields = []
|
|
1090
1093
|
|
|
1091
1094
|
for field in self.dataset_fields:
|
|
@@ -1093,7 +1096,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1093
1096
|
colorable_fields.append(field)
|
|
1094
1097
|
|
|
1095
1098
|
build_template = {}
|
|
1096
|
-
if
|
|
1099
|
+
if modality == "embedding":
|
|
1097
1100
|
if topic_model.topic_label_field is None:
|
|
1098
1101
|
logger.warning(
|
|
1099
1102
|
"You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
|
|
@@ -1135,7 +1138,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1135
1138
|
),
|
|
1136
1139
|
}
|
|
1137
1140
|
|
|
1138
|
-
elif
|
|
1141
|
+
elif modality == "text" or modality == "image":
|
|
1139
1142
|
# find the index id of the index with name reuse_embeddings_from_index
|
|
1140
1143
|
reuse_embedding_from_index_id = None
|
|
1141
1144
|
indices = self.indices
|
|
@@ -1149,13 +1152,18 @@ class AtlasDataset(AtlasClass):
|
|
|
1149
1152
|
f"Could not find the index '{reuse_embeddings_from_index}' to re-use from. Possible options are {[index.name for index in indices]}"
|
|
1150
1153
|
)
|
|
1151
1154
|
|
|
1152
|
-
if indexed_field is None:
|
|
1155
|
+
if indexed_field is None and modality == "text":
|
|
1153
1156
|
raise Exception("You did not specify a field to index. Specify an 'indexed_field'.")
|
|
1154
1157
|
|
|
1158
|
+
if modality == "image":
|
|
1159
|
+
indexed_field = "_blob_hash"
|
|
1160
|
+
if indexed_field is not None:
|
|
1161
|
+
logger.warning("Ignoring indexed_field for image datasets. Only _blob_hash is supported.")
|
|
1162
|
+
|
|
1155
1163
|
if indexed_field not in self.dataset_fields:
|
|
1156
1164
|
raise Exception(f"Indexing on {indexed_field} not allowed. Valid options are: {self.dataset_fields}")
|
|
1157
1165
|
|
|
1158
|
-
if
|
|
1166
|
+
if modality == "image":
|
|
1159
1167
|
if topic_model.topic_label_field is None:
|
|
1160
1168
|
print(
|
|
1161
1169
|
"You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
|
|
@@ -1163,7 +1171,9 @@ class AtlasDataset(AtlasClass):
|
|
|
1163
1171
|
topic_field = None
|
|
1164
1172
|
topic_model.build_topic_model = False
|
|
1165
1173
|
else:
|
|
1166
|
-
topic_field =
|
|
1174
|
+
topic_field = (
|
|
1175
|
+
topic_model.topic_label_field if topic_model.topic_label_field != indexed_field else None
|
|
1176
|
+
)
|
|
1167
1177
|
else:
|
|
1168
1178
|
topic_field = topic_model.topic_label_field
|
|
1169
1179
|
|
|
@@ -1245,7 +1255,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1245
1255
|
logger.warning("Could not find a map being built for this dataset.")
|
|
1246
1256
|
else:
|
|
1247
1257
|
logger.info(
|
|
1248
|
-
f"Created map `{atlas_projection.name}` in dataset `{self.identifier}`: {atlas_projection.
|
|
1258
|
+
f"Created map `{atlas_projection.name}` in dataset `{self.identifier}`: {atlas_projection.dataset_link}"
|
|
1249
1259
|
)
|
|
1250
1260
|
return atlas_projection
|
|
1251
1261
|
|
|
@@ -1346,6 +1356,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1346
1356
|
Args:
|
|
1347
1357
|
data: A pandas DataFrame, list of dictionaries, or pyarrow Table matching the dataset schema.
|
|
1348
1358
|
embeddings: A numpy array of embeddings: each row corresponds to a row in the table. Use if you already have embeddings for your datapoints.
|
|
1359
|
+
blobs: A list of image paths, bytes, or PIL Images. Use if you want to create an AtlasDataset using image embeddings over your images. Note: Blobs are stored locally only.
|
|
1349
1360
|
pbar: (Optional). A tqdm progress bar to update.
|
|
1350
1361
|
"""
|
|
1351
1362
|
if embeddings is not None:
|
|
@@ -1364,6 +1375,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1364
1375
|
"""
|
|
1365
1376
|
Add data, with associated blobs, to the dataset.
|
|
1366
1377
|
Uploads blobs to the server and associates them with the data.
|
|
1378
|
+
Blobs must reference objects stored locally
|
|
1367
1379
|
"""
|
|
1368
1380
|
if isinstance(data, DataFrame):
|
|
1369
1381
|
data = pa.Table.from_pandas(data)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|