nomic 3.0.38__tar.gz → 3.0.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nomic might be problematic. Click here for more details.
- {nomic-3.0.38 → nomic-3.0.41}/PKG-INFO +1 -1
- {nomic-3.0.38 → nomic-3.0.41}/nomic/atlas.py +1 -1
- {nomic-3.0.38 → nomic-3.0.41}/nomic/dataset.py +14 -7
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/PKG-INFO +1 -1
- {nomic-3.0.38 → nomic-3.0.41}/setup.py +1 -1
- {nomic-3.0.38 → nomic-3.0.41}/README.md +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/__init__.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/aws/__init__.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/aws/sagemaker.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/cli.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/data_inference.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/data_operations.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/embed.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/pl_callbacks/__init__.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/pl_callbacks/pl_callback.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/settings.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic/utils.py +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/SOURCES.txt +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/dependency_links.txt +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/entry_points.txt +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/requires.txt +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/nomic.egg-info/top_level.txt +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/pyproject.toml +0 -0
- {nomic-3.0.38 → nomic-3.0.41}/setup.cfg +0 -0
|
@@ -38,7 +38,7 @@ def map_data(
|
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
40
|
data: An ordered collection of the datapoints you are structuring. Can be a list of dictionaries, Pandas Dataframe or PyArrow Table.
|
|
41
|
-
blobs: A list of image paths, bytes, or PIL images to add to your image dataset.
|
|
41
|
+
blobs: A list of image paths, bytes, or PIL images to add to your image dataset that are stored locally.
|
|
42
42
|
embeddings: An [N,d] numpy array containing the N embeddings to add.
|
|
43
43
|
identifier: A name for your dataset that is used to generate the dataset identifier. A unique name will be chosen if not supplied.
|
|
44
44
|
description: The description of your dataset
|
|
@@ -1064,7 +1064,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1064
1064
|
elif isinstance(topic_model, NomicTopicOptions):
|
|
1065
1065
|
pass
|
|
1066
1066
|
elif topic_model:
|
|
1067
|
-
topic_model = NomicTopicOptions()
|
|
1067
|
+
topic_model = NomicTopicOptions(topic_label_field=indexed_field)
|
|
1068
1068
|
else:
|
|
1069
1069
|
topic_model = NomicTopicOptions(build_topic_model=False)
|
|
1070
1070
|
|
|
@@ -1086,6 +1086,9 @@ class AtlasDataset(AtlasClass):
|
|
|
1086
1086
|
else:
|
|
1087
1087
|
embedding_model = NomicEmbedOptions()
|
|
1088
1088
|
|
|
1089
|
+
if modality is None:
|
|
1090
|
+
modality = self.meta["modality"]
|
|
1091
|
+
|
|
1089
1092
|
colorable_fields = []
|
|
1090
1093
|
|
|
1091
1094
|
for field in self.dataset_fields:
|
|
@@ -1093,7 +1096,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1093
1096
|
colorable_fields.append(field)
|
|
1094
1097
|
|
|
1095
1098
|
build_template = {}
|
|
1096
|
-
if
|
|
1099
|
+
if modality == "embedding":
|
|
1097
1100
|
if topic_model.topic_label_field is None:
|
|
1098
1101
|
logger.warning(
|
|
1099
1102
|
"You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
|
|
@@ -1135,7 +1138,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1135
1138
|
),
|
|
1136
1139
|
}
|
|
1137
1140
|
|
|
1138
|
-
elif
|
|
1141
|
+
elif modality == "text" or modality == "image":
|
|
1139
1142
|
# find the index id of the index with name reuse_embeddings_from_index
|
|
1140
1143
|
reuse_embedding_from_index_id = None
|
|
1141
1144
|
indices = self.indices
|
|
@@ -1149,10 +1152,10 @@ class AtlasDataset(AtlasClass):
|
|
|
1149
1152
|
f"Could not find the index '{reuse_embeddings_from_index}' to re-use from. Possible options are {[index.name for index in indices]}"
|
|
1150
1153
|
)
|
|
1151
1154
|
|
|
1152
|
-
if indexed_field is None and
|
|
1155
|
+
if indexed_field is None and modality == "text":
|
|
1153
1156
|
raise Exception("You did not specify a field to index. Specify an 'indexed_field'.")
|
|
1154
1157
|
|
|
1155
|
-
if
|
|
1158
|
+
if modality == "image":
|
|
1156
1159
|
indexed_field = "_blob_hash"
|
|
1157
1160
|
if indexed_field is not None:
|
|
1158
1161
|
logger.warning("Ignoring indexed_field for image datasets. Only _blob_hash is supported.")
|
|
@@ -1160,7 +1163,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1160
1163
|
if indexed_field not in self.dataset_fields:
|
|
1161
1164
|
raise Exception(f"Indexing on {indexed_field} not allowed. Valid options are: {self.dataset_fields}")
|
|
1162
1165
|
|
|
1163
|
-
if
|
|
1166
|
+
if modality == "image":
|
|
1164
1167
|
if topic_model.topic_label_field is None:
|
|
1165
1168
|
print(
|
|
1166
1169
|
"You did not specify the `topic_label_field` option in your topic_model, your dataset will not contain auto-labeled topics."
|
|
@@ -1168,7 +1171,9 @@ class AtlasDataset(AtlasClass):
|
|
|
1168
1171
|
topic_field = None
|
|
1169
1172
|
topic_model.build_topic_model = False
|
|
1170
1173
|
else:
|
|
1171
|
-
topic_field =
|
|
1174
|
+
topic_field = (
|
|
1175
|
+
topic_model.topic_label_field if topic_model.topic_label_field != indexed_field else None
|
|
1176
|
+
)
|
|
1172
1177
|
else:
|
|
1173
1178
|
topic_field = topic_model.topic_label_field
|
|
1174
1179
|
|
|
@@ -1351,6 +1356,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1351
1356
|
Args:
|
|
1352
1357
|
data: A pandas DataFrame, list of dictionaries, or pyarrow Table matching the dataset schema.
|
|
1353
1358
|
embeddings: A numpy array of embeddings: each row corresponds to a row in the table. Use if you already have embeddings for your datapoints.
|
|
1359
|
+
blobs: A list of image paths, bytes, or PIL Images. Use if you want to create an AtlasDataset using image embeddings over your images. Note: Blobs are stored locally only.
|
|
1354
1360
|
pbar: (Optional). A tqdm progress bar to update.
|
|
1355
1361
|
"""
|
|
1356
1362
|
if embeddings is not None:
|
|
@@ -1369,6 +1375,7 @@ class AtlasDataset(AtlasClass):
|
|
|
1369
1375
|
"""
|
|
1370
1376
|
Add data, with associated blobs, to the dataset.
|
|
1371
1377
|
Uploads blobs to the server and associates them with the data.
|
|
1378
|
+
Blobs must reference objects stored locally
|
|
1372
1379
|
"""
|
|
1373
1380
|
if isinstance(data, DataFrame):
|
|
1374
1381
|
data = pa.Table.from_pandas(data)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|