lamindb 0.59.4__py3-none-any.whl → 0.60.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -1,11 +1,6 @@
1
1
  """A data framework for biology.
2
2
 
3
- We assume that data is stored as files or in array formats like parquet, zarr,
4
- HDF5, TileDB or DuckDB.
5
-
6
- LaminDB helps you manage these data with registries for metadata.
7
-
8
- The two most important are:
3
+ LaminDB helps you manage data with registries for metadata:
9
4
 
10
5
  .. autosummary::
11
6
  :toctree: .
@@ -13,7 +8,7 @@ The two most important are:
13
8
  File
14
9
  Dataset
15
10
 
16
- Four registries track provenance of data batches:
11
+ Registries to track provenance:
17
12
 
18
13
  .. autosummary::
19
14
  :toctree: .
@@ -23,7 +18,7 @@ Four registries track provenance of data batches:
23
18
  User
24
19
  Storage
25
20
 
26
- Four registries validate & contextualize data batches:
21
+ Registries to validate & contextualize:
27
22
 
28
23
  .. autosummary::
29
24
  :toctree: .
@@ -33,7 +28,14 @@ Four registries validate & contextualize data batches:
33
28
  FeatureSet
34
29
  Modality
35
30
 
36
- Functional tools:
31
+ You can also access data directly via paths:
32
+
33
+ .. autosummary::
34
+ :toctree: .
35
+
36
+ UPath
37
+
38
+ Functions:
37
39
 
38
40
  .. autosummary::
39
41
  :toctree: .
@@ -42,7 +44,7 @@ Functional tools:
42
44
  view
43
45
  save
44
46
 
45
- Static classes & modules:
47
+ Modules & settings:
46
48
 
47
49
  .. autosummary::
48
50
  :toctree: .
@@ -53,7 +55,7 @@ Static classes & modules:
53
55
 
54
56
  """
55
57
 
56
- __version__ = "0.59.4" # denote a release candidate for 0.1.0 with 0.1rc1
58
+ __version__ = "0.60.0" # denote a release candidate for 0.1.0 with 0.1rc1
57
59
 
58
60
  import os as _os
59
61
 
@@ -64,6 +66,7 @@ from lamin_utils import py_version_warning as _py_version_warning
64
66
  from lamindb_setup import _check_instance_setup
65
67
  from lamindb_setup._check_instance_setup import _INSTANCE_NOT_SETUP_WARNING
66
68
  from lamindb_setup._init_instance import reload_schema_modules as _reload_schema_modules
69
+ from lamindb_setup.dev.upath import UPath
67
70
 
68
71
  _py_version_warning("3.8", "3.11")
69
72
 
lamindb/_dataset.py CHANGED
@@ -45,6 +45,7 @@ def __init__(
45
45
  data: Union[pd.DataFrame, ad.AnnData, File, Iterable[File]] = (
46
46
  kwargs.pop("data") if len(args) == 0 else args[0]
47
47
  )
48
+ meta: Optional[str] = kwargs.pop("meta") if "meta" in kwargs else None
48
49
  name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
49
50
  description: Optional[str] = (
50
51
  kwargs.pop("description") if "description" in kwargs else None
@@ -92,9 +93,26 @@ def __init__(
92
93
  )
93
94
 
94
95
  run = get_run(run)
96
+ data_init_complete = False
95
97
  # there are exactly 3 ways of creating a Dataset object right now
96
98
  # using exactly one file or using more than one file
97
99
  # init file
100
+ # init from directory or bucket
101
+ if isinstance(data, (str, Path, UPath)):
102
+ file = None
103
+ files = None
104
+ upath = UPath(data)
105
+ if not upath.is_dir:
106
+ raise ValueError(f"Can only pass buckets or directories, not {data}")
107
+ upath_str = upath.as_posix().rstrip("/")
108
+ region = get_storage_region(upath_str)
109
+ storage_settings = StorageSettings(upath_str, region)
110
+ storage = register_storage(storage_settings)
111
+ hash = None
112
+ data_init_complete = True
113
+ # now handle the metadata
114
+ if isinstance(meta, (pd.DataFrame, ad.AnnData, File)):
115
+ data = meta
98
116
  if isinstance(data, (pd.DataFrame, ad.AnnData, File)):
99
117
  files = None
100
118
  storage = None
@@ -126,28 +144,19 @@ def __init__(
126
144
  file.description = f"See dataset {provisional_uid}" # type: ignore
127
145
  file._feature_sets = feature_sets
128
146
  storage = None
129
- # init from directory or bucket
130
- elif isinstance(data, (str, Path, UPath)):
131
- file = None
132
- files = None
133
- upath = UPath(data)
134
- if not upath.is_dir:
135
- raise ValueError(f"Can only pass buckets or directories, not {data}")
136
- upath_str = upath.as_posix().rstrip("/")
137
- region = get_storage_region(upath_str)
138
- storage_settings = StorageSettings(upath_str, region)
139
- storage = register_storage(storage_settings)
140
- hash = None
141
- # init files
142
- else:
147
+ data_init_complete = True
148
+ if not data_init_complete:
143
149
  file = None
144
150
  storage = None
145
151
  if hasattr(data, "__getitem__"):
146
152
  assert isinstance(data[0], File) # type: ignore
147
153
  files = data
148
154
  hash, feature_sets = from_files(files) # type: ignore
155
+ data_init_complete = True
149
156
  else:
150
- raise ValueError("Only DataFrame, AnnData and iterable of File is allowed")
157
+ raise ValueError(
158
+ "Only DataFrame, AnnData, folder or list of File is allowed."
159
+ )
151
160
  # we ignore datasets in trash containing the same hash
152
161
  existing_dataset = Dataset.filter(hash=hash).one_or_none()
153
162
  if existing_dataset is not None:
lamindb/_file.py CHANGED
@@ -1,7 +1,5 @@
1
- from collections import defaultdict
2
- from itertools import islice
3
1
  from pathlib import Path, PurePath, PurePosixPath
4
- from typing import Any, List, Optional, Set, Tuple, Union
2
+ from typing import Any, List, Optional, Tuple, Union
5
3
 
6
4
  import anndata as ad
7
5
  import fsspec
@@ -14,7 +12,7 @@ from lamindb_setup._init_instance import register_storage
14
12
  from lamindb_setup.dev import StorageSettings
15
13
  from lamindb_setup.dev._docs import doc_args
16
14
  from lamindb_setup.dev._hub_utils import get_storage_region
17
- from lamindb_setup.dev.upath import create_path
15
+ from lamindb_setup.dev.upath import create_path, extract_suffix_from_path
18
16
  from lnschema_core import Feature, FeatureSet, File, Modality, Run, Storage
19
17
  from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, PathLike
20
18
 
@@ -36,7 +34,6 @@ from lamindb.dev.storage.file import (
36
34
  ProgressCallback,
37
35
  auto_storage_key_from_file,
38
36
  auto_storage_key_from_id_suffix,
39
- extract_suffix_from_path,
40
37
  filepath_from_file,
41
38
  )
42
39
  from lamindb.dev.versioning import get_ids_from_old_version, init_uid
@@ -928,7 +925,6 @@ def path(self) -> Union[Path, UPath]:
928
925
  return filepath_from_file(self)
929
926
 
930
927
 
931
- # adapted from: https://stackoverflow.com/questions/9727673
932
928
  @classmethod # type: ignore
933
929
  @doc_args(File.view_tree.__doc__)
934
930
  def view_tree(
@@ -940,82 +936,18 @@ def view_tree(
940
936
  length_limit: int = 1000,
941
937
  ) -> None:
942
938
  """{}"""
943
- space = " "
944
- branch = "│ "
945
- tee = "├── "
946
- last = "└── "
947
- max_files_per_dir_per_type = 7
948
-
949
- if path is None:
950
- dir_path = settings.storage
951
- else:
952
- dir_path = create_path(path) # returns Path for local
953
- n_files = 0
954
- n_directories = 0
955
-
956
- # by default only including registered files
957
- # need a flag and a proper implementation
958
- registered_paths: Set[Any] = set()
959
- registered_dirs: Set[Any] = set()
939
+ logger.warning("Deprecated: Please use UPath.view_tree()")
940
+ include_paths = None
960
941
  if path is None:
961
- registered_paths = {
942
+ path = settings.storage
943
+ include_paths = {
962
944
  file.path for file in cls.filter(storage_id=setup_settings.storage.id).all()
963
945
  }
964
- registered_dirs = {d for p in registered_paths for d in p.parents}
965
- suffixes = set()
966
-
967
- def inner(dir_path: Union[Path, UPath], prefix: str = "", level=-1):
968
- nonlocal n_files, n_directories, suffixes
969
- if not level:
970
- return # 0, stop iterating
971
- stripped_dir_path = dir_path.as_posix().rstrip("/")
972
- # do not iterate through zarr directories
973
- if stripped_dir_path.endswith((".zarr", ".zrad")):
974
- return
975
- # this is needed so that the passed folder is not listed
976
- contents = [
977
- i
978
- for i in dir_path.iterdir()
979
- if i.as_posix().rstrip("/") != stripped_dir_path
980
- ]
981
- if limit_to_directories:
982
- contents = [d for d in contents if d.is_dir()]
983
- pointers = [tee] * (len(contents) - 1) + [last]
984
- n_files_per_dir_per_type = defaultdict(lambda: 0) # type: ignore
985
- for pointer, path in zip(pointers, contents):
986
- if path.is_dir():
987
- if registered_dirs and path not in registered_dirs:
988
- continue
989
- yield prefix + pointer + path.name
990
- n_directories += 1
991
- n_files_per_dir_per_type = defaultdict(lambda: 0)
992
- extension = branch if pointer == tee else space
993
- yield from inner(path, prefix=prefix + extension, level=level - 1)
994
- elif not limit_to_directories:
995
- if registered_paths and path not in registered_paths:
996
- continue
997
- suffix = extract_suffix_from_path(path)
998
- suffixes.add(suffix)
999
- n_files_per_dir_per_type[suffix] += 1
1000
- n_files += 1
1001
- if n_files_per_dir_per_type[suffix] == max_files_per_dir_per_type:
1002
- yield prefix + "..."
1003
- elif n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type:
1004
- continue
1005
- else:
1006
- yield prefix + pointer + path.name
1007
-
1008
- folder_tree = ""
1009
- iterator = inner(dir_path, level=level)
1010
- for line in islice(iterator, length_limit):
1011
- folder_tree += f"\n{line}"
1012
- if next(iterator, None):
1013
- folder_tree += f"... length_limit, {length_limit}, reached, counted:"
1014
- directory_info = "directory" if n_directories == 1 else "directories"
1015
- display_suffixes = ", ".join([f"{suffix!r}" for suffix in suffixes])
1016
- logger.print(
1017
- f"{dir_path.name} ({n_directories} sub-{directory_info} & {n_files} files with"
1018
- f" suffixes {display_suffixes}): {folder_tree}"
946
+ UPath(path).view_tree(
947
+ level=level,
948
+ limit_to_directories=limit_to_directories,
949
+ length_limit=length_limit,
950
+ include_paths=include_paths,
1019
951
  )
1020
952
 
1021
953
 
lamindb/_view.py CHANGED
@@ -11,15 +11,15 @@ from lnschema_core import Registry
11
11
 
12
12
  def view(
13
13
  n: int = 7, schema: Optional[str] = None, registries: Optional[List[str]] = None
14
- ):
15
- """View data.
14
+ ) -> None:
15
+ """View latest metadata state.
16
16
 
17
17
  Args:
18
- n: Display the last `n` rows of a table.
18
+ n: Display the last `n` rows of a registry.
19
19
  schema: Schema module to view. Default's to
20
20
  `None` and displays all schema modules.
21
21
  registries: List of Registry names. Defaults to
22
- `None` and lists all ORMs.
22
+ `None` and lists all registries.
23
23
 
24
24
  Examples:
25
25
  >>> ln.view()
lamindb/dev/_data.py CHANGED
@@ -19,7 +19,10 @@ from lnschema_core.types import StrField
19
19
 
20
20
  from lamindb.dev._settings import settings
21
21
 
22
- from .._feature_set import dict_schema_name_to_model_name
22
+ from .._feature_set import (
23
+ dict_related_model_to_related_name,
24
+ dict_schema_name_to_model_name,
25
+ )
23
26
  from .._parents import view_flow
24
27
  from .._query_set import QuerySet
25
28
  from ._feature_manager import (
@@ -202,19 +205,29 @@ def get_labels(
202
205
  def add_labels(
203
206
  self,
204
207
  records: Union[Registry, List[Registry], QuerySet, Iterable],
205
- feature: Feature,
208
+ feature: Optional[Feature] = None,
206
209
  *,
207
210
  field: Optional[StrField] = None,
208
211
  ) -> None:
209
212
  """{}"""
213
+ if self._state.adding:
214
+ raise ValueError("Please save the file/dataset before adding a label!")
215
+
210
216
  if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
211
217
  records = records.list()
212
218
  if isinstance(records, (str, Registry)):
213
219
  records = [records]
214
220
  if not isinstance(records, List): # avoids warning for pd Series
215
221
  records = list(records)
222
+ # create records from values
216
223
  if isinstance(records[0], str): # type: ignore
217
224
  records_validated = []
225
+ # feature is needed if we want to create records from values
226
+ if feature is None:
227
+ raise ValueError(
228
+ "Please pass a feature, e.g., via: label = ln.ULabel(name='my_label',"
229
+ " feature=ln.Feature(name='my_feature'))"
230
+ )
218
231
  if feature.registries is not None:
219
232
  orm_dict = dict_schema_name_to_model_name(File)
220
233
  for reg in feature.registries.split("|"):
@@ -231,80 +244,98 @@ def add_labels(
231
244
  )
232
245
  records = records_validated
233
246
 
234
- if self._state.adding:
235
- raise ValueError("Please save the file/dataset before adding a label!")
236
247
  for record in records:
237
248
  if record._state.adding:
238
249
  raise ValidationError(
239
250
  f"{record} not validated. If it looks correct: record.save()"
240
251
  )
241
- validate_feature(feature, records) # type:ignore
242
- records_by_registry = defaultdict(list)
243
- for record in records:
244
- records_by_registry[record.__class__.__get_name_with_schema__()].append(record)
245
- for registry_name, records in records_by_registry.items():
246
- getattr(self, self.features._accessor_by_orm[registry_name]).add(
247
- *records, through_defaults={"feature_id": feature.id}
248
- )
249
- feature_set_links = get_feature_set_links(self)
250
- feature_set_ids = [link.feature_set_id for link in feature_set_links.all()]
251
- # get all linked features of type Feature
252
- feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
253
- linked_features_by_slot = {
254
- feature_set_links.filter(feature_set_id=feature_set.id)
255
- .one()
256
- .slot: feature_set.features.all()
257
- for feature_set in feature_sets
258
- if "core.Feature" == feature_set.registry
259
- }
260
- for registry_name, records in records_by_registry.items():
261
- msg = ""
262
- if feature.registries is None or registry_name not in feature.registries:
252
+
253
+ if feature is None:
254
+ d = dict_related_model_to_related_name(self.__class__)
255
+ # strategy: group records by registry to reduce number of transactions
256
+ records_by_related_name: Dict = {}
257
+ for record in records:
258
+ related_name = d.get(record.__class__.__get_name_with_schema__())
259
+ if related_name is None:
260
+ raise ValueError(f"Can't add labels to {record.__class__} record!")
261
+ if related_name not in records_by_related_name:
262
+ records_by_related_name[related_name] = []
263
+ records_by_related_name[related_name].append(record)
264
+ for related_name, records in records_by_related_name.items():
265
+ print(related_name, records)
266
+ getattr(self, related_name).add(*records)
267
+ else:
268
+ validate_feature(feature, records) # type:ignore
269
+ records_by_registry = defaultdict(list)
270
+ for record in records:
271
+ records_by_registry[record.__class__.__get_name_with_schema__()].append(
272
+ record
273
+ )
274
+ for registry_name, records in records_by_registry.items():
275
+ getattr(self, self.features._accessor_by_orm[registry_name]).add(
276
+ *records, through_defaults={"feature_id": feature.id}
277
+ )
278
+ feature_set_links = get_feature_set_links(self)
279
+ feature_set_ids = [link.feature_set_id for link in feature_set_links.all()]
280
+ # get all linked features of type Feature
281
+ feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
282
+ linked_features_by_slot = {
283
+ feature_set_links.filter(feature_set_id=feature_set.id)
284
+ .one()
285
+ .slot: feature_set.features.all()
286
+ for feature_set in feature_sets
287
+ if "core.Feature" == feature_set.registry
288
+ }
289
+ for registry_name, records in records_by_registry.items():
290
+ msg = ""
291
+ if feature.registries is None or registry_name not in feature.registries:
292
+ if len(msg) > 0:
293
+ msg += ", "
294
+ msg += f"linked feature '{feature.name}' to registry '{registry_name}'"
295
+ if feature.registries is None:
296
+ feature.registries = registry_name
297
+ elif registry_name not in feature.registries:
298
+ feature.registries += f"|{registry_name}"
299
+ feature.save()
263
300
  if len(msg) > 0:
264
- msg += ", "
265
- msg += f"linked feature '{feature.name}' to registry '{registry_name}'"
266
- if feature.registries is None:
267
- feature.registries = registry_name
268
- elif registry_name not in feature.registries:
269
- feature.registries += f"|{registry_name}"
270
- feature.save()
271
- if len(msg) > 0:
272
- logger.save(msg)
273
- # check whether we have to update the feature set that manages labels
274
- # (Feature) to account for a new feature
275
- found_feature = False
276
- for _, linked_features in linked_features_by_slot.items():
277
- if feature in linked_features:
278
- found_feature = True
279
- if not found_feature:
280
- if "external" in linked_features_by_slot:
281
- feature_set = self.features._feature_set_by_slot["external"]
282
- features_list = feature_set.features.list()
283
- else:
284
- features_list = []
285
- features_list.append(feature)
286
- feature_set = FeatureSet(features_list, modality=priors.modalities.meta)
287
- feature_set.save()
288
- if "external" in linked_features_by_slot:
289
- old_feature_set_link = feature_set_links.filter(slot="external").one()
290
- old_feature_set_link.delete()
291
- remaining_links = self.feature_sets.through.objects.filter(
292
- feature_set_id=feature_set.id
293
- ).all()
294
- if len(remaining_links) == 0:
295
- old_feature_set = FeatureSet.filter(
296
- id=old_feature_set_link.feature_set_id
301
+ logger.save(msg)
302
+ # check whether we have to update the feature set that manages labels
303
+ # (Feature) to account for a new feature
304
+ found_feature = False
305
+ for _, linked_features in linked_features_by_slot.items():
306
+ if feature in linked_features:
307
+ found_feature = True
308
+ if not found_feature:
309
+ if "external" in linked_features_by_slot:
310
+ feature_set = self.features._feature_set_by_slot["external"]
311
+ features_list = feature_set.features.list()
312
+ else:
313
+ features_list = []
314
+ features_list.append(feature)
315
+ feature_set = FeatureSet(features_list, modality=priors.modalities.meta)
316
+ feature_set.save()
317
+ if "external" in linked_features_by_slot:
318
+ old_feature_set_link = feature_set_links.filter(
319
+ slot="external"
297
320
  ).one()
298
- logger.info(
299
- "no file links to it anymore, deleting feature set"
300
- f" {old_feature_set}"
301
- )
302
- old_feature_set.delete()
303
- self.features.add_feature_set(feature_set, slot="external")
304
- logger.save(
305
- f"linked new feature '{feature.name}' together with new feature set"
306
- f" {feature_set}"
307
- )
321
+ old_feature_set_link.delete()
322
+ remaining_links = self.feature_sets.through.objects.filter(
323
+ feature_set_id=feature_set.id
324
+ ).all()
325
+ if len(remaining_links) == 0:
326
+ old_feature_set = FeatureSet.filter(
327
+ id=old_feature_set_link.feature_set_id
328
+ ).one()
329
+ logger.info(
330
+ "no file links to it anymore, deleting feature set"
331
+ f" {old_feature_set}"
332
+ )
333
+ old_feature_set.delete()
334
+ self.features.add_feature_set(feature_set, slot="external")
335
+ logger.save(
336
+ f"linked new feature '{feature.name}' together with new feature set"
337
+ f" {feature_set}"
338
+ )
308
339
 
309
340
 
310
341
  def _track_run_input(
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Union
1
+ from typing import Dict, List, Optional, Union
2
2
 
3
3
  import numpy as np
4
4
  from lamin_utils import colors, logger
@@ -116,7 +116,7 @@ class LabelManager:
116
116
  def add(
117
117
  self,
118
118
  records: Union[Registry, List[Registry], QuerySet],
119
- feature: Feature,
119
+ feature: Optional[Feature] = None,
120
120
  ) -> None:
121
121
  """Add one or several labels and associate them with a feature.
122
122
 
@@ -326,9 +326,9 @@ def anndata_human_immune_cells(
326
326
  ln.save(
327
327
  lb.Gene.from_values(
328
328
  adata.var.index, field="ensembl_gene_id", organism="human"
329
- )[:-35]
329
+ )
330
330
  )
331
- ln.save(lb.CellType.from_values(adata.obs.cell_type, field="name")[:-2])
331
+ ln.save(lb.CellType.from_values(adata.obs.cell_type, field="name"))
332
332
  ln.save(lb.ExperimentalFactor.from_values(adata.obs.assay, field="name"))
333
333
  ln.save(lb.Tissue.from_values(adata.obs.tissue, field="name"))
334
334
  ln.Modality(name="rna", description="RNA measurements").save()
@@ -338,7 +338,9 @@ def anndata_human_immune_cells(
338
338
  ).save()
339
339
  ln.Feature(name="tissue", type="category", registries=[lb.Tissue]).save()
340
340
  ln.Feature(name="organism", type="category", registries=[lb.Organism]).save()
341
+ ln.Feature(name="donor", type="category", registries=[ln.ULabel]).save()
341
342
  lb.ExperimentalFactor.from_bionty(ontology_id="EFO:0008913").save()
343
+ ln.save([ln.ULabel(name=name) for name in adata.obs.donor.unique()])
342
344
  ln.settings.verbosity = verbosity
343
345
  lb.settings.auto_save_parents = auto_save_parents
344
346
  return adata
@@ -5,8 +5,6 @@
5
5
 
6
6
  AnnDataAccessor
7
7
  BackedAccessor
8
- UPath
9
-
10
8
  """
11
9
  from lamindb_setup.dev.upath import LocalPathClasses, UPath, infer_filesystem
12
10
 
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import shutil
3
3
  from pathlib import Path
4
- from typing import Literal, Optional, Union
4
+ from typing import Literal, Union
5
5
 
6
6
  import anndata as ad
7
7
  import fsspec
@@ -28,66 +28,6 @@ except ImportError:
28
28
  AUTO_KEY_PREFIX = ".lamindb/"
29
29
 
30
30
 
31
- # also see https://gist.github.com/securifera/e7eed730cbe1ce43d0c29d7cd2d582f4
32
- # ".gz" is not listed here as it typically occurs with another suffix
33
- KNOWN_SUFFIXES = {
34
- #
35
- # without readers
36
- #
37
- ".fasta",
38
- ".fastq",
39
- ".jpg",
40
- ".mtx",
41
- ".obo",
42
- ".pdf",
43
- ".png",
44
- ".tar",
45
- ".tiff",
46
- ".txt",
47
- ".tsv",
48
- ".zip",
49
- ".xml",
50
- #
51
- # with readers (see below)
52
- #
53
- ".h5ad",
54
- ".parquet",
55
- ".csv",
56
- ".fcs",
57
- ".xslx",
58
- ".zarr",
59
- ".zrad",
60
- }
61
-
62
-
63
- def extract_suffix_from_path(
64
- path: Union[UPath, Path], arg_name: Optional[str] = None
65
- ) -> str:
66
- if len(path.suffixes) <= 1:
67
- return path.suffix
68
- else:
69
- arg_name = "file" if arg_name is None else arg_name # for the warning
70
- msg = f"{arg_name} has more than one suffix (path.suffixes), "
71
- # first check the 2nd-to-last suffix because it might be followed by .gz
72
- # or another compression-related suffix
73
- # Alex thought about adding logic along the lines of path.suffixes[-1]
74
- # in COMPRESSION_SUFFIXES to detect something like .random.gz and then
75
- # add ".random.gz" but concluded it's too dangerous it's safer to just
76
- # use ".gz" in such a case
77
- if path.suffixes[-2] in KNOWN_SUFFIXES:
78
- suffix = "".join(path.suffixes[-2:])
79
- msg += f"inferring: '{suffix}'"
80
- else:
81
- suffix = path.suffixes[-1] # this is equivalent to path.suffix!!!
82
- msg += (
83
- f"using only last suffix: '{suffix}' - if you want your file format to"
84
- " be recognized, make an issue:"
85
- " https://github.com/laminlabs/lamindb/issues/new"
86
- )
87
- logger.warning(msg)
88
- return suffix
89
-
90
-
91
31
  # add type annotations back asap when re-organizing the module
92
32
  def auto_storage_key_from_file(file: File):
93
33
  if file.key is None or file.key_is_virtual:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.59.4
3
+ Version: 0.60.0
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -8,8 +8,8 @@ Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
- Requires-Dist: lnschema_core==0.54.0
12
- Requires-Dist: lamindb_setup==0.57.2
11
+ Requires-Dist: lnschema_core==0.54.1
12
+ Requires-Dist: lamindb_setup==0.58.0
13
13
  Requires-Dist: lamin_utils==0.11.7
14
14
  Requires-Dist: lamin_cli==0.1.2
15
15
  Requires-Dist: rapidfuzz
@@ -27,7 +27,7 @@ Requires-Dist: boto3==1.28.17 ; extra == "aws"
27
27
  Requires-Dist: aiobotocore==2.5.4 ; extra == "aws"
28
28
  Requires-Dist: fsspec[s3]==2023.9.0 ; extra == "aws"
29
29
  Requires-Dist: s3fs>=2023.1.0 ; extra == "aws"
30
- Requires-Dist: lnschema_bionty==0.34.0 ; extra == "bionty"
30
+ Requires-Dist: lnschema_bionty==0.34.2 ; extra == "bionty"
31
31
  Requires-Dist: pandas<2 ; extra == "dev"
32
32
  Requires-Dist: pre-commit ; extra == "dev"
33
33
  Requires-Dist: nox ; extra == "dev"
@@ -1,9 +1,9 @@
1
- lamindb/__init__.py,sha256=jc94aiPKUjYzRwyXU-8K-gHdGPNfbAhXzdeRbdeG0G4,2870
2
- lamindb/_dataset.py,sha256=Kqln9JKAIWtNmXq_Gmi9jruN4DWt4OcqDV-aB1-bPZk,15894
1
+ lamindb/__init__.py,sha256=Tyk98kfIcBkBTe9dlhge-0N1-vFC27Cq6n4O0zz9FAc,2816
2
+ lamindb/_dataset.py,sha256=uToS16g_68J5tPbe81auxkBYwtuBoceOPFf8b-a_0Vc,16251
3
3
  lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
4
4
  lamindb/_feature.py,sha256=BTss2B534SRbv1uj1MFydcrOxL-RyTsX5qMkdlbUy30,5562
5
5
  lamindb/_feature_set.py,sha256=G63pwauDQ7jg4ydFCQLhu-lgO6tm56iQwUdRuNHeKHY,9233
6
- lamindb/_file.py,sha256=ceS4CKGsZwEgvUPPyjbJ0YgHWU5Jm7KNVhHy9eP7Gp8,38680
6
+ lamindb/_file.py,sha256=ucUrnDL2UhbhwUxLqEZ2aaeE6I7FICMFsBSoVx-peIw,35789
7
7
  lamindb/_filter.py,sha256=Xf6nAm8BXT8BgX6801KqOZfa73GVAx1oPFNb6rNymIo,1013
8
8
  lamindb/_from_values.py,sha256=GitpmKOqV6YHJggaCnJgGsRIHI_bnuLRVE2oo9W-SgE,11613
9
9
  lamindb/_parents.py,sha256=qUFg_5kAr2VzsEbHsSnr3i1PgzjMMr1h00dQ-ugkFyU,13819
@@ -17,11 +17,11 @@ lamindb/_transform.py,sha256=87yUTz0RndJ_C98tBt4t2SPw8fksRgqJKwCQG_H40Kk,2515
17
17
  lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
18
18
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
19
19
  lamindb/_validate.py,sha256=3powFmYcNop2R6ijt2v3I_vPn4TD9ET4DJkW8uzQt_U,13719
20
- lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
20
+ lamindb/_view.py,sha256=gHtxY_Bp6zF7t0qFltzzmbf2cKbJY-iGnQiq-sAlW7k,2184
21
21
  lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
22
- lamindb/dev/_data.py,sha256=6TLM2tVWV7xMYzWNA14EsdyhSoRjK7IK6EU4VuQoC-g,15071
22
+ lamindb/dev/_data.py,sha256=8JIc18zmoffPK6SZQ2Kauusn4JpoEtYYiahcQpawbkE,16600
23
23
  lamindb/dev/_feature_manager.py,sha256=IojA1TPH3ZPlPghV_d1MIPIxdIcYO15RenI_o7YjmAM,8049
24
- lamindb/dev/_label_manager.py,sha256=5R2rZzdLgiZHEzXyilSjK3J7kHDHUOhneZJuSh--qQY,7339
24
+ lamindb/dev/_label_manager.py,sha256=EikCyLc0i80b9j0EOXznu42ERTu_KDv1dXRxeY3IiZU,7366
25
25
  lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
26
26
  lamindb/dev/_run_context.py,sha256=chmyw5VfhuIKCYpdjQ1vfa8uvdSBwGzOZ5nrlHmUs4s,19138
27
27
  lamindb/dev/_settings.py,sha256=ldS81qBsCZCSvrt_DgGBXN5jEWifbgU_oYeysoeWGFU,3780
@@ -31,17 +31,17 @@ lamindb/dev/hashing.py,sha256=IlNrHy-a9NqB0vfqiwIh4sjt40CvaiZIvfK6gMnkxDo,1381
31
31
  lamindb/dev/types.py,sha256=svg5S_aynuGfbEOsbmqkR_gF9d9YMzfOkcvGN37Rzvg,232
32
32
  lamindb/dev/versioning.py,sha256=XF7X-Ngat_Ggca7FdtZa5ElOKlOgoxDtxwZlhsCTJZU,2788
33
33
  lamindb/dev/datasets/__init__.py,sha256=clbWOmg4K8Rh94OPFtJasNKdtUHHvR_Lx11jZWMqfok,1350
34
- lamindb/dev/datasets/_core.py,sha256=_K1HuMjN8cwf8YWpepIXwHoNlepIuH7kqe7wIGBdcx0,19011
34
+ lamindb/dev/datasets/_core.py,sha256=58VBoP0vQ6GD33war3LXwhuJFKq06y5pqFBGvbSN63I,19158
35
35
  lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
36
- lamindb/dev/storage/__init__.py,sha256=mFvsMkAHHmO_xTM1UI-WGynDObnH0RCI2TXtFGhYfv8,392
36
+ lamindb/dev/storage/__init__.py,sha256=AlViEC4M7fDawYA-y8ORXMn-PhT8bGDSxsWm1tjSUOk,382
37
37
  lamindb/dev/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
38
38
  lamindb/dev/storage/_backed_access.py,sha256=JeEdrth4BOTo5Fa_bzHWaApJzbRhT_gbrhRRqLjZKto,22166
39
39
  lamindb/dev/storage/_zarr.py,sha256=7W1Jos1QOOF3f41uML_arQoDTNPZVpRyP2m3SLWaCAo,2766
40
- lamindb/dev/storage/file.py,sha256=UHwm4LeK22J4pDuMjwLSqORlFEIwm6WWxWN-IfDuocc,8005
40
+ lamindb/dev/storage/file.py,sha256=aJ1_GpjH1B1PSSt1LZOsQDFvlBrKvasVpLlqE1PTgQQ,6244
41
41
  lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
42
42
  lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
43
43
  lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
44
- lamindb-0.59.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- lamindb-0.59.4.dist-info/WHEEL,sha256=rSgq_JpHF9fHR1lx53qwg_1-2LypZE_qmcuXbVUq948,81
46
- lamindb-0.59.4.dist-info/METADATA,sha256=Ry_O2gNFAJbPAbeQ7l2zX52kq6j2MPTI1YCxAABg8yI,3114
47
- lamindb-0.59.4.dist-info/RECORD,,
44
+ lamindb-0.60.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
+ lamindb-0.60.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
46
+ lamindb-0.60.0.dist-info/METADATA,sha256=2LN1Ww6NMQhJYq3_XiC-ULfVykv1kGIzr9K0j-L5mSE,3114
47
+ lamindb-0.60.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: flit 3.8.0
2
+ Generator: flit 3.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any