lamindb 0.55.0__py3-none-any.whl → 0.55.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_dataset.py +41 -4
- lamindb/_file.py +2 -2
- lamindb/_filter.py +2 -9
- lamindb/_query_manager.py +20 -9
- lamindb/_registry.py +7 -2
- lamindb/_storage.py +9 -0
- lamindb/_transform.py +7 -0
- lamindb/dev/_label_manager.py +4 -1
- {lamindb-0.55.0.dist-info → lamindb-0.55.1.dist-info}/METADATA +5 -5
- {lamindb-0.55.0.dist-info → lamindb-0.55.1.dist-info}/RECORD +14 -14
- {lamindb-0.55.0.dist-info → lamindb-0.55.1.dist-info}/LICENSE +0 -0
- {lamindb-0.55.0.dist-info → lamindb-0.55.1.dist-info}/WHEEL +0 -0
- {lamindb-0.55.0.dist-info → lamindb-0.55.1.dist-info}/entry_points.txt +0 -0
lamindb/__init__.py
CHANGED
lamindb/_dataset.py
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
from collections import defaultdict
|
2
|
+
from pathlib import Path
|
2
3
|
from typing import Dict, Iterable, Literal, Optional, Tuple, Union
|
3
4
|
|
4
5
|
import anndata as ad
|
5
6
|
import pandas as pd
|
6
7
|
from lamin_utils import logger
|
8
|
+
from lamindb_setup._init_instance import register_storage
|
9
|
+
from lamindb_setup.dev import StorageSettings
|
7
10
|
from lamindb_setup.dev._docs import doc_args
|
11
|
+
from lamindb_setup.dev._hub_utils import get_storage_region
|
12
|
+
from lamindb_setup.dev.upath import UPath
|
8
13
|
from lnschema_core import Modality
|
9
14
|
from lnschema_core.models import Dataset, Feature, FeatureSet
|
10
15
|
from lnschema_core.types import AnnDataLike, DataLike, FieldAttr
|
@@ -84,11 +89,12 @@ def __init__(
|
|
84
89
|
)
|
85
90
|
|
86
91
|
run = get_run(run)
|
87
|
-
# there are exactly
|
92
|
+
# there are exactly 3 ways of creating a Dataset object right now
|
88
93
|
# using exactly one file or using more than one file
|
89
94
|
# init file
|
90
95
|
if isinstance(data, (pd.DataFrame, ad.AnnData, File)):
|
91
96
|
files = None
|
97
|
+
storage = None
|
92
98
|
if isinstance(data, File):
|
93
99
|
file = data
|
94
100
|
if file._state.adding:
|
@@ -116,9 +122,23 @@ def __init__(
|
|
116
122
|
if file.description is None or file.description == "tmp":
|
117
123
|
file.description = f"See dataset {provisional_id}" # type: ignore
|
118
124
|
file._feature_sets = feature_sets
|
125
|
+
storage = None
|
126
|
+
# init from directory or bucket
|
127
|
+
elif isinstance(data, (str, Path, UPath)):
|
128
|
+
file = None
|
129
|
+
files = None
|
130
|
+
upath = UPath(data)
|
131
|
+
if not upath.is_dir:
|
132
|
+
raise ValueError(f"Can only pass buckets or directories, not {data}")
|
133
|
+
upath_str = upath.as_posix().rstrip("/")
|
134
|
+
region = get_storage_region(upath_str)
|
135
|
+
storage_settings = StorageSettings(upath_str, region)
|
136
|
+
storage = register_storage(storage_settings)
|
137
|
+
hash = None
|
119
138
|
# init files
|
120
139
|
else:
|
121
140
|
file = None
|
141
|
+
storage = None
|
122
142
|
if hasattr(data, "__getitem__"):
|
123
143
|
assert isinstance(data[0], File) # type: ignore
|
124
144
|
files = data
|
@@ -144,6 +164,7 @@ def __init__(
|
|
144
164
|
reference=reference,
|
145
165
|
reference_type=reference_type,
|
146
166
|
file=file,
|
167
|
+
storage=storage,
|
147
168
|
hash=hash,
|
148
169
|
run=run,
|
149
170
|
version=version,
|
@@ -276,8 +297,9 @@ def load(
|
|
276
297
|
is_run_input: Optional[bool] = None,
|
277
298
|
**kwargs,
|
278
299
|
) -> DataLike:
|
279
|
-
_track_run_input
|
300
|
+
# cannot call _track_run_input here, see comment further down
|
280
301
|
if self.file is not None:
|
302
|
+
_track_run_input(self, is_run_input)
|
281
303
|
return self.file.load()
|
282
304
|
else:
|
283
305
|
all_files = self.files.all()
|
@@ -291,9 +313,14 @@ def load(
|
|
291
313
|
objects = [file.load(is_run_input=False) for file in all_files]
|
292
314
|
file_ids = [file.id for file in all_files]
|
293
315
|
if isinstance(objects[0], pd.DataFrame):
|
294
|
-
|
316
|
+
concat_object = pd.concat(objects, join=join)
|
295
317
|
elif isinstance(objects[0], ad.AnnData):
|
296
|
-
|
318
|
+
concat_object = ad.concat(
|
319
|
+
objects, join=join, label="file_id", keys=file_ids
|
320
|
+
)
|
321
|
+
# only call it here because there might be errors during concat
|
322
|
+
_track_run_input(self, is_run_input)
|
323
|
+
return concat_object
|
297
324
|
|
298
325
|
|
299
326
|
# docstring handled through attach_func_to_class_method
|
@@ -316,6 +343,14 @@ def save(self, *args, **kwargs) -> None:
|
|
316
343
|
save_feature_set_links(self)
|
317
344
|
|
318
345
|
|
346
|
+
@property # type: ignore
|
347
|
+
@doc_args(Dataset.path.__doc__)
|
348
|
+
def path(self) -> Union[Path, UPath]:
|
349
|
+
"""{}"""
|
350
|
+
_track_run_input(self)
|
351
|
+
return self.storage.path
|
352
|
+
|
353
|
+
|
319
354
|
METHOD_NAMES = [
|
320
355
|
"__init__",
|
321
356
|
"from_anndata",
|
@@ -337,3 +372,5 @@ if _TESTING:
|
|
337
372
|
|
338
373
|
for name in METHOD_NAMES:
|
339
374
|
attach_func_to_class_method(name, Dataset, globals())
|
375
|
+
|
376
|
+
setattr(Dataset, "path", path)
|
lamindb/_file.py
CHANGED
@@ -85,9 +85,9 @@ def process_pathlike(
|
|
85
85
|
new_root_str = new_root.as_posix().rstrip("/")
|
86
86
|
region = get_storage_region(new_root_str)
|
87
87
|
storage_settings = StorageSettings(new_root_str, region)
|
88
|
-
register_storage(storage_settings)
|
88
|
+
storage_record = register_storage(storage_settings)
|
89
89
|
use_existing_storage_key = True
|
90
|
-
return
|
90
|
+
return storage_record, use_existing_storage_key
|
91
91
|
# if the filepath is local
|
92
92
|
else:
|
93
93
|
use_existing_storage_key = False
|
lamindb/_filter.py
CHANGED
@@ -3,10 +3,6 @@ from uuid import UUID
|
|
3
3
|
|
4
4
|
import dj_database_url
|
5
5
|
from django.db import connections
|
6
|
-
from django.db.backends.postgresql.base import (
|
7
|
-
DatabaseWrapper as DatabaseWrapperPostgres,
|
8
|
-
)
|
9
|
-
from django.db.backends.sqlite3.base import DatabaseWrapper as DatabaseWrapperSQLite
|
10
6
|
from lamindb_setup._init_instance import InstanceSettings
|
11
7
|
from lamindb_setup._load_instance import get_owner_name_from_identifier
|
12
8
|
from lamindb_setup.dev._hub_core import load_instance
|
@@ -22,11 +18,8 @@ def add_db_connection(isettings: InstanceSettings, using: str):
|
|
22
18
|
db_config["TIME_ZONE"] = "UTC"
|
23
19
|
db_config["OPTIONS"] = {}
|
24
20
|
db_config["AUTOCOMMIT"] = True
|
25
|
-
|
26
|
-
|
27
|
-
else:
|
28
|
-
db_wrapper = DatabaseWrapperPostgres(db_config, alias=using)
|
29
|
-
connections[using] = db_wrapper
|
21
|
+
|
22
|
+
connections.settings[using] = db_config
|
30
23
|
|
31
24
|
|
32
25
|
def filter(Registry: Type[Registry], using: str = None, **expressions) -> QuerySet:
|
lamindb/_query_manager.py
CHANGED
@@ -25,6 +25,16 @@ class QueryManager(models.Manager):
|
|
25
25
|
>>> manager.df()
|
26
26
|
"""
|
27
27
|
|
28
|
+
def _track_run_input_manager(self):
|
29
|
+
if hasattr(self, "source_field_name") and hasattr(self, "target_field_name"):
|
30
|
+
if self.source_field_name == "dataset" and self.target_field_name == "file":
|
31
|
+
from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
|
32
|
+
from lamindb.dev._run_context import run_context
|
33
|
+
|
34
|
+
if run_context.run is None:
|
35
|
+
logger.warning(WARNING_RUN_TRANSFORM)
|
36
|
+
_track_run_input(self.instance)
|
37
|
+
|
28
38
|
def list(self, field: Optional[str] = None):
|
29
39
|
"""Populate a list with the results.
|
30
40
|
|
@@ -41,6 +51,7 @@ class QueryManager(models.Manager):
|
|
41
51
|
>>> label.parents.list("name")
|
42
52
|
['ULabel1', 'ULabel2', 'ULabel3']
|
43
53
|
"""
|
54
|
+
self._track_run_input_manager()
|
44
55
|
if field is None:
|
45
56
|
return [item for item in self.all()]
|
46
57
|
else:
|
@@ -58,15 +69,8 @@ class QueryManager(models.Manager):
|
|
58
69
|
|
59
70
|
For `**kwargs`, see :meth:`lamindb.dev.QuerySet.df`.
|
60
71
|
"""
|
61
|
-
|
62
|
-
|
63
|
-
from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
|
64
|
-
from lamindb.dev._run_context import run_context
|
65
|
-
|
66
|
-
if run_context.run is None:
|
67
|
-
logger.warning(WARNING_RUN_TRANSFORM)
|
68
|
-
_track_run_input(self.instance)
|
69
|
-
return self.all()
|
72
|
+
self._track_run_input_manager()
|
73
|
+
return self.all_base_class()
|
70
74
|
|
71
75
|
def __getitem__(self, item: str):
|
72
76
|
try:
|
@@ -86,3 +90,10 @@ class QueryManager(models.Manager):
|
|
86
90
|
setattr(models.Manager, "list", QueryManager.list)
|
87
91
|
setattr(models.Manager, "df", QueryManager.df)
|
88
92
|
setattr(models.Manager, "__getitem__", QueryManager.__getitem__)
|
93
|
+
setattr(
|
94
|
+
models.Manager, "_track_run_input_manager", QueryManager._track_run_input_manager
|
95
|
+
)
|
96
|
+
# the two lines below would be easy if we could actually inherit; like this,
|
97
|
+
# they're suboptimal
|
98
|
+
setattr(models.Manager, "all_base_class", models.Manager.all)
|
99
|
+
setattr(models.Manager, "all", QueryManager.all)
|
lamindb/_registry.py
CHANGED
@@ -82,7 +82,12 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
82
82
|
validate_required_fields(orm, kwargs)
|
83
83
|
from .dev._settings import settings
|
84
84
|
|
85
|
-
if
|
85
|
+
# do not search for names if an id is passed; this is important
|
86
|
+
# e.g. when synching ids from the notebook store to lamindb
|
87
|
+
has_consciously_provided_id = False
|
88
|
+
if "_has_consciously_provided_id" in kwargs:
|
89
|
+
has_consciously_provided_id = kwargs.pop("_has_consciously_provided_id")
|
90
|
+
if settings.upon_create_search_names and not has_consciously_provided_id:
|
86
91
|
result = suggest_objects_with_same_name(orm, kwargs)
|
87
92
|
if result == "object-with-same-name-exists":
|
88
93
|
if "version" in kwargs:
|
@@ -341,7 +346,7 @@ def transfer_to_default_db(record: Registry, save: bool = False):
|
|
341
346
|
record.transform_id = run_context.transform.id
|
342
347
|
else:
|
343
348
|
record.transform_id = None
|
344
|
-
if hasattr(record, "storage_id"):
|
349
|
+
if hasattr(record, "storage_id") and record.storage_id is not None:
|
345
350
|
record.storage.save()
|
346
351
|
record._state.db = "default"
|
347
352
|
if save:
|
lamindb/_storage.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from lamindb_setup.dev._docs import doc_args
|
1
2
|
from lamindb_setup.dev.upath import UPath, create_path
|
2
3
|
from lnschema_core import Storage
|
3
4
|
|
@@ -6,4 +7,12 @@ def root_as_path(self) -> UPath:
|
|
6
7
|
return create_path(self.root)
|
7
8
|
|
8
9
|
|
10
|
+
@property # type: ignore
|
11
|
+
@doc_args(Storage.path.__doc__)
|
12
|
+
def path(self) -> UPath:
|
13
|
+
"""{}"""
|
14
|
+
return create_path(self.root)
|
15
|
+
|
16
|
+
|
9
17
|
setattr(Storage, "root_as_path", root_as_path)
|
18
|
+
setattr(Storage, "path", path)
|
lamindb/_transform.py
CHANGED
@@ -44,8 +44,14 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
44
44
|
)
|
45
45
|
if name is None:
|
46
46
|
name = is_new_version_of.name
|
47
|
+
|
48
|
+
# this is only because the user-facing constructor allows passing an id
|
49
|
+
# most others don't
|
47
50
|
if id is None:
|
51
|
+
has_consciously_provided_id = False
|
48
52
|
id = new_id
|
53
|
+
else:
|
54
|
+
has_consciously_provided_id = True
|
49
55
|
super(Transform, transform).__init__(
|
50
56
|
id=id,
|
51
57
|
name=name,
|
@@ -54,6 +60,7 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
54
60
|
version=version,
|
55
61
|
initial_version_id=initial_version_id,
|
56
62
|
reference=reference,
|
63
|
+
_has_consciously_provided_id=has_consciously_provided_id,
|
57
64
|
)
|
58
65
|
|
59
66
|
|
lamindb/dev/_label_manager.py
CHANGED
@@ -127,4 +127,7 @@ class LabelManager:
|
|
127
127
|
labels_list = labels.list()
|
128
128
|
for label in labels_list:
|
129
129
|
transfer_to_default_db(label, save=True)
|
130
|
-
|
130
|
+
# this should not occur as file and dataset should have the same attributes
|
131
|
+
# but this might not be true for custom schema
|
132
|
+
if hasattr(self._host, related_name):
|
133
|
+
getattr(self._host, related_name).add(*labels_list)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.55.
|
3
|
+
Version: 0.55.1
|
4
4
|
Summary: Open-source data platform for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -8,9 +8,9 @@ Description-Content-Type: text/markdown
|
|
8
8
|
Classifier: Programming Language :: Python :: 3.8
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
|
-
Requires-Dist: lnschema_core==0.
|
12
|
-
Requires-Dist: lamindb_setup==0.54.
|
13
|
-
Requires-Dist: lamin_utils==0.11.
|
11
|
+
Requires-Dist: lnschema_core==0.50.0
|
12
|
+
Requires-Dist: lamindb_setup==0.54.3
|
13
|
+
Requires-Dist: lamin_utils==0.11.4
|
14
14
|
Requires-Dist: rapidfuzz
|
15
15
|
Requires-Dist: pyarrow
|
16
16
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -35,7 +35,7 @@ Requires-Dist: pytest-cov ; extra == "dev"
|
|
35
35
|
Requires-Dist: nbproject_test>=0.3.0 ; extra == "dev"
|
36
36
|
Requires-Dist: faker-biology ; extra == "dev"
|
37
37
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
38
|
-
Requires-Dist: readfcs>=1.1.
|
38
|
+
Requires-Dist: readfcs>=1.1.7 ; extra == "fcs"
|
39
39
|
Requires-Dist: fsspec[gs]==2023.9.0 ; extra == "gcp"
|
40
40
|
Requires-Dist: nbproject==0.9.2 ; extra == "jupyter"
|
41
41
|
Requires-Dist: nbstripout ; extra == "jupyter"
|
@@ -1,19 +1,19 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_dataset.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=ZCNDGjrECrHymRvihw53TRk4QYluCJ1URlRRhKLb4vA,2945
|
2
|
+
lamindb/_dataset.py,sha256=363qySdhwD9qO9op-fXRuTTflKmCdhGAOvf115gdjXo,13444
|
3
3
|
lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
|
4
4
|
lamindb/_feature.py,sha256=5gsa7zsMVVtm1DID4dF3Vwo5llWyY1dH3Hg5hjaIrQk,5554
|
5
5
|
lamindb/_feature_set.py,sha256=YyRLc76piJdh8X6B917cFBiwJTdIxvQo0n5k2-HUGro,9373
|
6
|
-
lamindb/_file.py,sha256=
|
7
|
-
lamindb/_filter.py,sha256=
|
6
|
+
lamindb/_file.py,sha256=WotCwWGtVYLhyUARgJeInLgRnuKKRmx7E12IxS9Pe20,35548
|
7
|
+
lamindb/_filter.py,sha256=06hUpqZTTvXPOfywR0oJ47dd281KvnejRpTvaCAIV74,1515
|
8
8
|
lamindb/_from_values.py,sha256=OztkgQpyWy6CSrr5RcKwL1r_DTjCoYWyllR94LLd8qs,11595
|
9
9
|
lamindb/_parents.py,sha256=GZ6eUlIDtcAh4dCjBE2CC7KkY62WkKNFP3MMLtBjW44,13048
|
10
|
-
lamindb/_query_manager.py,sha256=
|
10
|
+
lamindb/_query_manager.py,sha256=MXueabWHqft7GWNkzmWbhfTqdk-0mKU7nWrhXG6wpYQ,3693
|
11
11
|
lamindb/_query_set.py,sha256=Lf7vLvOsEfUWRQ3iImSj4eQPmUK1KCgeoKS_m66Lp7o,10279
|
12
|
-
lamindb/_registry.py,sha256=
|
12
|
+
lamindb/_registry.py,sha256=N-CzfRokoWLl1ive5aYr5mBnUQuddQ1GCjElbeUFNsY,13404
|
13
13
|
lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
|
14
14
|
lamindb/_save.py,sha256=Lir8uDGS9XuYOaqogpT2EiDrWV_T_G9PbPvckbvUsoE,9474
|
15
|
-
lamindb/_storage.py,sha256=
|
16
|
-
lamindb/_transform.py,sha256=
|
15
|
+
lamindb/_storage.py,sha256=HUdXGj4839C606gvxWXo0tDITbtbuyJKOgUPhagYPTI,415
|
16
|
+
lamindb/_transform.py,sha256=O9JcOt0Khg9fF_1Rmid03XyxJms6OnxQp8rCAT3ZuDw,2499
|
17
17
|
lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
|
18
18
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
19
19
|
lamindb/_validate.py,sha256=6FQIxE8ZK4HwrrmrmFSjeCFKc-mxaHej5fuBh_4y2Tk,13656
|
@@ -21,7 +21,7 @@ lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
|
|
21
21
|
lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
|
22
22
|
lamindb/dev/_data.py,sha256=1uOGPpa1OWIM1QcgpS_M_l35sDC8Fltk-fYf_5cKxUc,13540
|
23
23
|
lamindb/dev/_feature_manager.py,sha256=z7uWuSO5fWl0pCKdCsc86rRrcKnAmtx2wAuOTiZn5XE,5869
|
24
|
-
lamindb/dev/_label_manager.py,sha256=
|
24
|
+
lamindb/dev/_label_manager.py,sha256=yW0KgM1WMYe4NOpWNCO5ymw5CY2_UQl3t9tXZER4Gdc,4887
|
25
25
|
lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
|
26
26
|
lamindb/dev/_run_context.py,sha256=XLlEknzOe0Cabi9fvzdQl5UeLoU02fAFkCdB7S2FLZM,19109
|
27
27
|
lamindb/dev/_settings.py,sha256=b0uq5eB6Dk7BQjBoK4a-9hy8y1DPsq0rErAQjZ4DibM,3398
|
@@ -41,8 +41,8 @@ lamindb/dev/storage/file.py,sha256=CrR8dhfqEWPj7jhNj7zujQE1WT4gWGCP0qKPIIta0bg,7
|
|
41
41
|
lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
|
42
42
|
lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
|
43
43
|
lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
|
44
|
-
lamindb-0.55.
|
45
|
-
lamindb-0.55.
|
46
|
-
lamindb-0.55.
|
47
|
-
lamindb-0.55.
|
48
|
-
lamindb-0.55.
|
44
|
+
lamindb-0.55.1.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
|
45
|
+
lamindb-0.55.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
46
|
+
lamindb-0.55.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
47
|
+
lamindb-0.55.1.dist-info/METADATA,sha256=E5NRemxlnVm8u55H11hKjH1ezGF6ZYnd4z0zHvpC-Z4,3022
|
48
|
+
lamindb-0.55.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|