lamindb 0.45.0__py3-none-any.whl → 0.46a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -9
- lamindb/_context.py +11 -12
- lamindb/_dataset.py +142 -0
- lamindb/_delete.py +6 -6
- lamindb/_feature_set.py +138 -0
- lamindb/_file.py +322 -81
- lamindb/_from_values.py +57 -160
- lamindb/_orm.py +398 -0
- lamindb/_save.py +26 -10
- lamindb/_select.py +3 -3
- lamindb/_view.py +2 -2
- lamindb/dev/__init__.py +2 -2
- lamindb/dev/_settings.py +2 -1
- lamindb/dev/datasets/__init__.py +6 -0
- lamindb/dev/datasets/_core.py +30 -0
- lamindb/dev/hashing.py +4 -0
- lamindb/dev/storage/__init__.py +4 -3
- lamindb/dev/storage/_backed_access.py +3 -3
- lamindb/dev/storage/{_file.py → file.py} +48 -3
- lamindb/dev/storage/{_object.py → object.py} +1 -0
- lamindb/dev/utils.py +9 -0
- lamindb/types.py +9 -1
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/METADATA +20 -17
- lamindb-0.46a1.dist-info/RECORD +36 -0
- lamindb/_baseorm_methods.py +0 -535
- lamindb/_featureset_methods.py +0 -73
- lamindb/_file_access.py +0 -48
- lamindb/_file_methods.py +0 -319
- lamindb-0.45.0.dist-info/RECORD +0 -36
- /lamindb/{_transform_methods.py → _transform.py} +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/LICENSE +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/WHEEL +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/entry_points.txt +0 -0
lamindb/_orm.py
ADDED
@@ -0,0 +1,398 @@
|
|
1
|
+
import builtins
|
2
|
+
from typing import Dict, Iterable, List, Literal, NamedTuple, Optional, Set, Union
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
from django.core.exceptions import FieldDoesNotExist
|
6
|
+
from django.db.models import CharField, TextField
|
7
|
+
from django.db.models.query_utils import DeferredAttribute as Field
|
8
|
+
from lamin_logger import logger
|
9
|
+
from lamin_logger._lookup import Lookup
|
10
|
+
from lamin_logger._search import search as base_search
|
11
|
+
from lamindb_setup.dev._docs import doc_args
|
12
|
+
from lnschema_core import ORM
|
13
|
+
from lnschema_core.types import ListLike, StrField
|
14
|
+
|
15
|
+
from lamindb.dev.utils import attach_func_to_class_method
|
16
|
+
|
17
|
+
from . import _TESTING
|
18
|
+
from ._from_values import _has_species_field, get_or_create_records
|
19
|
+
from .dev._settings import settings
|
20
|
+
|
21
|
+
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
22
|
+
|
23
|
+
|
24
|
+
class ValidationError(Exception):
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
def init_self_from_db(self: ORM, existing_record: ORM):
|
29
|
+
new_args = [
|
30
|
+
getattr(existing_record, field.attname) for field in self._meta.concrete_fields
|
31
|
+
]
|
32
|
+
super(self.__class__, self).__init__(*new_args)
|
33
|
+
self._state.adding = False # mimic from_db
|
34
|
+
self._state.db = "default"
|
35
|
+
|
36
|
+
|
37
|
+
def validate_required_fields(orm: ORM, kwargs):
|
38
|
+
required_fields = {
|
39
|
+
k.name for k in orm._meta.fields if not k.null and k.default is None
|
40
|
+
}
|
41
|
+
required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
|
42
|
+
kwargs.update(required_fields_not_passed)
|
43
|
+
missing_fields = [
|
44
|
+
k for k, v in kwargs.items() if v is None and k in required_fields
|
45
|
+
]
|
46
|
+
if missing_fields:
|
47
|
+
raise TypeError(f"{missing_fields} are required.")
|
48
|
+
|
49
|
+
|
50
|
+
def suggest_objects_with_same_name(orm: ORM, kwargs) -> Optional[str]:
|
51
|
+
if kwargs.get("name") is None:
|
52
|
+
return None
|
53
|
+
else:
|
54
|
+
results = orm.search(kwargs["name"])
|
55
|
+
if results.shape[0] == 0:
|
56
|
+
return None
|
57
|
+
|
58
|
+
# subset results to those with at least 0.5 levensteihn distance
|
59
|
+
results = results.loc[results.__ratio__ >= 90]
|
60
|
+
|
61
|
+
# test for exact match
|
62
|
+
if len(results) > 0:
|
63
|
+
if results.index[0] == kwargs["name"]:
|
64
|
+
logger.warning("Object with exact same name exists, returning it")
|
65
|
+
return "object-with-same-name-exists"
|
66
|
+
else:
|
67
|
+
msg = "Entries with similar names exist:"
|
68
|
+
if IPYTHON:
|
69
|
+
from IPython.display import display
|
70
|
+
|
71
|
+
logger.warning(f"{msg}")
|
72
|
+
display(results)
|
73
|
+
else:
|
74
|
+
logger.warning(f"{msg}\n{results.name}")
|
75
|
+
return None
|
76
|
+
|
77
|
+
|
78
|
+
def __init__(orm: ORM, *args, **kwargs):
|
79
|
+
if not args:
|
80
|
+
validate_required_fields(orm, kwargs)
|
81
|
+
if settings.upon_create_search_names:
|
82
|
+
result = suggest_objects_with_same_name(orm, kwargs)
|
83
|
+
if result == "object-with-same-name-exists":
|
84
|
+
existing_record = orm.select(name=kwargs["name"])[0]
|
85
|
+
init_self_from_db(orm, existing_record)
|
86
|
+
return None
|
87
|
+
super(ORM, orm).__init__(**kwargs)
|
88
|
+
elif len(args) != len(orm._meta.concrete_fields):
|
89
|
+
raise ValueError("Please provide keyword arguments, not plain arguments")
|
90
|
+
else:
|
91
|
+
# object is loaded from DB (**kwargs could be omitted below, I believe)
|
92
|
+
super(ORM, orm).__init__(*args, **kwargs)
|
93
|
+
|
94
|
+
|
95
|
+
@classmethod # type:ignore
|
96
|
+
@doc_args(ORM.from_values.__doc__)
|
97
|
+
def from_values(cls, identifiers: ListLike, field: StrField, **kwargs) -> List["ORM"]:
|
98
|
+
"""{}"""
|
99
|
+
if isinstance(field, str):
|
100
|
+
field = getattr(cls, field)
|
101
|
+
if not isinstance(field, Field): # field is DeferredAttribute
|
102
|
+
raise TypeError(
|
103
|
+
"field must be a string or an ORM field, e.g., `CellType.name`!"
|
104
|
+
)
|
105
|
+
from_bionty = True if cls.__module__.startswith("lnschema_bionty.") else False
|
106
|
+
return get_or_create_records(
|
107
|
+
iterable=identifiers, field=field, from_bionty=from_bionty, **kwargs
|
108
|
+
)
|
109
|
+
|
110
|
+
|
111
|
+
@classmethod # type: ignore
|
112
|
+
@doc_args(ORM.search.__doc__)
|
113
|
+
def search(
|
114
|
+
cls,
|
115
|
+
string: str,
|
116
|
+
*,
|
117
|
+
field: Optional[StrField] = None,
|
118
|
+
top_hit: bool = False,
|
119
|
+
case_sensitive: bool = True,
|
120
|
+
synonyms_field: Optional[Union[str, TextField, CharField]] = "synonyms",
|
121
|
+
synonyms_sep: str = "|",
|
122
|
+
) -> Union["pd.DataFrame", "ORM"]:
|
123
|
+
"""{}"""
|
124
|
+
if field is None:
|
125
|
+
field = get_default_str_field(cls)
|
126
|
+
if not isinstance(field, str):
|
127
|
+
field = field.field.name
|
128
|
+
|
129
|
+
records = cls.objects.all()
|
130
|
+
df = pd.DataFrame.from_records(records.values())
|
131
|
+
|
132
|
+
result = base_search(
|
133
|
+
df=df,
|
134
|
+
string=string,
|
135
|
+
field=field,
|
136
|
+
synonyms_field=str(synonyms_field),
|
137
|
+
case_sensitive=case_sensitive,
|
138
|
+
return_ranked_results=not top_hit,
|
139
|
+
synonyms_sep=synonyms_sep,
|
140
|
+
tuple_name=cls.__name__,
|
141
|
+
)
|
142
|
+
|
143
|
+
if not top_hit or result is None:
|
144
|
+
return result
|
145
|
+
else:
|
146
|
+
if isinstance(result, list):
|
147
|
+
return [records.get(id=r.id) for r in result]
|
148
|
+
else:
|
149
|
+
return records.get(id=result.id)
|
150
|
+
|
151
|
+
|
152
|
+
@classmethod # type: ignore
|
153
|
+
@doc_args(ORM.lookup.__doc__)
|
154
|
+
def lookup(cls, field: Optional[StrField] = None) -> NamedTuple:
|
155
|
+
"""{}"""
|
156
|
+
if field is None:
|
157
|
+
field = get_default_str_field(cls)
|
158
|
+
if not isinstance(field, str):
|
159
|
+
field = field.field.name
|
160
|
+
|
161
|
+
records = cls.objects.all()
|
162
|
+
|
163
|
+
return Lookup(
|
164
|
+
records=records,
|
165
|
+
values=[i.get(field) for i in records.values()],
|
166
|
+
tuple_name=cls.__name__,
|
167
|
+
prefix="ln",
|
168
|
+
).lookup()
|
169
|
+
|
170
|
+
|
171
|
+
@classmethod # type: ignore
|
172
|
+
@doc_args(ORM.inspect.__doc__)
|
173
|
+
def inspect(
|
174
|
+
cls,
|
175
|
+
identifiers: ListLike,
|
176
|
+
field: StrField,
|
177
|
+
*,
|
178
|
+
case_sensitive: bool = False,
|
179
|
+
inspect_synonyms: bool = True,
|
180
|
+
return_df: bool = False,
|
181
|
+
logging: bool = True,
|
182
|
+
**kwargs,
|
183
|
+
) -> Union["pd.DataFrame", Dict[str, List[str]]]:
|
184
|
+
"""{}"""
|
185
|
+
from lamin_logger._inspect import inspect
|
186
|
+
|
187
|
+
if not isinstance(field, str):
|
188
|
+
field = field.field.name
|
189
|
+
|
190
|
+
return inspect(
|
191
|
+
df=_filter_df_based_on_species(orm=cls, species=kwargs.get("species")),
|
192
|
+
identifiers=identifiers,
|
193
|
+
field=str(field),
|
194
|
+
case_sensitive=case_sensitive,
|
195
|
+
inspect_synonyms=inspect_synonyms,
|
196
|
+
return_df=return_df,
|
197
|
+
logging=logging,
|
198
|
+
)
|
199
|
+
|
200
|
+
|
201
|
+
@classmethod # type: ignore
|
202
|
+
@doc_args(ORM.map_synonyms.__doc__)
|
203
|
+
def map_synonyms(
|
204
|
+
cls,
|
205
|
+
synonyms: Iterable,
|
206
|
+
*,
|
207
|
+
return_mapper: bool = False,
|
208
|
+
case_sensitive: bool = False,
|
209
|
+
keep: Literal["first", "last", False] = "first",
|
210
|
+
synonyms_field: str = "synonyms",
|
211
|
+
synonyms_sep: str = "|",
|
212
|
+
field: Optional[str] = None,
|
213
|
+
**kwargs,
|
214
|
+
) -> Union[List[str], Dict[str, str]]:
|
215
|
+
"""{}"""
|
216
|
+
from lamin_logger._map_synonyms import map_synonyms
|
217
|
+
|
218
|
+
if field is None:
|
219
|
+
field = get_default_str_field(cls)
|
220
|
+
if not isinstance(field, str):
|
221
|
+
field = field.field.name
|
222
|
+
|
223
|
+
try:
|
224
|
+
cls._meta.get_field(synonyms_field)
|
225
|
+
df = _filter_df_based_on_species(orm=cls, species=kwargs.get("species"))
|
226
|
+
except FieldDoesNotExist:
|
227
|
+
df = pd.DataFrame()
|
228
|
+
return map_synonyms(
|
229
|
+
df=df,
|
230
|
+
identifiers=synonyms,
|
231
|
+
field=field,
|
232
|
+
return_mapper=return_mapper,
|
233
|
+
case_sensitive=case_sensitive,
|
234
|
+
keep=keep,
|
235
|
+
synonyms_field=synonyms_field,
|
236
|
+
sep=synonyms_sep,
|
237
|
+
)
|
238
|
+
|
239
|
+
|
240
|
+
def _filter_df_based_on_species(orm: ORM, species: Optional[Union[str, ORM]] = None):
|
241
|
+
import pandas as pd
|
242
|
+
|
243
|
+
records = orm.objects.all()
|
244
|
+
if _has_species_field(orm):
|
245
|
+
# here, we can safely import lnschema_bionty
|
246
|
+
from lnschema_bionty._bionty import create_or_get_species_record
|
247
|
+
|
248
|
+
species_record = create_or_get_species_record(species=species, orm=orm)
|
249
|
+
if species_record is not None:
|
250
|
+
records = records.filter(species__name=species_record.name)
|
251
|
+
|
252
|
+
return pd.DataFrame.from_records(records.values())
|
253
|
+
|
254
|
+
|
255
|
+
def get_default_str_field(orm: ORM) -> str:
|
256
|
+
"""Get the 1st char or text field from the orm."""
|
257
|
+
model_field_names = [i.name for i in orm._meta.fields]
|
258
|
+
|
259
|
+
# set default field
|
260
|
+
if "name" in model_field_names:
|
261
|
+
# by default use the name field
|
262
|
+
field = orm._meta.get_field("name")
|
263
|
+
else:
|
264
|
+
# first char or text field that doesn't contain "id"
|
265
|
+
for i in orm._meta.fields:
|
266
|
+
if "id" in i.name:
|
267
|
+
continue
|
268
|
+
if i.get_internal_type() in {"CharField", "TextField"}:
|
269
|
+
field = i
|
270
|
+
break
|
271
|
+
|
272
|
+
# no default field can be found
|
273
|
+
if field is None:
|
274
|
+
raise ValueError("Please specify a field to search against!")
|
275
|
+
|
276
|
+
return field.name
|
277
|
+
|
278
|
+
|
279
|
+
def _add_or_remove_synonyms(
|
280
|
+
synonym: Union[str, Iterable],
|
281
|
+
record: ORM,
|
282
|
+
action: Literal["add", "remove"],
|
283
|
+
force: bool = False,
|
284
|
+
):
|
285
|
+
"""Add or remove synonyms."""
|
286
|
+
|
287
|
+
def check_synonyms_in_all_records(synonyms: Set[str], record: ORM):
|
288
|
+
"""Errors if input synonym is associated with other records in the DB."""
|
289
|
+
import pandas as pd
|
290
|
+
from IPython.display import display
|
291
|
+
|
292
|
+
syns_all = (
|
293
|
+
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
|
294
|
+
)
|
295
|
+
if len(syns_all) == 0:
|
296
|
+
return
|
297
|
+
df = pd.DataFrame(syns_all.values())
|
298
|
+
df["synonyms"] = df["synonyms"].str.split("|")
|
299
|
+
df = df.explode("synonyms")
|
300
|
+
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
|
301
|
+
if matches_df.shape[0] > 0:
|
302
|
+
records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
|
303
|
+
logger.error(
|
304
|
+
f"Input synonyms {matches_df['synonyms'].unique()} already associated"
|
305
|
+
" with the following records:\n(Pass `force=True` to ignore this error)"
|
306
|
+
)
|
307
|
+
display(records_df)
|
308
|
+
raise SystemExit(AssertionError)
|
309
|
+
|
310
|
+
# passed synonyms
|
311
|
+
if isinstance(synonym, str):
|
312
|
+
syn_new_set = set([synonym])
|
313
|
+
else:
|
314
|
+
syn_new_set = set(synonym)
|
315
|
+
# nothing happens when passing an empty string or list
|
316
|
+
if len(syn_new_set) == 0:
|
317
|
+
return
|
318
|
+
# because we use | as the separator
|
319
|
+
if any(["|" in i for i in syn_new_set]):
|
320
|
+
raise AssertionError("A synonym can't contain '|'!")
|
321
|
+
|
322
|
+
# existing synonyms
|
323
|
+
syns_exist = record.synonyms
|
324
|
+
if syns_exist is None or len(syns_exist) == 0:
|
325
|
+
syns_exist_set = set()
|
326
|
+
else:
|
327
|
+
syns_exist_set = set(syns_exist.split("|"))
|
328
|
+
|
329
|
+
if action == "add":
|
330
|
+
if not force:
|
331
|
+
check_synonyms_in_all_records(syn_new_set, record)
|
332
|
+
syns_exist_set.update(syn_new_set)
|
333
|
+
elif action == "remove":
|
334
|
+
syns_exist_set = syns_exist_set.difference(syn_new_set)
|
335
|
+
|
336
|
+
if len(syns_exist_set) == 0:
|
337
|
+
syns_str = None
|
338
|
+
else:
|
339
|
+
syns_str = "|".join(syns_exist_set)
|
340
|
+
|
341
|
+
record.synonyms = syns_str
|
342
|
+
|
343
|
+
# if record is already in DB, save the changes to DB
|
344
|
+
if not record._state.adding:
|
345
|
+
record.save()
|
346
|
+
|
347
|
+
|
348
|
+
def _check_synonyms_field_exist(record: ORM):
|
349
|
+
try:
|
350
|
+
record.__getattribute__("synonyms")
|
351
|
+
except AttributeError:
|
352
|
+
raise NotImplementedError(
|
353
|
+
f"No synonyms field found in table {record.__class__.__name__}!"
|
354
|
+
)
|
355
|
+
|
356
|
+
|
357
|
+
def add_synonym(self, synonym: Union[str, ListLike], force: bool = False):
|
358
|
+
_check_synonyms_field_exist(self)
|
359
|
+
_add_or_remove_synonyms(synonym=synonym, record=self, force=force, action="add")
|
360
|
+
|
361
|
+
|
362
|
+
def remove_synonym(self, synonym: Union[str, ListLike]):
|
363
|
+
_check_synonyms_field_exist(self)
|
364
|
+
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
365
|
+
|
366
|
+
|
367
|
+
METHOD_NAMES = [
|
368
|
+
"__init__",
|
369
|
+
"search",
|
370
|
+
"lookup",
|
371
|
+
"map_synonyms",
|
372
|
+
"inspect",
|
373
|
+
"add_synonym",
|
374
|
+
"remove_synonym",
|
375
|
+
"from_values",
|
376
|
+
]
|
377
|
+
|
378
|
+
if _TESTING:
|
379
|
+
from inspect import signature
|
380
|
+
|
381
|
+
SIGS = {
|
382
|
+
name: signature(getattr(ORM, name))
|
383
|
+
for name in METHOD_NAMES
|
384
|
+
if not name.startswith("__")
|
385
|
+
}
|
386
|
+
|
387
|
+
for name in METHOD_NAMES:
|
388
|
+
attach_func_to_class_method(name, ORM, globals())
|
389
|
+
|
390
|
+
|
391
|
+
@classmethod # type: ignore
|
392
|
+
def __name_with_type__(cls) -> str:
|
393
|
+
schema_module_name = cls.__module__.split(".")[0]
|
394
|
+
schema_name = schema_module_name.replace("lnschema_", "")
|
395
|
+
return f"{schema_name}.{cls.__name__}"
|
396
|
+
|
397
|
+
|
398
|
+
setattr(ORM, "__name_with_type__", __name_with_type__)
|
lamindb/_save.py
CHANGED
@@ -5,11 +5,14 @@ from typing import Iterable, List, Optional, Tuple, Union, overload # noqa
|
|
5
5
|
import lamindb_setup
|
6
6
|
from django.db import transaction
|
7
7
|
from lamin_logger import logger
|
8
|
-
from lnschema_core.models import
|
8
|
+
from lnschema_core.models import ORM, File
|
9
9
|
|
10
|
-
from lamindb._file_access import auto_storage_key_from_file
|
11
10
|
from lamindb.dev.storage import store_object
|
12
|
-
from lamindb.dev.storage.
|
11
|
+
from lamindb.dev.storage.file import (
|
12
|
+
auto_storage_key_from_file,
|
13
|
+
delete_storage_using_key,
|
14
|
+
print_hook,
|
15
|
+
)
|
13
16
|
|
14
17
|
try:
|
15
18
|
from lamindb.dev.storage._zarr import write_adata_zarr
|
@@ -20,7 +23,7 @@ except ImportError:
|
|
20
23
|
|
21
24
|
|
22
25
|
@overload
|
23
|
-
def save(record:
|
26
|
+
def save(record: ORM) -> ORM:
|
24
27
|
...
|
25
28
|
|
26
29
|
|
@@ -28,11 +31,11 @@ def save(record: BaseORM) -> BaseORM:
|
|
28
31
|
# Overloaded function signature 2 will never be matched: signature 1's parameter
|
29
32
|
# type(s) are the same or broader
|
30
33
|
@overload
|
31
|
-
def save(records: Iterable[
|
34
|
+
def save(records: Iterable[ORM]) -> Iterable[ORM]: # type: ignore
|
32
35
|
...
|
33
36
|
|
34
37
|
|
35
|
-
def save(record: Union[
|
38
|
+
def save(record: Union[ORM, Iterable[ORM]], **kwargs) -> None: # type: ignore
|
36
39
|
"""Save to database & storage.
|
37
40
|
|
38
41
|
Inserts a new :term:`record` if the corresponding row doesn't exist.
|
@@ -42,7 +45,7 @@ def save(record: Union[BaseORM, Iterable[BaseORM]], **fields) -> None: # type:
|
|
42
45
|
passing it to `save`.
|
43
46
|
|
44
47
|
Args:
|
45
|
-
record: One or multiple `
|
48
|
+
record: One or multiple `ORM` objects.
|
46
49
|
|
47
50
|
Returns:
|
48
51
|
The record as returned from the database with a `created_at` timestamp.
|
@@ -68,18 +71,31 @@ def save(record: Union[BaseORM, Iterable[BaseORM]], **fields) -> None: # type:
|
|
68
71
|
"""
|
69
72
|
if isinstance(record, Iterable):
|
70
73
|
records = set(record)
|
71
|
-
elif isinstance(record,
|
74
|
+
elif isinstance(record, ORM):
|
72
75
|
records = {record}
|
73
76
|
|
77
|
+
def atomic_save(records: Iterable[ORM], **kwargs):
|
78
|
+
with transaction.atomic():
|
79
|
+
for record in records:
|
80
|
+
record.save(**kwargs)
|
81
|
+
|
74
82
|
# we're distinguishing between files and non-files
|
75
83
|
# because for files, we want to bulk-upload
|
76
84
|
# rather than upload one-by-one
|
77
85
|
files = {r for r in records if isinstance(r, File)}
|
78
86
|
non_files = records.difference(files)
|
79
87
|
if non_files:
|
80
|
-
|
88
|
+
non_files_with_parents = {r for r in non_files if hasattr(r, "_parents")}
|
89
|
+
if len(non_files_with_parents) < 2 or kwargs.get("parents") is False:
|
90
|
+
atomic_save(non_files)
|
91
|
+
else:
|
92
|
+
logger.warning("Recursing through parents will take a while...")
|
93
|
+
# first save all records without recursing parents
|
94
|
+
atomic_save(non_files, parents=False)
|
95
|
+
# save the record with parents one by one
|
81
96
|
for record in non_files:
|
82
97
|
record.save()
|
98
|
+
|
83
99
|
if files:
|
84
100
|
with transaction.atomic():
|
85
101
|
for record in files:
|
@@ -190,5 +206,5 @@ def upload_data_object(file) -> None:
|
|
190
206
|
):
|
191
207
|
logger.hint(f"storing file {file.id} with key {file_storage_key}")
|
192
208
|
storagepath = lamindb_setup.settings.storage.key_to_filepath(file_storage_key)
|
193
|
-
print_progress = partial(print_hook, filepath=file.
|
209
|
+
print_progress = partial(print_hook, filepath=file.key)
|
194
210
|
write_adata_zarr(file._memory_rep, storagepath, callback=print_progress)
|
lamindb/_select.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
from typing import Union
|
2
2
|
|
3
3
|
from django.db.models import Manager
|
4
|
-
from lnschema_core import
|
4
|
+
from lnschema_core import ORM
|
5
5
|
from lnschema_core._queryset import QuerySet
|
6
6
|
|
7
7
|
|
8
|
-
def select(*ORM:
|
9
|
-
"""Query
|
8
|
+
def select(*ORM: ORM, **expressions) -> Union[QuerySet, Manager]:
|
9
|
+
"""Query records.
|
10
10
|
|
11
11
|
Guide: :doc:`/guide/select`.
|
12
12
|
|
lamindb/_view.py
CHANGED
@@ -6,7 +6,7 @@ from IPython.display import display
|
|
6
6
|
from lamin_logger import colors
|
7
7
|
from lamindb_setup import settings
|
8
8
|
from lamindb_setup.dev._setup_schema import get_schema_module_name
|
9
|
-
from lnschema_core import
|
9
|
+
from lnschema_core import ORM
|
10
10
|
|
11
11
|
from ._select import select
|
12
12
|
|
@@ -30,7 +30,7 @@ def view(n: int = 10, schema: Optional[str] = None):
|
|
30
30
|
orms = [
|
31
31
|
i
|
32
32
|
for i in schema_module.__dict__.values()
|
33
|
-
if inspect.isclass(i) and issubclass(i,
|
33
|
+
if inspect.isclass(i) and issubclass(i, ORM) and i.__name__ != "ORM"
|
34
34
|
]
|
35
35
|
if len(schema_names) > 1:
|
36
36
|
section = f"* module: {colors.green(colors.bold(schema_name))} *"
|
lamindb/dev/__init__.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
.. autosummary::
|
4
4
|
:toctree: .
|
5
5
|
|
6
|
-
|
6
|
+
ORM
|
7
7
|
QuerySet
|
8
8
|
datasets
|
9
9
|
hashing
|
@@ -12,7 +12,7 @@
|
|
12
12
|
"""
|
13
13
|
|
14
14
|
from lnschema_core._queryset import QuerySet
|
15
|
-
from lnschema_core.models import
|
15
|
+
from lnschema_core.models import ORM
|
16
16
|
|
17
17
|
from . import datasets # noqa
|
18
18
|
from ._settings import Settings
|
lamindb/dev/_settings.py
CHANGED
@@ -15,6 +15,7 @@ class Settings:
|
|
15
15
|
|
16
16
|
def __init__(self):
|
17
17
|
self._verbosity: int = 2 # info-level logging
|
18
|
+
logger.set_verbosity(self._verbosity)
|
18
19
|
|
19
20
|
upon_file_create_if_hash_exists: Literal[
|
20
21
|
"warn_return_existing", "error", "warn_create_new"
|
@@ -70,7 +71,7 @@ class Settings:
|
|
70
71
|
|
71
72
|
@property
|
72
73
|
def verbosity(self) -> int:
|
73
|
-
"""Verbosity (default
|
74
|
+
"""Verbosity (default 3).
|
74
75
|
|
75
76
|
- 0: only show 'error' messages
|
76
77
|
- 1: also show 'warning' messages
|
lamindb/dev/datasets/__init__.py
CHANGED
@@ -11,6 +11,9 @@
|
|
11
11
|
dir_scrnaseq_cellranger
|
12
12
|
generate_cell_ranger_files
|
13
13
|
df_iris
|
14
|
+
df_iris_in_meter
|
15
|
+
df_iris_in_meter_batch1
|
16
|
+
df_iris_in_meter_batch2
|
14
17
|
anndata_mouse_sc_lymph_node
|
15
18
|
anndata_human_immune_cells
|
16
19
|
anndata_pbmc68k_reduced
|
@@ -26,6 +29,9 @@ from ._core import (
|
|
26
29
|
anndata_pbmc68k_reduced,
|
27
30
|
anndata_with_obs,
|
28
31
|
df_iris,
|
32
|
+
df_iris_in_meter,
|
33
|
+
df_iris_in_meter_batch1,
|
34
|
+
df_iris_in_meter_batch2,
|
29
35
|
dir_scrnaseq_cellranger,
|
30
36
|
file_bam,
|
31
37
|
file_fastq,
|
lamindb/dev/datasets/_core.py
CHANGED
@@ -169,6 +169,36 @@ def df_iris() -> pd.DataFrame:
|
|
169
169
|
return pd.read_parquet(filepath)
|
170
170
|
|
171
171
|
|
172
|
+
def df_iris_in_meter() -> pd.DataFrame:
|
173
|
+
"""The iris dataset with lenghts in meter."""
|
174
|
+
df = df_iris()
|
175
|
+
# rename columns
|
176
|
+
df.rename(
|
177
|
+
columns={
|
178
|
+
"sepal length (cm)": "sepal_length",
|
179
|
+
"sepal width (cm)": "sepal_width",
|
180
|
+
"petal length (cm)": "petal_length",
|
181
|
+
"petal width (cm)": "petal_width",
|
182
|
+
"target": "iris_species_code",
|
183
|
+
},
|
184
|
+
inplace=True,
|
185
|
+
)
|
186
|
+
df[["sepal_length", "sepal_width", "petal_length", "petal_width"]] /= 100
|
187
|
+
return df
|
188
|
+
|
189
|
+
|
190
|
+
def df_iris_in_meter_batch1() -> pd.DataFrame:
|
191
|
+
"""The iris dataset with lenghts in meter."""
|
192
|
+
df_iris = df_iris_in_meter()
|
193
|
+
return df_iris.iloc[: len(df_iris) // 2]
|
194
|
+
|
195
|
+
|
196
|
+
def df_iris_in_meter_batch2() -> pd.DataFrame:
|
197
|
+
"""The iris dataset with lenghts in meter."""
|
198
|
+
df_iris = df_iris_in_meter()
|
199
|
+
return df_iris.iloc[len(df_iris) // 2 :]
|
200
|
+
|
201
|
+
|
172
202
|
def generate_cell_ranger_files(
|
173
203
|
sample_name: str, basedir: Union[str, Path] = "./", output_only: bool = True
|
174
204
|
):
|
lamindb/dev/hashing.py
CHANGED
@@ -19,6 +19,10 @@ def to_b64_str(bstr: bytes):
|
|
19
19
|
return b64
|
20
20
|
|
21
21
|
|
22
|
+
def b16_to_b64(s: str):
|
23
|
+
return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
|
24
|
+
|
25
|
+
|
22
26
|
# a lot to read about this: lamin-notes/2022/hashing
|
23
27
|
def hash_set(s: Set[str]) -> str:
|
24
28
|
bstr = ":".join(sorted(s)).encode("utf-8")
|
lamindb/dev/storage/__init__.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
:toctree: .
|
5
5
|
|
6
6
|
AnnDataAccessor
|
7
|
+
BackedAccessor
|
7
8
|
UPath
|
8
9
|
|
9
10
|
"""
|
@@ -13,8 +14,8 @@ from lamindb_setup.dev.upath import infer_filesystem as _infer_filesystem
|
|
13
14
|
from ._anndata_sizes import size_adata
|
14
15
|
|
15
16
|
try:
|
16
|
-
from ._backed_access import AnnDataAccessor
|
17
|
+
from ._backed_access import AnnDataAccessor, BackedAccessor
|
17
18
|
except ImportError:
|
18
19
|
pass
|
19
|
-
from .
|
20
|
-
from .
|
20
|
+
from .file import delete_storage, load_to_memory, store_object
|
21
|
+
from .object import infer_suffix, write_to_file
|
@@ -16,7 +16,7 @@ from fsspec.core import OpenFile
|
|
16
16
|
from lamindb_setup.dev.upath import infer_filesystem
|
17
17
|
from lnschema_core import File
|
18
18
|
|
19
|
-
from lamindb.
|
19
|
+
from lamindb.dev.storage.file import filepath_from_file
|
20
20
|
|
21
21
|
ZARR_INSTALLED = False
|
22
22
|
try:
|
@@ -430,9 +430,9 @@ if ZARR_INSTALLED:
|
|
430
430
|
)
|
431
431
|
|
432
432
|
if file.suffix in (".h5ad", ".zrad"):
|
433
|
-
return AnnDataAccessor(conn, storage, file.
|
433
|
+
return AnnDataAccessor(conn, storage, file.key)
|
434
434
|
else:
|
435
435
|
if get_spec(storage).encoding_type == "anndata":
|
436
|
-
return AnnDataAccessor(conn, storage, file.
|
436
|
+
return AnnDataAccessor(conn, storage, file.key)
|
437
437
|
else:
|
438
438
|
return BackedAccessor(conn, storage)
|