lamindb 0.45a1__py3-none-any.whl → 0.46a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,535 +0,0 @@
1
- import builtins
2
- from typing import (
3
- Dict,
4
- Iterable,
5
- List,
6
- Literal,
7
- NamedTuple,
8
- Optional,
9
- Set,
10
- Tuple,
11
- Union,
12
- )
13
-
14
- import pandas as pd
15
- from django.core.exceptions import FieldDoesNotExist
16
- from django.db.models import CharField, TextField
17
- from lamin_logger import logger
18
- from lamin_logger._lookup import Lookup
19
- from lnschema_core import BaseORM
20
-
21
- from ._from_values import Field, ListLike, get_or_create_records
22
- from .dev._settings import settings
23
-
24
- _is_ipython = getattr(builtins, "__IPYTHON__", False)
25
-
26
-
27
- class ValidationError(Exception):
28
- pass
29
-
30
-
31
- def validate_required_fields(orm: BaseORM, kwargs):
32
- required_fields = {
33
- k.name for k in orm._meta.fields if not k.null and k.default is None
34
- }
35
- required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
36
- kwargs.update(required_fields_not_passed)
37
- missing_fields = [
38
- k for k, v in kwargs.items() if v is None and k in required_fields
39
- ]
40
- if missing_fields:
41
- raise TypeError(f"{missing_fields} are required.")
42
-
43
-
44
- def suggest_objects_with_same_name(orm: BaseORM, kwargs) -> Optional[str]:
45
- if kwargs.get("name") is None:
46
- return None
47
- else:
48
- results = orm.search(kwargs["name"])
49
- if results.shape[0] == 0:
50
- return None
51
-
52
- # subset results to those with at least 0.5 levensteihn distance
53
- results = results.loc[results.__ratio__ >= 90]
54
-
55
- # test for exact match
56
- if len(results) > 0:
57
- if results.index[0] == kwargs["name"]:
58
- logger.warning("Object with exact same name exists, returning it")
59
- return "object-with-same-name-exists"
60
- else:
61
- msg = "Entries with similar names exist:"
62
- if _is_ipython:
63
- from IPython.display import display
64
-
65
- logger.warning(f"{msg}")
66
- display(results)
67
- else:
68
- logger.warning(f"{msg}\n{results.name}")
69
- return None
70
-
71
-
72
- def return_object_from_bionty(orm: BaseORM, *args, **kwargs) -> Dict:
73
- """Pass bionty search/lookup results."""
74
- from lnschema_bionty._bionty import (
75
- create_or_get_species_record,
76
- encode_id,
77
- get_bionty_source_record,
78
- )
79
-
80
- arg = args[0]
81
- if isinstance(arg, Tuple): # type:ignore
82
- bionty_kwargs = arg._asdict()
83
- else:
84
- bionty_kwargs = arg[0]._asdict()
85
-
86
- if len(bionty_kwargs) > 0:
87
- import bionty as bt
88
-
89
- # add species and bionty_source
90
- species_record = create_or_get_species_record(
91
- orm=orm, species=kwargs.get("species")
92
- )
93
- if species_record is not None:
94
- bionty_kwargs["species"] = species_record
95
- bionty_object = getattr(bt, orm.__class__.__name__)(
96
- species=species_record.name if species_record is not None else None
97
- )
98
- bionty_kwargs["bionty_source"] = get_bionty_source_record(bionty_object)
99
-
100
- model_field_names = {i.name for i in orm._meta.fields}
101
- bionty_kwargs = {
102
- k: v for k, v in bionty_kwargs.items() if k in model_field_names
103
- }
104
- return encode_id(orm=orm, kwargs=bionty_kwargs)
105
-
106
-
107
- def __init__(orm: BaseORM, *args, **kwargs):
108
- if not args:
109
- validate_required_fields(orm, kwargs)
110
- if settings.upon_create_search_names:
111
- result = suggest_objects_with_same_name(orm, kwargs)
112
- if result == "object-with-same-name-exists":
113
- existing_object = orm.select(name=kwargs["name"])[0]
114
- new_args = [
115
- getattr(existing_object, field.attname)
116
- for field in orm._meta.concrete_fields
117
- ]
118
- super(BaseORM, orm).__init__(*new_args)
119
- orm._state.adding = False # mimic from_db
120
- return None
121
- if orm.__module__.startswith("lnschema_bionty"):
122
- from lnschema_bionty._bionty import encode_id
123
-
124
- kwargs = encode_id(orm=orm, kwargs=kwargs)
125
- super(BaseORM, orm).__init__(**kwargs)
126
- elif (
127
- orm.__module__.startswith("lnschema_bionty")
128
- and args
129
- and len(args) == 1
130
- and isinstance(args[0], (Tuple, List)) # type:ignore
131
- and len(args[0]) > 0
132
- ):
133
- if isinstance(args[0], List) and len(args[0]) > 1:
134
- logger.warning(
135
- "Multiple lookup/search results are passed, only returning record from"
136
- " the first entry"
137
- )
138
- result = return_object_from_bionty(orm, *args, **kwargs) # type:ignore
139
- try:
140
- existing_object = orm.select(**result)[0]
141
- new_args = [
142
- getattr(existing_object, field.attname)
143
- for field in orm._meta.concrete_fields
144
- ]
145
- super(BaseORM, orm).__init__(*new_args)
146
- orm._state.adding = False # mimic from_db
147
- except IndexError:
148
- super(BaseORM, orm).__init__(**result)
149
- elif len(args) != len(orm._meta.concrete_fields):
150
- raise ValueError("Please provide keyword arguments, not plain arguments")
151
- else:
152
- # object is loaded from DB (**kwargs could be ommitted below, I believe)
153
- super(BaseORM, orm).__init__(*args, **kwargs)
154
-
155
-
156
- @classmethod # type:ignore
157
- def from_values(cls, values: ListLike, field: Union[Field, str], **kwargs):
158
- if isinstance(field, str):
159
- field = getattr(cls, field)
160
- if not isinstance(field, Field): # field is DeferredAttribute
161
- raise TypeError(
162
- "field must be a string or an ORM field, e.g., `CellType.name`!"
163
- )
164
- if cls.__name__ == "FeatureSet":
165
- from lamindb._featureset_methods import parse_features_from_iterable
166
-
167
- features = parse_features_from_iterable(
168
- iterable=values,
169
- field=field,
170
- species=kwargs.get("species"),
171
- )
172
- return features
173
-
174
- from_bionty = True if cls.__module__.startswith("lnschema_bionty.") else False
175
- return get_or_create_records(
176
- iterable=values, field=field, from_bionty=from_bionty, **kwargs
177
- )
178
-
179
-
180
- @classmethod # type: ignore
181
- def search(
182
- cls,
183
- string: str,
184
- *,
185
- field: Optional[Union[str, CharField, TextField]] = None,
186
- top_hit: bool = False,
187
- case_sensitive: bool = True,
188
- synonyms_field: Optional[Union[str, TextField, CharField]] = "synonyms",
189
- synonyms_sep: str = "|",
190
- ) -> Union[pd.DataFrame, BaseORM]:
191
- """Search the table.
192
-
193
- Args:
194
- string: `str` The input string to match against the field ontology values.
195
- field: `Optional[Union[str, CharField, TextField]] = None` The field
196
- against which the input string is matching.
197
- top_hit: `bool = False` If `True`, return only the top hit or hits (in
198
- case of equal scores).
199
- case_sensitive: `bool = False` Whether the match is case sensitive.
200
- synonyms_field: `bool = True` Also search synonyms. If `None`, is ignored.
201
-
202
- Returns:
203
- A sorted `DataFrame` of search results with a score in column
204
- `__ratio__`. If `top_hit` is `True`, the best match.
205
- """
206
- import pandas as pd
207
- from lamin_logger._search import search
208
-
209
- if field is None:
210
- field = get_default_str_field(cls)
211
- if not isinstance(field, str):
212
- field = field.field.name
213
-
214
- records = cls.objects.all()
215
- df = pd.DataFrame.from_records(records.values())
216
-
217
- result = search(
218
- df=df,
219
- string=string,
220
- field=field,
221
- synonyms_field=str(synonyms_field),
222
- case_sensitive=case_sensitive,
223
- return_ranked_results=not top_hit,
224
- synonyms_sep=synonyms_sep,
225
- tuple_name=cls.__name__,
226
- )
227
-
228
- if not top_hit or result is None:
229
- return result
230
- else:
231
- if isinstance(result, list):
232
- return [records.get(id=r.id) for r in result]
233
- else:
234
- return records.get(id=result.id)
235
-
236
-
237
- @classmethod # type: ignore
238
- def lookup(cls, field: Optional[Union[str, CharField, TextField]] = None) -> NamedTuple:
239
- """Return an auto-complete object for a field.
240
-
241
- Args:
242
- field: `Optional[Union[str, CharField, TextField]] = None` The field to
243
- look up the values for. Defaults to 'name'.
244
-
245
- Returns:
246
- A `NamedTuple` of lookup information of the field values with a
247
- dictionary converter.
248
-
249
- Examples:
250
- >>> import lnschema_bionty as lb
251
- >>> lookup = lb.Gene.lookup()
252
- >>> lookup.adgb_dt
253
- >>> lookup_dict = lookup.dict()
254
- >>> lookup['ADGB-DT']
255
- """
256
- if field is None:
257
- field = get_default_str_field(cls)
258
- if not isinstance(field, str):
259
- field = field.field.name
260
-
261
- records = cls.objects.all()
262
-
263
- return Lookup(
264
- records=records,
265
- values=[i.get(field) for i in records.values()],
266
- tuple_name=cls.__name__,
267
- prefix="ln",
268
- ).lookup()
269
-
270
-
271
- lookup.__doc__ = Lookup.__doc__
272
-
273
-
274
- @classmethod # type: ignore
275
- def inspect(
276
- cls,
277
- identifiers: Iterable,
278
- field: Union[str, CharField, TextField],
279
- *,
280
- case_sensitive: bool = False,
281
- inspect_synonyms: bool = True,
282
- return_df: bool = False,
283
- logging: bool = True,
284
- **kwargs,
285
- ) -> Union[pd.DataFrame, Dict[str, List[str]]]:
286
- """Inspect if a list of identifiers are mappable to existing values of a field.
287
-
288
- Args:
289
- identifiers: Identifiers that will be checked against the field.
290
- field: `Union[str, CharField, TextField]` The field of identifiers.
291
- Examples are 'ontology_id' to map against the source ID
292
- or 'name' to map against the ontologies field names.
293
- case_sensitive: Whether the identifier inspection is case sensitive.
294
- inspect_synonyms: Whether to inspect synonyms.
295
- return_df: Whether to return a Pandas DataFrame.
296
-
297
- Returns:
298
- - A Dictionary of "mapped" and "unmapped" identifiers
299
- - If `return_df`: A DataFrame indexed by identifiers with a boolean `__mapped__`
300
- column that indicates compliance with the identifiers.
301
-
302
- Examples:
303
- >>> import lnschema_bionty as lb
304
- >>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
305
- >>> lb.Gene.inspect(gene_symbols, field=lb.Gene.symbol)
306
- """
307
- from lamin_logger._inspect import inspect
308
-
309
- if not isinstance(field, str):
310
- field = field.field.name
311
-
312
- return inspect(
313
- df=_filter_df_based_on_species(orm=cls, species=kwargs.get("species")),
314
- identifiers=identifiers,
315
- field=str(field),
316
- case_sensitive=case_sensitive,
317
- inspect_synonyms=inspect_synonyms,
318
- return_df=return_df,
319
- logging=logging,
320
- )
321
-
322
-
323
- @classmethod # type: ignore
324
- def map_synonyms(
325
- cls,
326
- synonyms: Iterable,
327
- *,
328
- return_mapper: bool = False,
329
- case_sensitive: bool = False,
330
- keep: Literal["first", "last", False] = "first",
331
- synonyms_field: str = "synonyms",
332
- synonyms_sep: str = "|",
333
- field: Optional[str] = None,
334
- **kwargs,
335
- ) -> Union[List[str], Dict[str, str]]:
336
- """Maps input synonyms to standardized names.
337
-
338
- Args:
339
- synonyms: `Iterable` Synonyms that will be standardized.
340
- return_mapper: `bool = False` If `True`, returns `{input_synonym1:
341
- standardized_name1}`.
342
- case_sensitive: `bool = False` Whether the mapping is case sensitive.
343
- species: `Optional[str]` Map only against this species related entries.
344
- keep: `Literal["first", "last", False] = "first"` When a synonym maps to
345
- multiple names, determines which duplicates to mark as
346
- `pd.DataFrame.duplicated`
347
-
348
- - "first": returns the first mapped standardized name
349
- - "last": returns the last mapped standardized name
350
- - `False`: returns all mapped standardized name
351
- synonyms_field: `str = "synonyms"` A field containing the concatenated synonyms.
352
- synonyms_sep: `str = "|"` Which separator is used to separate synonyms.
353
- field: `Optional[str]` The field representing the standardized names.
354
-
355
- Returns:
356
- If `return_mapper` is `False`: a list of standardized names. Otherwise,
357
- a dictionary of mapped values with mappable synonyms as keys and
358
- standardized names as values.
359
-
360
- Examples:
361
- >>> import lnschema_bionty as lb
362
- >>> gene_synonyms = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
363
- >>> standardized_names = lb.Gene.map_synonyms(gene_synonyms, species="human")
364
- """
365
- from lamin_logger._map_synonyms import map_synonyms
366
-
367
- if field is None:
368
- field = get_default_str_field(cls)
369
- if not isinstance(field, str):
370
- field = field.field.name
371
-
372
- try:
373
- cls._meta.get_field(synonyms_field)
374
- df = _filter_df_based_on_species(orm=cls, species=kwargs.get("species"))
375
- except FieldDoesNotExist:
376
- df = pd.DataFrame()
377
- return map_synonyms(
378
- df=df,
379
- identifiers=synonyms,
380
- field=field,
381
- return_mapper=return_mapper,
382
- case_sensitive=case_sensitive,
383
- keep=keep,
384
- synonyms_field=synonyms_field,
385
- sep=synonyms_sep,
386
- )
387
-
388
-
389
- def _filter_df_based_on_species(
390
- orm: BaseORM, species: Union[str, BaseORM, None] = None
391
- ):
392
- import pandas as pd
393
-
394
- records = orm.objects.all()
395
- try:
396
- # if the orm has a species field, it's required
397
- records.model._meta.get_field("species")
398
- if species is None:
399
- raise AssertionError(
400
- f"{orm.__name__} table requires to specify a species name via"
401
- " `species=`!"
402
- )
403
- elif isinstance(species, BaseORM):
404
- species_name = species.name
405
- else:
406
- species_name = species
407
- records = records.filter(species__name=species_name)
408
- except FieldDoesNotExist:
409
- pass
410
-
411
- return pd.DataFrame.from_records(records.values())
412
-
413
-
414
- def get_default_str_field(orm: BaseORM) -> str:
415
- """Get the 1st char or text field from the orm."""
416
- model_field_names = [i.name for i in orm._meta.fields]
417
-
418
- # set default field
419
- if "name" in model_field_names:
420
- # by default use the name field
421
- field = orm._meta.get_field("name")
422
- else:
423
- # first char or text field that doesn't contain "id"
424
- for i in orm._meta.fields:
425
- if "id" in i.name:
426
- continue
427
- if i.get_internal_type() in {"CharField", "TextField"}:
428
- field = i
429
- break
430
-
431
- # no default field can be found
432
- if field is None:
433
- raise ValueError("Please specify a field to search against!")
434
-
435
- return field.name
436
-
437
-
438
- def _add_or_remove_synonyms(
439
- synonym: Union[str, Iterable],
440
- record: BaseORM,
441
- action: Literal["add", "remove"],
442
- force: bool = False,
443
- ):
444
- """Add or remove synonyms."""
445
-
446
- def check_synonyms_in_all_records(synonyms: Set[str], record: BaseORM):
447
- """Errors if input synonyms are already associated with records in the DB."""
448
- import pandas as pd
449
- from IPython.display import display
450
-
451
- syns_all = (
452
- record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
453
- )
454
- if len(syns_all) == 0:
455
- return
456
- df = pd.DataFrame(syns_all.values())
457
- df["synonyms"] = df["synonyms"].str.split("|")
458
- df = df.explode("synonyms")
459
- matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
460
- if matches_df.shape[0] > 0:
461
- records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
462
- logger.error(
463
- f"Input synonyms {matches_df['synonyms'].unique()} already associated"
464
- " with the following records:\n(Pass `force=True` to ignore this error)"
465
- )
466
- display(records_df)
467
- raise SystemExit(AssertionError)
468
-
469
- # passed synonyms
470
- if isinstance(synonym, str):
471
- syn_new_set = set([synonym])
472
- else:
473
- syn_new_set = set(synonym)
474
- # nothing happens when passing an empty string or list
475
- if len(syn_new_set) == 0:
476
- return
477
- # because we use | as the separator
478
- if any(["|" in i for i in syn_new_set]):
479
- raise AssertionError("A synonym can't contain '|'!")
480
-
481
- # existing synonyms
482
- syns_exist = record.synonyms
483
- if syns_exist is None or len(syns_exist) == 0:
484
- syns_exist_set = set()
485
- else:
486
- syns_exist_set = set(syns_exist.split("|"))
487
-
488
- if action == "add":
489
- if not force:
490
- check_synonyms_in_all_records(syn_new_set, record)
491
- syns_exist_set.update(syn_new_set)
492
- elif action == "remove":
493
- syns_exist_set = syns_exist_set.difference(syn_new_set)
494
-
495
- if len(syns_exist_set) == 0:
496
- syns_str = None
497
- else:
498
- syns_str = "|".join(syns_exist_set)
499
-
500
- record.synonyms = syns_str
501
-
502
- # if the record already exists in the DB, save it
503
- if not record._state.adding:
504
- record.save()
505
-
506
-
507
- def _check_synonyms_field_exist(record: BaseORM):
508
- try:
509
- record.__getattribute__("synonyms")
510
- except AttributeError:
511
- raise NotImplementedError(
512
- f"No synonyms field found in table {record.__class__.__name__}!"
513
- )
514
-
515
-
516
- def add_synonym(self, synonym: Union[str, Iterable], force: bool = False):
517
- """Add synonyms to a record."""
518
- _check_synonyms_field_exist(self)
519
- _add_or_remove_synonyms(synonym=synonym, record=self, force=force, action="add")
520
-
521
-
522
- def remove_synonym(self, synonym: Union[str, Iterable]):
523
- """Remove synonyms from a record."""
524
- _check_synonyms_field_exist(self)
525
- _add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
526
-
527
-
528
- BaseORM.__init__ = __init__
529
- BaseORM.search = search
530
- BaseORM.lookup = lookup
531
- BaseORM.map_synonyms = map_synonyms
532
- BaseORM.inspect = inspect
533
- BaseORM.add_synonym = add_synonym
534
- BaseORM.remove_synonym = remove_synonym
535
- BaseORM.from_values = from_values
@@ -1,73 +0,0 @@
1
- from typing import Dict, Optional
2
-
3
- from lamin_logger import logger
4
- from lnschema_core import FeatureSet
5
-
6
- from lamindb._select import select
7
- from lamindb.dev.hashing import hash_set
8
-
9
- from ._from_values import Field, ListLike, get_or_create_records, index_iterable
10
-
11
-
12
- # expose to user via ln.FeatureSet
13
- def parse_features_from_iterable(
14
- iterable: ListLike,
15
- field: Field,
16
- species: Optional[str] = None,
17
- ):
18
- # get related_name of the field class from FeatureSet class
19
- model = field.field.model
20
- related_name = [
21
- i.related_name
22
- for i in FeatureSet._meta.related_objects
23
- if i.related_model == model
24
- ]
25
- if len(related_name) == 0:
26
- raise AssertionError(
27
- f"Can't create featuresets from {model.__name__}! Check your schema!"
28
- )
29
- else:
30
- related_name = related_name[0]
31
-
32
- iterable_idx = index_iterable(iterable)
33
-
34
- features_hash = hash_set(set(iterable_idx))
35
-
36
- featureset = select(
37
- FeatureSet,
38
- id=features_hash,
39
- type=related_name,
40
- ).one_or_none()
41
- if featureset is not None:
42
- logger.info("Returning an existing featureset")
43
- else:
44
- records = get_or_create_records(
45
- iterable=iterable_idx, field=field, species=species, from_bionty=True
46
- )
47
- featureset = FeatureSet(
48
- id=features_hash, type=related_name, **{related_name: records}
49
- )
50
- return featureset
51
-
52
-
53
- def __init__(featureset, *args, **kwargs): # type: ignore
54
- related_names = [i.related_name for i in featureset.__class__._meta.related_objects]
55
-
56
- relationships: Dict = {}
57
- for related_name in related_names:
58
- if related_name in kwargs:
59
- relationships[related_name] = kwargs.pop(related_name)
60
- featureset._relationships = relationships
61
-
62
- super(FeatureSet, featureset).__init__(*args, **kwargs)
63
-
64
-
65
- def save(featureset, *args, **kwargs):
66
- super(FeatureSet, featureset).save(*args, **kwargs)
67
- for key, records in featureset._relationships.items():
68
- [r.save() for r in records]
69
- getattr(featureset, key).set(records)
70
-
71
-
72
- FeatureSet.__init__ = __init__
73
- FeatureSet.save = save
lamindb/_file_access.py DELETED
@@ -1,48 +0,0 @@
1
- from lamin_logger import logger
2
- from lamindb_setup import settings
3
- from lamindb_setup.dev import StorageSettings
4
- from lnschema_core.models import File, Storage
5
-
6
- AUTO_KEY_PREFIX = ".lamindb/"
7
-
8
-
9
- # add type annotations back asap when re-organizing the module
10
- def auto_storage_key_from_file(file: File):
11
- if file.key is None:
12
- return f"{AUTO_KEY_PREFIX}{file.id}{file.suffix}"
13
- else:
14
- return file.key
15
-
16
-
17
- def attempt_accessing_path(file: File, storage_key: str):
18
- if file.storage_id == settings.storage.id:
19
- path = settings.storage.key_to_filepath(storage_key)
20
- else:
21
- logger.warning(
22
- "file.path() is slower for files outside the currently configured storage"
23
- " location"
24
- )
25
- storage = Storage.select(id=file.storage_id).one()
26
- # find a better way than passing None to instance_settings in the future!
27
- storage_settings = StorageSettings(storage.root, instance_settings=None)
28
- path = storage_settings.key_to_filepath(storage_key)
29
- # the following is for backward compat
30
- if storage_key.startswith(AUTO_KEY_PREFIX) and not path.exists():
31
- logger.warning(
32
- "You have auto-keyed files in your storage root, please move them into"
33
- f" {AUTO_KEY_PREFIX} within your storage location"
34
- )
35
- # try legacy_storage_key in root
36
- for previous_prefix in ["", "lndb/"]:
37
- legacy_storage_key = storage_key.replace(AUTO_KEY_PREFIX, previous_prefix)
38
- path = settings.storage.key_to_filepath(legacy_storage_key)
39
- if path.exists():
40
- return path
41
- return path
42
-
43
-
44
- # add type annotations back asap when re-organizing the module
45
- def filepath_from_file(file: File):
46
- storage_key = auto_storage_key_from_file(file)
47
- path = attempt_accessing_path(file, storage_key)
48
- return path