lamindb 0.76.9__py3-none-any.whl → 0.76.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -21,7 +21,8 @@ Key functionality.
21
21
  .. autosummary::
22
22
  :toctree: .
23
23
 
24
- context
24
+ track
25
+ finish
25
26
  connect
26
27
  Curator
27
28
  view
@@ -33,6 +34,7 @@ Modules and settings.
33
34
  :toctree: .
34
35
 
35
36
  integrations
37
+ context
36
38
  settings
37
39
  setup
38
40
  UPath
@@ -41,7 +43,7 @@ Modules and settings.
41
43
  """
42
44
 
43
45
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.76.9"
46
+ __version__ = "0.76.11"
45
47
 
46
48
  import os as _os
47
49
 
@@ -58,7 +60,7 @@ def __getattr__(name):
58
60
  raise _InstanceNotSetupError()
59
61
 
60
62
 
61
- if _check_instance_setup(from_lamindb=True):
63
+ if _check_instance_setup(from_module="lnschema_core"):
62
64
  del _InstanceNotSetupError
63
65
  del __getattr__ # delete so that imports work out
64
66
  from lnschema_core.models import (
@@ -97,17 +99,16 @@ if _check_instance_setup(from_lamindb=True):
97
99
  from .core._context import context
98
100
  from .core._settings import settings
99
101
 
100
- # schema modules
101
- if not _os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
102
- from lamindb_setup._init_instance import (
103
- reload_schema_modules as _reload_schema_modules,
104
- )
105
-
106
- _reload_schema_modules(_lamindb_setup.settings.instance)
107
-
108
- track = context.track # backward compat
109
- finish = context.finish # backward compat
102
+ track = context.track # simple access because these are so common
103
+ finish = context.finish # simple access because these are so common
110
104
  Curate = Curator # backward compat
111
105
  settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
112
- context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
106
+ context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
107
+
108
+ Note that you can access:
109
+
110
+ - `ln.context.track()` as `ln.track()`
111
+ - `ln.context.finish()` as `ln.finish()`
112
+
113
+ """
113
114
  from django.db.models import Q
lamindb/_artifact.py CHANGED
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import shutil
5
+ from collections.abc import Mapping
5
6
  from pathlib import Path, PurePath, PurePosixPath
6
- from typing import TYPE_CHECKING, Any, Mapping
7
+ from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import fsspec
9
10
  import lamindb_setup as ln_setup
lamindb/_can_validate.py CHANGED
@@ -12,7 +12,7 @@ from lnschema_core import CanValidate, Record
12
12
 
13
13
  from lamindb._utils import attach_func_to_class_method
14
14
 
15
- from ._from_values import _has_organism_field, _print_values
15
+ from ._from_values import _has_organism_field, _print_values, get_or_create_records
16
16
  from ._record import _queryset, get_name_field
17
17
 
18
18
  if TYPE_CHECKING:
@@ -21,6 +21,33 @@ if TYPE_CHECKING:
21
21
  from lnschema_core.types import ListLike, StrField
22
22
 
23
23
 
24
+ # from_values doesn't apply for QuerySet or Manager
25
+ @classmethod # type:ignore
26
+ @doc_args(CanValidate.from_values.__doc__)
27
+ def from_values(
28
+ cls,
29
+ values: ListLike,
30
+ field: StrField | None = None,
31
+ create: bool = False,
32
+ organism: Record | str | None = None,
33
+ source: Record | None = None,
34
+ mute: bool = False,
35
+ ) -> list[Record]:
36
+ """{}""" # noqa: D415
37
+ from_source = True if cls.__module__.startswith("bionty.") else False
38
+
39
+ field_str = get_name_field(cls, field=field)
40
+ return get_or_create_records(
41
+ iterable=values,
42
+ field=getattr(cls, field_str),
43
+ create=create,
44
+ from_source=from_source,
45
+ organism=organism,
46
+ source=source,
47
+ mute=mute,
48
+ )
49
+
50
+
24
51
  @classmethod # type: ignore
25
52
  @doc_args(CanValidate.inspect.__doc__)
26
53
  def inspect(
@@ -349,16 +376,24 @@ def _standardize(
349
376
  from bionty._bionty import create_or_get_organism_record
350
377
 
351
378
  organism_record = create_or_get_organism_record(
352
- organism=organism, registry=registry
379
+ organism=organism, registry=registry, field=field
353
380
  )
354
381
  organism = (
355
382
  organism_record.name if organism_record is not None else organism_record
356
383
  )
357
384
 
385
+ # only perform synonym mapping if field is the name field
386
+ if hasattr(registry, "_name_field") and field != registry._name_field:
387
+ synonyms_field = None
388
+
358
389
  try:
359
390
  registry._meta.get_field(synonyms_field)
391
+ fields = {i for i in [field, return_field, synonyms_field] if i is not None}
360
392
  df = _filter_query_based_on_organism(
361
- queryset=queryset, field=field, organism=organism
393
+ queryset=queryset,
394
+ field=field,
395
+ organism=organism,
396
+ fields=list(fields),
362
397
  )
363
398
  except FieldDoesNotExist:
364
399
  df = pd.DataFrame()
@@ -525,6 +560,7 @@ def _filter_query_based_on_organism(
525
560
  field: str,
526
561
  organism: str | Record | None = None,
527
562
  values_list_field: str | None = None,
563
+ fields: list[str] | None = None,
528
564
  ):
529
565
  """Filter a queryset based on organism."""
530
566
  import pandas as pd
@@ -536,13 +572,18 @@ def _filter_query_based_on_organism(
536
572
  from bionty._bionty import create_or_get_organism_record
537
573
 
538
574
  organism_record = create_or_get_organism_record(
539
- organism=organism, registry=registry
575
+ organism=organism, registry=registry, field=field
540
576
  )
541
577
  if organism_record is not None:
542
578
  queryset = queryset.filter(organism__name=organism_record.name)
543
579
 
544
580
  if values_list_field is None:
581
+ if fields:
582
+ return pd.DataFrame.from_records(
583
+ queryset.values_list(*fields), columns=fields
584
+ )
545
585
  return pd.DataFrame.from_records(queryset.values())
586
+
546
587
  else:
547
588
  return queryset.values_list(values_list_field, flat=True)
548
589
 
@@ -564,6 +605,7 @@ METHOD_NAMES = [
564
605
  "add_synonym",
565
606
  "remove_synonym",
566
607
  "set_abbr",
608
+ "from_values",
567
609
  ]
568
610
 
569
611
  if ln_setup._TESTING: # type: ignore
lamindb/_collection.py CHANGED
@@ -4,7 +4,6 @@ from collections import defaultdict
4
4
  from typing import (
5
5
  TYPE_CHECKING,
6
6
  Any,
7
- Iterable,
8
7
  Literal,
9
8
  )
10
9
 
@@ -37,6 +36,8 @@ from .core._data import (
37
36
  from .core._settings import settings
38
37
 
39
38
  if TYPE_CHECKING:
39
+ from collections.abc import Iterable
40
+
40
41
  from lamindb.core.storage import UPath
41
42
 
42
43
  from ._query_set import QuerySet
lamindb/_curate.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import copy
4
- from typing import TYPE_CHECKING, Iterable
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  import anndata as ad
7
7
  import lamindb_setup as ln_setup
@@ -19,6 +19,8 @@ from lnschema_core import (
19
19
  from .core.exceptions import ValidationError
20
20
 
21
21
  if TYPE_CHECKING:
22
+ from collections.abc import Iterable
23
+
22
24
  from lamindb_setup.core.types import UPathStr
23
25
  from lnschema_core.types import FieldAttr
24
26
  from mudata import MuData
lamindb/_feature_set.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable, Type
3
+ from typing import TYPE_CHECKING
4
4
 
5
5
  import lamindb_setup as ln_setup
6
6
  import numpy as np
@@ -21,6 +21,8 @@ from .core.schema import (
21
21
  )
22
22
 
23
23
  if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+
24
26
  import pandas as pd
25
27
 
26
28
  from ._query_set import QuerySet
lamindb/_finish.py CHANGED
@@ -1,8 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- import os
4
3
  import re
5
- import shutil
6
4
  from datetime import datetime, timezone
7
5
  from typing import TYPE_CHECKING
8
6
 
@@ -37,6 +35,13 @@ def prepare_notebook(
37
35
  if strip_title:
38
36
  lines.pop(i)
39
37
  cell["source"] = "\n".join(lines)
38
+ # strip resaved finish error if present
39
+ # this is normally the last cell
40
+ if cell["cell_type"] == "code" and ".finish(" in cell["source"]:
41
+ for output in cell["outputs"]:
42
+ if output.get("ename", None) == "NotebookNotSaved":
43
+ cell["outputs"] = []
44
+ break
40
45
  return None
41
46
 
42
47
 
@@ -77,17 +82,6 @@ def notebook_to_script(
77
82
  script_path.write_text(py_content)
78
83
 
79
84
 
80
- def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
81
- import jupytext
82
-
83
- # get title back
84
- py_content = transform.source_code.replace(
85
- "# # transform.name", f"# # {transform.name}"
86
- )
87
- notebook = jupytext.reads(py_content, fmt="py:percent")
88
- jupytext.write(notebook, notebook_path)
89
-
90
-
91
85
  def save_context_core(
92
86
  *,
93
87
  run: Run,
@@ -97,6 +91,10 @@ def save_context_core(
97
91
  ignore_non_consecutive: bool | None = None,
98
92
  from_cli: bool = False,
99
93
  ) -> str | None:
94
+ from lnschema_core.models import (
95
+ format_field_value, # needs to come after lamindb was imported because of CLI use
96
+ )
97
+
100
98
  import lamindb as ln
101
99
 
102
100
  from .core._context import context, is_run_from_ipython
@@ -121,7 +119,7 @@ def save_context_core(
121
119
  notebook_content = read_notebook(filepath) # type: ignore
122
120
  if not ignore_non_consecutive: # ignore_non_consecutive is None or False
123
121
  is_consecutive = check_consecutiveness(
124
- notebook_content, calling_statement=".finish()"
122
+ notebook_content, calling_statement=".finish("
125
123
  )
126
124
  if not is_consecutive:
127
125
  response = "n" # ignore_non_consecutive == False
@@ -158,15 +156,13 @@ def save_context_core(
158
156
  if hash != ref_hash:
159
157
  response = input(
160
158
  f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
161
- f"Proceed? (y/n)"
159
+ f" Proceed? (y/n)"
162
160
  )
163
161
  if response == "y":
164
162
  transform.source_code = source_code_path.read_text()
165
163
  transform.hash = hash
166
164
  else:
167
- logger.warning(
168
- "Please re-run `ln.context.track()` to make a new version"
169
- )
165
+ logger.warning("Please re-run `ln.track()` to make a new version")
170
166
  return "rerun-the-notebook"
171
167
  else:
172
168
  logger.important("source code is already saved")
@@ -236,6 +232,11 @@ def save_context_core(
236
232
  transform.save()
237
233
 
238
234
  # finalize
235
+ if not from_cli:
236
+ run_time = run.finished_at - run.started_at
237
+ logger.important(
238
+ f"finished Run('{run.uid[:8]}') after {run_time} at {format_field_value(run.finished_at)}"
239
+ )
239
240
  if ln_setup.settings.instance.is_on_hub:
240
241
  identifier = ln_setup.settings.instance.slug
241
242
  logger.important(
lamindb/_from_values.py CHANGED
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable
3
+ from typing import TYPE_CHECKING
4
4
 
5
5
  import pandas as pd
6
6
  from django.core.exceptions import FieldDoesNotExist
7
7
  from lamin_utils import colors, logger
8
- from lnschema_core.models import Feature, Record, ULabel
8
+ from lnschema_core.models import Feature, Field, Record, ULabel
9
9
 
10
10
  from .core._settings import settings
11
11
 
12
12
  if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
13
15
  from lnschema_core.types import ListLike, StrField
14
16
 
15
17
 
@@ -29,20 +31,17 @@ def get_or_create_records(
29
31
  if create:
30
32
  return [registry(**{field.field.name: value}) for value in iterable]
31
33
  creation_search_names = settings.creation.search_names
32
- feature: Feature = None
33
34
  organism = _get_organism_record(field, organism)
34
- kwargs: dict = {}
35
- if organism is not None:
36
- kwargs["organism"] = organism
37
- if source is not None:
38
- kwargs["source"] = source
39
35
  settings.creation.search_names = False
40
36
  try:
41
37
  iterable_idx = index_iterable(iterable)
42
38
 
43
39
  # returns existing records & non-existing values
44
40
  records, nonexist_values, msg = get_existing_records(
45
- iterable_idx=iterable_idx, field=field, mute=mute, **kwargs
41
+ iterable_idx=iterable_idx,
42
+ field=field,
43
+ organism=organism,
44
+ mute=mute,
46
45
  )
47
46
 
48
47
  # new records to be created based on new values
@@ -58,11 +57,10 @@ def get_or_create_records(
58
57
  ):
59
58
  source_record = records[0].source
60
59
  if not source_record and hasattr(registry, "public"):
61
- from bionty._bionty import get_source_record
60
+ if organism is None:
61
+ organism = _ensembl_prefix(nonexist_values[0], field, organism)
62
+ organism = _get_organism_record(field, organism, force=True)
62
63
 
63
- source_record = get_source_record(
64
- registry.public(organism=organism), registry
65
- )
66
64
  if source_record:
67
65
  from bionty.core._add_ontology import check_source_in_db
68
66
 
@@ -82,9 +80,10 @@ def get_or_create_records(
82
80
  records_bionty, unmapped_values = create_records_from_source(
83
81
  iterable_idx=nonexist_values,
84
82
  field=field,
83
+ organism=organism,
84
+ source=source_record,
85
85
  msg=msg,
86
86
  mute=mute,
87
- **kwargs,
88
87
  )
89
88
  if len(records_bionty) > 0:
90
89
  msg = ""
@@ -106,17 +105,17 @@ def get_or_create_records(
106
105
  f"{colors.red('did not create')} {name} record{s} for "
107
106
  f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
108
107
  )
109
- if registry.__get_schema_name__() == "bionty" or registry == ULabel:
110
- if isinstance(iterable, pd.Series):
111
- feature = iterable.name
112
- feature_name = None
113
- if isinstance(feature, str):
114
- feature_name = feature
115
- if feature_name is not None:
116
- if feature_name is not None:
117
- for record in records:
118
- record._feature = feature_name
119
- logger.debug(f"added default feature '{feature_name}'")
108
+ # if registry.__get_schema_name__() == "bionty" or registry == ULabel:
109
+ # if isinstance(iterable, pd.Series):
110
+ # feature = iterable.name
111
+ # feature_name = None
112
+ # if isinstance(feature, str):
113
+ # feature_name = feature
114
+ # if feature_name is not None:
115
+ # if feature_name is not None:
116
+ # for record in records:
117
+ # record._feature = feature_name
118
+ # logger.debug(f"added default feature '{feature_name}'")
120
119
  return records
121
120
  finally:
122
121
  settings.creation.search_names = creation_search_names
@@ -125,46 +124,27 @@ def get_or_create_records(
125
124
  def get_existing_records(
126
125
  iterable_idx: pd.Index,
127
126
  field: StrField,
127
+ organism: Record | None = None,
128
128
  mute: bool = False,
129
- **kwargs,
130
129
  ):
130
+ # NOTE: existing records matching is agnostic to the source
131
131
  model = field.field.model
132
- condition: dict = {} if len(kwargs) == 0 else kwargs.copy()
133
- # existing records matching is agnostic to the bionty source
134
- if "source" in condition:
135
- condition.pop("source")
132
+ if organism is None and field.field.name == "ensembl_gene_id":
133
+ if len(iterable_idx) > 0:
134
+ organism = _ensembl_prefix(iterable_idx[0], field, organism)
135
+ organism = _get_organism_record(field, organism, force=True)
136
136
 
137
137
  # standardize based on the DB reference
138
138
  # log synonyms mapped terms
139
- result = model.inspect(
139
+ syn_mapper = model.standardize(
140
140
  iterable_idx,
141
141
  field=field,
142
- organism=kwargs.get("organism"),
143
- source=kwargs.get("source"),
142
+ organism=organism,
144
143
  mute=True,
144
+ public_aware=False,
145
+ return_mapper=True,
145
146
  )
146
- syn_mapper = result.synonyms_mapper
147
-
148
- syn_msg = ""
149
- if len(syn_mapper) > 0:
150
- s = "" if len(syn_mapper) == 1 else "s"
151
- names = list(syn_mapper.keys())
152
- print_values = colors.green(_print_values(names))
153
- syn_msg = (
154
- "loaded"
155
- f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
156
- f" matching {colors.italic('synonyms')}: {print_values}"
157
- )
158
- iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
159
-
160
- # get all existing records in the db
161
- # if necessary, create records for the values in kwargs
162
- # k:v -> k:v_record
163
- # kwargs is used to deal with organism
164
- condition.update({f"{field.field.name}__in": iterable_idx.values})
165
-
166
- query_set = model.filter(**condition)
167
- records = query_set.list()
147
+ iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
168
148
 
169
149
  # now we have to sort the list of queried records
170
150
  # preserved = Case(
@@ -177,16 +157,33 @@ def get_existing_records(
177
157
  # records = query_set.order_by(preserved).list()
178
158
 
179
159
  # log validated terms
180
- validated = result.validated
160
+ is_validated = model.validate(
161
+ iterable_idx, field=field, organism=organism, mute=True
162
+ )
163
+ if len(is_validated) > 0:
164
+ validated = iterable_idx[is_validated]
165
+ else:
166
+ validated = []
181
167
  msg = ""
182
- if len(validated) > 0:
183
- s = "" if len(validated) == 1 else "s"
184
- print_values = colors.green(_print_values(validated))
185
- msg = (
186
- "loaded"
187
- f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
188
- f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
189
- )
168
+ syn_msg = ""
169
+ if not mute:
170
+ if len(validated) > 0:
171
+ s = "" if len(validated) == 1 else "s"
172
+ print_values = colors.green(_print_values(validated))
173
+ msg = (
174
+ "loaded"
175
+ f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
176
+ f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
177
+ )
178
+ if len(syn_mapper) > 0:
179
+ s = "" if len(syn_mapper) == 1 else "s"
180
+ names = list(syn_mapper.keys())
181
+ print_values = colors.green(_print_values(names))
182
+ syn_msg = (
183
+ "loaded"
184
+ f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
185
+ f" matching {colors.italic('synonyms')}: {print_values}"
186
+ )
190
187
 
191
188
  # no logging if all values are validated
192
189
  # logs if there are synonyms
@@ -197,20 +194,28 @@ def get_existing_records(
197
194
  logger.success(syn_msg)
198
195
  msg = ""
199
196
 
200
- existing_values = iterable_idx.intersection(
201
- query_set.values_list(field.field.name, flat=True)
202
- )
203
- nonexist_values = iterable_idx.difference(existing_values)
197
+ # get all existing records in the db
198
+ # if necessary, create records for the values in kwargs
199
+ # k:v -> k:v_record
200
+ query = {f"{field.field.name}__in": iterable_idx.values}
201
+ if organism is not None:
202
+ query["organism"] = organism
203
+ records = model.filter(**query).list()
204
204
 
205
- return records, nonexist_values, msg
205
+ if len(validated) == len(iterable_idx):
206
+ return records, [], msg
207
+ else:
208
+ nonval_values = iterable_idx.difference(validated)
209
+ return records, nonval_values, msg
206
210
 
207
211
 
208
212
  def create_records_from_source(
209
213
  iterable_idx: pd.Index,
210
214
  field: StrField,
215
+ organism: Record | None = None,
216
+ source: Record | None = None,
211
217
  msg: str = "",
212
218
  mute: bool = False,
213
- **kwargs,
214
219
  ):
215
220
  model = field.field.model
216
221
  records: list = []
@@ -221,19 +226,13 @@ def create_records_from_source(
221
226
  # create the corresponding bionty object from model
222
227
  try:
223
228
  # TODO: more generic
224
- organism = kwargs.get("organism")
225
- if field.field.name == "ensembl_gene_id":
226
- if iterable_idx[0].startswith("ENSG"):
227
- organism = "human"
228
- elif iterable_idx[0].startswith("ENSMUSG"):
229
- organism = "mouse"
230
- public_ontology = model.public(organism=organism, source=kwargs.get("source"))
229
+ public_ontology = model.public(organism=organism, source=source)
231
230
  except Exception:
232
231
  # for custom records that are not created from public sources
233
232
  return records, iterable_idx
234
- # add source record to the kwargs
235
- source_record = get_source_record(public_ontology, model)
236
- kwargs.update({"source": source_record})
233
+ # get the default source
234
+ if source is None:
235
+ source = get_source_record(public_ontology, model)
237
236
 
238
237
  # filter the columns in bionty df based on fields
239
238
  bionty_df = filter_bionty_df_columns(model=model, public_ontology=public_ontology)
@@ -264,15 +263,17 @@ def create_records_from_source(
264
263
  bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
265
264
  keys=mapped_values, column_name=field.field.name, df=bionty_df
266
265
  )
267
- organism_kwargs = {}
268
- if "organism" not in kwargs:
269
- organism_record = _get_organism_record(
270
- field, public_ontology.organism, force=True
271
- )
272
- if organism_record is not None:
273
- organism_kwargs["organism"] = organism_record
266
+
267
+ if hasattr(model, "organism_id") and organism is None:
268
+ organism = _get_organism_record(field, source.organism, force=True)
269
+
270
+ create_kwargs = (
271
+ {"organism": organism, "source": source}
272
+ if organism is not None
273
+ else {"source": source}
274
+ )
274
275
  for bk in bionty_kwargs:
275
- records.append(model(**bk, **kwargs, **organism_kwargs))
276
+ records.append(model(**bk, **create_kwargs))
276
277
 
277
278
  # number of records that matches field (not synonyms)
278
279
  validated = result.validated
@@ -364,6 +365,13 @@ def _has_organism_field(registry: type[Record]) -> bool:
364
365
  def _get_organism_record(
365
366
  field: StrField, organism: str | Record, force: bool = False
366
367
  ) -> Record:
368
+ """Get organism record.
369
+
370
+ Args:
371
+ field: the field to get the organism record for
372
+ organism: the organism to get the record for
373
+ force: whether to force fetching the organism record
374
+ """
367
375
  registry = field.field.model
368
376
  check = True
369
377
  if not force and hasattr(registry, "_ontology_id_field"):
@@ -375,8 +383,21 @@ def _get_organism_record(
375
383
  if _has_organism_field(registry) and check:
376
384
  from bionty._bionty import create_or_get_organism_record
377
385
 
386
+ if field and not isinstance(field, str):
387
+ field = field.field.name
388
+
378
389
  organism_record = create_or_get_organism_record(
379
- organism=organism, registry=registry
390
+ organism=organism, registry=registry, field=field
380
391
  )
381
392
  if organism_record is not None:
382
393
  return organism_record
394
+
395
+
396
+ def _ensembl_prefix(id: str, field: StrField, organism: Record | None) -> str | None:
397
+ if field.field.name == "ensembl_gene_id" and organism is None:
398
+ if id.startswith("ENSG"):
399
+ organism = "human"
400
+ elif id.startswith("ENSMUSG"):
401
+ organism = "mouse"
402
+
403
+ return organism
lamindb/_query_set.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections import UserList
4
- from typing import TYPE_CHECKING, Iterable, NamedTuple
4
+ from typing import TYPE_CHECKING, NamedTuple
5
5
 
6
6
  import pandas as pd
7
7
  from django.db import models
@@ -23,6 +23,8 @@ from lnschema_core.models import (
23
23
  from lamindb.core.exceptions import DoesNotExist
24
24
 
25
25
  if TYPE_CHECKING:
26
+ from collections.abc import Iterable
27
+
26
28
  from lnschema_core.types import ListLike, StrField
27
29
 
28
30