lamindb 0.54.4__py3-none-any.whl → 0.55.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -50,12 +50,11 @@ Static classes & modules:
50
50
 
51
51
  settings
52
52
  setup
53
- schema
54
53
  dev
55
54
 
56
55
  """
57
56
 
58
- __version__ = "0.54.4" # denote a release candidate for 0.1.0 with 0.1rc1
57
+ __version__ = "0.55.1" # denote a release candidate for 0.1.0 with 0.1rc1
59
58
 
60
59
  import os as _os
61
60
 
@@ -65,6 +64,7 @@ import lamindb_setup as _lamindb_setup
65
64
  from lamin_utils import py_version_warning as _py_version_warning
66
65
  from lamindb_setup import _check_instance_setup
67
66
  from lamindb_setup._check_instance_setup import _INSTANCE_NOT_SETUP_WARNING
67
+ from lamindb_setup._init_instance import reload_schema_modules as _reload_schema_modules
68
68
 
69
69
  _py_version_warning("3.8", "3.11")
70
70
 
@@ -114,15 +114,15 @@ if _INSTANCE_SETUP:
114
114
  from . import _ulabel # noqa
115
115
  from . import _validate # noqa
116
116
  from . import dev # noqa
117
- from . import schema # noqa
118
117
  from ._delete import delete # noqa
119
- from ._registry import select_backward as select # noqa
120
118
  from ._save import save # noqa
121
119
  from ._view import view # noqa
122
120
  from .dev import _priors # noqa
123
121
  from .dev._run_context import run_context # noqa
124
122
  from .dev._settings import settings
125
123
 
124
+ # schema modules
125
+ _reload_schema_modules(_lamindb_setup.settings.instance)
126
+
126
127
  track = run_context._track # noqa
127
- add = save # backward compat
128
128
  settings.__doc__ = """Global :class:`~lamindb.dev.Settings`."""
lamindb/_dataset.py CHANGED
@@ -1,10 +1,15 @@
1
1
  from collections import defaultdict
2
+ from pathlib import Path
2
3
  from typing import Dict, Iterable, Literal, Optional, Tuple, Union
3
4
 
4
5
  import anndata as ad
5
6
  import pandas as pd
6
7
  from lamin_utils import logger
8
+ from lamindb_setup._init_instance import register_storage
9
+ from lamindb_setup.dev import StorageSettings
7
10
  from lamindb_setup.dev._docs import doc_args
11
+ from lamindb_setup.dev._hub_utils import get_storage_region
12
+ from lamindb_setup.dev.upath import UPath
8
13
  from lnschema_core import Modality
9
14
  from lnschema_core.models import Dataset, Feature, FeatureSet
10
15
  from lnschema_core.types import AnnDataLike, DataLike, FieldAttr
@@ -21,7 +26,7 @@ from .dev._data import (
21
26
  add_transform_to_kwargs,
22
27
  get_run,
23
28
  save_feature_set_links,
24
- save_transform_run_feature_sets,
29
+ save_feature_sets,
25
30
  )
26
31
  from .dev.hashing import hash_set
27
32
 
@@ -84,11 +89,12 @@ def __init__(
84
89
  )
85
90
 
86
91
  run = get_run(run)
87
- # there are exactly two ways of creating a Dataset object right now
92
+ # there are exactly 3 ways of creating a Dataset object right now
88
93
  # using exactly one file or using more than one file
89
94
  # init file
90
95
  if isinstance(data, (pd.DataFrame, ad.AnnData, File)):
91
96
  files = None
97
+ storage = None
92
98
  if isinstance(data, File):
93
99
  file = data
94
100
  if file._state.adding:
@@ -116,9 +122,23 @@ def __init__(
116
122
  if file.description is None or file.description == "tmp":
117
123
  file.description = f"See dataset {provisional_id}" # type: ignore
118
124
  file._feature_sets = feature_sets
125
+ storage = None
126
+ # init from directory or bucket
127
+ elif isinstance(data, (str, Path, UPath)):
128
+ file = None
129
+ files = None
130
+ upath = UPath(data)
131
+ if not upath.is_dir:
132
+ raise ValueError(f"Can only pass buckets or directories, not {data}")
133
+ upath_str = upath.as_posix().rstrip("/")
134
+ region = get_storage_region(upath_str)
135
+ storage_settings = StorageSettings(upath_str, region)
136
+ storage = register_storage(storage_settings)
137
+ hash = None
119
138
  # init files
120
139
  else:
121
140
  file = None
141
+ storage = None
122
142
  if hasattr(data, "__getitem__"):
123
143
  assert isinstance(data[0], File) # type: ignore
124
144
  files = data
@@ -144,6 +164,7 @@ def __init__(
144
164
  reference=reference,
145
165
  reference_type=reference_type,
146
166
  file=file,
167
+ storage=storage,
147
168
  hash=hash,
148
169
  run=run,
149
170
  version=version,
@@ -276,8 +297,9 @@ def load(
276
297
  is_run_input: Optional[bool] = None,
277
298
  **kwargs,
278
299
  ) -> DataLike:
279
- _track_run_input(self, is_run_input)
300
+ # cannot call _track_run_input here, see comment further down
280
301
  if self.file is not None:
302
+ _track_run_input(self, is_run_input)
281
303
  return self.file.load()
282
304
  else:
283
305
  all_files = self.files.all()
@@ -291,9 +313,14 @@ def load(
291
313
  objects = [file.load(is_run_input=False) for file in all_files]
292
314
  file_ids = [file.id for file in all_files]
293
315
  if isinstance(objects[0], pd.DataFrame):
294
- return pd.concat(objects, join=join)
316
+ concat_object = pd.concat(objects, join=join)
295
317
  elif isinstance(objects[0], ad.AnnData):
296
- return ad.concat(objects, join=join, label="file_id", keys=file_ids)
318
+ concat_object = ad.concat(
319
+ objects, join=join, label="file_id", keys=file_ids
320
+ )
321
+ # only call it here because there might be errors during concat
322
+ _track_run_input(self, is_run_input)
323
+ return concat_object
297
324
 
298
325
 
299
326
  # docstring handled through attach_func_to_class_method
@@ -308,7 +335,7 @@ def save(self, *args, **kwargs) -> None:
308
335
  if self.file is not None:
309
336
  self.file.save()
310
337
  # we don't need to save feature sets again
311
- save_transform_run_feature_sets(self)
338
+ save_feature_sets(self)
312
339
  super(Dataset, self).save()
313
340
  if hasattr(self, "_files"):
314
341
  if self._files is not None and len(self._files) > 0:
@@ -316,6 +343,14 @@ def save(self, *args, **kwargs) -> None:
316
343
  save_feature_set_links(self)
317
344
 
318
345
 
346
+ @property # type: ignore
347
+ @doc_args(Dataset.path.__doc__)
348
+ def path(self) -> Union[Path, UPath]:
349
+ """{}"""
350
+ _track_run_input(self)
351
+ return self.storage.path
352
+
353
+
319
354
  METHOD_NAMES = [
320
355
  "__init__",
321
356
  "from_anndata",
@@ -337,3 +372,5 @@ if _TESTING:
337
372
 
338
373
  for name in METHOD_NAMES:
339
374
  attach_func_to_class_method(name, Dataset, globals())
375
+
376
+ setattr(Dataset, "path", path)
lamindb/_file.py CHANGED
@@ -47,7 +47,7 @@ from .dev._data import (
47
47
  add_transform_to_kwargs,
48
48
  get_run,
49
49
  save_feature_set_links,
50
- save_transform_run_feature_sets,
50
+ save_feature_sets,
51
51
  )
52
52
  from .dev._priors import priors
53
53
  from .dev.storage.file import AUTO_KEY_PREFIX
@@ -85,9 +85,9 @@ def process_pathlike(
85
85
  new_root_str = new_root.as_posix().rstrip("/")
86
86
  region = get_storage_region(new_root_str)
87
87
  storage_settings = StorageSettings(new_root_str, region)
88
- register_storage(storage_settings)
88
+ storage_record = register_storage(storage_settings)
89
89
  use_existing_storage_key = True
90
- return storage_settings.record, use_existing_storage_key
90
+ return storage_record, use_existing_storage_key
91
91
  # if the filepath is local
92
92
  else:
93
93
  use_existing_storage_key = False
@@ -833,7 +833,7 @@ def save(self, *args, **kwargs) -> None:
833
833
 
834
834
 
835
835
  def _save_skip_storage(file, *args, **kwargs) -> None:
836
- save_transform_run_feature_sets(file)
836
+ save_feature_sets(file)
837
837
  super(File, file).save(*args, **kwargs)
838
838
  save_feature_set_links(file)
839
839
 
lamindb/_filter.py CHANGED
@@ -1,13 +1,43 @@
1
1
  from typing import Type
2
+ from uuid import UUID
2
3
 
4
+ import dj_database_url
5
+ from django.db import connections
6
+ from lamindb_setup._init_instance import InstanceSettings
7
+ from lamindb_setup._load_instance import get_owner_name_from_identifier
8
+ from lamindb_setup.dev._hub_core import load_instance
3
9
  from lnschema_core import Registry
4
10
 
5
11
  from lamindb._query_set import QuerySet
6
12
 
7
13
 
8
- def filter(Registry: Type[Registry], **expressions) -> QuerySet:
14
+ def add_db_connection(isettings: InstanceSettings, using: str):
15
+ db_config = dj_database_url.config(
16
+ default=isettings.db, conn_max_age=600, conn_health_checks=True
17
+ )
18
+ db_config["TIME_ZONE"] = "UTC"
19
+ db_config["OPTIONS"] = {}
20
+ db_config["AUTOCOMMIT"] = True
21
+
22
+ connections.settings[using] = db_config
23
+
24
+
25
+ def filter(Registry: Type[Registry], using: str = None, **expressions) -> QuerySet:
9
26
  """See :meth:`~lamindb.dev.Registry.filter`."""
10
- qs = QuerySet(model=Registry)
27
+ if using is not None:
28
+ owner, name = get_owner_name_from_identifier(using)
29
+ instance_result, storage_result = load_instance(owner=owner, name=name)
30
+ isettings = InstanceSettings(
31
+ owner=owner,
32
+ name=name,
33
+ storage_root=storage_result["root"],
34
+ storage_region=storage_result["region"],
35
+ db=instance_result["db"],
36
+ schema=instance_result["schema_str"],
37
+ id=UUID(instance_result["id"]),
38
+ )
39
+ add_db_connection(isettings, using)
40
+ qs = QuerySet(model=Registry, using=using)
11
41
  if len(expressions) > 0:
12
42
  return qs.filter(**expressions)
13
43
  else:
lamindb/_parents.py CHANGED
@@ -78,7 +78,7 @@ def view_flow(data: Union[File, Dataset], with_children: bool = True) -> None:
78
78
  df_values += _get_all_child_runs(data)
79
79
  df_edges = _df_edges_from_runs(df_values)
80
80
 
81
- data_label = _label_data_run_transform(data)
81
+ data_label = _record_label(data)
82
82
 
83
83
  def add_node(
84
84
  record: Union[Run, File, Dataset],
@@ -156,7 +156,7 @@ def _view_parents(
156
156
  else:
157
157
  df_edges = df_edges_children
158
158
 
159
- record_label = record.__getattribute__(field)
159
+ record_label = _record_label(record, field)
160
160
 
161
161
  u = graphviz.Digraph(
162
162
  record.id,
@@ -172,7 +172,7 @@ def _view_parents(
172
172
  )
173
173
  u.node(
174
174
  record.id,
175
- label=_label_data_run_transform(record)
175
+ label=_record_label(record)
176
176
  if record.__class__.__name__ == "Transform"
177
177
  else _add_emoji(record, record_label),
178
178
  fillcolor=LAMIN_GREEN_LIGHTER,
@@ -237,22 +237,57 @@ def _df_edges_from_parents(
237
237
  df_edges["source_record"] = df_edges["source"].apply(lambda x: all.get(id=x))
238
238
  df_edges["target_record"] = df_edges["target"].apply(lambda x: all.get(id=x))
239
239
  if record.__class__.__name__ == "Transform":
240
- df_edges["source_label"] = df_edges["source_record"].apply(
241
- _label_data_run_transform
242
- )
243
- df_edges["target_label"] = df_edges["target_record"].apply(
244
- _label_data_run_transform
245
- )
240
+ df_edges["source_label"] = df_edges["source_record"].apply(_record_label)
241
+ df_edges["target_label"] = df_edges["target_record"].apply(_record_label)
246
242
  else:
247
243
  df_edges["source_label"] = df_edges["source_record"].apply(
248
- lambda x: x.__getattribute__(field)
244
+ lambda x: _record_label(x, field)
249
245
  )
250
246
  df_edges["target_label"] = df_edges["target_record"].apply(
251
- lambda x: x.__getattribute__(field)
247
+ lambda x: _record_label(x, field)
252
248
  )
253
249
  return df_edges
254
250
 
255
251
 
252
+ def _record_label(record: Registry, field: Optional[str] = None):
253
+ if isinstance(record, File):
254
+ if record.description is None:
255
+ name = record.key
256
+ else:
257
+ name = record.description.replace("&", "&")
258
+
259
+ return (
260
+ rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
261
+ rf' FACE="Monospace">id={record.id}<BR/>suffix={record.suffix}</FONT>>'
262
+ )
263
+ elif isinstance(record, Dataset):
264
+ name = record.name.replace("&", "&amp;")
265
+ return (
266
+ rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
267
+ rf' FACE="Monospace">id={record.id}<BR/>version={record.version}</FONT>>'
268
+ )
269
+ elif isinstance(record, Run):
270
+ name = f'{record.transform.name.replace("&", "&amp;")}'
271
+ return (
272
+ rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"' # noqa
273
+ rf' FACE="Monospace">id={record.id}<BR/>type={record.transform.type},'
274
+ rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>" # noqa
275
+ )
276
+ elif isinstance(record, Transform):
277
+ name = f'{record.name.replace("&", "&amp;")}'
278
+ return (
279
+ rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"' # noqa
280
+ rf' FACE="Monospace">id={record.id}<BR/>type={record.type},'
281
+ rf" user={record.created_by.name}<BR/>updated_at={format_field_value(record.updated_at)}</FONT>>" # noqa
282
+ )
283
+ else:
284
+ name = record.__getattribute__(field)
285
+ return (
286
+ rf'<{name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
287
+ rf' FACE="Monospace">id={record.id}</FONT>>'
288
+ )
289
+
290
+
256
291
  def _add_emoji(record: Registry, label: str):
257
292
  if record.__class__.__name__ == "Transform":
258
293
  emoji = TRANSFORM_EMOJIS.get(record.type, "💫")
@@ -321,39 +356,6 @@ def _get_all_child_runs(data: Union[File, Dataset]) -> List:
321
356
  return run_inputs_outputs
322
357
 
323
358
 
324
- def _label_data_run_transform(record: Union[File, Run, Transform]):
325
- if isinstance(record, File):
326
- if record.description is None:
327
- name = record.key
328
- else:
329
- name = record.description.replace("&", "&amp;")
330
-
331
- return (
332
- rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
333
- rf' FACE="Monospace">id={record.id}<BR/>suffix={record.suffix}</FONT>>'
334
- )
335
- elif isinstance(record, Dataset):
336
- name = record.name.replace("&", "&amp;")
337
- return (
338
- rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
339
- rf' FACE="Monospace">id={record.id}<BR/>version={record.version}</FONT>>'
340
- )
341
- elif isinstance(record, Run):
342
- name = f'{record.transform.name.replace("&", "&amp;")}'
343
- return (
344
- rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"' # noqa
345
- rf' FACE="Monospace">id={record.id}<BR/>type={record.transform.type},'
346
- rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>" # noqa
347
- )
348
- elif isinstance(record, Transform):
349
- name = f'{record.name.replace("&", "&amp;")}'
350
- return (
351
- rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"' # noqa
352
- rf' FACE="Monospace">id={record.id}<BR/>type={record.type},'
353
- rf" user={record.created_by.name}<BR/>updated_at={format_field_value(record.updated_at)}</FONT>>" # noqa
354
- )
355
-
356
-
357
359
  def _df_edges_from_runs(df_values: List):
358
360
  import pandas as pd
359
361
 
@@ -363,8 +365,8 @@ def _df_edges_from_runs(df_values: List):
363
365
  df = df.drop_duplicates().dropna()
364
366
  df["source"] = [f"{i._meta.model_name}_{i.id}" for i in df["source_record"]]
365
367
  df["target"] = [f"{i._meta.model_name}_{i.id}" for i in df["target_record"]]
366
- df["source_label"] = df["source_record"].apply(_label_data_run_transform)
367
- df["target_label"] = df["target_record"].apply(_label_data_run_transform)
368
+ df["source_label"] = df["source_record"].apply(_record_label)
369
+ df["target_label"] = df["target_record"].apply(_record_label)
368
370
  return df
369
371
 
370
372
 
lamindb/_query_manager.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  from django.db import models
4
+ from lamin_utils import logger
4
5
 
5
6
  from .dev._feature_manager import get_feature_set_by_slot
6
7
 
@@ -24,6 +25,16 @@ class QueryManager(models.Manager):
24
25
  >>> manager.df()
25
26
  """
26
27
 
28
+ def _track_run_input_manager(self):
29
+ if hasattr(self, "source_field_name") and hasattr(self, "target_field_name"):
30
+ if self.source_field_name == "dataset" and self.target_field_name == "file":
31
+ from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
32
+ from lamindb.dev._run_context import run_context
33
+
34
+ if run_context.run is None:
35
+ logger.warning(WARNING_RUN_TRANSFORM)
36
+ _track_run_input(self.instance)
37
+
27
38
  def list(self, field: Optional[str] = None):
28
39
  """Populate a list with the results.
29
40
 
@@ -40,6 +51,7 @@ class QueryManager(models.Manager):
40
51
  >>> label.parents.list("name")
41
52
  ['ULabel1', 'ULabel2', 'ULabel3']
42
53
  """
54
+ self._track_run_input_manager()
43
55
  if field is None:
44
56
  return [item for item in self.all()]
45
57
  else:
@@ -52,6 +64,14 @@ class QueryManager(models.Manager):
52
64
  """
53
65
  return self.all().df(**kwargs)
54
66
 
67
+ def all(self):
68
+ """Return QuerySet of all.
69
+
70
+ For `**kwargs`, see :meth:`lamindb.dev.QuerySet.df`.
71
+ """
72
+ self._track_run_input_manager()
73
+ return self.all_base_class()
74
+
55
75
  def __getitem__(self, item: str):
56
76
  try:
57
77
  source_field_name = self.source_field_name
@@ -70,3 +90,10 @@ class QueryManager(models.Manager):
70
90
  setattr(models.Manager, "list", QueryManager.list)
71
91
  setattr(models.Manager, "df", QueryManager.df)
72
92
  setattr(models.Manager, "__getitem__", QueryManager.__getitem__)
93
+ setattr(
94
+ models.Manager, "_track_run_input_manager", QueryManager._track_run_input_manager
95
+ )
96
+ # the two lines below would be easy if we could actually inherit; like this,
97
+ # they're suboptimal
98
+ setattr(models.Manager, "all_base_class", models.Manager.all)
99
+ setattr(models.Manager, "all", QueryManager.all)
lamindb/_registry.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import builtins
2
2
  from typing import Iterable, List, NamedTuple, Optional, Union
3
3
 
4
+ import lamindb_setup as ln_setup
4
5
  import pandas as pd
5
6
  from django.core.exceptions import FieldDoesNotExist
6
7
  from django.db.models import Manager, QuerySet
@@ -81,7 +82,12 @@ def __init__(orm: Registry, *args, **kwargs):
81
82
  validate_required_fields(orm, kwargs)
82
83
  from .dev._settings import settings
83
84
 
84
- if settings.upon_create_search_names:
85
+ # do not search for names if an id is passed; this is important
86
+ # e.g. when synching ids from the notebook store to lamindb
87
+ has_consciously_provided_id = False
88
+ if "_has_consciously_provided_id" in kwargs:
89
+ has_consciously_provided_id = kwargs.pop("_has_consciously_provided_id")
90
+ if settings.upon_create_search_names and not has_consciously_provided_id:
85
91
  result = suggest_objects_with_same_name(orm, kwargs)
86
92
  if result == "object-with-same-name-exists":
87
93
  if "version" in kwargs:
@@ -314,10 +320,59 @@ def _queryset(cls: Union[Registry, QuerySet, Manager]) -> QuerySet:
314
320
  return queryset
315
321
 
316
322
 
323
+ def transfer_to_default_db(record: Registry, save: bool = False):
324
+ db = record._state.db
325
+ if db is not None and db != "default":
326
+ logger.info(f"saving from instance {db} to default instance: {record}")
327
+ from lamindb.dev._data import WARNING_RUN_TRANSFORM
328
+ from lamindb.dev._run_context import run_context
329
+
330
+ logger.hint("saving to default instance")
331
+ if (
332
+ hasattr(record, "created_by_id")
333
+ and record.created_by_id != ln_setup.settings.user.id
334
+ ):
335
+ logger.info(f"updating created_by_id with {ln_setup.settings.user.id}")
336
+ record.created_by_id = ln_setup.settings.user.id
337
+ if hasattr(record, "run_id"):
338
+ if run_context.run is not None:
339
+ logger.info("updating run & transform to current run & transform")
340
+ record.run_id = run_context.run.id
341
+ else:
342
+ logger.warning(WARNING_RUN_TRANSFORM)
343
+ record.run_id = None
344
+ if hasattr(record, "transform_id"):
345
+ if run_context.transform is not None:
346
+ record.transform_id = run_context.transform.id
347
+ else:
348
+ record.transform_id = None
349
+ if hasattr(record, "storage_id") and record.storage_id is not None:
350
+ record.storage.save()
351
+ record._state.db = "default"
352
+ if save:
353
+ record.save()
354
+
355
+
356
+ # docstring handled through attach_func_to_class_method
357
+ def save(self, *args, **kwargs) -> None:
358
+ db = self._state.db
359
+ transfer_to_default_db(self)
360
+ super(Registry, self).save(*args, **kwargs)
361
+ if db is not None and db != "default":
362
+ if hasattr(self, "labels"):
363
+ logger.info("transfer labels")
364
+ from copy import copy
365
+
366
+ self_on_db = copy(self)
367
+ self_on_db._state.db = db
368
+ self.labels.add_from(self_on_db)
369
+
370
+
317
371
  METHOD_NAMES = [
318
372
  "__init__",
319
373
  "search",
320
374
  "lookup",
375
+ "save",
321
376
  "from_values",
322
377
  ]
323
378
 
@@ -347,16 +402,5 @@ def __get_name_with_schema__(cls) -> str:
347
402
  return f"{schema_name}.{cls.__name__}"
348
403
 
349
404
 
350
- def select_backward(cls, **expressions):
351
- logger.warning("select() is deprecated! please use: Registry.filter()")
352
- return cls.filter(**expressions)
353
-
354
-
355
- @classmethod # type: ignore
356
- def select(cls, **expressions):
357
- return select_backward(cls, **expressions)
358
-
359
-
360
405
  setattr(Registry, "__get_schema_name__", __get_schema_name__)
361
406
  setattr(Registry, "__get_name_with_schema__", __get_name_with_schema__)
362
- setattr(Registry, "select", select) # backward compat
lamindb/_storage.py CHANGED
@@ -1,3 +1,4 @@
1
+ from lamindb_setup.dev._docs import doc_args
1
2
  from lamindb_setup.dev.upath import UPath, create_path
2
3
  from lnschema_core import Storage
3
4
 
@@ -6,4 +7,12 @@ def root_as_path(self) -> UPath:
6
7
  return create_path(self.root)
7
8
 
8
9
 
10
+ @property # type: ignore
11
+ @doc_args(Storage.path.__doc__)
12
+ def path(self) -> UPath:
13
+ """{}"""
14
+ return create_path(self.root)
15
+
16
+
9
17
  setattr(Storage, "root_as_path", root_as_path)
18
+ setattr(Storage, "path", path)
lamindb/_transform.py CHANGED
@@ -44,8 +44,14 @@ def __init__(transform: Transform, *args, **kwargs):
44
44
  )
45
45
  if name is None:
46
46
  name = is_new_version_of.name
47
+
48
+ # this is only because the user-facing constructor allows passing an id
49
+ # most others don't
47
50
  if id is None:
51
+ has_consciously_provided_id = False
48
52
  id = new_id
53
+ else:
54
+ has_consciously_provided_id = True
49
55
  super(Transform, transform).__init__(
50
56
  id=id,
51
57
  name=name,
@@ -54,6 +60,7 @@ def __init__(transform: Transform, *args, **kwargs):
54
60
  version=version,
55
61
  initial_version_id=initial_version_id,
56
62
  reference=reference,
63
+ _has_consciously_provided_id=has_consciously_provided_id,
57
64
  )
58
65
 
59
66
 
lamindb/dev/_data.py CHANGED
@@ -34,15 +34,16 @@ from ._priors import priors
34
34
  from ._run_context import run_context
35
35
  from .exceptions import ValidationError
36
36
 
37
+ WARNING_RUN_TRANSFORM = (
38
+ "no run & transform get linked, consider passing a `run` or calling ln.track()"
39
+ )
40
+
37
41
 
38
42
  def get_run(run: Optional[Run]) -> Optional[Run]:
39
43
  if run is None:
40
44
  run = run_context.run
41
- if run is None:
42
- logger.warning(
43
- "no run & transform get linked, consider passing a `run` or calling"
44
- " ln.track()"
45
- )
45
+ if run is None and not settings.silence_file_run_transform_warning:
46
+ logger.warning(WARNING_RUN_TRANSFORM)
46
47
  return run
47
48
 
48
49
 
@@ -51,11 +52,7 @@ def add_transform_to_kwargs(kwargs: Dict[str, Any], run: Run):
51
52
  kwargs["transform"] = run.transform
52
53
 
53
54
 
54
- def save_transform_run_feature_sets(self: Union[File, Dataset]) -> None:
55
- if self.transform is not None:
56
- self.transform.save()
57
- if self.run is not None:
58
- self.run.save()
55
+ def save_feature_sets(self: Union[File, Dataset]) -> None:
59
56
  if hasattr(self, "_feature_sets"):
60
57
  saved_feature_sets = {}
61
58
  for key, feature_set in self._feature_sets.items():
@@ -6,7 +6,7 @@ from lnschema_core.models import Data, Dataset, Feature, File, Registry
6
6
  from .._feature_set import dict_related_model_to_related_name
7
7
  from .._from_values import _print_values
8
8
  from .._query_set import QuerySet
9
- from .._registry import get_default_str_field
9
+ from .._registry import get_default_str_field, transfer_to_default_db
10
10
 
11
11
 
12
12
  def get_labels_as_dict(self: Data):
@@ -14,7 +14,14 @@ def get_labels_as_dict(self: Data):
14
14
  for related_model, related_name in dict_related_model_to_related_name(
15
15
  self.__class__
16
16
  ).items():
17
- if related_name in {"feature_sets", "files", "input_of", "datasets"}:
17
+ if related_name in {
18
+ "feature_sets",
19
+ "files",
20
+ "input_of",
21
+ "datasets",
22
+ "source_of",
23
+ "report_of",
24
+ }:
18
25
  continue
19
26
  labels[related_name] = (related_model, self.__getattribute__(related_name))
20
27
  return labels
@@ -110,9 +117,17 @@ class LabelManager:
110
117
  labels = data.labels.get(
111
118
  getattr(features_lookup, row["name"]), mute=True
112
119
  )
120
+ for label in labels:
121
+ transfer_to_default_db(label, save=True)
113
122
  self._host.labels.add(
114
123
  labels, feature=getattr(features_lookup, row["name"])
115
124
  )
116
125
  # for now, have this be duplicated, need to disentangle above
117
126
  for related_name, (_, labels) in get_labels_as_dict(data).items():
118
- getattr(self._host, related_name).add(*labels.all())
127
+ labels_list = labels.list()
128
+ for label in labels_list:
129
+ transfer_to_default_db(label, save=True)
130
+ # this should not occur as file and dataset should have the same attributes
131
+ # but this might not be true for custom schema
132
+ if hasattr(self._host, related_name):
133
+ getattr(self._host, related_name).add(*labels_list)
lamindb/dev/_settings.py CHANGED
@@ -56,6 +56,8 @@ class Settings:
56
56
 
57
57
  FAQ: :doc:`/faq/track-run-inputs`
58
58
  """
59
+ silence_file_run_transform_warning: bool = False
60
+ """Silence warning about missing run & transform during file creation."""
59
61
 
60
62
  @property
61
63
  def storage(self) -> Union[Path, UPath]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.54.4
3
+ Version: 0.55.1
4
4
  Summary: Open-source data platform for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -8,9 +8,9 @@ Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
- Requires-Dist: lnschema_core==0.48.4
12
- Requires-Dist: lamindb_setup==0.53.2
13
- Requires-Dist: lamin_utils==0.11.3
11
+ Requires-Dist: lnschema_core==0.50.0
12
+ Requires-Dist: lamindb_setup==0.54.3
13
+ Requires-Dist: lamin_utils==0.11.4
14
14
  Requires-Dist: rapidfuzz
15
15
  Requires-Dist: pyarrow
16
16
  Requires-Dist: typing_extensions!=4.6.0
@@ -34,11 +34,14 @@ Requires-Dist: coverage ; extra == "dev"
34
34
  Requires-Dist: pytest-cov ; extra == "dev"
35
35
  Requires-Dist: nbproject_test>=0.3.0 ; extra == "dev"
36
36
  Requires-Dist: faker-biology ; extra == "dev"
37
- Requires-Dist: erdiagram>=0.1.3 ; extra == "erdiagram"
38
- Requires-Dist: readfcs>=1.1.5 ; extra == "fcs"
37
+ Requires-Dist: django-schema-graph ; extra == "erdiagram"
38
+ Requires-Dist: readfcs>=1.1.7 ; extra == "fcs"
39
39
  Requires-Dist: fsspec[gs]==2023.9.0 ; extra == "gcp"
40
40
  Requires-Dist: nbproject==0.9.2 ; extra == "jupyter"
41
+ Requires-Dist: nbstripout ; extra == "jupyter"
42
+ Requires-Dist: nbconvert ; extra == "jupyter"
41
43
  Requires-Dist: psycopg2-binary ; extra == "postgres"
44
+ Requires-Dist: lamin_vault ; extra == "postgres"
42
45
  Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
43
46
  Project-URL: Home, https://github.com/laminlabs/lamindb
44
47
  Provides-Extra: aws
@@ -1,30 +1,30 @@
1
- lamindb/__init__.py,sha256=dgz0a8-2DKOnshq03qxCrI9HH_ksnRxCz_WamVZ5gZ8,2911
2
- lamindb/_dataset.py,sha256=I6RiJ9HP1U-hi_2Xy9utDnWHw8L0N8aM-vB9_w4pRUE,12188
1
+ lamindb/__init__.py,sha256=ZCNDGjrECrHymRvihw53TRk4QYluCJ1URlRRhKLb4vA,2945
2
+ lamindb/_dataset.py,sha256=363qySdhwD9qO9op-fXRuTTflKmCdhGAOvf115gdjXo,13444
3
3
  lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
4
4
  lamindb/_feature.py,sha256=5gsa7zsMVVtm1DID4dF3Vwo5llWyY1dH3Hg5hjaIrQk,5554
5
5
  lamindb/_feature_set.py,sha256=YyRLc76piJdh8X6B917cFBiwJTdIxvQo0n5k2-HUGro,9373
6
- lamindb/_file.py,sha256=yrjP-nAEJ8P8dC1mGpB7-LqYVbNdwz0KY68tFZgm4PU,35568
7
- lamindb/_filter.py,sha256=Rf5RSkglIhJhSdk3AIPfc83F5NIfZrdCZDpgcYNj5KY,351
6
+ lamindb/_file.py,sha256=WotCwWGtVYLhyUARgJeInLgRnuKKRmx7E12IxS9Pe20,35548
7
+ lamindb/_filter.py,sha256=06hUpqZTTvXPOfywR0oJ47dd281KvnejRpTvaCAIV74,1515
8
8
  lamindb/_from_values.py,sha256=OztkgQpyWy6CSrr5RcKwL1r_DTjCoYWyllR94LLd8qs,11595
9
- lamindb/_parents.py,sha256=-BMhDfOQ8ORC5jbh09u4WVh-1G4vm509Bv4cDYh54OE,12969
10
- lamindb/_query_manager.py,sha256=F3ACAjy6UF7Vvdf9rnHRCLx8XvZAgpvTQLCqWpCH6Xk,2592
9
+ lamindb/_parents.py,sha256=GZ6eUlIDtcAh4dCjBE2CC7KkY62WkKNFP3MMLtBjW44,13048
10
+ lamindb/_query_manager.py,sha256=MXueabWHqft7GWNkzmWbhfTqdk-0mKU7nWrhXG6wpYQ,3693
11
11
  lamindb/_query_set.py,sha256=Lf7vLvOsEfUWRQ3iImSj4eQPmUK1KCgeoKS_m66Lp7o,10279
12
- lamindb/_registry.py,sha256=HvgT1kDUbUOo513nhBlKexI_C3lH3DJEyig-FIyHq2A,11444
12
+ lamindb/_registry.py,sha256=N-CzfRokoWLl1ive5aYr5mBnUQuddQ1GCjElbeUFNsY,13404
13
13
  lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
14
14
  lamindb/_save.py,sha256=Lir8uDGS9XuYOaqogpT2EiDrWV_T_G9PbPvckbvUsoE,9474
15
- lamindb/_storage.py,sha256=vfd1yeGEalVVEJrZQBv57dlAZSHsY1kexRjPFqTxGNg,207
16
- lamindb/_transform.py,sha256=fWmlLBtBrplka_1SzFojdZ9EHCQ15j2yv4iB2jJAD4U,2235
15
+ lamindb/_storage.py,sha256=HUdXGj4839C606gvxWXo0tDITbtbuyJKOgUPhagYPTI,415
16
+ lamindb/_transform.py,sha256=O9JcOt0Khg9fF_1Rmid03XyxJms6OnxQp8rCAT3ZuDw,2499
17
17
  lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
18
18
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
19
19
  lamindb/_validate.py,sha256=6FQIxE8ZK4HwrrmrmFSjeCFKc-mxaHej5fuBh_4y2Tk,13656
20
20
  lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
21
21
  lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
22
- lamindb/dev/_data.py,sha256=hNhs6YsnjwC6JI09EuMfgpsF4GT6_xxBafp5iJ7-5cs,13614
22
+ lamindb/dev/_data.py,sha256=1uOGPpa1OWIM1QcgpS_M_l35sDC8Fltk-fYf_5cKxUc,13540
23
23
  lamindb/dev/_feature_manager.py,sha256=z7uWuSO5fWl0pCKdCsc86rRrcKnAmtx2wAuOTiZn5XE,5869
24
- lamindb/dev/_label_manager.py,sha256=HCYZaUP1u4v8L0NemJjmm7kz5fA_Q-aISUJCMzqX4YU,4305
24
+ lamindb/dev/_label_manager.py,sha256=yW0KgM1WMYe4NOpWNCO5ymw5CY2_UQl3t9tXZER4Gdc,4887
25
25
  lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
26
26
  lamindb/dev/_run_context.py,sha256=XLlEknzOe0Cabi9fvzdQl5UeLoU02fAFkCdB7S2FLZM,19109
27
- lamindb/dev/_settings.py,sha256=AqPrB3ij3uIzlXDmH7rl8HsqIgaDUNReafwxjJDQphw,3267
27
+ lamindb/dev/_settings.py,sha256=b0uq5eB6Dk7BQjBoK4a-9hy8y1DPsq0rErAQjZ4DibM,3398
28
28
  lamindb/dev/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
29
29
  lamindb/dev/fields.py,sha256=Yzdk2qF2ILNYktyswLTgHNrjCN8-McGsv5pqRdijIZ8,171
30
30
  lamindb/dev/hashing.py,sha256=IlNrHy-a9NqB0vfqiwIh4sjt40CvaiZIvfK6gMnkxDo,1381
@@ -39,12 +39,10 @@ lamindb/dev/storage/_backed_access.py,sha256=xbeXMFIXzDv7_fjR384omxdpSy3pF-T91rB
39
39
  lamindb/dev/storage/_zarr.py,sha256=7W1Jos1QOOF3f41uML_arQoDTNPZVpRyP2m3SLWaCAo,2766
40
40
  lamindb/dev/storage/file.py,sha256=CrR8dhfqEWPj7jhNj7zujQE1WT4gWGCP0qKPIIta0bg,7978
41
41
  lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
42
- lamindb/schema/__init__.py,sha256=dtMCkfxDdYe6FHdqvySRZApyqgf_D1pkqTytyOX1dvw,619
43
- lamindb/schema/_core.py,sha256=nWR3X_rNd1AbWw3naMiBi8ppAEpqIDyEYqM54feRB_s,766
44
42
  lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
45
43
  lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
46
- lamindb-0.54.4.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
47
- lamindb-0.54.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- lamindb-0.54.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
49
- lamindb-0.54.4.dist-info/METADATA,sha256=fvMRsYeapWFpUlnv8wIyG07hT0chub9HQyx0EGuj240,2877
50
- lamindb-0.54.4.dist-info/RECORD,,
44
+ lamindb-0.55.1.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
45
+ lamindb-0.55.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
+ lamindb-0.55.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
47
+ lamindb-0.55.1.dist-info/METADATA,sha256=E5NRemxlnVm8u55H11hKjH1ezGF6ZYnd4z0zHvpC-Z4,3022
48
+ lamindb-0.55.1.dist-info/RECORD,,
@@ -1,28 +0,0 @@
1
- """Schema tools & overview.
2
-
3
- Guide: :doc:`/schemas`
4
-
5
- You can access mounted schema modules with domain-specific entities via
6
- available via `ln.schema.<module>.<entity>`.
7
-
8
- However, we recommend to import schema modules, e.g., like `import
9
- lnschema_bionty as bt`.
10
-
11
- .. autosummary::
12
- :toctree: .
13
-
14
- graph
15
- view
16
-
17
- """
18
- import importlib as _importlib
19
-
20
- from lamindb_setup import settings as _settings
21
- from lamindb_setup._init_instance import reload_schema_modules as _reload_schema_modules
22
-
23
- from .. import _INSTANCE_SETUP
24
-
25
- if _INSTANCE_SETUP:
26
- _reload_schema_modules(_settings.instance)
27
-
28
- from ._core import graph, view
lamindb/schema/_core.py DELETED
@@ -1,37 +0,0 @@
1
- import lamindb_setup as setup
2
-
3
-
4
- def graph():
5
- """Get diagram of entity relationships as `pydot.Dot` graph object.
6
-
7
- It uses `erdiagram.create_schema_graph`
8
- """
9
- import erdiagram
10
-
11
- metadata = get_db_metadata()
12
- return erdiagram.create_schema_graph(
13
- metadata=metadata,
14
- show_datatypes=False,
15
- show_indexes=False,
16
- rankdir="TB",
17
- concentrate=True,
18
- )
19
-
20
-
21
- def view():
22
- """View diagram of entity relationships.
23
-
24
- It displays :func:`~lamindb.schema.graph`.
25
- """
26
- import erdiagram
27
-
28
- erdiagram.view(graph())
29
-
30
-
31
- def get_db_metadata():
32
- import sqlalchemy as sa
33
-
34
- engine = sa.create_engine(setup.settings.instance.db)
35
- metadata = sa.MetaData(bind=engine)
36
- metadata.reflect()
37
- return metadata